RegexCalcLexer.java

import java.io.IOException;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.regex.Matcher;

/** Lexer (aka scanner) for the simple calculator language using regular expressions.
 */
public class RegexCalcLexer extends CalcLexer {
    /** Pairing of token type to regular expression. */
    private static Map<CalcToken.Type, Pattern> specs = Map.of(
        CalcToken.Type.OPA,  Pattern.compile("[+-]"),
        CalcToken.Type.OPM,  Pattern.compile("[*/]"),
        CalcToken.Type.NUM,  Pattern.compile("-?[0-9]+"),
        CalcToken.Type.LP,   Pattern.compile("\\("),
        CalcToken.Type.RP,   Pattern.compile("\\)"),
        CalcToken.Type.STOP, Pattern.compile(";")
    );

    private static Pattern whitespace = Pattern.compile("\\s*");

    /** The source of input lines. */
    private BufferedReader source;
    /** Stores progress in matching tokens from the most recent line of input. */
    private Matcher curLine = null;

    public RegexCalcLexer() throws IOException {
        this.source = new BufferedReader(new InputStreamReader(System.in));
    }

    /** Reads, moves past, and returns a single token from the input stream.
     * This works by first trying to get one more token out of the current line,
     * and reading in more lines as necessary to get a new token.
     * Note that it does NOT throw an error on EOF, but rather just return
     * the EOF token in that case.
     */
    @Override
    protected CalcToken readToken() throws IOException {
        while (true) {
            if (curLine == null) {
                // no current line, so read a new one
                String line = source.readLine();
                // check for end of input
                if (line == null)
                    return new CalcToken(CalcToken.Type.EOF, "");
                curLine = whitespace.matcher(line);
            }
            // skip past any whitespace before the next token
            if (curLine.usePattern(whitespace).lookingAt())
                curLine.region(curLine.end(), curLine.regionEnd());
            // stop this look when there is something left on the line besides whitespace
            if (curLine.regionStart() < curLine.regionEnd()) break;
            else curLine = null;
        }
        // use maximal munch to select the longest matching token
        CalcToken candidate = null;
        for (Map.Entry<CalcToken.Type, Pattern> spec : specs.entrySet()) {
            if (curLine.usePattern(spec.getValue()).lookingAt()) {
                // this token could be a match - check if it's the longest
                String text = curLine.group();
                if (candidate == null || text.length() > candidate.getText().length())
                    candidate = new CalcToken(spec.getKey(), text);
            }
        }
        if (candidate == null)
            throw new IOException("Unrecognized token starting on column %d"
                .formatted(curLine.regionStart()));
        // skip past the matched token on the current line
        curLine.region(curLine.regionStart() + candidate.getText().length(), curLine.regionEnd());
        return candidate;
    }
}