Skip to content

org.antlr.v4.runtime.atn.EpsilonTransition cannot be cast to org.antlr.v4.runtime.atn.RuleTransition #4843

Open
@Dr-Bluemond

Description

@Dr-Bluemond

I'm implementing a 'predicting the next token' function. I got this exception when I call parser.getExpectedTokens()

The grammar file is listed below, which I think is LL(1)

grammar Expr;

expr   : term ((PLUS | MINUS) term)* ;
term   : factor ((MUL | DIV) factor)* ;
factor : INT | LPAREN expr RPAREN ;

PLUS   : '+' ;
MINUS  : '-' ;
MUL    : '*' ;
DIV    : '/' ;
LPAREN : '(' ;
RPAREN : ')' ;
INT    : [0-9]+ ;

WS     : [ \t\r\n]+ -> skip ;

And I use this program to predict the next token. Although it's very tricky, I think it's irrelevent to the bug.

import expr.ExprParser;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.Vocabulary;
import org.antlr.v4.runtime.misc.Interval;
import java.util.Collections;
import java.util.List;
import java.util.ArrayList;
import java.util.Scanner;

public class Main {
    static ExprParser parser = new ExprParser(null);

    public static void main(String[] args) {
        Vocabulary vocabulary = parser.getVocabulary();
        List<Token> prefix = new ArrayList<>();
        Scanner scanner = new Scanner(System.in);

        while (true) {
            List<Integer> expectedTokens = getExpectedTokens(prefix);
            System.out.println("Expected tokens:");
            expectedTokens.forEach(token -> {
                System.out.println(token + ": " + vocabulary.getDisplayName(token));
            });

            System.out.print("Choose next token type: ");
            int chosenTokenType = scanner.nextInt();

            if (chosenTokenType == Token.EOF) {
                System.out.println("Exiting...");
                break;
            }

            Token nextToken = new CommonToken(chosenTokenType);
            prefix.add(nextToken);
        }

        scanner.close();
    }

    private static List<Integer> getExpectedTokens(List<Token> prefix) {
        DirectTokenStream tokenStream = new DirectTokenStream(prefix);
        parser.setTokenStream(tokenStream);
        tokenStream.setParser(parser);
        try {
            parser.expr();
        } catch (StopParseException e) { // 并非异常,而是合理跳出parser的手段。
            return tokenStream.getExpectedTokens();
        } catch (Exception e) {
            System.err.println(e.getMessage());
            throw e;
        }
       //Parser exited normally, return a EOF only list
        return Collections.singletonList(CommonToken.EOF);
    }

}

/**
 * 并非异常,而是推出parser的手段。
 */
class StopParseException extends RuntimeException {
    public StopParseException(String message) {
        super(message);
    }
}

class DirectTokenStream implements TokenStream {
    private final List<Token> tokens;
    private Parser parser;
    private List<Integer> expectedTokens;
    private int currentIndex = 0;

    public DirectTokenStream(List<Token> tokens) {
        this.tokens = tokens;
        this.expectedTokens = new ArrayList<>();
    }

    public void setParser(Parser parser) {
        this.parser = parser;
    }

    public List<Integer> getExpectedTokens() {
        return expectedTokens;
    }

    @Override
    public Token LT(int k) {
        // 只允许向前预查一个。consume之后和预查执行之前就正好是parser决定下一个可行Token的时机。
        if (k <= -1) { // 反向查询
            return tokens.get(currentIndex + k);
        }
        if (k == 1) { // 正向查询1个
            if (currentIndex < tokens.size()) {
                return tokens.get(currentIndex);
            } else {
                expectedTokens = parser.getExpectedTokens().toList();
                throw new StopParseException("Index out of bounds. Expected tokens updated.");
            }
        }
        throw new UnsupportedOperationException("Please check if the grammar is LL(1)!");

    }

    @Override
    public Token get(int i) { throw new UnsupportedOperationException(); }
    @Override
    public TokenSource getTokenSource() { throw new UnsupportedOperationException(); }
    @Override
    public String getText() { return null; }
    @Override
    public String getText(Interval interval) { return null; }
    @Override
    public String getText(RuleContext ctx) { return null; }
    @Override
    public String getText(Token start, Token stop) { return null; }

    @Override
    public void consume() {
        currentIndex++;
    }

    @Override
    public int LA(int i) { return LT(i).getType(); }
    @Override
    public int mark() { return -1; }
    @Override
    public void release(int marker) {}
    @Override
    public void seek(int index) {}
    @Override
    public int index() { return 0; }
    @Override
    public int size() { return 0; }

    @Override
    public String getSourceName() {
        return "DirectToken";
    }
}

Here is the running log:

Expected tokens:
5: '('
7: INT
Choose next token type: 7
org.antlr.v4.runtime.atn.EpsilonTransition cannot be cast to org.antlr.v4.runtime.atn.RuleTransition
Exception in thread "main" java.lang.ClassCastException: org.antlr.v4.runtime.atn.EpsilonTransition cannot be cast to org.antlr.v4.runtime.atn.RuleTransition
	at org.antlr.v4.runtime.atn.ATN.getExpectedTokens(ATN.java:183)
	at org.antlr.v4.runtime.Parser.getExpectedTokens(Parser.java:822)
	at DirectTokenStream.LT(Main.java:96)
	at DirectTokenStream.LA(Main.java:123)
	at org.antlr.v4.runtime.DefaultErrorStrategy.sync(DefaultErrorStrategy.java:239)
	at expr.ExprParser.term(ExprParser.java:192)
	at expr.ExprParser.expr(ExprParser.java:122)
	at Main.getExpectedTokens(Main.java:45)

Then, I tried to fix the problem by modifing ATN.java:181 to this

		while (ctx != null && ctx.invokingState >= 0 && following.contains(Token.EPSILON)) {
			ATNState invokingState = states.get(ctx.invokingState);
			if (invokingState.transition(0) instanceof RuleTransition) {
				RuleTransition rt = (RuleTransition)invokingState.transition(0);
				following = nextTokens(rt.followState);
				expected.addAll(following);
			}
			expected.remove(Token.EPSILON);
			ctx = ctx.parent;
		}

then it behaves normally

Expected tokens:
5: '('
7: INT
Choose next token type: 7
Expected tokens:
-1: EOF
1: '+'
2: '-'
3: '*'
4: '/'
Choose next token type: 

I'm not an expert of parser. Hope someone can tell me whether my modification is reasonable and whether there is a bug in ANTLR. Thank you!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions