Open
Description
I'm implementing a 'predicting the next token' function. I got this exception when I call parser.getExpectedTokens()
The grammar file is listed below, which I think is LL(1)
grammar Expr;
expr : term ((PLUS | MINUS) term)* ;
term : factor ((MUL | DIV) factor)* ;
factor : INT | LPAREN expr RPAREN ;
PLUS : '+' ;
MINUS : '-' ;
MUL : '*' ;
DIV : '/' ;
LPAREN : '(' ;
RPAREN : ')' ;
INT : [0-9]+ ;
WS : [ \t\r\n]+ -> skip ;
And I use this program to predict the next token. Although it's very tricky, I think it's irrelevent to the bug.
import expr.ExprParser;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.Vocabulary;
import org.antlr.v4.runtime.misc.Interval;
import java.util.Collections;
import java.util.List;
import java.util.ArrayList;
import java.util.Scanner;
public class Main {
static ExprParser parser = new ExprParser(null);
public static void main(String[] args) {
Vocabulary vocabulary = parser.getVocabulary();
List<Token> prefix = new ArrayList<>();
Scanner scanner = new Scanner(System.in);
while (true) {
List<Integer> expectedTokens = getExpectedTokens(prefix);
System.out.println("Expected tokens:");
expectedTokens.forEach(token -> {
System.out.println(token + ": " + vocabulary.getDisplayName(token));
});
System.out.print("Choose next token type: ");
int chosenTokenType = scanner.nextInt();
if (chosenTokenType == Token.EOF) {
System.out.println("Exiting...");
break;
}
Token nextToken = new CommonToken(chosenTokenType);
prefix.add(nextToken);
}
scanner.close();
}
private static List<Integer> getExpectedTokens(List<Token> prefix) {
DirectTokenStream tokenStream = new DirectTokenStream(prefix);
parser.setTokenStream(tokenStream);
tokenStream.setParser(parser);
try {
parser.expr();
} catch (StopParseException e) { // 并非异常,而是合理跳出parser的手段。
return tokenStream.getExpectedTokens();
} catch (Exception e) {
System.err.println(e.getMessage());
throw e;
}
//Parser exited normally, return a EOF only list
return Collections.singletonList(CommonToken.EOF);
}
}
/**
* 并非异常,而是推出parser的手段。
*/
class StopParseException extends RuntimeException {
public StopParseException(String message) {
super(message);
}
}
class DirectTokenStream implements TokenStream {
private final List<Token> tokens;
private Parser parser;
private List<Integer> expectedTokens;
private int currentIndex = 0;
public DirectTokenStream(List<Token> tokens) {
this.tokens = tokens;
this.expectedTokens = new ArrayList<>();
}
public void setParser(Parser parser) {
this.parser = parser;
}
public List<Integer> getExpectedTokens() {
return expectedTokens;
}
@Override
public Token LT(int k) {
// 只允许向前预查一个。consume之后和预查执行之前就正好是parser决定下一个可行Token的时机。
if (k <= -1) { // 反向查询
return tokens.get(currentIndex + k);
}
if (k == 1) { // 正向查询1个
if (currentIndex < tokens.size()) {
return tokens.get(currentIndex);
} else {
expectedTokens = parser.getExpectedTokens().toList();
throw new StopParseException("Index out of bounds. Expected tokens updated.");
}
}
throw new UnsupportedOperationException("Please check if the grammar is LL(1)!");
}
@Override
public Token get(int i) { throw new UnsupportedOperationException(); }
@Override
public TokenSource getTokenSource() { throw new UnsupportedOperationException(); }
@Override
public String getText() { return null; }
@Override
public String getText(Interval interval) { return null; }
@Override
public String getText(RuleContext ctx) { return null; }
@Override
public String getText(Token start, Token stop) { return null; }
@Override
public void consume() {
currentIndex++;
}
@Override
public int LA(int i) { return LT(i).getType(); }
@Override
public int mark() { return -1; }
@Override
public void release(int marker) {}
@Override
public void seek(int index) {}
@Override
public int index() { return 0; }
@Override
public int size() { return 0; }
@Override
public String getSourceName() {
return "DirectToken";
}
}
Here is the running log:
Expected tokens:
5: '('
7: INT
Choose next token type: 7
org.antlr.v4.runtime.atn.EpsilonTransition cannot be cast to org.antlr.v4.runtime.atn.RuleTransition
Exception in thread "main" java.lang.ClassCastException: org.antlr.v4.runtime.atn.EpsilonTransition cannot be cast to org.antlr.v4.runtime.atn.RuleTransition
at org.antlr.v4.runtime.atn.ATN.getExpectedTokens(ATN.java:183)
at org.antlr.v4.runtime.Parser.getExpectedTokens(Parser.java:822)
at DirectTokenStream.LT(Main.java:96)
at DirectTokenStream.LA(Main.java:123)
at org.antlr.v4.runtime.DefaultErrorStrategy.sync(DefaultErrorStrategy.java:239)
at expr.ExprParser.term(ExprParser.java:192)
at expr.ExprParser.expr(ExprParser.java:122)
at Main.getExpectedTokens(Main.java:45)
Then, I tried to fix the problem by modifing ATN.java:181 to this
while (ctx != null && ctx.invokingState >= 0 && following.contains(Token.EPSILON)) {
ATNState invokingState = states.get(ctx.invokingState);
if (invokingState.transition(0) instanceof RuleTransition) {
RuleTransition rt = (RuleTransition)invokingState.transition(0);
following = nextTokens(rt.followState);
expected.addAll(following);
}
expected.remove(Token.EPSILON);
ctx = ctx.parent;
}
then it behaves normally
Expected tokens:
5: '('
7: INT
Choose next token type: 7
Expected tokens:
-1: EOF
1: '+'
2: '-'
3: '*'
4: '/'
Choose next token type:
I'm not an expert of parser. Hope someone can tell me whether my modification is reasonable and whether there is a bug in ANTLR. Thank you!
Metadata
Metadata
Assignees
Labels
No labels