-
-
Notifications
You must be signed in to change notification settings - Fork 453
Make lark.lark parse the same grammar as load_grammar.py, and make grammar.md document it more fully. #1388
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
db1a5a5
9493f81
7a2880f
83a374f
fdffb5f
95c5742
200d6b5
0fb28f9
2ec5ef3
e9c026e
9bf7ddf
7f02bd1
4f7a5eb
40576d2
daac65d
5f37365
697841b
654e102
33d7088
0d01fe2
20302ca
ff01d96
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,46 +7,66 @@ _item: rule | |
| | token | ||
| | statement | ||
|
|
||
| rule: RULE rule_params priority? ":" expansions | ||
| token: TOKEN token_params priority? ":" expansions | ||
| rule: RULE_MODIFIERS? RULE rule_params priority? ":" rule_expansions | ||
| token: TOKEN priority? ":" token_expansions | ||
|
|
||
| rule_params: ["{" RULE ("," RULE)* "}"] | ||
| token_params: ["{" TOKEN ("," TOKEN)* "}"] | ||
|
|
||
| priority: "." NUMBER | ||
|
|
||
| statement: "%ignore" expansions -> ignore | ||
| statement: "%ignore" ignore_token -> ignore | ||
| | "%import" import_path ["->" name] -> import | ||
| | "%import" import_path name_list -> multi_import | ||
| | "%override" rule -> override_rule | ||
| | "%override" token -> override_token | ||
| | "%declare" name+ -> declare | ||
| | "%extend" rule -> extend_rule | ||
| | "%extend" token -> extend_token | ||
|
|
||
| ignore_token: ignore_item [ OP | "~" NUMBER [".." NUMBER]] | ||
| ignore_item: STRING | TOKEN | REGEXP | ||
|
|
||
| !import_path: "."? name ("." name)* | ||
| name_list: "(" name ("," name)* ")" | ||
|
|
||
| ?expansions: alias (_VBAR alias)* | ||
| ?rule_expansions: rule_alias (_VBAR rule_alias)* | ||
|
|
||
| ?rule_inner_expansions: rule_expansion (_VBAR rule_expansion)* | ||
|
|
||
| ?rule_alias: rule_expansion ["->" RULE] | ||
|
|
||
| ?rule_expansion: rule_expr* | ||
|
|
||
| ?rule_expr: rule_atom [OP | "~" NUMBER [".." NUMBER]] | ||
| ?rule_atom: "(" rule_inner_expansions ")" | ||
| | "[" rule_inner_expansions "]" -> rule_maybe | ||
| | rule_value | ||
|
|
||
| ?rule_value: RULE "{" rule_value ("," rule_value)* "}" -> rule_template_usage | ||
| | RULE | ||
| | token_value | ||
|
|
||
| ?alias: expansion ["->" RULE] | ||
| ?token_expansions: token_expansion (_VBAR token_expansion)* | ||
|
|
||
| ?expansion: expr* | ||
| ?token_expansion: token_expr* | ||
|
|
||
| ?expr: atom [OP | "~" NUMBER [".." NUMBER]] | ||
| ?token_expr: token_atom [OP | "~" NUMBER [".." NUMBER]] | ||
|
|
||
| ?atom: "(" expansions ")" | ||
| | "[" expansions "]" -> maybe | ||
| | value | ||
| ?token_atom: "(" token_expansions ")" | ||
|
||
| | "[" token_expansions "]" -> token_maybe | ||
| | token_value | ||
|
|
||
| ?value: STRING ".." STRING -> literal_range | ||
| | name | ||
| | (REGEXP | STRING) -> literal | ||
| | name "{" value ("," value)* "}" -> template_usage | ||
| ?token_value: STRING ".." STRING -> literal_range | ||
| | TOKEN | ||
| | (REGEXP | STRING) -> literal | ||
|
|
||
| name: RULE | ||
| | TOKEN | ||
|
|
||
| _VBAR: _NL? "|" | ||
| OP: /[+*]|[?](?![a-z])/ | ||
| RULE: /!?[_?]?[a-z][_a-z0-9]*/ | ||
| RULE: /_?[a-z][_a-z0-9]*/ | ||
| RULE_MODIFIERS: /!||[?]!?(?=[a-z])/ | ||
|
||
| TOKEN: /_?[A-Z][_A-Z0-9]*/ | ||
| STRING: _STRING "i"? | ||
| REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/])*?\/[imslux]*/ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,169 @@ | ||
| from __future__ import absolute_import | ||
|
|
||
| import os | ||
| from unittest import TestCase, main | ||
|
|
||
| from lark import lark, Lark, UnexpectedToken | ||
| from lark.load_grammar import GrammarError | ||
|
|
||
|
|
||
| # Based on TestGrammar, with lots of tests that can't be run elided. | ||
| class TestGrammarFormal(TestCase): | ||
| def setUp(self): | ||
| lark_path = os.path.join(os.path.dirname(lark.__file__), 'grammars/lark.lark') | ||
| # lark_path = os.path.join(os.path.dirname(lark.__file__), 'grammars/lark.lark-ORIG') | ||
| with open(lark_path, 'r') as f: | ||
| self.lark_grammar = "\n".join(f.readlines()) | ||
erezsh marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| def test_errors(self): | ||
| # raise NotImplementedError("Doesn't work yet.") | ||
| l = Lark(self.lark_grammar, parser="lalr") | ||
|
|
||
| # This is an unrolled form of the test_grammar.py:GRAMMAR_ERRORS tests, because the lark.lark messages vary. | ||
|
|
||
| # 'Incorrect type of value', 'a: 1\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..NUMBER., .1..', l.parse, 'a: 1\n') | ||
| # 'Unclosed parenthesis', 'a: (\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token.._NL.,', l.parse, 'a: (\n') | ||
| # 'Unmatched closing parenthesis', 'a: )\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..RPAR.', l.parse, 'a: )\n') | ||
| # 'Unmatched closing parenthesis', 'a: )\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..RPAR.,', l.parse, 'a: )\n') | ||
| # 'Unmatched closing parenthesis', 'a: (\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token.._NL.,', l.parse, 'a: (\n') | ||
| # 'Expecting rule or terminal definition (missing colon)', 'a\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token.._NL.,', l.parse, 'a\n') | ||
| # 'Expecting rule or terminal definition (missing colon)', 'A\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token.._NL.,', l.parse, 'A\n') | ||
| # 'Expecting rule or terminal definition (missing colon)', 'a->\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..__ANON_0., .->', l.parse, 'a->\n') | ||
| # 'Expecting rule or terminal definition (missing colon)', 'A->\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..__ANON_0., .->', l.parse, 'A->\n') | ||
| # 'Expecting rule or terminal definition (missing colon)', 'a A\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..TOKEN., .A..', l.parse, 'a A\n') | ||
| # 'Illegal name for rules or terminals', 'Aa:\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..RULE., .a..', l.parse, 'Aa:\n') | ||
| # 'Alias expects lowercase name', 'a: -> "a"\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..STRING., ."a"..', l.parse, 'a: -> "a"\n') | ||
| # 'Unexpected colon', 'a::\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..COLON.,', l.parse, 'a::\n') | ||
| # 'Unexpected colon', 'a: b:\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..COLON.,', l.parse, 'a: b:\n') | ||
| # 'Unexpected colon', 'a: B:\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..COLON.,', l.parse, 'a: B:\n') | ||
| # 'Unexpected colon', 'a: "a":\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..COLON.,', l.parse, 'a: "a":\n') | ||
| # 'Misplaced operator', 'a: b??' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..OP., .\?..', l.parse, 'a: b??') | ||
| # 'Misplaced operator', 'a: b(?)' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..OP., .\?..', l.parse, 'a: b(?)') | ||
| # 'Misplaced operator', 'a:+\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..OP., .\+..', l.parse, 'a:+\n') | ||
| # 'Misplaced operator', 'a:?\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..OP., .\?..', l.parse, 'a:?\n') | ||
| # 'Misplaced operator', 'a:*\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..OP., .\*..', l.parse, 'a:*\n') | ||
| # 'Misplaced operator', 'a:|*\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..OP., .\*..', l.parse, 'a:|*\n') | ||
| # 'Expecting option ("|") or a new rule or terminal definition', 'a:a\n()\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..LPAR.,', l.parse, 'a:a\n()\n') | ||
| # 'Terminal names cannot contain dots', 'A.B\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..TOKEN., .B..', l.parse, 'A.B\n') | ||
| # 'Expecting rule or terminal definition', '"a"\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..STRING., ."a"..', l.parse, '"a"\n') | ||
| # '%import expects a name', '%import "a"\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..STRING., ."a"..', l.parse, '%import "a"\n') | ||
| # '%ignore expects a value', '%ignore %import\n' | ||
| self.assertRaisesRegex(UnexpectedToken, 'Unexpected token Token..__ANON_2., .%import..', l.parse, '%ignore %import\n') | ||
|
|
||
| # def test_empty_literal(self): | ||
RossPatterson marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| # raise NotImplementedError("Breaks tests/test_parser.py:_TestParser:test_backslash2().") | ||
|
|
||
| # def test_ignore_name(self): | ||
| # raise NotImplementedError("Can't parse using parsed grammar.") | ||
|
|
||
| # def test_override_rule_1(self): | ||
| # raise NotImplementedError("Can't parse using parsed grammar.") | ||
|
|
||
| # def test_override_rule_2(self): | ||
| # raise NotImplementedError("Can't test semantics of grammar, only syntax.") | ||
|
|
||
| # def test_override_rule_3(self): | ||
| # raise NotImplementedError("Can't test semantics of grammar, only syntax.") | ||
|
|
||
| # def test_override_terminal(self): | ||
| # raise NotImplementedError("Can't parse using parsed grammar.") | ||
|
|
||
| # def test_extend_rule_1(self): | ||
| # raise NotImplementedError("Can't parse using parsed grammar.") | ||
|
|
||
| # def test_extend_rule_2(self): | ||
| # raise NotImplementedError("Can't test semantics of grammar, only syntax.") | ||
|
|
||
| # def test_extend_term(self): | ||
| # raise NotImplementedError("Can't parse using parsed grammar.") | ||
|
|
||
| # def test_extend_twice(self): | ||
| # raise NotImplementedError("Can't parse using parsed grammar.") | ||
|
|
||
| # def test_undefined_ignore(self): | ||
| # raise NotImplementedError("Can't parse using parsed grammar.") | ||
|
|
||
| def test_alias_in_terminal(self): | ||
| l = Lark(self.lark_grammar, parser="lalr") | ||
| g = """start: TERM | ||
| TERM: "a" -> alias | ||
| """ | ||
| # self.assertRaisesRegex( GrammarError, "Aliasing not allowed in terminals", Lark, g) | ||
| self.assertRaisesRegex( UnexpectedToken, "Unexpected token Token.'__ANON_0', '->'.", l.parse, g) | ||
|
|
||
| # def test_undefined_rule(self): | ||
| # raise NotImplementedError("Can't test semantics of grammar, only syntax.") | ||
|
|
||
| # def test_undefined_term(self): | ||
| # raise NotImplementedError("Can't test semantics of grammar, only syntax.") | ||
|
|
||
| # def test_token_multiline_only_works_with_x_flag(self): | ||
| # raise NotImplementedError("Can't test regex flags in Lark grammar.") | ||
|
|
||
| # def test_import_custom_sources(self): | ||
| # raise NotImplementedError("Can't parse using parsed grammar.") | ||
|
|
||
| # def test_import_custom_sources2(self): | ||
| # raise NotImplementedError("Can't parse using parsed grammar.") | ||
|
|
||
| # def test_import_custom_sources3(self): | ||
| # raise NotImplementedError("Can't parse using parsed grammar.") | ||
|
|
||
| # def test_my_find_grammar_errors(self): | ||
| # raise NotImplementedError("Can't parse using parsed grammar.") | ||
|
|
||
| # def test_ranged_repeat_terms(self): | ||
| # raise NotImplementedError("Can't parse using parsed grammar.") | ||
|
|
||
| # def test_ranged_repeat_large(self): | ||
| # raise NotImplementedError("Can't parse using parsed grammar.") | ||
|
|
||
| # def test_large_terminal(self): | ||
| # raise NotImplementedError("Can't parse using parsed grammar.") | ||
|
|
||
| # def test_list_grammar_imports(self): | ||
| # raise NotImplementedError("Can't test semantics of grammar, only syntax.") | ||
|
|
||
| def test_inline_with_expand_single(self): | ||
| l = Lark(self.lark_grammar, parser="lalr") | ||
| grammar = r""" | ||
| start: _a | ||
| !?_a: "A" | ||
| """ | ||
| # self.assertRaisesRegex(GrammarError, "Inlined rules (_rule) cannot use the ?rule modifier.", l.parse, grammar) | ||
| # TODO Is this really catching the right problem? | ||
| self.assertRaisesRegex(UnexpectedToken, "Unexpected token Token.'OP', '?'.", l.parse, grammar) | ||
|
|
||
|
|
||
| # def test_line_breaks(self): | ||
| # raise NotImplementedError("Can't parse using parsed grammar.") | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| main() | ||
Uh oh!
There was an error while loading. Please reload this page.