-
-
Notifications
You must be signed in to change notification settings - Fork 453
Make lark.lark parse the same grammar as load_grammar.py, and make grammar.md document it more fully. #1388
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 6 commits
db1a5a5
9493f81
7a2880f
83a374f
fdffb5f
95c5742
200d6b5
0fb28f9
2ec5ef3
e9c026e
9bf7ddf
7f02bd1
4f7a5eb
40576d2
daac65d
5f37365
697841b
654e102
33d7088
0d01fe2
20302ca
ff01d96
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,25 +1,39 @@ | ||
| # Lark grammar of Lark's syntax | ||
| # Note: Lark is not bootstrapped, its parser is implemented in load_grammar.py | ||
| # This grammar matches that one, but does not enfore some rules that it does. | ||
RossPatterson marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| # If you want to enforce those, you can pass the "LarkValidatorVisitor" over | ||
| # the parse tree, like this: | ||
|
|
||
| # import os | ||
| # import lark | ||
| # from lark.lark_validator_visitor import LarkValidatorVisitor | ||
| # | ||
| # lark_path = os.path.join(os.path.dirname(lark.__file__), 'grammars/lark.lark') | ||
| # lark_parser = Lark.open(lark_path, parser="lalr") | ||
RossPatterson marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| # parse_tree = lark_parser.parse(my_grammar) | ||
| # LarkValidatorVisitor.validate(parse_tree) | ||
|
|
||
| start: (_item? _NL)* _item? | ||
|
|
||
| _item: rule | ||
| | token | ||
| | statement | ||
|
|
||
| rule: RULE rule_params priority? ":" expansions | ||
| token: TOKEN token_params priority? ":" expansions | ||
| rule: rule_modifiers? RULE rule_params priority? ":" expansions | ||
| token: TOKEN priority? ":" expansions | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, but It's different for
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @erezsh If my comment of 2024-06-20 is acceptable, let's resolve this point.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think what I meant was that priority can already be an empty rule, so no point in making it optional. |
||
|
|
||
| rule_modifiers: RULE_MODIFIERS | ||
|
|
||
| rule_params: ["{" RULE ("," RULE)* "}"] | ||
| token_params: ["{" TOKEN ("," TOKEN)* "}"] | ||
|
|
||
| priority: "." NUMBER | ||
|
|
||
| statement: "%ignore" expansions -> ignore | ||
| | "%import" import_path ["->" name] -> import | ||
| | "%import" import_path name_list -> multi_import | ||
| | "%override" rule -> override_rule | ||
| | "%override" (rule | token) -> override | ||
| | "%declare" name+ -> declare | ||
| | "%extend" (rule | token) -> extend | ||
|
|
||
| !import_path: "."? name ("." name)* | ||
| name_list: "(" name ("," name)* ")" | ||
|
|
@@ -39,14 +53,15 @@ name_list: "(" name ("," name)* ")" | |
| ?value: STRING ".." STRING -> literal_range | ||
| | name | ||
| | (REGEXP | STRING) -> literal | ||
| | name "{" value ("," value)* "}" -> template_usage | ||
| | RULE "{" value ("," value)* "}" -> template_usage | ||
|
|
||
| name: RULE | ||
| | TOKEN | ||
|
|
||
| _VBAR: _NL? "|" | ||
| OP: /[+*]|[?](?![a-z])/ | ||
| RULE: /!?[_?]?[a-z][_a-z0-9]*/ | ||
| RULE_MODIFIERS: /(!|![?]?|[?]!?)(?=[_a-z])/ | ||
| RULE: /_?[a-z][_a-z0-9]*/ | ||
| TOKEN: /_?[A-Z][_A-Z0-9]*/ | ||
| STRING: _STRING "i"? | ||
| REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/])*?\/[imslux]*/ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,93 @@ | ||
| from .lexer import Token | ||
| from .load_grammar import GrammarError | ||
| from .visitors import Visitor | ||
| from .tree import Tree | ||
|
|
||
| class LarkValidatorVisitor(Visitor): | ||
|
|
||
| @classmethod | ||
| def validate(cls, tree: Tree): | ||
| visitor = cls() | ||
| visitor.visit(tree) | ||
| return tree | ||
|
|
||
| def alias(self, tree: Tree): | ||
| # Reject alias names in inner 'expansions'. | ||
| self._reject_aliases(tree.children[0], "Deep aliasing not allowed") | ||
|
|
||
| def ignore(self, tree: Tree): | ||
| # Reject everything except 'literal' and 'name' > 'TOKEN'. | ||
| assert len(tree.children) > 0 # The grammar should pass us some things to ignore. | ||
| if len(tree.children) > 1: | ||
| self._reject_bad_ignore() | ||
| node = tree.children[0] | ||
| if node.data == "expansions": | ||
| if len(node.children) > 1: | ||
| self._reject_bad_ignore() | ||
| node = node.children[0] | ||
| if node.data == "alias": | ||
| if len(node.children) > 1: | ||
| self._reject_bad_ignore() | ||
| node = node.children[0] | ||
| if node.data == "expansion": | ||
| if len(node.children) > 1: | ||
| self._reject_bad_ignore() | ||
| node = node.children[0] | ||
| if node.data == "expr": | ||
| if len(node.children) > 1: | ||
| self._reject_bad_ignore() | ||
| node = node.children[0] | ||
| if node.data == "atom": | ||
| if len(node.children) > 1: | ||
| self._reject_bad_ignore() | ||
| node = node.children[0] | ||
| if node.data == "literal": | ||
| return | ||
| elif node.data == "name": | ||
| if node.children[0].data == "TOKEN": | ||
| return | ||
| elif node.data == "value": | ||
| if node.children[0].data == "literal": | ||
| return | ||
| elif node.children[0].data == "name": | ||
| if node.children[0][0].data == "TOKEN": | ||
| return | ||
| self._reject_bad_ignore() | ||
|
|
||
| def token(self, tree: Tree): | ||
| assert len(tree.children) > 1 # The grammar should pass us at least a token name and an item. | ||
| first_item = 2 if tree.children[1].data == "priority" else 1 | ||
| # Reject alias names in token definitions. | ||
| for child in tree.children[first_item:]: | ||
| self._reject_aliases(child, "Aliasing not allowed in terminals (You used -> in the wrong place)") | ||
| # Reject template usage in token definitions. We do this before checking rules | ||
| # because rule usage looks like template usage, just without parameters. | ||
| for child in tree.children[first_item:]: | ||
| self._reject_templates(child, "Templates not allowed in terminals") | ||
| # Reject rule references in token definitions. | ||
| for child in tree.children[first_item:]: | ||
| self._reject_rules(child, "Rules aren't allowed inside terminals") | ||
|
|
||
| def _reject_aliases(self, item: Tree|Token, message: str): | ||
| if isinstance(item, Tree): | ||
| if item.data == "alias" and len(item.children) > 1 and item.children[1] is not None: | ||
| raise GrammarError(message) | ||
| for child in item.children: | ||
| self._reject_aliases(child, message) | ||
|
|
||
| def _reject_bad_ignore(self): | ||
| raise GrammarError("Bad %ignore - must have a Terminal or other value.") | ||
|
|
||
| def _reject_rules(self, item: Tree|Token, message: str): | ||
| if isinstance(item, Token) and item.type == "RULE": | ||
| raise GrammarError(message) | ||
| elif isinstance(item, Tree): | ||
| for child in item.children: | ||
| self._reject_rules(child, message) | ||
|
|
||
| def _reject_templates(self, item: Tree|Token, message: str): | ||
| if isinstance(item, Tree): | ||
| if item.data == "template_usage": | ||
| raise GrammarError(message) | ||
| for child in item.children: | ||
| self._reject_templates(child, message) |
Uh oh!
There was an error while loading. Please reload this page.