Skip to content

Commit c13b56a

Browse files
authored
Merge pull request #1006 from lark-parser/tree_templates
2 parents 46b03de + 3c6b131 commit c13b56a

File tree

5 files changed

+270
-4
lines changed

5 files changed

+270
-4
lines changed

examples/advanced/extend_python.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from python_parser import PythonIndenter
1212

1313
GRAMMAR = r"""
14-
%import .python3 (compound_stmt, single_input, file_input, eval_input, test, suite, _NEWLINE, _INDENT, _DEDENT, COMMENT)
14+
%import python (compound_stmt, single_input, file_input, eval_input, test, suite, _NEWLINE, _INDENT, _DEDENT, COMMENT)
1515
1616
%extend compound_stmt: match_stmt
1717

examples/advanced/py3to2.py

+94
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
"""
2+
Python 3 to Python 2 converter (tree templates)
3+
===============================================
4+
5+
This example demonstrates how to translate between two trees using tree templates.
6+
It parses Python 3, translates it to a Python 2 AST, and then outputs the result as Python 2 code.
7+
8+
Uses reconstruct_python.py for generating the final Python 2 code.
9+
"""
10+
11+
12+
from lark import Lark
13+
from lark.tree_templates import TemplateConf, TemplateTranslator
14+
15+
from lark.indenter import PythonIndenter
16+
from reconstruct_python import PythonReconstructor
17+
18+
19+
#
20+
# 1. Define a Python parser that also accepts template vars in the code (in the form of $var)
21+
#
22+
TEMPLATED_PYTHON = r"""
23+
%import python (single_input, file_input, eval_input, atom, var, stmt, expr, testlist_star_expr, _NEWLINE, _INDENT, _DEDENT, COMMENT, NAME)
24+
25+
%extend atom: TEMPLATE_NAME -> var
26+
27+
TEMPLATE_NAME: "$" NAME
28+
29+
?template_start: (stmt | testlist_star_expr _NEWLINE)
30+
31+
%ignore /[\t \f]+/ // WS
32+
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT
33+
%ignore COMMENT
34+
"""
35+
36+
parser = Lark(TEMPLATED_PYTHON, parser='lalr', start=['single_input', 'file_input', 'eval_input', 'template_start'], postlex=PythonIndenter(), maybe_placeholders=False)
37+
38+
39+
def parse_template(s):
40+
return parser.parse(s + '\n', start='template_start')
41+
42+
def parse_code(s):
43+
return parser.parse(s + '\n', start='file_input')
44+
45+
46+
#
47+
# 2. Define translations using templates (each template code is parsed to a template tree)
48+
#
49+
50+
pytemplate = TemplateConf(parse=parse_template)
51+
52+
translations_3to2 = {
53+
'yield from $a':
54+
'for _tmp in $a: yield _tmp',
55+
56+
'raise $e from $x':
57+
'raise $e',
58+
59+
'$a / $b':
60+
'float($a) / $b',
61+
}
62+
translations_3to2 = {pytemplate(k): pytemplate(v) for k, v in translations_3to2.items()}
63+
64+
#
65+
# 3. Translate and reconstruct Python 3 code into valid Python 2 code
66+
#
67+
68+
python_reconstruct = PythonReconstructor(parser)
69+
70+
def translate_py3to2(code):
71+
tree = parse_code(code)
72+
tree = TemplateTranslator(translations_3to2).translate(tree)
73+
return python_reconstruct.reconstruct(tree)
74+
75+
76+
#
77+
# Test Code
78+
#
79+
80+
_TEST_CODE = '''
81+
if a / 2 > 1:
82+
yield from [1,2,3]
83+
else:
84+
raise ValueError(a) from e
85+
86+
'''
87+
88+
def test():
89+
print(_TEST_CODE)
90+
print(' -----> ')
91+
print(translate_py3to2(_TEST_CODE))
92+
93+
if __name__ == '__main__':
94+
test()

examples/advanced/reconstruct_python.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from lark.reconstruct import Reconstructor
1313
from lark.indenter import PythonIndenter
1414

15-
15+
# Official Python grammar by Lark
1616
python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'],
1717
parser='lalr', postlex=PythonIndenter(), start='file_input',
1818
maybe_placeholders=False # Necessary for reconstructor
@@ -57,16 +57,25 @@ def postproc(items):
5757
yield "\n"
5858

5959

60-
python_reconstruct = Reconstructor(python_parser3, {'_NEWLINE': special, '_DEDENT': special, '_INDENT': special})
60+
class PythonReconstructor:
61+
def __init__(self, parser):
62+
self._recons = Reconstructor(parser, {'_NEWLINE': special, '_DEDENT': special, '_INDENT': special})
63+
64+
def reconstruct(self, tree):
65+
return self._recons.reconstruct(tree, postproc)
6166

6267

6368
def test():
69+
python_reconstructor = PythonReconstructor(python_parser3)
70+
6471
self_contents = open(__file__).read()
6572

6673
tree = python_parser3.parse(self_contents+'\n')
67-
output = python_reconstruct.reconstruct(tree, postproc)
74+
output = python_reconstructor.reconstruct(tree)
6875

6976
tree_new = python_parser3.parse(output)
77+
print(tree.pretty())
78+
print(tree_new.pretty())
7079
# assert tree.pretty() == tree_new.pretty()
7180
assert tree == tree_new
7281

lark/indenter.py

+9
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,12 @@ class PythonIndenter(Indenter):
110110
tab_len = 8
111111

112112
###}
113+
114+
115+
class PythonIndenter(Indenter):
116+
NL_type = '_NEWLINE'
117+
OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
118+
CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
119+
INDENT_type = '_INDENT'
120+
DEDENT_type = '_DEDENT'
121+
tab_len = 8

lark/tree_templates.py

+154
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
"""This module defines utilities for matching and translation tree templates.
2+
3+
A tree templates is a tree that contains nodes that are template variables.
4+
5+
"""
6+
7+
from typing import Union, Optional, Mapping
8+
9+
from lark import Tree, Transformer
10+
11+
TreeOrCode = Union[Tree, str]
12+
13+
class TemplateConf:
14+
"""Template Configuration
15+
16+
Allows customization for different uses of Template
17+
"""
18+
19+
def __init__(self, parse=None):
20+
self._parse = parse
21+
22+
23+
def test_var(self, var: Union[Tree, str]) -> Optional[str]:
24+
"""Given a tree node, if it is a template variable return its name. Otherwise, return None.
25+
26+
This method may be overridden for customization
27+
28+
Parameters:
29+
var: Tree | str - The tree node to test
30+
31+
"""
32+
if isinstance(var, str) and var.startswith('$'):
33+
return var.lstrip('$')
34+
35+
if isinstance(var, Tree) and var.data == 'var' and var.children[0].startswith('$'):
36+
return var.children[0].lstrip('$')
37+
38+
39+
def _get_tree(self, template: TreeOrCode):
40+
if isinstance(template, str):
41+
assert self._parse
42+
template = self._parse(template)
43+
44+
assert isinstance(template, Tree)
45+
return template
46+
47+
def __call__(self, template):
48+
return Template(template, conf=self)
49+
50+
def _match_tree_template(self, template, tree):
51+
template_var = self.test_var(template)
52+
if template_var:
53+
return {template_var: tree}
54+
55+
if isinstance(template, str):
56+
if template == tree:
57+
return {}
58+
return
59+
60+
assert isinstance(template, Tree), template
61+
62+
if template.data == tree.data and len(template.children) == len(tree.children):
63+
res = {}
64+
for t1, t2 in zip(template.children, tree.children):
65+
matches = self._match_tree_template(t1, t2)
66+
if matches is None:
67+
return
68+
69+
res.update(matches)
70+
71+
return res
72+
73+
74+
75+
class _ReplaceVars(Transformer):
76+
def __init__(self, conf, vars):
77+
self._conf = conf
78+
self._vars = vars
79+
80+
def __default__(self, data, children, meta):
81+
tree = super().__default__(data, children, meta)
82+
83+
var = self._conf.test_var(tree)
84+
if var:
85+
return self._vars[var]
86+
return tree
87+
88+
89+
class Template:
90+
"""Represents a tree templates, tied to a specific configuration
91+
92+
A tree template is a tree that contains nodes that are template variables.
93+
Those variables will match any tree.
94+
(future versions may support annotations on the variables, to allow more complex templates)
95+
"""
96+
97+
def __init__(self, tree: Tree, conf = TemplateConf()):
98+
self.conf = conf
99+
self.tree = conf._get_tree(tree)
100+
101+
def match(self, tree: TreeOrCode):
102+
"""Match a tree template to a tree.
103+
104+
A tree template without variables will only match ``tree`` if it is equal to the template.
105+
106+
Parameters:
107+
tree (Tree): The tree to match to the template
108+
109+
Returns:
110+
Optional[Dict[str, Tree]]: If match is found, returns a dictionary mapping
111+
template variable names to their matching tree nodes.
112+
If no match was found, returns None.
113+
"""
114+
tree = self.conf._get_tree(tree)
115+
return self.conf._match_tree_template(self.tree, tree)
116+
117+
def search(self, tree: TreeOrCode):
118+
"""Search for all occurances of the tree template inside ``tree``.
119+
"""
120+
tree = self.conf._get_tree(tree)
121+
for subtree in tree.iter_subtrees():
122+
res = self.match(subtree)
123+
if res:
124+
yield subtree, res
125+
126+
def apply_vars(self, vars: Mapping[str, Tree]):
127+
"""Apply vars to the template tree
128+
"""
129+
return _ReplaceVars(self.conf, vars).transform(self.tree)
130+
131+
132+
def translate(t1: Template, t2: Template, tree: TreeOrCode):
133+
"""Search tree and translate each occurrance of t1 into t2.
134+
"""
135+
tree = t1.conf._get_tree(tree) # ensure it's a tree, parse if necessary and possible
136+
for subtree, vars in t1.search(tree):
137+
res = t2.apply_vars(vars)
138+
subtree.set(res.data, res.children)
139+
return tree
140+
141+
142+
143+
class TemplateTranslator:
144+
"""Utility class for translating a collection of patterns
145+
"""
146+
147+
def __init__(self, translations: Mapping[TreeOrCode, TreeOrCode]):
148+
assert all( isinstance(k, Template) and isinstance(v, Template) for k, v in translations.items() )
149+
self.translations = translations
150+
151+
def translate(self, tree: Tree):
152+
for k, v in self.translations.items():
153+
tree = translate(k, v, tree)
154+
return tree

0 commit comments

Comments
 (0)