Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tree Templates (first draft) + py3to2 example #1006

Merged
merged 4 commits into from
Oct 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/advanced/extend_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from python_parser import PythonIndenter

GRAMMAR = r"""
%import .python3 (compound_stmt, single_input, file_input, eval_input, test, suite, _NEWLINE, _INDENT, _DEDENT, COMMENT)
%import python (compound_stmt, single_input, file_input, eval_input, test, suite, _NEWLINE, _INDENT, _DEDENT, COMMENT)

%extend compound_stmt: match_stmt

Expand Down
94 changes: 94 additions & 0 deletions examples/advanced/py3to2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
Python 3 to Python 2 converter (tree templates)
===============================================

This example demonstrates how to translate between two trees using tree templates.
It parses Python 3, translates it to a Python 2 AST, and then outputs the result as Python 2 code.

Uses reconstruct_python.py for generating the final Python 2 code.
"""


from lark import Lark
from lark.tree_templates import TemplateConf, TemplateTranslator

from lark.indenter import PythonIndenter
from reconstruct_python import PythonReconstructor


#
# 1. Define a Python parser that also accepts template vars in the code (in the form of $var)
#
TEMPLATED_PYTHON = r"""
%import python (single_input, file_input, eval_input, atom, var, stmt, expr, testlist_star_expr, _NEWLINE, _INDENT, _DEDENT, COMMENT, NAME)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like a perfect usecase for an %include statement.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking that too

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You didn't submit a PR for this, right?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is part of #998, but that is not a finished implementation.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm okay. Anyway, we can change it to %include when it's ready.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.


%extend atom: TEMPLATE_NAME -> var

TEMPLATE_NAME: "$" NAME

?template_start: (stmt | testlist_star_expr _NEWLINE)

%ignore /[\t \f]+/ // WS
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT
%ignore COMMENT
"""

parser = Lark(TEMPLATED_PYTHON, parser='lalr', start=['single_input', 'file_input', 'eval_input', 'template_start'], postlex=PythonIndenter(), maybe_placeholders=False)


def parse_template(s):
return parser.parse(s + '\n', start='template_start')

def parse_code(s):
return parser.parse(s + '\n', start='file_input')


#
# 2. Define translations using templates (each template code is parsed to a template tree)
#

pytemplate = TemplateConf(parse=parse_template)

translations_3to2 = {
'yield from $a':
'for _tmp in $a: yield _tmp',

'raise $e from $x':
'raise $e',

'$a / $b':
'float($a) / $b',
}
translations_3to2 = {pytemplate(k): pytemplate(v) for k, v in translations_3to2.items()}

#
# 3. Translate and reconstruct Python 3 code into valid Python 2 code
#

python_reconstruct = PythonReconstructor(parser)

def translate_py3to2(code):
tree = parse_code(code)
tree = TemplateTranslator(translations_3to2).translate(tree)
return python_reconstruct.reconstruct(tree)


#
# Test Code
#

_TEST_CODE = '''
if a / 2 > 1:
yield from [1,2,3]
else:
raise ValueError(a) from e

'''

def test():
print(_TEST_CODE)
print(' -----> ')
print(translate_py3to2(_TEST_CODE))

if __name__ == '__main__':
test()
15 changes: 12 additions & 3 deletions examples/advanced/reconstruct_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from lark.reconstruct import Reconstructor
from lark.indenter import PythonIndenter


# Official Python grammar by Lark
python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'],
parser='lalr', postlex=PythonIndenter(), start='file_input',
maybe_placeholders=False # Necessary for reconstructor
Expand Down Expand Up @@ -57,16 +57,25 @@ def postproc(items):
yield "\n"


python_reconstruct = Reconstructor(python_parser3, {'_NEWLINE': special, '_DEDENT': special, '_INDENT': special})
class PythonReconstructor:
def __init__(self, parser):
self._recons = Reconstructor(parser, {'_NEWLINE': special, '_DEDENT': special, '_INDENT': special})

def reconstruct(self, tree):
return self._recons.reconstruct(tree, postproc)


def test():
python_reconstructor = PythonReconstructor(python_parser3)

self_contents = open(__file__).read()

tree = python_parser3.parse(self_contents+'\n')
output = python_reconstruct.reconstruct(tree, postproc)
output = python_reconstructor.reconstruct(tree)

tree_new = python_parser3.parse(output)
print(tree.pretty())
print(tree_new.pretty())
# assert tree.pretty() == tree_new.pretty()
assert tree == tree_new

Expand Down
9 changes: 9 additions & 0 deletions lark/indenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,12 @@ class PythonIndenter(Indenter):
tab_len = 8

###}


class PythonIndenter(Indenter):
NL_type = '_NEWLINE'
OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
INDENT_type = '_INDENT'
DEDENT_type = '_DEDENT'
tab_len = 8
154 changes: 154 additions & 0 deletions lark/tree_templates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
"""This module defines utilities for matching and translation tree templates.

A tree templates is a tree that contains nodes that are template variables.

"""

from typing import Union, Optional, Mapping

from lark import Tree, Transformer

TreeOrCode = Union[Tree, str]

class TemplateConf:
"""Template Configuration

Allows customization for different uses of Template
"""

def __init__(self, parse=None):
self._parse = parse


def test_var(self, var: Union[Tree, str]) -> Optional[str]:
"""Given a tree node, if it is a template variable return its name. Otherwise, return None.

This method may be overridden for customization

Parameters:
var: Tree | str - The tree node to test

"""
if isinstance(var, str) and var.startswith('$'):
return var.lstrip('$')

if isinstance(var, Tree) and var.data == 'var' and var.children[0].startswith('$'):
return var.children[0].lstrip('$')


def _get_tree(self, template: TreeOrCode):
if isinstance(template, str):
assert self._parse
template = self._parse(template)

assert isinstance(template, Tree)
return template

def __call__(self, template):
return Template(template, conf=self)

def _match_tree_template(self, template, tree):
template_var = self.test_var(template)
if template_var:
return {template_var: tree}

if isinstance(template, str):
if template == tree:
return {}
return

assert isinstance(template, Tree), template

if template.data == tree.data and len(template.children) == len(tree.children):
res = {}
for t1, t2 in zip(template.children, tree.children):
matches = self._match_tree_template(t1, t2)
if matches is None:
return

res.update(matches)

return res



class _ReplaceVars(Transformer):
def __init__(self, conf, vars):
self._conf = conf
self._vars = vars

def __default__(self, data, children, meta):
tree = super().__default__(data, children, meta)

var = self._conf.test_var(tree)
if var:
return self._vars[var]
return tree


class Template:
"""Represents a tree templates, tied to a specific configuration

A tree template is a tree that contains nodes that are template variables.
Those variables will match any tree.
(future versions may support annotations on the variables, to allow more complex templates)
"""

def __init__(self, tree: Tree, conf = TemplateConf()):
self.conf = conf
self.tree = conf._get_tree(tree)

def match(self, tree: TreeOrCode):
"""Match a tree template to a tree.

A tree template without variables will only match ``tree`` if it is equal to the template.

Parameters:
tree (Tree): The tree to match to the template

Returns:
Optional[Dict[str, Tree]]: If match is found, returns a dictionary mapping
template variable names to their matching tree nodes.
If no match was found, returns None.
"""
tree = self.conf._get_tree(tree)
return self.conf._match_tree_template(self.tree, tree)

def search(self, tree: TreeOrCode):
"""Search for all occurances of the tree template inside ``tree``.
"""
tree = self.conf._get_tree(tree)
for subtree in tree.iter_subtrees():
res = self.match(subtree)
if res:
yield subtree, res

def apply_vars(self, vars: Mapping[str, Tree]):
"""Apply vars to the template tree
"""
return _ReplaceVars(self.conf, vars).transform(self.tree)


def translate(t1: Template, t2: Template, tree: TreeOrCode):
"""Search tree and translate each occurrance of t1 into t2.
"""
tree = t1.conf._get_tree(tree) # ensure it's a tree, parse if necessary and possible
for subtree, vars in t1.search(tree):
res = t2.apply_vars(vars)
subtree.set(res.data, res.children)
return tree



class TemplateTranslator:
"""Utility class for translating a collection of patterns
"""

def __init__(self, translations: Mapping[TreeOrCode, TreeOrCode]):
assert all( isinstance(k, Template) and isinstance(v, Template) for k, v in translations.items() )
self.translations = translations

def translate(self, tree: Tree):
for k, v in self.translations.items():
tree = translate(k, v, tree)
return tree