Skip to content

Commit

Permalink
🖊️ Trim leading and trailing spaces in assignments (#5936)
Browse files Browse the repository at this point in the history
With this PR we explicitly trim leading and trailing spaces in assignments in levels 2-11. Note that in these levels the assignment and list assignment statements do not quotes. The program `a is three spaces   ` used to be transpiled to `a = 'three spaces   '` but is now transpiled to `a = 'three spaces'`. Similarly, the list `a is  cat , dog ,  chicken ` is now transpiled to `a = ['cat', 'dog', 'chicken']`.

Fixes #5684

**How to test**
Ensure that the following snippet yields `[4]` twice in level 9. Ensure that there is no grammar ambiguity. 
```
i = 4   # met comment
print '[' i ']'
if 1 = 1
    j = 4    # met comment
    print '[' j ']'
```
  • Loading branch information
boryanagoncharenko authored Nov 13, 2024
1 parent 5b0b730 commit 49b9b5e
Show file tree
Hide file tree
Showing 11 changed files with 96 additions and 30 deletions.
3 changes: 2 additions & 1 deletion grammars/level2-Additions.lark
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@ sleep: _SLEEP (INT | var_access)?

//in level 2, forward may also depend on a variable
turtle: _FORWARD (NUMBER | textwithoutspaces)? -> forward | _TURN ((NUMBER | textwithoutspaces))? -> turn | _COLOR ((black | blue | brown | gray | green | orange | pink | purple | red | white | yellow | textwithoutspaces))? -> color
assign: var _IS text -> assign
assign: var _IS textwithinnerspaces _SPACE? -> assign

textwithoutspaces: /([^\n #]+)/ -> text
text: /([^\n#]+)/ -> text
textwithinnerspaces: /([^\n#]*[^\n# ])/ -> text

var: NAME // used for variable definitions, e.g. a = 1
var_access: NAME // used for variable references, e.g. for i in range. It parses the same as var, but does not result in a lookup table entry
Expand Down
7 changes: 4 additions & 3 deletions grammars/level3-Additions.lark
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
command:+= assign_list | add | remove | error_add_missing_to | error_remove_missing_from | error_add_missing_list | error_remove_missing_list >> error_invalid
_print_ask_argument: (_SPACE | (list_access textwithoutspaces?) | textwithoutspaces)*

assign: var _IS (list_access | text) -> assign
assign_list: var _IS text_list (_COMMA text_list)+
assign: var _IS (list_access | textwithinnerspaces) _SPACE? -> assign
assign_list: var _IS textwithinnerspaces (_COMMA textwithinnerspaces)+ _SPACE?
play: _PLAY (list_access | textwithoutspaces)

list_access: var_access _AT (INT | random)
Expand All @@ -11,7 +11,8 @@ turtle: _FORWARD ((NUMBER | list_access | textwithoutspaces))? -> forward | _TUR
sleep: _SLEEP (INT | list_access | var_access)?
// lists are introduced and list separators (comma and arabic comma) have to excluded from text.
text: /([^\n،,,、#]+)/ -> text
text_list: /([^\n,،,、#]+)/ -> text // list elements are another exception since they can contain punctuation but not list separators
// list elements are another exception since they can contain punctuation but not list separators. Also, they cannot end with space.
textwithinnerspaces: /([^\n،,,、#]*[^\n،,,、# ])/ -> text

// Values which are added or removed from lists can contain spaces, so we need to escape the 'to_list' and 'from' keywords
// The part " (?!<expand_keyword to_list>|<expand_keyword from>)" will be processed to a space with a negative look ahead of all to_list and from values
Expand Down
3 changes: 0 additions & 3 deletions grammars/level5-Additions.lark
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ _if_less_command: print | ask | play | turtle | assign_list | add | remove | sle
// just to be sure that if Lark tries to resolve ambiguity, error_invalid will be considered last
error_invalid.-100: textwithoutspaces _SPACE* (quoted_text | textwithspaces)?

assign_list: var _IS textwithspaces (_COMMA textwithspaces)+
assign: var _IS (list_access | textwithspaces)

error_print_no_quotes: _PRINT (textwithoutspaces | list_access | var_access) (_SPACE (textwithoutspaces | list_access | var_access))* -> error_print_nq

// new commands for level 5
Expand Down
4 changes: 2 additions & 2 deletions grammars/level6-Additions.lark
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ play: _PLAY (list_access | expression | textwithoutspaces)
equality_check: (INT | textwithoutspaces) (_IS | _EQUALS) (INT | quoted_text | textwithoutspaces)
condition: >> equality_check

assign_list: var (_IS | _EQUALS) (INT | textwithspaces) (_COMMA (INT | textwithspaces))+
assign: var (_IS | _EQUALS) (INT | list_access | expression | textwithoutspaces | textwithspaces)
assign_list: var (_IS | _EQUALS) (INT | textwithinnerspaces) (_COMMA (INT | textwithinnerspaces))+ _SPACE?
assign: var (_IS | _EQUALS) (INT | list_access | expression | textwithoutspaces | textwithinnerspaces) _SPACE?

add: _ADD_LIST (INT | text_add_remove_list) _TO_LIST _SPACE var_access
remove: _REMOVE (INT | text_add_remove_list) _FROM _SPACE var_access
Expand Down
6 changes: 4 additions & 2 deletions tests/Tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,10 +388,12 @@ def source_map_tester(self, code, expected_source_map: dict):
result = hedy.transpile(code, self.level, 'en')
self.assertDictEqual(result.source_map.get_compressed_mapping(), expected_source_map)

def assert_translated_code_equal(self, orignal, translation):
def assert_translated_code_equal(self, original, translation):
# When we translate a program we lose information about the whitespaces of the original program.
# So when comparing the original and the translated code, we compress multiple whitespaces into one.
self.assertEqual(re.sub('\\s+', ' ', orignal), re.sub('\\s+', ' ', translation))
# Also, we lose the trailing spaces, so we strip before comparing.
self.assertEqual(re.sub('\\s+', ' ', original).strip(),
re.sub('\\s+', ' ', translation).strip())

@staticmethod
def validate_Python_code(parseresult):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_level/test_level_02.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def test_print_comment(self):

def test_assign_comment(self):
code = "test is Welkom bij Hedy # This is a comment"
expected = "test = 'Welkom bij Hedy '"
expected = "test = 'Welkom bij Hedy'"
self.multi_level_tester(
max_level=3,
code=code,
Expand Down
32 changes: 27 additions & 5 deletions tests/test_level/test_level_03.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,18 @@ def test_assign_var_to_var(self):

self.multi_level_tester(max_level=5, code=code, expected=expected)

def test_assign_var_trims_spaces(self):
code = "answer is This is long "
expected = "answer = 'This is long'"

self.multi_level_tester(max_level=5, code=code, expected=expected, unused_allowed=True)

def test_assign_var_trims_spaces_with_comment(self):
code = "answer is This is long # comment"
expected = "answer = 'This is long'"

self.multi_level_tester(max_level=5, code=code, expected=expected, unused_allowed=True)

def test_assign_list(self):
code = "dieren is Hond, Kat, Kangoeroe"
expected = "dieren = ['Hond', 'Kat', 'Kangoeroe']"
Expand Down Expand Up @@ -354,11 +366,21 @@ def test_assign_list_to_hungarian_var(self):

self.multi_level_tester(max_level=5, code=code, expected=expected)

def test_assign_list_with_spaces(self):
# spaces are parsed in the text here, that is fine (could be avoided if we say text
# can't *end* (or start) in a space but I find this ok for now
code = "dieren is Hond , Kat , Kangoeroe"
expected = "dieren = ['Hond ', 'Kat ', 'Kangoeroe']"
def test_assign_list_trims_elements_trailing_spaces(self):
code = "dieren is Hond , Kat , Kangoeroe "
expected = "dieren = ['Hond', 'Kat', 'Kangoeroe']"

self.multi_level_tester(max_level=5, code=code, expected=expected, unused_allowed=True)

def test_assign_list_trims_elements_leading_spaces(self):
code = "dieren is Hond, Kat, Kangoeroe"
expected = "dieren = ['Hond', 'Kat', 'Kangoeroe']"

self.multi_level_tester(max_level=5, code=code, expected=expected, unused_allowed=True)

def test_assign_list_trims_elements_spaces(self):
code = "dieren is I am , waiting for , the summer "
expected = "dieren = ['I am', 'waiting for', 'the summer']"

self.multi_level_tester(max_level=5, code=code, expected=expected, unused_allowed=True)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_level/test_level_04.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ def test_print_comment(self):

def test_assign_comment(self):
code = 'test is "Welkom bij Hedy" # This is a comment'
expected = 'test = \'"Welkom bij Hedy" \''
expected = """test = '"Welkom bij Hedy"'"""
self.multi_level_tester(
max_level=5,
unused_allowed=True,
Expand Down
34 changes: 28 additions & 6 deletions tests/test_level/test_level_06.py
Original file line number Diff line number Diff line change
Expand Up @@ -927,7 +927,7 @@ def test_assign_catalan_var_name(self):

def test_assign_comment(self):
code = 'test is "Welkom bij Hedy" # This is a comment'
expected = 'test = Value(\'"Welkom bij Hedy" \')'
expected = """test = Value('"Welkom bij Hedy"')"""
self.multi_level_tester(
max_level=11,
unused_allowed=True,
Expand All @@ -954,6 +954,18 @@ def test_assign_var_to_var(self):

self.multi_level_tester(max_level=11, code=code, expected=expected)

def test_assign_var_trims_spaces(self):
code = "answer is This is long "
expected = "answer = Value('This is long')"

self.multi_level_tester(max_level=11, code=code, expected=expected, unused_allowed=True)

def test_assign_var_trims_spaces_with_comment(self):
code = "answer is This is long # comment"
expected = "answer = Value('This is long')"

self.multi_level_tester(max_level=11, code=code, expected=expected, unused_allowed=True)

def test_assign_text_with_inner_single_quote(self):
code = "var is Hedy's"
expected = "var = Value('Hedy\\'s')"
Expand Down Expand Up @@ -1042,11 +1054,21 @@ def test_assign_list_with_arabic_comma_and_is(self):
lang='ar'
)

def test_assign_list_with_spaces(self):
# spaces are parsed in the text here, that is fine (could be avoided if we say text
# can't *end* (or start) in a space but I find this ok for now
code = "dieren is Hond , Kat , Kangoeroe"
expected = "dieren = Value([Value('Hond '), Value('Kat '), Value('Kangoeroe')])"
def test_assign_list_trims_elements_trailing_spaces(self):
code = "dieren is Hond , Kat , Kangoeroe "
expected = "dieren = Value([Value('Hond'), Value('Kat'), Value('Kangoeroe')])"

self.multi_level_tester(max_level=11, code=code, expected=expected, unused_allowed=True)

def test_assign_list_trims_elements_leading_spaces(self):
code = "dieren is Hond, Kat, Kangoeroe"
expected = "dieren = Value([Value('Hond'), Value('Kat'), Value('Kangoeroe')])"

self.multi_level_tester(max_level=11, code=code, expected=expected, unused_allowed=True)

def test_assign_list_trims_elements_spaces(self):
code = "dieren is I am , waiting for , the summer "
expected = "dieren = Value([Value('I am'), Value('waiting for'), Value('the summer')])"

self.multi_level_tester(max_level=11, code=code, expected=expected, unused_allowed=True)

Expand Down
26 changes: 26 additions & 0 deletions tests/test_level/test_level_08.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,32 @@ def test_equality_arabic_and_latin_vars(self):

self.multi_level_tester(code=code, expected=expected, max_level=11, output='jahoor!')

def test_assign_var_in_if_trims_spaces(self):
code = self.dedent(
"if 1 = 1",
("j = 4 ", " "),
(" print '[' j ']'", " "))

expected = textwrap.dedent(f"""\
if localize('1') == localize('1'):
j = Value('4', num_sys='Latin')
print(f'[{{j}}]')""")

self.multi_level_tester(code=code, expected=expected, max_level=11, output='[4]')

def test_assign_var_in_if_trims_spaces_with_comment(self):
code = textwrap.dedent("""\
if 1 = 1
j = 4 # met comment
print '[' j ']'""")

expected = textwrap.dedent(f"""\
if localize('1') == localize('1'):
j = Value('4', num_sys='Latin')
print(f'[{{j}}]')""")

self.multi_level_tester(code=code, expected=expected, max_level=11, output='[4]')

#
# in/not in list
#
Expand Down
7 changes: 1 addition & 6 deletions tests/test_level/test_level_12.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,12 +938,7 @@ def test_print_comment(self):
def test_assign_comment(self):
code = 'test = "Welkom bij Hedy" # This is a comment'
expected = "test = Value('Welkom bij Hedy')"
self.multi_level_tester(
max_level=18,
code=code,
expected=expected,
unused_allowed=True
)
self.multi_level_tester(code=code, expected=expected, unused_allowed=True)

#
# ask tests
Expand Down

0 comments on commit 49b9b5e

Please sign in to comment.