diff --git a/dissect/cstruct/expression.py b/dissect/cstruct/expression.py index 1cc3c0a..fc0518e 100644 --- a/dissect/cstruct/expression.py +++ b/dissect/cstruct/expression.py @@ -142,7 +142,7 @@ def tokenize(self) -> list[str]: self.tokens.append(">>") elif self.match(expected="<", append=False) and self.match(expected="<", append=False): self.tokens.append("<<") - elif self.match(expected={" ", "\t"}, append=False): + elif self.match(expected={" ", "\n", "\t"}, append=False): continue else: raise ExpressionTokenizerError( diff --git a/dissect/cstruct/parser.py b/dissect/cstruct/parser.py index 7634113..77a8800 100644 --- a/dissect/cstruct/parser.py +++ b/dissect/cstruct/parser.py @@ -63,7 +63,7 @@ def _tokencollection() -> TokenCollection: "ENUM", ) TOK.add(r"(?<=})\s*(?P(?:[a-zA-Z0-9_]+\s*,\s*)+[a-zA-Z0-9_]+)\s*(?=;)", "DEFS") - TOK.add(r"(?P\**?\s*[a-zA-Z0-9_]+)(?:\s*:\s*(?P\d+))?(?:\[(?P[^;\n]*)\])?\s*(?=;)", "NAME") + TOK.add(r"(?P\**?\s*[a-zA-Z0-9_]+)(?:\s*:\s*(?P\d+))?(?:\[(?P[^;]*)\])?\s*(?=;)", "NAME") TOK.add(r"[a-zA-Z_][a-zA-Z0-9_]*", "IDENTIFIER") TOK.add(r"[{}]", "BLOCK") TOK.add(r"\$(?P[^\s]+) = (?P{[^}]+})\w*[\r\n]+", "LOOKUP") diff --git a/tests/test_types_structure.py b/tests/test_types_structure.py index 8d251eb..948e4bc 100644 --- a/tests/test_types_structure.py +++ b/tests/test_types_structure.py @@ -761,3 +761,37 @@ def __init__(self, _0 = None, _1 = None, _2 = None, _3 = None, _4 = None): cached = structure._make_structure__init__(5) assert structure._make_structure__init__.cache_info() == (1, 1, 128, 1) assert result is cached + + +def test_structure_definition_newline(cs: cstruct, compiled: bool) -> None: + cdef = """ + struct test { + char magic[4 + ]; + + wchar wmagic[4 + ]; + uint8 a; + uint16 b; + uint32 c; + char string[]; + wchar wstring[]; + }; + """ + cs.endian = ">" + cs.load(cdef, compiled=compiled) + + assert verify_compiled(cs.test, compiled) + + buf = b"test\x00t\x00e\x00s\x00t\x01\x02\x03\x04\x05\x06\x07lalala\x00\x00t\x00e\x00s\x00t\x00\x00" + + obj = cs.test() + obj.magic = "test" + obj.wmagic = "test" + obj.a = 0x01 + obj.b = 0x0203 + obj.c = 0x04050607 + obj.string = b"lalala" + obj.wstring = "test" + + assert obj.dumps() == buf