-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.py
106 lines (82 loc) · 2.47 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
class ParseError(Exception):
def __init__(self, message='', rule=None, stream=None):
self.message = message
self.rule = rule
self.stream = stream
self.original_stream = None
def __str__(self):
msg = self.message
if self.original_stream:
msg += '\nError at token %d:' % (
len(self.original_stream) - len(self.stream))
msg += '\n' + ''.join(str(s) for s in self.stream)
if self.rule:
msg += '\nFailed rule: ' + str(self.rule)
return msg
class Rule(object):
rules = None
def __init__(self, val):
self.val = val
def __repr__(self):
return ' '.join(str(v) for v in self.val)
@classmethod
def parse(cls, stream):
for rule in cls.rules:
try:
if hasattr(rule, '__iter__'):
# try each production in order. report the result of the first one
# that matches.
parse = []
new_stream = stream
for r in rule:
v, new_stream = r.parse(new_stream)
parse.append(v)
return cls(parse), new_stream
else:
# the rule is an alias for another rule. just report its result.
return rule.parse(stream)
except ParseError:
pass
# none of the rules matched. fail.
raise ParseError(message='No grammar rule matched', rule=cls, stream=stream)
class Terminal(Rule):
def __repr__(self):
return str(self.val)
@classmethod
def tokenize(cls, string):
raise NotImplementedError
@classmethod
def parse(cls, stream):
if not stream:
raise ParseError(rule=cls, stream=stream)
if not isinstance(stream[0], cls):
raise ParseError(rule=cls, stream=stream)
return stream[0], stream[1:]
class LiteralToken(Terminal):
tokens = {}
@classmethod
def tokenize(cls, string):
for token in cls.tokens:
if string.startswith(token):
return cls(token), string[len(token):]
return None, string
# support for tokenizing the input
def eat_whitespace(string):
while string and string[0].isspace():
string = string[1:]
return string
def tokenize(string, acceptable_tokens):
parsed_tokens = []
while string:
string = eat_whitespace(string)
if not string:
break
for tok in acceptable_tokens:
token, string = tok.tokenize(string)
if token:
parsed_tokens.append(token)
break
if not token:
raise ParseError(message='Unrecognized:' + string,
stream=parsed_tokens)
return parsed_tokens