-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinterpreter.py
799 lines (651 loc) · 22.8 KB
/
interpreter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Support float division for py2
'''
from __future__ import division
from math import sin,cos,tan,radians,degrees
import sys
'''
DEBUG_LEVEL
0 -> NO_LOGGING
1 -> IMPORTANT
2 -> INFO
'''
DEBUG_LEVEL = 1
'''
Define tokens types that we can
use in our parser
'''
NUMBER = 'NUMBER'
PLUS = 'PLUS'
MINUS = 'MINUS'
MULTYPLY = 'MULTYPLY'
DIVIDE = 'DIVIDE'
OB = 'OB'
CB = 'CB'
EOF = 'EOF'
'''
Function returns result
for operations between numbers
x, y
@param x Number token value
@param y Number token value
'''
ops = {
'+':lambda x,y: x+y,
'-':lambda x,y: x-y,
'*':lambda x,y: x*y,
'/':lambda x,y: x/y,
'sin': lambda x: sin(x),
'cos': lambda x: cos(x),
'tan': lambda x: tan(x),
'ctg': lambda x: 1/tan(x)
}
'''
Logger function
Checks if level is less or equal
to current DEBUG_LEVEL, and prints
output to console
@param level Integer of level
@param message A message to be print
'''
def lprint(message, level):
if level <= DEBUG_LEVEL:
print(message)
def tokenListPrettyPrint(token_list,level):
if level <= DEBUG_LEVEL:
print('[')
for token in token_list:
print(' ' + str(token) + ',')
print(']')
class Token(object):
'''
Class constructor
@param t_type Some of the above mentioned token types
@param value Value of that token
'''
def __init__(self,t_type,value = None):
'''
Assing type and value to new object instance
'''
self.type = t_type
self.value = value
def __eq__(self, other):
'''
We'll compare tokens by their type
Then if we need we can compare the values
'''
return self.type == other.type
def __str__(self):
'''
String representation of the class
This is called when we print token out
@return string Token(type,value)
'''
return 'Token({type},{value})'.format(
type = self.type,
value = repr(self.value)
)
def __repr__(self):
'''
Class representation
@return String representation of class
'''
return self.__str__()
'''
Map for single char tokens
'''
sct = {
'(' : Token(OB,'('),
')' : Token(CB,')'),
'+' : Token(PLUS,'+'),
'-' : Token(MINUS,'-'),
'/' : Token(DIVIDE,'/'),
'*' : Token(MULTYPLY,'*')
}
'''
Map for string tokens
'''
mct = {
'number_tokens':{
'tan' : {
'parseMethod' : lambda parser: parser.parseMathFunction
},
'sin' : {
'parseMethod' : lambda parser: parser.parseMathFunction
},
'cos' : {
'parseMethod' : lambda parser: parser.parseMathFunction
},
'ctg' : {
'parseMethod' : lambda parser: parser.parseMathFunction
}
}
}
class ParsingError(Exception):
'''
Error while parsing
'''
def __init__(self,message):
super(ParsingError, self).__init__(message)
class Interpreter(object):
'''
Finds tokens in stream of
input
'''
def __init__(self,inp):
'''
Constructor function
@parm inp contains user input
'''
self.inp = inp
self.position = 0
self.current_token = None
self.current_char = self.inp[self.position]
lprint('Initializing interpreter with {u_inp}'.format(
u_inp = inp
), 2)
def error(self):
'''
Error handler for our interpreter
'''
error_message = 'Error parsing input'
raise ParsingError(error_message)
def advance(self):
'''
Advance position of cursor, and
set current_char, if we eneded
reading stream then set it to None
'''
self.position += 1
if self.position > len(self.inp) - 1:
self.current_char = None
else:
self.current_char = self.inp[self.position]
def number(self):
'''
Parse number in format
(-)x | x.y where x,y are
integers
'''
result = '';
while(
self.current_char is not None and
self.current_char.isdigit() or
self.current_char == '.' or
(self.current_char == '-' and
self.inp[self.position-1].isdigit() != True)
):
result += self.current_char
self.advance()
return float(result);
def parseMathFunction(self,function_name,arg_num = 2):
'''
This method is called as the parser
recognized the
@params function_name Name of a math function from mct
@params arg_num For possible future use
'''
# Consume all whitespaces before number of (
self.consume_whitespace()
#State machine for using brackets
useBrackets = False
if self.current_char == '(':
'''
Advance one position, and
consume whitespaces to number
'''
useBrackets = True
self.advance()
self.consume_whitespace()
'''
Take number argument and
create a number Token, set
token value to sin(rad) by
default
'''
argument = self.number()
token = Token(NUMBER)
token.value = ops[function_name](argument)
'''
If we use brackets, we can
specify second argument to
function, and that is, what
unit is argument expresses
in
'''
if useBrackets:
#This means we have second parameter
if self.current_char == ',':
'''
Skip , character
consume whitespace to argument
peek into it, evaluate and
set new token value if deg is
used
'''
self.advance()
self.consume_whitespace()
peek = self.inp[self.position:self.position+3].lower()
if peek in ('deg','rad'):
if peek == 'deg':
#Round thing up to couple of decimals
token.value = round(ops[function_name](
radians(round(argument, 2))
), 2)
'''
Skip three chars
we used for argument
'''
for i in range(3):
self.advance()
else:
self.error()
'''
Consume whitespace to )
and advance
'''
self.consume_whitespace()
if self.current_char != ')':
self.error()
self.advance()
return token
def multiCharToken(self):
#start by searching with current token
result = self.current_char
#go trough all token types
for token_type in mct:
#and through all tokenstr representations
for tokenstr in mct[token_type].keys():
'''
While result is in current
token string representation
and not equal to any, load
next token, and if it is
equal return parsed token
'''
while result in tokenstr:
if tokenstr == result:
self.advance()
return mct[token_type][tokenstr]['parseMethod'](self)(tokenstr)
break
self.advance()
result += self.current_char
return False
def get_next_token(self):
'''
This method extracts the
next token from user input
string. If token is invalid
error() is called.
@return Token
'''
# If we ended the stream return End of file token
if self.position > len(self.inp) - 1:
return Token(EOF,None);
# Number token
if self.current_char.isdigit():
token = Token(NUMBER,self.number())
return token
#Single char token
elif self.current_char in sct:
token = sct[self.current_char]
self.advance()
return token
else:
token = self.multiCharToken()
if token:
return token
self.error()
def consume_whitespace(self):
'''
Skip the whitespace characters
'''
while self.current_char == ' ' :
self.advance()
def eat(self, token_type):
'''
Eat function is used to confirm
the integrity of current token
and load the next token
'''
if self.current_token.type in token_type:
self.consume_whitespace()
self.current_token = self.get_next_token();
else:
self.error()
def unaryOperators(self,tokenList):
'''
Filter token list to recognize
unary operators.
Each char or (chars) in example
represents value of token:
@params tokenList List of tokens where
unary reducton should occur
tokenList = result
---+2+----(23) = (-2)+(23)
---*2+(32) = error
---2*(23) = (-2)*(23)
'''
#list for filtered tokens
result = []
#last token must be number
if tokenList[len(tokenList)-1].type == NUMBER:
#operators list
operators = []
#go through each token
for index,token in enumerate(tokenList):
if token.type in (PLUS,MINUS):
'''
If current token is + or -
append current token to
operators
'''
operators.append(token)
else:
'''
If not, we check if our
oprators list is not empty
'''
if operators:
#we can start with + sign
sign = Token(PLUS,'+')
'''
If we have uneven number
of - tokens in list. We can
assume operation is -
'''
if operators.count(
Token(MINUS,'-')
) % 2 == 1:
sign = Token(MINUS,'-')
'''
After combination of +|- tokens
we must have a number
'''
if token.type == NUMBER:
if not result:
'''
Since we can't have -|+ before
expression to solve it by our
methods append that sign directly
to the first number value itself
'''
if sign == Token(MINUS,'-'):
token.value = -token.value
result.append(token)
else:
'''
For all other values
append sign and number token
'''
result.append(sign)
result.append(token)
else:
self.error()
#clear operators list
operators = []
else:
'''
Since we have no + - operatiors
defined before non +|- token, we
just append that token to the
result.
Input '4*5' is example where this
occurs for each token.
'''
result.append(token)
else:
self.error()
lprint("After filtering unary ops, we have",2)
tokenListPrettyPrint(result,2)
return result
def doOperations(self,tokenList,operationToken):
'''
Single operation between 2 numbers.
Find first instance of opration token
in given list. Check indexes because
tokens can't be at beggining and end
of basic expressions. Check nearby
tokens for numbers, so we don't have
situation like // ++ or +( or anything
like that. Do operations on numbers,
insert at a place of the left number
in list, then delete tokens used including
both numbers and operation sign from right
to left, so we don't have problems with
indexes, and finish up.If anything is blown
raise Exception.
We don't need to return anything since
we're activly modifiying the current list
tokenList.
@params tokenList Token list where operation should
be done
@params operation Operation token
Current tokenList is beeing modified, so this
function has no return value
'''
index = tokenList.index(operationToken)
if index != 0 and index != len(tokenList)-1:
a = tokenList[index-1]
b = tokenList[index+1]
if a.type == NUMBER and b.type==NUMBER:
result = ops[tokenList[index].value](
a.value,
b.value
)
tokenList.insert(index-1,Token(NUMBER,result))
for i in range(index+2,index-1,-1):
del tokenList[i]
lprint("Token list after operation",2)
tokenListPrettyPrint(tokenList,2)
else:
self.error()
else:
self.error()
def calculateExpression(self,tokens):
'''
Do operations on tokens list.
First do multiplication and division,
and do them in order.
Then we can do addition and subtraction.
Since we pass tokens list to doOperations, after
all processes are finished we should have
a single token list, so we can just return
tokens[0], if we don't have a single token
raise Exception.
@params tokens Token list on which
operation should be done
'''
#PARSE UNARY OPERATORS
tokens = self.unaryOperators(tokens)
while (Token(MULTYPLY,'*') in tokens or
Token(DIVIDE,'/') in tokens):
m_index = len(tokens)
d_index = len(tokens)
if Token(MULTYPLY,'*') in tokens:
m_index = tokens.index(Token(MULTYPLY,'*'))
if Token(DIVIDE,'/') in tokens:
d_index = tokens.index(Token(DIVIDE,'/'))
if m_index > d_index:
self.doOperations(tokens,Token(DIVIDE,'/'))
else:
self.doOperations(tokens,Token(MULTYPLY,'*'))
while (Token(PLUS,'+') in tokens or
Token(MINUS,'-') in tokens):
p_index = len(tokens)
m_index = len(tokens)
if Token(PLUS,'+') in tokens:
p_index = tokens.index(Token(PLUS,'+'))
if Token(MINUS,'-') in tokens:
m_index = tokens.index(Token(MINUS,'-'))
if p_index > m_index:
self.doOperations(tokens,Token(MINUS,'-'))
else:
self.doOperations(tokens,Token(PLUS,'+'))
if len(tokens) == 1:
return tokens[0]
else:
self.error();
def expr(self):
self.consume_whitespace()
self.current_token = self.get_next_token()
tokens = []
#parse all tokens from the input stream
while self.current_token:
token = self.current_token
self.eat(self.current_token.type)
if token.type == EOF:
break
tokens.append(token)
#informatice things
lprint("Token list at beginning",2)
tokenListPrettyPrint(tokens,2)
'''
Now we have digged a bit deeper.
Our so called 'calculator' interpreter
can accept braces. We have baseexpr
list, and it serves to hold level 0
tokens. Token level is defined by
number of parent braces like this
0 (1 (2 ( 3 (4 (5) 4) 3) 2) 1) 0 (1) (1(2)1) 0
So at the end of our parsing we must have our
current level set to 0.
We go through each token, if token is opening
brace we increase current level, and continue
because we don't want it to be added to baseexpr
list. If token is closing bracket, we check
if there is a tree formed by bracketing. Tree
is a dict containg level : expr for each level
that has something in it. So if we have a formed
tree, we calculate expression at the highest level
because it has to be non malformed expression,
then append the result to level before it. After
we get to level 1 we have the final result of brackets
calulations.
Let's take for example
1+(2+3+(5*((3)+3)))
Since 1 and + are level 0 token it is appended
to baseexpr, then we are at level 1, we append
2,+,3,+ tokens to level 1 so tree looks like
{
1:[2,+,3,+]
}
then we go to next level where tree looks like
{
1:[2,+,3,+],
2:[5,*]
}
we go to next level, since it has no tokens yet, it's
not created, and we go to our final level
{
1:[2,+,3,+],
2:[5,*],
4:[3]
}
now we start closing braces
on brace closed, the expression of highest level is
executed, and result is appended to previous level,
if previous level isn't created, then we'll create
it at this moment.
After level 4 brace is closed:
{
1:[2,+,3,+],
2:[5,*],
3:[3]
}
then we get our + and 3 appended to third level, and
before level 3 brace is closed we have
{
1:[2,+,3,+],
2:[5,*],
3:[3+3]
}
now we evaluate and append to previous, and so on,
when we have our first level brace closed, we append
executed result to baseexpr, after that we have baseexpr
containg simple operations that can be executed with
calculateExpression.
'''
baseexpr = []
tree = {}
level = 0
for token in tokens:
if token == Token('OB','('):
level += 1
continue
elif token == Token('CB',')'):
if tree:
keys = sorted(tree.keys(),reverse=True)
result = self.calculateExpression(tree[keys[0]])
if level > 0:
if (level-1) in tree:
tree[level-1].append(result)
else:
tree[level-1] = [result]
del tree[level]
else:
lprint('Failing on braces evaluation',1)
self.error()
if level == 1:
baseexpr.append(result)
tree.clear()
level -= 1
continue
if level == 0:
baseexpr.append(token)
continue
if level not in tree:
tree[level]=[token]
else:
tree[level].append(token)
if level != 0:
self.error()
lprint('After braces reduction',2)
tokenListPrettyPrint(baseexpr,2)
result = self.calculateExpression(baseexpr).value
lprint('Result from interpretation is {res}'.format(
res=result),2)
return result
def main():
'''
Our program main entry point
'''
while True:
try:
v_major = sys.version_info.major
inp = ''
if v_major == 3:
inp = input('calc > ')
elif v_major == 2:
inp = raw_input('calc > ')
else:
raise Exception("Python version not supported")
except EOFError:
'''
exception EOFError
Raised when one of the built-in functions
(input() or raw_input()) hits an end-of-file condition (EOF)
without reading any data.
(N.B.: the file.read() and file.readline() methods return an empty string
when they hit EOF.)
'''
break;
except KeyboardInterrupt:
'''User exited program '''
print('\nBye bye')
break;
if not inp:
continue
interpreter = Interpreter(inp)
# Check for parsing errors
try:
result = interpreter.expr()
#If we end up here, we're cool :)
print(result)
except ParsingError as e:
#We'll print the exception
print(str(e))
if __name__ == '__main__':
main()