-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
147 additions
and
120 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,161 +1,188 @@ | ||
// parser/parser.go | ||
package parser | ||
|
||
import ( | ||
"fmt" | ||
"strconv" | ||
"strings" | ||
"unicode" | ||
) | ||
|
||
type TokenType int | ||
|
||
const ( | ||
ILLEGAL TokenType = iota | ||
EOF | ||
IDENTIFIER | ||
INT | ||
ASSIGN | ||
PLUS | ||
MINUS | ||
ASTERISK | ||
SLASH | ||
LPAREN | ||
RPAREN | ||
SEMICOLON | ||
|
||
"github.com/simplyYan/Wysb/src/tokenizer" | ||
) | ||
|
||
type Node interface { | ||
String() string | ||
type Token struct { | ||
Type TokenType | ||
Literal string | ||
} | ||
|
||
type LetStatement struct { | ||
Name string | ||
Type string | ||
Value Expression | ||
} | ||
func Tokenize(input string) []Token { | ||
var tokens []Token | ||
runes := []rune(input) | ||
|
||
func (ls *LetStatement) String() string { | ||
return fmt.Sprintf("let %s: %s = %s", ls.Name, ls.Type, ls.Value.String()) | ||
} | ||
for i := 0; i < len(runes); { | ||
ch := runes[i] | ||
|
||
type IfStatement struct { | ||
Condition Expression | ||
Consequence []Node | ||
Alternative []Node | ||
} | ||
if unicode.IsSpace(ch) { | ||
i++ | ||
continue | ||
} | ||
|
||
func (is *IfStatement) String() string { | ||
return fmt.Sprintf("if %s { ... } else { ... }", is.Condition.String()) | ||
} | ||
switch ch { | ||
case '=': | ||
tokens = append(tokens, Token{Type: ASSIGN, Literal: string(ch)}) | ||
case '+': | ||
tokens = append(tokens, Token{Type: PLUS, Literal: string(ch)}) | ||
case '-': | ||
tokens = append(tokens, Token{Type: MINUS, Literal: string(ch)}) | ||
case '*': | ||
tokens = append(tokens, Token{Type: ASTERISK, Literal: string(ch)}) | ||
case '/': | ||
tokens = append(tokens, Token{Type: SLASH, Literal: string(ch)}) | ||
case '(': | ||
tokens = append(tokens, Token{Type: LPAREN, Literal: string(ch)}) | ||
case ')': | ||
tokens = append(tokens, Token{Type: RPAREN, Literal: string(ch)}) | ||
case ';': | ||
tokens = append(tokens, Token{Type: SEMICOLON, Literal: string(ch)}) | ||
default: | ||
if isLetter(ch) { | ||
identifier := readIdentifier(runes, &i) | ||
tokens = append(tokens, Token{Type: IDENTIFIER, Literal: identifier}) | ||
continue | ||
} else if isDigit(ch) { | ||
number := readNumber(runes, &i) | ||
tokens = append(tokens, Token{Type: INT, Literal: number}) | ||
continue | ||
} else { | ||
tokens = append(tokens, Token{Type: ILLEGAL, Literal: string(ch)}) | ||
} | ||
} | ||
|
||
type ForStatement struct { | ||
Identifier string | ||
Range Expression | ||
Body []Node | ||
} | ||
i++ | ||
} | ||
|
||
func (fs *ForStatement) String() string { | ||
return fmt.Sprintf("for %s in %s { ... }", fs.Identifier, fs.Range.String()) | ||
tokens = append(tokens, Token{Type: EOF, Literal: ""}) | ||
return tokens | ||
} | ||
|
||
type Expression interface { | ||
Node | ||
func isLetter(ch rune) bool { | ||
return unicode.IsLetter(ch) | ||
} | ||
|
||
type IntegerLiteral struct { | ||
Value int | ||
func isDigit(ch rune) bool { | ||
return unicode.IsDigit(ch) | ||
} | ||
|
||
func (il *IntegerLiteral) String() string { | ||
return fmt.Sprintf("%d", il.Value) | ||
func readIdentifier(input []rune, start *int) string { | ||
var sb strings.Builder | ||
for *start < len(input) && isLetter(input[*start]) { | ||
sb.WriteRune(input[*start]) | ||
*start++ | ||
} | ||
return sb.String() | ||
} | ||
|
||
type Identifier struct { | ||
Name string | ||
func readNumber(input []rune, start *int) string { | ||
var sb strings.Builder | ||
for *start < len(input) && isDigit(input[*start]) { | ||
sb.WriteRune(input[*start]) | ||
*start++ | ||
} | ||
return sb.String() | ||
} | ||
|
||
func (id *Identifier) String() string { | ||
return id.Name | ||
} | ||
type NodeType int | ||
|
||
const ( | ||
VARIABLE NodeType = iota | ||
NUMBER | ||
EXPRESSION | ||
ASSIGNMENT | ||
STATEMENT | ||
) | ||
|
||
type InfixExpression struct { | ||
Left Expression | ||
Operator string | ||
Right Expression | ||
type Node struct { | ||
Type NodeType | ||
Value string | ||
Left *Node | ||
Right *Node | ||
} | ||
|
||
func (ie *InfixExpression) String() string { | ||
return fmt.Sprintf("(%s %s %s)", ie.Left.String(), ie.Operator, ie.Right.String()) | ||
type Statement struct { | ||
Node *Node | ||
} | ||
|
||
func Parse(tokens []tokenizer.Token) []Node { | ||
var statements []Node | ||
i := 0 | ||
func Parse(tokens []Token) []Statement { | ||
var statements []Statement | ||
var current *Node | ||
var stack []*Node | ||
var node *Node | ||
|
||
for i < len(tokens) { | ||
switch tokens[i].Type { | ||
case tokenizer.LET: | ||
i++ | ||
if tokens[i].Type != tokenizer.IDENT { | ||
panic("expected identifier after 'let'") | ||
for i := 0; i < len(tokens); i++ { | ||
token := tokens[i] | ||
|
||
switch token.Type { | ||
case IDENTIFIER: | ||
node = &Node{Type: VARIABLE, Value: token.Literal} | ||
if current != nil { | ||
current.Left = node | ||
} | ||
name := tokens[i].Literal | ||
i++ | ||
if tokens[i].Type != tokenizer.COLON { | ||
panic("expected ':' after identifier") | ||
stack = append(stack, node) | ||
current = nil | ||
case INT: | ||
node = &Node{Type: NUMBER, Value: token.Literal} | ||
if current != nil { | ||
current.Right = node | ||
} | ||
i++ | ||
if tokens[i].Type != tokenizer.IDENT { | ||
panic("expected type after ':'") | ||
stack = append(stack, node) | ||
current = nil | ||
case ASSIGN: | ||
node = &Node{Type: ASSIGNMENT, Value: token.Literal} | ||
if len(stack) > 0 { | ||
node.Left = stack[len(stack)-1] | ||
stack = stack[:len(stack)-1] | ||
} | ||
varType := tokens[i].Literal | ||
i++ | ||
if tokens[i].Type != tokenizer.ASSIGN { | ||
panic("expected '=' after type") | ||
stack = append(stack, node) | ||
case PLUS, MINUS, ASTERISK, SLASH: | ||
node = &Node{Type: EXPRESSION, Value: token.Literal} | ||
if len(stack) > 0 { | ||
node.Left = stack[len(stack)-1] | ||
stack = stack[:len(stack)-1] | ||
} | ||
i++ | ||
value := parseExpression(tokens, &i) | ||
statements = append(statements, &LetStatement{Name: name, Type: varType, Value: value}) | ||
if len(stack) > 0 { | ||
node.Right = stack[len(stack)-1] | ||
stack = stack[:len(stack)-1] | ||
} | ||
stack = append(stack, node) | ||
current = node | ||
case SEMICOLON: | ||
if len(stack) > 0 { | ||
statements = append(statements, Statement{Node: stack[0]}) | ||
stack = nil | ||
} | ||
case LPAREN, RPAREN: | ||
|
||
case tokenizer.IF: | ||
i++ | ||
condition := parseExpression(tokens, &i) | ||
i++ // Skip '{' | ||
consequence := parseBlock(tokens, &i) | ||
var alternative []Node | ||
if tokens[i].Type == tokenizer.ELSE { | ||
i++ | ||
i++ // Skip '{' | ||
alternative = parseBlock(tokens, &i) | ||
case EOF: | ||
if len(stack) > 0 { | ||
statements = append(statements, Statement{Node: stack[0]}) | ||
} | ||
statements = append(statements, &IfStatement{Condition: condition, Consequence: consequence, Alternative: alternative}) | ||
default: | ||
|
||
case tokenizer.FOR: | ||
i++ | ||
identifier := tokens[i].Literal | ||
i++ // Skip 'in' | ||
rangeExpr := parseExpression(tokens, &i) | ||
i++ // Skip '{' | ||
body := parseBlock(tokens, &i) | ||
statements = append(statements, &ForStatement{Identifier: identifier, Range: rangeExpr, Body: body}) | ||
} | ||
i++ | ||
} | ||
|
||
return statements | ||
} | ||
|
||
func parseBlock(tokens []tokenizer.Token, i *int) []Node { | ||
var block []Node | ||
for tokens[*i].Type != tokenizer.RCURLY { | ||
block = append(block, Parse(tokens)[*i]) | ||
*i++ | ||
} | ||
return block | ||
} | ||
|
||
func parseExpression(tokens []tokenizer.Token, i *int) Expression { | ||
token := tokens[*i] | ||
|
||
switch token.Type { | ||
case tokenizer.INT: | ||
return &IntegerLiteral{Value: atoi(token.Literal)} | ||
case tokenizer.IDENT: | ||
left := &Identifier{Name: token.Literal} | ||
*i++ | ||
operator := tokens[*i].Literal | ||
*i++ | ||
right := parseExpression(tokens, i) | ||
return &InfixExpression{Left: left, Operator: operator, Right: right} | ||
} | ||
return nil | ||
} | ||
|
||
func atoi(str string) int { | ||
num, _ := strconv.Atoi(str) | ||
return num | ||
} |