Skip to content

Commit

Permalink
Update parser.go
Browse files Browse the repository at this point in the history
  • Loading branch information
simplyYan authored Sep 1, 2024
1 parent 36071e7 commit 568e6db
Showing 1 changed file with 147 additions and 120 deletions.
267 changes: 147 additions & 120 deletions parser/parser.go
Original file line number Diff line number Diff line change
@@ -1,161 +1,188 @@
// parser/parser.go
package parser

import (
"fmt"
"strconv"
"strings"
"unicode"
)

type TokenType int

const (
ILLEGAL TokenType = iota
EOF
IDENTIFIER
INT
ASSIGN
PLUS
MINUS
ASTERISK
SLASH
LPAREN
RPAREN
SEMICOLON

"github.com/simplyYan/Wysb/src/tokenizer"
)

type Node interface {
String() string
type Token struct {
Type TokenType
Literal string
}

type LetStatement struct {
Name string
Type string
Value Expression
}
func Tokenize(input string) []Token {
var tokens []Token
runes := []rune(input)

func (ls *LetStatement) String() string {
return fmt.Sprintf("let %s: %s = %s", ls.Name, ls.Type, ls.Value.String())
}
for i := 0; i < len(runes); {
ch := runes[i]

type IfStatement struct {
Condition Expression
Consequence []Node
Alternative []Node
}
if unicode.IsSpace(ch) {
i++
continue
}

func (is *IfStatement) String() string {
return fmt.Sprintf("if %s { ... } else { ... }", is.Condition.String())
}
switch ch {
case '=':
tokens = append(tokens, Token{Type: ASSIGN, Literal: string(ch)})
case '+':
tokens = append(tokens, Token{Type: PLUS, Literal: string(ch)})
case '-':
tokens = append(tokens, Token{Type: MINUS, Literal: string(ch)})
case '*':
tokens = append(tokens, Token{Type: ASTERISK, Literal: string(ch)})
case '/':
tokens = append(tokens, Token{Type: SLASH, Literal: string(ch)})
case '(':
tokens = append(tokens, Token{Type: LPAREN, Literal: string(ch)})
case ')':
tokens = append(tokens, Token{Type: RPAREN, Literal: string(ch)})
case ';':
tokens = append(tokens, Token{Type: SEMICOLON, Literal: string(ch)})
default:
if isLetter(ch) {
identifier := readIdentifier(runes, &i)
tokens = append(tokens, Token{Type: IDENTIFIER, Literal: identifier})
continue
} else if isDigit(ch) {
number := readNumber(runes, &i)
tokens = append(tokens, Token{Type: INT, Literal: number})
continue
} else {
tokens = append(tokens, Token{Type: ILLEGAL, Literal: string(ch)})
}
}

type ForStatement struct {
Identifier string
Range Expression
Body []Node
}
i++
}

func (fs *ForStatement) String() string {
return fmt.Sprintf("for %s in %s { ... }", fs.Identifier, fs.Range.String())
tokens = append(tokens, Token{Type: EOF, Literal: ""})
return tokens
}

type Expression interface {
Node
func isLetter(ch rune) bool {
return unicode.IsLetter(ch)
}

type IntegerLiteral struct {
Value int
func isDigit(ch rune) bool {
return unicode.IsDigit(ch)
}

func (il *IntegerLiteral) String() string {
return fmt.Sprintf("%d", il.Value)
func readIdentifier(input []rune, start *int) string {
var sb strings.Builder
for *start < len(input) && isLetter(input[*start]) {
sb.WriteRune(input[*start])
*start++
}
return sb.String()
}

type Identifier struct {
Name string
func readNumber(input []rune, start *int) string {
var sb strings.Builder
for *start < len(input) && isDigit(input[*start]) {
sb.WriteRune(input[*start])
*start++
}
return sb.String()
}

func (id *Identifier) String() string {
return id.Name
}
type NodeType int

const (
VARIABLE NodeType = iota
NUMBER
EXPRESSION
ASSIGNMENT
STATEMENT
)

type InfixExpression struct {
Left Expression
Operator string
Right Expression
type Node struct {
Type NodeType
Value string
Left *Node
Right *Node
}

func (ie *InfixExpression) String() string {
return fmt.Sprintf("(%s %s %s)", ie.Left.String(), ie.Operator, ie.Right.String())
type Statement struct {
Node *Node
}

func Parse(tokens []tokenizer.Token) []Node {
var statements []Node
i := 0
func Parse(tokens []Token) []Statement {
var statements []Statement
var current *Node
var stack []*Node
var node *Node

for i < len(tokens) {
switch tokens[i].Type {
case tokenizer.LET:
i++
if tokens[i].Type != tokenizer.IDENT {
panic("expected identifier after 'let'")
for i := 0; i < len(tokens); i++ {
token := tokens[i]

switch token.Type {
case IDENTIFIER:
node = &Node{Type: VARIABLE, Value: token.Literal}
if current != nil {
current.Left = node
}
name := tokens[i].Literal
i++
if tokens[i].Type != tokenizer.COLON {
panic("expected ':' after identifier")
stack = append(stack, node)
current = nil
case INT:
node = &Node{Type: NUMBER, Value: token.Literal}
if current != nil {
current.Right = node
}
i++
if tokens[i].Type != tokenizer.IDENT {
panic("expected type after ':'")
stack = append(stack, node)
current = nil
case ASSIGN:
node = &Node{Type: ASSIGNMENT, Value: token.Literal}
if len(stack) > 0 {
node.Left = stack[len(stack)-1]
stack = stack[:len(stack)-1]
}
varType := tokens[i].Literal
i++
if tokens[i].Type != tokenizer.ASSIGN {
panic("expected '=' after type")
stack = append(stack, node)
case PLUS, MINUS, ASTERISK, SLASH:
node = &Node{Type: EXPRESSION, Value: token.Literal}
if len(stack) > 0 {
node.Left = stack[len(stack)-1]
stack = stack[:len(stack)-1]
}
i++
value := parseExpression(tokens, &i)
statements = append(statements, &LetStatement{Name: name, Type: varType, Value: value})
if len(stack) > 0 {
node.Right = stack[len(stack)-1]
stack = stack[:len(stack)-1]
}
stack = append(stack, node)
current = node
case SEMICOLON:
if len(stack) > 0 {
statements = append(statements, Statement{Node: stack[0]})
stack = nil
}
case LPAREN, RPAREN:

case tokenizer.IF:
i++
condition := parseExpression(tokens, &i)
i++ // Skip '{'
consequence := parseBlock(tokens, &i)
var alternative []Node
if tokens[i].Type == tokenizer.ELSE {
i++
i++ // Skip '{'
alternative = parseBlock(tokens, &i)
case EOF:
if len(stack) > 0 {
statements = append(statements, Statement{Node: stack[0]})
}
statements = append(statements, &IfStatement{Condition: condition, Consequence: consequence, Alternative: alternative})
default:

case tokenizer.FOR:
i++
identifier := tokens[i].Literal
i++ // Skip 'in'
rangeExpr := parseExpression(tokens, &i)
i++ // Skip '{'
body := parseBlock(tokens, &i)
statements = append(statements, &ForStatement{Identifier: identifier, Range: rangeExpr, Body: body})
}
i++
}

return statements
}

func parseBlock(tokens []tokenizer.Token, i *int) []Node {
var block []Node
for tokens[*i].Type != tokenizer.RCURLY {
block = append(block, Parse(tokens)[*i])
*i++
}
return block
}

func parseExpression(tokens []tokenizer.Token, i *int) Expression {
token := tokens[*i]

switch token.Type {
case tokenizer.INT:
return &IntegerLiteral{Value: atoi(token.Literal)}
case tokenizer.IDENT:
left := &Identifier{Name: token.Literal}
*i++
operator := tokens[*i].Literal
*i++
right := parseExpression(tokens, i)
return &InfixExpression{Left: left, Operator: operator, Right: right}
}
return nil
}

func atoi(str string) int {
num, _ := strconv.Atoi(str)
return num
}

0 comments on commit 568e6db

Please sign in to comment.