-
Notifications
You must be signed in to change notification settings - Fork 0
/
Tokenizer.hs
90 lines (85 loc) · 2.3 KB
/
Tokenizer.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
module Tokenizer
( Token(..)
, tokenize
, nameOf
) where
import qualified Data.Char as Char
data Token
= TId String
| TOpenParen
| TClosingParen
| TLambda String
| TArrow
| TLet
| TEqual
| TIn
| TIntLit Int
| TBoolLit Bool
| TIf
| TThen
| TElse
deriving (Show, Eq)
nameOf :: Token -> String
nameOf (TId _) = "identifier"
nameOf TOpenParen = "'('"
nameOf TClosingParen = "')'"
nameOf (TLambda _) = "function"
nameOf TArrow = "'->'"
nameOf TLet = "let"
nameOf TEqual = "'='"
nameOf TIn = "in"
nameOf (TIntLit _) = "int literal"
nameOf (TBoolLit _) = "bool literal"
nameOf TIf = "if"
nameOf TThen = "then"
nameOf TElse = "else"
isSpecialChar :: Char -> Bool
isSpecialChar c = c `elem` "|&<>=+-/*"
tokenize :: String -> Either String [Token]
tokenize ('\n':xs) = tokenize xs
tokenize (' ':xs) = tokenize xs
tokenize ('(':xs) = tokenize xs >>= (\ts -> pure $ TOpenParen : ts)
tokenize (')':xs) = tokenize xs >>= (\ts -> pure $ TClosingParen : ts)
tokenize ('\\':xs) =
case xs of
(x:_)
| Char.isAlpha x -> do
let (id, rest) = span Char.isAlphaNum xs
ts <- tokenize rest
pure $ TLambda id : ts
(x:_) -> Left $ "Expected identifier after \\ but found '" ++ [x] ++ "'"
[] -> Left "Expected identifier after \\ but reached end of input"
tokenize input@(x:xs)
| Char.isAlpha x = do
let (id, rest) = span Char.isAlphaNum input
ts <- tokenize rest
pure $
(case id of
"let" -> TLet
"in" -> TIn
"if" -> TIf
"then" -> TThen
"else" -> TElse
"True" -> TBoolLit True
"False" -> TBoolLit False
_ -> TId id) :
ts
tokenize input@(x:xs)
| isSpecialChar x =
let (id, rest) = span isSpecialChar input
in if id == "--"
then tokenize $ dropWhile (/= '\n') rest
else do
ts <- tokenize rest
pure
(case id of
"->" -> TArrow : ts
"=" -> TEqual : ts
_ -> TId id : ts)
tokenize input@(x:xs)
| Char.isDigit x = do
let (i, rest) = span Char.isDigit input
ts <- tokenize rest
pure $ TIntLit (read i) : ts
tokenize (x:_) = Left $ "Unrecognized '" ++ [x] ++ "'"
tokenize [] = pure []