Skip to content

Commit

Permalink
Increase test-coverage of lexer
Browse files Browse the repository at this point in the history
It turns out that an unterminated string caused an infinite loop,
so that has been fixed along with a test-case or two to confirm that.

Fixed up some other things so now we have 100% test-coverage of this
package, albeit some of it is fake.
  • Loading branch information
skx committed Nov 21, 2023
1 parent 77f5afc commit 9d572c7
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 28 deletions.
67 changes: 41 additions & 26 deletions lexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package lexer

import (
"errors"
"fmt"
"strings"
"unicode"
Expand Down Expand Up @@ -237,11 +238,23 @@ func (l *Lexer) NextToken() token.Token {
}
}
case rune('"'):
tok.Type = token.STRING
tok.Literal = l.readString()
str, err := l.readString('"')
if err == nil {
tok.Literal = str
tok.Type = token.STRING
} else {
tok.Literal = err.Error()
tok.Type = token.ILLEGAL
}
case rune('`'):
tok.Type = token.BACKTICK
tok.Literal = l.readBacktick()
str, err := l.readString('`')
if err == nil {
tok.Literal = str
tok.Type = token.BACKTICK
} else {
tok.Literal = err.Error()
tok.Type = token.ILLEGAL
}
case rune('['):
tok = newToken(token.LBRACKET, l.ch)
case rune(']'):
Expand Down Expand Up @@ -284,16 +297,16 @@ func newToken(tokenType token.Type, ch rune) token.Token {
//
// So with input like this:
//
// a.blah();
// a.blah();
//
// Our identifier should be "a" (then we have a period, then a second
// identifier "blah", followed by opening & closing parenthesis).
//
// However we also have to cover the case of:
//
// string.toupper( "blah" );
// os.getenv( "PATH" );
// ..
// string.toupper( "blah" );
// os.getenv( "PATH" );
// ..
//
// So we have a horrid implementation..
func (l *Lexer) readIdentifier() string {
Expand Down Expand Up @@ -488,22 +501,36 @@ func (l *Lexer) readDecimal() token.Token {
return token.Token{Type: token.INT, Literal: integer}
}

// read string
func (l *Lexer) readString() string {
// read a string, deliminated by the given character.
func (l *Lexer) readString(delim rune) (string, error) {
out := ""

for {
l.readChar()
if l.ch == '"' {

if l.ch == rune(0) {
return "", fmt.Errorf("unterminated string")
}
if l.ch == delim {
break
}

//
// Handle \n, \r, \t, \", etc.
//
if l.ch == '\\' {

// Line ending with "\" + newline
if l.peekChar() == '\n' {
// consume the newline.
l.readChar()
continue
}

l.readChar()

if l.ch == rune(0) {
return "", errors.New("unterminated string")
}
if l.ch == rune('n') {
l.ch = '\n'
}
Expand All @@ -521,9 +548,10 @@ func (l *Lexer) readString() string {
}
}
out = out + string(l.ch)

}

return out
return out, nil
}

// read a regexp, including flags.
Expand Down Expand Up @@ -576,19 +604,6 @@ func (l *Lexer) readRegexp() (string, error) {
return out, nil
}

// read the end of a backtick-quoted string
func (l *Lexer) readBacktick() string {
position := l.position + 1
for {
l.readChar()
if l.ch == '`' {
break
}
}
out := string(l.characters[position:l.position])
return out
}

// peek character
func (l *Lexer) peekChar() rune {
if l.readPosition >= len(l.characters) {
Expand Down
79 changes: 77 additions & 2 deletions lexer/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,15 +244,17 @@ func TestString(t *testing.T) {
t.Fatalf("tests[%d] - Literal wrong, expected=%q, got=%q", i, tt.expectedLiteral, tok.Literal)
}
}

}

func TestSimpleComment(t *testing.T) {
input := `=+// This is a comment
// This is still a comment
# I like comments
let a = 1; # This is a comment too.
// This is a final
// comment on two-lines`
// comment on two-lines
/*
`

tests := []struct {
expectedType token.Type
Expand Down Expand Up @@ -673,6 +675,11 @@ a = 3/4;
t.Fatalf("tests[%d] - Literal wrong, expected=%q, got=%q", i, tt.expectedLiteral, tok.Literal)
}
}

x := l.GetLine()
if x != 2 {
t.Fatalf("unexpected line. %d", x)
}
}

// TestDotDot is designed to ensure we get a ".." not an integer value.
Expand Down Expand Up @@ -702,3 +709,71 @@ func TestDotDot(t *testing.T) {
}
}
}

// TestIllegalString is designed to look for an unterminated/illegal string
func TestIllegalString(t *testing.T) {

// Illegal strings
bad := []string{
`if ( f ~= "steve\
)`,
`if ( f ~= "steve\`,
}

for _, input := range bad {

tests := []struct {
expectedType token.Type
expectedLiteral string
}{
{token.IF, "if"},
{token.LPAREN, "("},
{token.IDENT, "f"},
{token.CONTAINS, "~="},
{token.ILLEGAL, "unterminated string"},
{token.EOF, ""},
}
l := New(input)
for i, tt := range tests {
tok := l.NextToken()
if tok.Type != tt.expectedType {
t.Fatalf("tests[%d] - tokentype wrong, expected=%q, got=%q", i, tt.expectedType, tok.Type)
}
if tok.Literal != tt.expectedLiteral {
t.Fatalf("tests[%d] - Literal wrong, expected=%q, got=%q", i, tt.expectedLiteral, tok.Literal)
}
}
}
}

// TestIllegalString is designed to look for an unterminated/illegal backtick
func TestIllegalBacktick(t *testing.T) {
input := "if ( f ~= `steve )"

tests := []struct {
expectedType token.Type
expectedLiteral string
}{
{token.IF, "if"},
{token.LPAREN, "("},
{token.IDENT, "f"},
{token.CONTAINS, "~="},
{token.ILLEGAL, "unterminated string"},
{token.EOF, ""},
}
l := New(input)
for i, tt := range tests {
tok := l.NextToken()
if tok.Type != tt.expectedType {
t.Fatalf("tests[%d] - tokentype wrong, expected=%q, got=%q", i, tt.expectedType, tok.Type)
}
if tok.Literal != tt.expectedLiteral {
t.Fatalf("tests[%d] - Literal wrong, expected=%q, got=%q", i, tt.expectedLiteral, tok.Literal)
}
}

x := l.GetLine()
if x != 0 {
t.Fatalf("unexpected line. %d", x)
}
}

0 comments on commit 9d572c7

Please sign in to comment.