Skip to content

Commit a220d8b

Browse files
authored
Merge pull request #11 from nilium/allow-braces-in-barewords
Allow braces & comment runes in barewords
2 parents 807334d + f3b7cf5 commit a220d8b

File tree

3 files changed

+74
-40
lines changed

3 files changed

+74
-40
lines changed

lexer.go

Lines changed: 47 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -406,9 +406,23 @@ func isBarewordRune(r rune) bool {
406406
!isBarewordForbidden(r)
407407
}
408408

409+
// isBarewordTransition returns true if r is valid inside of a token that is not a bareword but
410+
// would become one by consuming r.
411+
func isBarewordTransition(r rune) bool {
412+
return unicode.In(r, barewordTables...) &&
413+
!isStatementSep(r)
414+
}
415+
409416
// isBarewordForbidden returns true if r is one of the characters that may not appear in a bareword.
410417
func isBarewordForbidden(r rune) bool {
411-
return isStatementSep(r) || unicode.IsControl(r)
418+
return isWordSep(r) || unicode.IsControl(r)
419+
}
420+
421+
func isWordSep(r rune) bool {
422+
return unicode.IsSpace(r) ||
423+
r == rSentinel || // End statement
424+
r == rDoubleQuote || // Quoted string
425+
r == rBackQuote // Raw string
412426
}
413427

414428
func isStatementSep(r rune) bool {
@@ -419,8 +433,7 @@ func isStatementSep(r rune) bool {
419433
r == rBracketOpen || // Open array
420434
r == rBracketClose || // Close array
421435
r == rDoubleQuote || // Quoted string
422-
r == rBackQuote || // Raw string
423-
r == rComment // Comment
436+
r == rBackQuote // Raw string
424437
}
425438

426439
func isLongIntervalInitial(r rune) bool {
@@ -561,8 +574,20 @@ func (l *Lexer) lexSegment(r rune) (Token, consumerFunc, error) {
561574

562575
func (l *Lexer) lexWordTail(next consumerFunc) consumerFunc {
563576
var wordConsumer consumerFunc
577+
var braces int
564578
wordConsumer = func(r rune) (Token, consumerFunc, error) {
565-
if isBarewordRune(r) {
579+
switch {
580+
case r == rCurlOpen || r == rBracketOpen:
581+
braces++
582+
l.buffer(r, r)
583+
return noToken, wordConsumer, nil
584+
case (r == rCurlClose || r == rBracketClose):
585+
if braces <= 0 {
586+
break
587+
}
588+
braces--
589+
fallthrough
590+
case isBarewordRune(r):
566591
l.buffer(r, r)
567592
return noToken, wordConsumer, nil
568593
}
@@ -603,7 +628,7 @@ func (l *Lexer) lexSignedNumber(r rune) (Token, consumerFunc, error) {
603628
case isStatementSep(r) || r == eof:
604629
l.unread()
605630
return l.lexBecomeWord(-1)
606-
case isBarewordRune(r):
631+
case isBarewordTransition(r):
607632
return l.lexBecomeWord(r)
608633
}
609634
return noToken, nil, fmt.Errorf("unexpected character %q: expected number after sign", r)
@@ -629,7 +654,7 @@ func (l *Lexer) lexOctalNumber(r rune) (Token, consumerFunc, error) {
629654
l.unread()
630655
tok, err := l.valueToken(TOctal, parseBaseInt(8))
631656
return tok, l.lexSegment, err
632-
case isBarewordRune(r):
657+
case isBarewordTransition(r):
633658
return l.lexBecomeWord(r)
634659
}
635660
return noToken, nil, fmt.Errorf("unexpected character %q: expected octal digit or separator", r)
@@ -656,7 +681,7 @@ func (l *Lexer) lexHexNum(r rune) (Token, consumerFunc, error) {
656681
l.unread()
657682
tok, err := l.valueToken(THex, parseBaseInt(16))
658683
return tok, l.lexSegment, err
659-
case isBarewordRune(r):
684+
case isBarewordTransition(r):
660685
return l.lexBecomeWord(r)
661686
}
662687
return noToken, nil, fmt.Errorf("unexpected character %q: expected hex digit or separator", r)
@@ -671,7 +696,7 @@ func (l *Lexer) lexBinNum(r rune) (Token, consumerFunc, error) {
671696
l.unread()
672697
tok, err := l.valueToken(TBinary, parseBaseInt(2))
673698
return tok, l.lexSegment, err
674-
case isBarewordRune(r):
699+
case isBarewordTransition(r):
675700
return l.lexBecomeWord(r)
676701
}
677702
return noToken, nil, fmt.Errorf("unexpected character %q: expected binary digit or separator", r)
@@ -695,7 +720,7 @@ func (l *Lexer) lexRationalDenomInitial(r rune) (Token, consumerFunc, error) {
695720
case isStatementSep(r) || r == eof:
696721
l.unread()
697722
return l.lexBecomeWord(-1)
698-
case isBarewordRune(r):
723+
case isBarewordTransition(r):
699724
return l.lexBecomeWord(r)
700725
}
701726
return noToken, nil, fmt.Errorf("unexpected character %q: expected positive number", r)
@@ -706,7 +731,7 @@ func (l *Lexer) lexRationalDenomTail(r rune) (Token, consumerFunc, error) {
706731
case isDecimal(r):
707732
l.buffer(r, r)
708733
return noToken, l.lexRationalDenomTail, nil
709-
case isBarewordRune(r):
734+
case isBarewordTransition(r):
710735
return l.lexBecomeWord(r)
711736
case isStatementSep(r) || r == eof:
712737
l.unread()
@@ -757,7 +782,7 @@ func (l *Lexer) lexFloatExponentUnsigned(r rune) (Token, consumerFunc, error) {
757782
case r == eof || isStatementSep(r):
758783
l.unread()
759784
return l.lexBecomeWord(-1)
760-
case isBarewordRune(r):
785+
case isBarewordTransition(r):
761786
return l.lexBecomeWord(r)
762787
}
763788
return noToken, nil, fmt.Errorf("unexpected character %q: expected sign or digit", r)
@@ -776,7 +801,7 @@ func (l *Lexer) lexFloatExponentSignedTail(r rune) (Token, consumerFunc, error)
776801
l.unread()
777802
tok, err := l.valueToken(TFloat, parseBigFloat(l.Precision))
778803
return tok, l.lexSegment, err
779-
case isBarewordRune(r):
804+
case isBarewordTransition(r):
780805
return l.lexBecomeWord(r)
781806
}
782807
return noToken, nil, fmt.Errorf("unexpected character %q: expected digit or separator", r)
@@ -816,7 +841,7 @@ func (l *Lexer) lexFloatEnd(r rune) (Token, consumerFunc, error) {
816841
l.unread()
817842
tok, err := l.valueToken(TFloat, parseBigFloat(l.Precision))
818843
return tok, l.lexSegment, err
819-
case isBarewordRune(r):
844+
case isBarewordTransition(r):
820845
return l.lexBecomeWord(r)
821846
}
822847
return noToken, nil, fmt.Errorf("unexpected character %q: expected separator", r)
@@ -835,7 +860,7 @@ func (l *Lexer) lexFloatPointInitial(r rune) (Token, consumerFunc, error) {
835860
case r == eof || isStatementSep(r):
836861
l.unread()
837862
return l.lexBecomeWord(-1)
838-
case isBarewordRune(r):
863+
case isBarewordTransition(r):
839864
return l.lexBecomeWord(r)
840865
}
841866
return l.lexFloatPoint(r)
@@ -865,7 +890,7 @@ func (l *Lexer) lexFloatPoint(r rune) (Token, consumerFunc, error) {
865890
l.unread()
866891
tok, err := l.valueToken(TFloat, parseBigFloat(l.Precision))
867892
return tok, l.lexSegment, err
868-
case isBarewordRune(r):
893+
case isBarewordTransition(r):
869894
return l.lexBecomeWord(r)
870895
}
871896
return noToken, nil, fmt.Errorf("unexpected character %q: expected digit, exponent, or separator", r)
@@ -909,7 +934,7 @@ func (l *Lexer) lexIntervalUnitMaybeLong(r rune) (Token, consumerFunc, error) {
909934
return noToken, l.lexIntervalInitial, nil
910935
case isStatementSep(r) || r == eof:
911936
return l.lexIntervalInitial(r)
912-
case isBarewordRune(r):
937+
case isBarewordTransition(r):
913938
return l.lexBecomeWord(r)
914939
}
915940
return noToken, nil, fmt.Errorf("unexpected character %q: expected digit or 's'", r)
@@ -931,7 +956,7 @@ func (l *Lexer) lexIntervalUnitLong(r rune) (Token, consumerFunc, error) {
931956
case isStatementSep(r) || r == eof:
932957
l.unread()
933958
return l.lexBecomeWord(-1)
934-
case isBarewordRune(r):
959+
case isBarewordTransition(r):
935960
return l.lexBecomeWord(r)
936961
}
937962
return noToken, nil, fmt.Errorf("unexpected character %q: expected 's'", r)
@@ -972,7 +997,7 @@ func (l *Lexer) lexIntervalFloatTail(r rune) (Token, consumerFunc, error) {
972997
case isStatementSep(r) || r == eof:
973998
l.unread()
974999
return l.lexBecomeWord(-1)
975-
case isBarewordRune(r):
1000+
case isBarewordTransition(r):
9761001
return l.lexBecomeWord(r)
9771002
}
9781003
return noToken, l.lexIntervalFloatTail, fmt.Errorf("unexpected character %s: expected digit or interval unit", TDuration)
@@ -1013,7 +1038,7 @@ func (l *Lexer) lexInterval(r rune) (Token, consumerFunc, error) {
10131038
case isStatementSep(r) || r == eof:
10141039
l.unread()
10151040
return l.lexBecomeWord(-1)
1016-
case isBarewordRune(r):
1041+
case isBarewordTransition(r):
10171042
return l.lexBecomeWord(r)
10181043
}
10191044
return noToken, nil, fmt.Errorf("unexpected character %q: expected number or interval unit", r)
@@ -1058,7 +1083,7 @@ func (l *Lexer) lexZero(r rune) (Token, consumerFunc, error) {
10581083
case r == 'E' || r == 'e':
10591084
l.buffer(r, r)
10601085
return noToken, l.lexFloatExponentUnsigned, nil
1061-
case isBarewordRune(r):
1086+
case isBarewordTransition(r):
10621087
return l.lexBecomeWord(r)
10631088
}
10641089
return noToken, nil, fmt.Errorf("unexpected character %q: expected b, x, X, octal, duration unit, or separator", r)
@@ -1117,7 +1142,7 @@ func (l *Lexer) lexNonZero(r rune) (Token, consumerFunc, error) {
11171142
return noToken, l.lexFloatExponentUnsigned, nil
11181143
}
11191144

1120-
if isBarewordRune(r) {
1145+
if isBarewordTransition(r) {
11211146
return l.lexBecomeWord(r)
11221147
}
11231148

@@ -1150,7 +1175,7 @@ func (l *Lexer) lexBaseNumber(neg bool, base int) (consumer consumerFunc) {
11501175
n++
11511176
l.buffer(r, r)
11521177
return noToken, consumer, nil
1153-
} else if isBarewordRune(r) {
1178+
} else if isBarewordTransition(r) {
11541179
return l.lexBecomeWord(r)
11551180
} else if n == 0 && (isStatementSep(r) || r == eof) {
11561181
l.unread()

lexer_test.go

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,9 @@ func TestBooleans(t *testing.T) {
283283
_ws, {Token: Token{Kind: TWord, Raw: []byte("true"), Value: "true"}},
284284
_ws, {Token: Token{Kind: TWord, Raw: []byte("Yes"), Value: "Yes"}},
285285
_ws, {Token: Token{Kind: TWord, Raw: []byte("FALSE"), Value: "FALSE"}},
286-
_curlopen,
287-
_curlclose,
286+
_ws, _curlopen, _curlclose,
288287
_eof,
289-
}.Run(t, "TRUE true Yes FALSE{}")
288+
}.Run(t, "TRUE true Yes FALSE {}")
290289
}
291290

292291
func TestStatement(t *testing.T) {
@@ -297,26 +296,35 @@ func TestStatement(t *testing.T) {
297296
_ws, {Token: Token{Kind: TOctal, Raw: []byte("+0600"), Value: big.NewInt(0600)}},
298297
_ws, {Token: Token{Kind: THex, Raw: []byte("-0xf"), Value: big.NewInt(-15)}},
299298
_ws, {Token: Token{Kind: THex, Raw: []byte("0x12f"), Value: big.NewInt(303)}},
299+
_ws, wordCase("${FOO:-${BAZ:-Default}}"),
300300
_semicolon,
301-
_ws, {Token: Token{Kind: TWord, Raw: []byte("stmt/2"), Value: "stmt/2"}},
302-
_semicolon,
303-
_ws, {Token: Token{Kind: TWord, Raw: []byte("sect"), Value: "sect"}},
304-
_curlopen, _curlclose,
305-
_ws, {Token: Token{Kind: TWord, Raw: []byte("a"), Value: "a"}},
306-
_semicolon,
307-
_ws, {Token: Token{Kind: TWord, Raw: []byte("b"), Value: "b"}},
308-
_curlopen, _curlclose,
309-
_ws, {Token: Token{Kind: TWord, Raw: []byte("c"), Value: "c"}},
310-
_comment,
301+
_ws, wordCase("stmt/2"), _semicolon,
302+
_ws, wordCase("stmt{}"), _semicolon,
303+
_ws, wordCase("invalid"), _curlclose,
304+
_ws, wordCase("sect"), _ws, _curlopen, _curlclose,
305+
_ws, wordCase("a"), _semicolon,
306+
_ws, wordCase("b{}"),
307+
_ws, wordCase("c'foo"),
308+
_ws, _comment,
309+
_ws, wordCase("#[foo]"),
310+
_ws, wordCase("$[foo]"),
311+
_ws, wordCase("${foo}"),
312+
_ws, wordCase("${{foo}}"),
313+
_ws, wordCase("${[foo}]"),
314+
_ws, wordCase("${foo}"), _curlclose,
315+
_ws, wordCase("${foo]"), _bracketclose,
311316
_ws, _semicolon, _semicolon,
312317
_ws, _eof,
313318
}.Run(t, `
314-
stmt -1234 +0600 -0xf 0x12f;
319+
stmt -1234 +0600 -0xf 0x12f ${FOO:-${BAZ:-Default}};
315320
stmt/2;
316-
sect{}
321+
stmt{};
322+
invalid}
323+
sect {}
317324
a;
318325
b{}
319-
c'foo
326+
c'foo 'foo
327+
#[foo] $[foo] ${foo} ${{foo}} ${[foo}] ${foo}} ${foo]]
320328
;;
321329
`)
322330
}
@@ -726,6 +734,7 @@ func TestFloats(t *testing.T) {
726734
{Token: Token{Kind: TWord, Value: "k"}},
727735
_ws, dec("0.5"),
728736
_curlclose,
737+
_ws, dec("0e123"),
729738
_ws, dec("0.0e0"),
730739
_ws, dec("0.0E0"),
731740
_ws, dec("1.2345"),
@@ -739,7 +748,7 @@ func TestFloats(t *testing.T) {
739748
-1.2345 -12345e-4 -1.2345e4 -1.2345e+4
740749
+0.0 +0.5 +0.0e0 +0.0E0
741750
+1.2345 +12345E-4 +1.2345E4 +1.2345E+4
742-
[0.0] #{k 0.5} 0.0e0 0.0E0
751+
[0.0] #{k 0.5} 0e123 0.0e0 0.0E0
743752
1.2345 12345e-4 1.2345e4 1.2345e+4
744753
;`)
745754

parser_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ func TestParseAST(t *testing.T) {
9999
},
100100
{
101101
Name: "MinimalSpace",
102-
Src: `sect[]#{}{stmt #{k[2]"p"#{}}true[false];}`,
102+
Src: `sect []#{}{stmt #{k [2]"p"#{}}true [false];}`,
103103
Doc: doc().section("sect", mkexprs(), mkmap()).
104104
statement("stmt", mkmap("k", mkexprs(2), "p", mkmap()), true, mkexprs(false)).
105105
Doc(),

0 commit comments

Comments
 (0)