Skip to content

Commit 1ba9e52

Browse files
authored
Merge pull request #10 from b3b00/bugfix/#9-error-column-and-line-numbers
Bugfix/#9 error column and line numbers
2 parents ff210bf + 5e4abfd commit 1ba9e52

File tree

5 files changed

+172
-82
lines changed

5 files changed

+172
-82
lines changed

ParserTests/ErrorTests.cs

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,31 +14,31 @@ public class ErrorTests
1414
{
1515

1616

17-
//[Fact]
18-
//public void TestJsonSyntaxError()
19-
//{
20-
// JSONParser jsonParser = new JSONParser();
21-
// ParserBuilder builder = new ParserBuilder();
22-
// Parser<JsonToken> parser = builder.BuildParser<JsonToken>(jsonParser, ParserType.LL_RECURSIVE_DESCENT, "root");
17+
[Fact]
18+
public void TestJsonSyntaxError()
19+
{
20+
JSONParser jsonParser = new JSONParser();
21+
ParserBuilder builder = new ParserBuilder();
22+
Parser<JsonToken> parser = builder.BuildParser<JsonToken>(jsonParser, ParserType.LL_RECURSIVE_DESCENT, "root");
2323

24-
25-
// string source = @"{
26-
// 'one': 1,
27-
// 'bug':{,}
28-
// }".Replace("'","\"");
29-
// ParseResult<JsonToken> r = parser.Parse(source);
30-
// Assert.True(r.IsError);
31-
// Assert.Null(r.Result);
32-
// Assert.NotNull(r.Errors);
33-
// Assert.True(r.Errors.Count > 0);
34-
// Assert.IsAssignableFrom(typeof(UnexpectedTokenSyntaxError<JsonToken>), r.Errors[0]);
35-
// UnexpectedTokenSyntaxError<JsonToken> error = r.Errors[0] as UnexpectedTokenSyntaxError<JsonToken>;
3624

37-
// Assert.Equal(JsonToken.COMMA, error?.UnexpectedToken.TokenID);
38-
// Assert.Equal(2, error?.Line);
39-
// Assert.Equal(26, error?.Column);
25+
string source = @"{
26+
'one': 1,
27+
'bug':{,}
28+
}".Replace("'", "\"");
29+
ParseResult<JsonToken> r = parser.Parse(source);
30+
Assert.True(r.IsError);
31+
Assert.Null(r.Result);
32+
Assert.NotNull(r.Errors);
33+
Assert.True(r.Errors.Count > 0);
34+
Assert.IsAssignableFrom(typeof(UnexpectedTokenSyntaxError<JsonToken>), r.Errors[0]);
35+
UnexpectedTokenSyntaxError<JsonToken> error = r.Errors[0] as UnexpectedTokenSyntaxError<JsonToken>;
4036

41-
//}
37+
Assert.Equal(JsonToken.COMMA, error?.UnexpectedToken.TokenID);
38+
Assert.Equal(3, error?.Line);
39+
Assert.Equal(24, error?.Column);
40+
41+
}
4242

4343
[Fact]
4444
public void TestExpressionSyntaxError()
@@ -58,7 +58,7 @@ public void TestExpressionSyntaxError()
5858
Assert.Equal(ExpressionToken.PLUS, error.UnexpectedToken.TokenID);
5959

6060
Assert.Equal(1, error.Line);
61-
Assert.Equal(7, error.Column);
61+
Assert.Equal(10, error.Column);
6262
}
6363

6464
[Fact]
@@ -76,7 +76,7 @@ public void TestLexicalError()
7676
Assert.IsAssignableFrom(typeof(LexicalError), r.Errors[0]);
7777
LexicalError error = r.Errors[0] as LexicalError;
7878
Assert.Equal(1, error.Line);
79-
Assert.Equal(2, error.Column);
79+
Assert.Equal(3, error.Column);
8080
Assert.Equal('@', error.UnexpectedChar);
8181
}
8282
}

ParserTests/LexerTests.cs

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
using sly.parser;
2+
using expressionparser;
3+
using jsonparser;
4+
using sly.lexer;
5+
using sly.parser.generator;
6+
using System.Linq;
7+
using System.Collections.Generic;
8+
using System.Text;
9+
using Xunit;
10+
11+
namespace ParserTests
12+
{
13+
public class LexerTests
14+
{
15+
16+
private ILexer<JsonToken> GetJsonLexer()
17+
{
18+
JSONParser jsonParser = new JSONParser();
19+
ParserBuilder builder = new ParserBuilder();
20+
Parser<JsonToken> parser = builder.BuildParser<JsonToken>(jsonParser, ParserType.LL_RECURSIVE_DESCENT, "root");
21+
return parser.Lexer;
22+
}
23+
24+
private ILexer<ExpressionToken> GetExpressionLexer()
25+
{
26+
ExpressionParser exprParser = new ExpressionParser();
27+
ParserBuilder builder = new ParserBuilder();
28+
Parser<ExpressionToken> parser = builder.BuildParser<ExpressionToken>(exprParser, ParserType.LL_RECURSIVE_DESCENT, "expression");
29+
return parser.Lexer;
30+
}
31+
32+
33+
34+
35+
[Fact]
36+
public void TestSingleLineJsonLexing()
37+
{
38+
string json = "{ \"propi\": 12 , \"props\":\"val\" }";
39+
ILexer<JsonToken> lexer = GetJsonLexer();
40+
List<Token<JsonToken>> tokens = lexer.Tokenize(json).ToList<Token<JsonToken>>();
41+
Assert.Equal(10, tokens.Count);
42+
List<JsonToken> expectedTokensID = new List<JsonToken>()
43+
{
44+
JsonToken.ACCG, JsonToken.STRING,JsonToken.COLON,JsonToken.INT,
45+
JsonToken.COMMA, JsonToken.STRING,JsonToken.COLON,JsonToken.STRING,
46+
JsonToken.ACCD
47+
};
48+
List<JsonToken> tokensID = tokens.Take(9).Select((Token<JsonToken> tok) => tok.TokenID).ToList<JsonToken>();
49+
Assert.Equal(expectedTokensID, tokensID);
50+
51+
List<int> expectedColumnPositions = new List<int>()
52+
{
53+
1,3,10,12,15,17,24,25,31
54+
};
55+
56+
List<int> columnPositions = tokens.Take(9).Select((Token<JsonToken> tok) => tok.Position.Column).ToList<int>();
57+
Assert.Equal(expectedColumnPositions, columnPositions);
58+
59+
;
60+
}
61+
62+
[Fact]
63+
public void TestSingleLineExpressionLexing()
64+
{
65+
ILexer<ExpressionToken> lexer = GetExpressionLexer();
66+
}
67+
68+
[Fact]
69+
public void TestMultiLineJsonLexing()
70+
{
71+
string json = "{ \"propi\": 12 \n" +
72+
", \"props\":\"val\" }";
73+
ILexer<JsonToken> lexer = GetJsonLexer();
74+
List<Token<JsonToken>> tokens = lexer.Tokenize(json).ToList<Token<JsonToken>>();
75+
Assert.Equal(10, tokens.Count);
76+
List<JsonToken> expectedTokensID = new List<JsonToken>()
77+
{
78+
JsonToken.ACCG, JsonToken.STRING,JsonToken.COLON,JsonToken.INT,
79+
JsonToken.COMMA, JsonToken.STRING,JsonToken.COLON,JsonToken.STRING,
80+
JsonToken.ACCD
81+
};
82+
List<JsonToken> tokensID = tokens.Take(9).Select((Token<JsonToken> tok) => tok.TokenID).ToList<JsonToken>();
83+
Assert.Equal(expectedTokensID, tokensID);
84+
85+
List<int> expectedColumnPositions = new List<int>()
86+
{
87+
1,3,10,12,
88+
1,3,10,11,17
89+
};
90+
91+
List<int> columnPositions = tokens.Take(9).Select((Token<JsonToken> tok) => tok.Position.Column).ToList<int>();
92+
Assert.Equal(expectedColumnPositions, columnPositions);
93+
94+
List<int> expectedLinePositions = new List<int>()
95+
{
96+
1,1,1,1,2,2,2,2,2
97+
};
98+
99+
List<int> linePositions = tokens.Take(9).Select((Token<JsonToken> tok) => tok.Position.Line).ToList<int>();
100+
Assert.Equal(expectedLinePositions, linePositions);
101+
102+
;
103+
}
104+
105+
[Fact]
106+
public void TestMultiLineExpressionLexing()
107+
{
108+
ILexer<ExpressionToken> lexer = GetExpressionLexer();
109+
}
110+
}
111+
}

sly/lexer/Lexer.cs

Lines changed: 10 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ namespace sly.lexer
99
/// <summary>
1010
/// T is the token type
1111
/// </summary>
12-
/// <typeparam name="T"></typeparam>
12+
/// <typeparam name="T">T is the enum Token type</typeparam>
1313
public class Lexer<T> : ILexer<T>
1414
{
1515

@@ -44,11 +44,6 @@ public void InitGlobalRegex()
4444

4545
public IEnumerable<Token<T>> Tokenize(string source)
4646
{
47-
if (GlobalRegex == null)
48-
{
49-
InitGlobalRegex();
50-
}
51-
5247
int currentIndex = 0;
5348
List<Token<T>> tokens = new List<Token<T>>();
5449
int currentLine = 1;
@@ -60,37 +55,10 @@ public IEnumerable<Token<T>> Tokenize(string source)
6055

6156
while (currentIndex < source.Length)
6257
{
63-
currentColumn = currentIndex - currentLineStartIndex;
58+
currentColumn = currentIndex - currentLineStartIndex+1;
6459
TokenDefinition<T> matchedDefinition = null;
6560
int matchLength = 0;
66-
67-
T globTok =eol ;
68-
var globMatch = GlobalRegex.Match(source,currentIndex);
69-
bool globalFound = globMatch.Success;
70-
71-
if (globalFound)
72-
{
73-
int index = -1;
74-
int i = 1;
75-
while (i < globMatch.Groups.Count && index < 0)
76-
{
77-
if (!string.IsNullOrEmpty(globMatch.Groups[i].Value))
78-
{
79-
index = i;
80-
}
81-
i++;
82-
}
83-
string tokenName = GlobalRegex.GroupNameFromNumber(index);
84-
globTok = (T)Enum.Parse(typeof(T), tokenName, false);
85-
}
86-
else
87-
{
88-
;
89-
}
9061

91-
92-
93-
9462
foreach (var rule in tokenDefinitions)
9563
{
9664
var match = rule.Regex.Match(source, currentIndex);
@@ -104,33 +72,23 @@ public IEnumerable<Token<T>> Tokenize(string source)
10472
}
10573

10674
if (matchedDefinition == null)
107-
{
108-
if (globalFound)
109-
{
110-
// dommage
111-
;
112-
}
75+
{
11376
throw new LexerException<T>(new LexicalError(currentLine,currentColumn, source[currentIndex]));
11477
}
11578
else
11679
{
11780
var value = source.Substring(currentIndex, matchLength);
11881

119-
if (!matchedDefinition.IsIgnored)
120-
{
121-
122-
if (!matchedDefinition.TokenID.Equals(globTok))
123-
{
124-
// dommage
125-
;
126-
}
127-
yield return new Token<T>(matchedDefinition.TokenID, value, new TokenPosition(currentIndex - matchLength, currentLine, currentColumn));
128-
}
12982
if (matchedDefinition.IsEndOfLine)
130-
{
131-
currentLineStartIndex = currentIndex;
83+
{
84+
currentLineStartIndex = currentIndex+matchLength;
13285
currentLine++;
13386
}
87+
if (!matchedDefinition.IsIgnored)
88+
{
89+
90+
yield return new Token<T>(matchedDefinition.TokenID, value, new TokenPosition(currentIndex, currentLine, currentColumn));
91+
}
13492
currentIndex += matchLength;
13593

13694

sly/lexer/TokenDefinition.cs

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,24 @@
33

44
namespace sly.lexer
55
{
6+
/// <summary>
7+
/// defines a token assiating :
8+
/// - a token identifier (which type is T)
9+
/// - and a regular expression capturing the token
10+
/// a token may be skipped and / or match an end of line
11+
///
12+
/// </summary>
13+
/// <typeparam name="T">T is the enum Token type</typeparam>
614
public class TokenDefinition<T>
715
{
8-
9-
public TokenDefinition(T token, string regex)
10-
: this(token, regex, false,false)
11-
{
12-
}
13-
16+
17+
/// <summary>
18+
///
19+
/// </summary>
20+
/// <param name="token"> the token ID</param>
21+
/// <param name="regex"> the regular expression for the token</param>
22+
/// <param name="isIgnored">true if the token must ignored (i.e the lexer does not return it, used for whitespaces for instance) </param>
23+
/// <param name="isEndOfLine">true if the token matches an end of line (for line counting)</param>
1424
public TokenDefinition(T token, string regex,bool isIgnored= false,bool isEndOfLine = false)
1525
{
1626
TokenID = token;

sly/parser/parser/llparser/RecursiveDescentSyntaxParser.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,10 @@ public virtual SyntaxParseResult<T> Parse(IList<Token<T>> tokens, Rule<T> rule,
194194
{
195195
children.Add(nonTerminalResult.Root);
196196
currentPosition = nonTerminalResult.EndingPosition;
197+
if (nonTerminalResult.Errors != null && nonTerminalResult.Errors.Any())
198+
{
199+
errors.AddRange(nonTerminalResult.Errors);
200+
}
197201
}
198202
else
199203
{
@@ -241,6 +245,12 @@ public SyntaxParseResult<T> ParseTerminal(IList<Token<T>> tokens, TerminalClause
241245

242246
public SyntaxParseResult<T> ParseNonTerminal(IList<Token<T>> tokens, NonTerminalClause<T> nonTermClause,int currentPosition)
243247
{
248+
249+
if (nonTermClause.NonTerminalName == "members")
250+
{
251+
;
252+
}
253+
244254
NonTerminal<T> nt = Configuration.NonTerminals[nonTermClause.NonTerminalName];
245255
bool found = false;
246256
bool isError = false;
@@ -315,6 +325,7 @@ public SyntaxParseResult<T> ParseNonTerminal(IList<Token<T>> tokens, NonTerminal
315325
else
316326
{
317327
result.IsError = true;
328+
result.Errors = errors;
318329
result.EndingPosition = greaterIndex;
319330
}
320331
return result;

0 commit comments

Comments
 (0)