Skip to content

Commit e9e0036

Browse files
committed
checkpoint on unified lexer
1 parent 82bc133 commit e9e0036

File tree

1 file changed

+58
-38
lines changed

1 file changed

+58
-38
lines changed

src/text/text.c

Lines changed: 58 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -215,13 +215,12 @@ txt_token_array_from_lang_kind_string(Arena *arena, TXT_LangKind lang_kind, Stri
215215
next_escaped = 1;
216216
}
217217

218-
// rjf: take starter, push new token tasks
218+
// rjf: take token starters
219+
B32 new_token_needed = (top_task == 0);
220+
TXT_TokenizerRule *new_rule = nil_rule;
221+
TXT_TokenKind new_token_kind = TXT_TokenKind_Null;
219222
{
220-
TXT_TokenizerRule *new_rule = nil_rule;
221-
TXT_TokenKind new_token_kind = TXT_TokenKind_Null;
222-
223223
// rjf: use next bytes to look up a rule from the table
224-
if(top_task == 0)
225224
{
226225
TXT_TokenizerRule *active_rule = top_task ? top_task->rule : nil_rule;
227226
U64 hash_1byte = u64_hash_from_str8(string_1byte);
@@ -292,37 +291,11 @@ txt_token_array_from_lang_kind_string(Arena *arena, TXT_LangKind lang_kind, Stri
292291
byte == '>' || byte == '/' ||
293292
byte == '?' || byte == '|') { new_token_kind = TXT_TokenKind_Symbol; }
294293
}
295-
296-
// rjf: start new token
297-
if(new_token_kind != TXT_TokenKind_Null)
298-
{
299-
TokenTask *task = free_task;
300-
if(task != 0)
301-
{
302-
SLLStackPop(free_task);
303-
}
304-
else
305-
{
306-
task = push_array(scratch.arena, TokenTask, 1);
307-
}
308-
SLLStackPush(top_task, task);
309-
top_task->rule = new_rule;
310-
top_task->kind = new_token_kind;
311-
top_task->start_idx = idx;
312-
}
313-
314-
// rjf: invalid token kind -> emit error
315-
else if(top_task == 0)
316-
{
317-
TXT_Token token = {TXT_TokenKind_Error, r1u64(idx, idx+1)};
318-
txt_token_chunk_list_push(scratch.arena, &tokens, chunk_size, &token);
319-
}
320294
}
321295

322296
// rjf: look for ender based on rule's closing symbol
323297
U64 ender_pad = 0;
324298
B32 ender_found = 0;
325-
B32 task_pop = 0;
326299
if(top_task != 0 && idx > top_task->start_idx)
327300
{
328301
TXT_TokenKind active_token_kind = top_task->kind;
@@ -373,13 +346,33 @@ txt_token_array_from_lang_kind_string(Arena *arena, TXT_LangKind lang_kind, Stri
373346
}
374347
}
375348

376-
// rjf: next byte is ender => emit token
349+
// rjf: if we have a new token to start, but we have an active token, then
350+
// end the current token (but keep the same stack)
351+
B32 keep_top_task = 0;
352+
if(top_task != 0 && new_token_kind != TXT_TokenKind_Null && idx > top_task->start_idx)
353+
{
354+
keep_top_task = (!ender_found);
355+
ender_found = 1;
356+
}
357+
358+
// rjf: if we have an ender => emit token(s) for current task
377359
if(ender_found)
378360
{
379361
TXT_Token token = {top_task->kind, r1u64(top_task->start_idx, idx+ender_pad)};
380-
TokenTask *popped = top_task;
381-
SLLStackPop(top_task);
382-
SLLStackPush(free_task, popped);
362+
if(!keep_top_task)
363+
{
364+
TokenTask *popped = top_task;
365+
SLLStackPop(top_task);
366+
SLLStackPush(free_task, popped);
367+
if(top_task != 0)
368+
{
369+
top_task->start_idx = idx+ender_pad;
370+
}
371+
}
372+
else
373+
{
374+
top_task->start_idx = idx+ender_pad;
375+
}
383376

384377
// rjf: trim \r's off of end
385378
{
@@ -442,16 +435,43 @@ txt_token_array_from_lang_kind_string(Arena *arena, TXT_LangKind lang_kind, Stri
442435
{
443436
txt_token_chunk_list_push(scratch.arena, &tokens, chunk_size, &token);
444437
}
445-
446-
// rjf: increment by ender padding
447-
idx += ender_pad;
438+
}
439+
440+
// rjf: start new token
441+
if(new_token_kind != TXT_TokenKind_Null)
442+
{
443+
TokenTask *task = free_task;
444+
if(task != 0)
445+
{
446+
SLLStackPop(free_task);
447+
}
448+
else
449+
{
450+
task = push_array(scratch.arena, TokenTask, 1);
451+
}
452+
SLLStackPush(top_task, task);
453+
top_task->rule = new_rule;
454+
top_task->kind = new_token_kind;
455+
top_task->start_idx = idx;
456+
}
457+
458+
// rjf: invalid token kind -> emit error
459+
else if(new_token_needed)
460+
{
461+
TXT_Token token = {TXT_TokenKind_Error, r1u64(idx, idx+1)};
462+
txt_token_chunk_list_push(scratch.arena, &tokens, chunk_size, &token);
448463
}
449464

450465
// rjf: advance by 1 byte if we haven't found an ender
451466
if(!ender_found)
452467
{
453468
idx += 1;
454469
}
470+
471+
// rjf: advance by ender padding
472+
idx += ender_pad;
473+
474+
// rjf: advance escaping
455475
escaped = next_escaped;
456476
}
457477
}

0 commit comments

Comments
 (0)