@@ -484,20 +484,13 @@ LexerOutput nwtrees::lexer(const char* data, LexerOutput&& prev_output)
484
484
std::stable_sort (matches, matches + match_count, &cmp);
485
485
}
486
486
487
- // -- Commit token.
488
-
489
487
LexerMatch& selected_match = matches[0 ];
490
- output.tokens .push_back (std::move (selected_match.token ));
491
-
492
- // -- Step stream forward, past the matched token length.
488
+ bool should_commit_match = true ;
493
489
494
- input.offset += selected_match.length ;
495
- }
490
+ // -- For tokens that need name buffers, prepare the buffer and update the name entry.
496
491
497
- // -- For tokens that need name buffers, prepare the buffer and update the name entry.
492
+ Token& token = selected_match. token ;
498
493
499
- for (Token& token : output.tokens )
500
- {
501
494
const bool is_identifier = token.type == Token::Identifier;
502
495
const bool is_str_literal = token.type == Token::Literal && token.literal == Literal::String;
503
496
@@ -509,11 +502,30 @@ LexerOutput nwtrees::lexer(const char* data, LexerOutput&& prev_output)
509
502
std::memcpy (output.names .data () + new_idx, input.base + entry->idx , entry->len );
510
503
entry->idx = (int )new_idx;
511
504
}
512
- }
513
505
514
- // -- We will check for any string literals that are together and merge them into one token.
515
- // This is quite easy: because they are next to each other, their contents are guaranteed to be next
516
- // to each other in the buffer, so we just delete the ones at the end and increase the length of the first.
506
+ // -- If we're a string literal, we merge ourself with the previous token if it was also a string literal.
507
+
508
+ if (is_str_literal && !output.tokens .empty ())
509
+ {
510
+ Token& last_token = output.tokens [output.tokens .size () - 1 ];
511
+ if (last_token.type == Token::Literal && last_token.literal == Literal::String)
512
+ {
513
+ last_token.literal_data .str .len += token.literal_data .str .len ;
514
+ should_commit_match = false ;
515
+ }
516
+ }
517
+
518
+ // -- Commit token.
519
+
520
+ if (should_commit_match)
521
+ {
522
+ output.tokens .push_back (token);
523
+ }
524
+
525
+ // -- Step stream forward, past the matched token length.
526
+
527
+ input.offset += selected_match.length ;
528
+ }
517
529
518
530
return output;
519
531
}
0 commit comments