Skip to content

Commit 5a60906

Browse files
committed
Merge string literals that are adjacent.
This has the side-effect of removing one of our passes (since we do the names buffer inside the main loop now), improving performance by about 5%.
1 parent 091ef4d commit 5a60906

File tree

2 files changed

+33
-22
lines changed

2 files changed

+33
-22
lines changed

src/nwtrees/Lexer.cpp

+26-14
Original file line numberDiff line numberDiff line change
@@ -484,20 +484,13 @@ LexerOutput nwtrees::lexer(const char* data, LexerOutput&& prev_output)
484484
std::stable_sort(matches, matches + match_count, &cmp);
485485
}
486486

487-
// -- Commit token.
488-
489487
LexerMatch& selected_match = matches[0];
490-
output.tokens.push_back(std::move(selected_match.token));
491-
492-
// -- Step stream forward, past the matched token length.
488+
bool should_commit_match = true;
493489

494-
input.offset += selected_match.length;
495-
}
490+
// -- For tokens that need name buffers, prepare the buffer and update the name entry.
496491

497-
// -- For tokens that need name buffers, prepare the buffer and update the name entry.
492+
Token& token = selected_match.token;
498493

499-
for (Token& token : output.tokens)
500-
{
501494
const bool is_identifier = token.type == Token::Identifier;
502495
const bool is_str_literal = token.type == Token::Literal && token.literal == Literal::String;
503496

@@ -509,11 +502,30 @@ LexerOutput nwtrees::lexer(const char* data, LexerOutput&& prev_output)
509502
std::memcpy(output.names.data() + new_idx, input.base + entry->idx, entry->len);
510503
entry->idx = (int)new_idx;
511504
}
512-
}
513505

514-
// -- We will check for any string literals that are together and merge them into one token.
515-
// This is quite easy: because they are next to each other, their contents are guaranteed to be next
516-
// to each other in the buffer, so we just delete the ones at the end and increase the length of the first.
506+
// -- If we're a string literal, we merge ourself with the previous token if it was also a string literal.
507+
508+
if (is_str_literal && !output.tokens.empty())
509+
{
510+
Token& last_token = output.tokens[output.tokens.size() - 1];
511+
if (last_token.type == Token::Literal && last_token.literal == Literal::String)
512+
{
513+
last_token.literal_data.str.len += token.literal_data.str.len;
514+
should_commit_match = false;
515+
}
516+
}
517+
518+
// -- Commit token.
519+
520+
if (should_commit_match)
521+
{
522+
output.tokens.push_back(token);
523+
}
524+
525+
// -- Step stream forward, past the matched token length.
526+
527+
input.offset += selected_match.length;
528+
}
517529

518530
return output;
519531
}

tests/Lexer.cpp

+7-8
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@
55
namespace
66
{
77
template <typename T>
8-
std::string concat(const T& collection)
8+
std::string concat(const T& collection, const char separator = ' ')
99
{
1010
std::string ret;
1111
for (const char* str : collection)
1212
{
1313
ret += str;
14-
ret += ' ';
14+
ret += separator;
1515
}
1616
return ret;
1717
}
@@ -99,11 +99,11 @@ TEST_CLASS(Lexer)
9999
TEST_METHOD(Literals_String)
100100
{
101101
static constexpr std::array literals { R"("test \" ")", R"("testnewline\n")" };
102-
nwtrees::LexerOutput lex = nwtrees::lexer(concat(literals).c_str());
103-
TEST_EXPECT(lex.tokens.size() == literals.size());
102+
nwtrees::LexerOutput lex = nwtrees::lexer(concat(literals, ';').c_str());
103+
TEST_EXPECT(lex.tokens.size() == literals.size() * 2);
104104
TEST_EXPECT(lex.errors.empty());
105105

106-
for (int i = 0; i < literals.size(); ++i)
106+
for (int i = 0; i < literals.size(); i += 2)
107107
{
108108
const nwtrees::Token& token = lex.tokens[i];
109109
TEST_EXPECT(token.type == nwtrees::Token::Literal);
@@ -115,9 +115,8 @@ TEST_CLASS(Lexer)
115115

116116
TEST_METHOD(Literals_String_Concat)
117117
{
118-
return;
119-
const char* input = R"("test" "test2" "test3")";
120-
nwtrees::LexerOutput lex = nwtrees::lexer(input);
118+
static constexpr std::array literals { R"("test")", R"("test2")", R"("test3")" };
119+
nwtrees::LexerOutput lex = nwtrees::lexer(concat(literals).c_str());
121120
TEST_EXPECT(lex.tokens.size() == 1);
122121

123122
const nwtrees::Token& token = lex.tokens[0];

0 commit comments

Comments
 (0)