@@ -102,6 +102,84 @@ df_fuzzy_match_find(Arena *arena, String8List needles, String8 haystack)
102
102
return result ;
103
103
}
104
104
105
+ internal DF_ScoredFuzzyMatchRangeList
106
+ df_scored_fuzzy_match_find (Arena * arena , String8List needles , String8 haystack )
107
+ {
108
+ Temp scratch = scratch_begin (0 , 0 );
109
+ // We're going to implement a very simple scoring mechanism similar to that described in
110
+ // https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/.
111
+ #define df_scored_unmatched -1
112
+ #define df_scored_consecutive 5
113
+ #define df_scored_unmatched_leading -3
114
+ DF_ScoredFuzzyMatchRangeList invalid = {0 };
115
+ DF_ScoredFuzzyMatchRangeList result = {0 };
116
+ // Simplify to a single needle.
117
+ String8 needle = str8_list_join (scratch .arena , & needles , 0 );
118
+ if (needle .size == 0 )
119
+ {
120
+ scratch_end (scratch );
121
+ return invalid ;
122
+ }
123
+ String8 tmp_str = str8 (needle .str , 1 );
124
+ U64 find_pos = 0 ;
125
+ find_pos = str8_find_needle (haystack , find_pos , tmp_str , StringMatchFlag_CaseInsensitive );
126
+ if (find_pos >= haystack .size )
127
+ {
128
+ scratch_end (scratch );
129
+ return invalid ;
130
+ }
131
+ // Leading character penalty.
132
+ // Only go to a max of 3 based on the article.
133
+ result .score += Min (find_pos , 3 ) * df_scored_unmatched_leading ;
134
+ // We also want to deduct for additional unmatched characters between start and find_pos.
135
+ if (find_pos > 3 )
136
+ {
137
+ result .score += (find_pos - 3 ) * df_scored_unmatched ;
138
+ }
139
+ Rng1U64 range = r1u64 (find_pos , find_pos + 1 );
140
+ DF_FuzzyMatchRangeNode * n = push_array (arena , DF_FuzzyMatchRangeNode , 1 );
141
+ n -> range = range ;
142
+ SLLQueuePush (result .list .first , result .list .last , n );
143
+ result .list .count += 1 ;
144
+ // Match the rest.
145
+ U64 prev_found = find_pos ;
146
+ U64 search_start = 0 ;
147
+ find_pos += 1 ;
148
+ for (U64 idx = 1 ; idx < needle .size ; ++ idx )
149
+ {
150
+ tmp_str = str8 (needle .str + idx , 1 );
151
+ search_start = find_pos ;
152
+ find_pos = str8_find_needle (haystack , find_pos , tmp_str , StringMatchFlag_CaseInsensitive );
153
+ if (find_pos >= haystack .size )
154
+ {
155
+ scratch_end (scratch );
156
+ return invalid ;
157
+ }
158
+ // Compute consecutive bonus.
159
+ if (prev_found + 1 == find_pos )
160
+ {
161
+ result .score += df_scored_consecutive ;
162
+ // We can reuse the existing node and simply extend it.
163
+ result .list .last -> range .max = find_pos + 1 ;
164
+ }
165
+ else
166
+ {
167
+ result .score += (find_pos - search_start ) * df_scored_unmatched ;
168
+ Rng1U64 range = r1u64 (find_pos , find_pos + 1 );
169
+ DF_FuzzyMatchRangeNode * n = push_array (arena , DF_FuzzyMatchRangeNode , 1 );
170
+ n -> range = range ;
171
+ SLLQueuePush (result .list .first , result .list .last , n );
172
+ result .list .count += 1 ;
173
+ }
174
+ prev_found = find_pos ;
175
+ find_pos += 1 ;
176
+ }
177
+ // Compute final unmatched characters.
178
+ result .score += (haystack .size - find_pos ) * df_scored_unmatched ;
179
+ scratch_end (scratch );
180
+ return result ;
181
+ }
182
+
105
183
////////////////////////////////
106
184
//~ rjf: View Type Functions
107
185
0 commit comments