@@ -1863,6 +1863,85 @@ fuzzy_match_range_list_copy(Arena *arena, FuzzyMatchRangeList *src)
18631863 return dst ;
18641864}
18651865
1866+ internal ScoredFuzzyMatchRangeList
1867+ scored_fuzzy_match_find (Arena * arena , String8 needle , String8 haystack )
1868+ {
1869+ Temp scratch = scratch_begin (0 , 0 );
1870+ // We're going to implement a very simple scoring mechanism similar to that described in
1871+ // https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/.
1872+ #define scored_fuzzy_match_unmatched -1
1873+ #define scored_fuzzy_match_consecutive 5
1874+ #define scored_fuzzy_match_unmatched_leading -3
1875+ ScoredFuzzyMatchRangeList invalid = {0 };
1876+ ScoredFuzzyMatchRangeList result = {0 };
1877+ // Simplify to a single needle which has common delimiters removed.
1878+ String8List needles = str8_split (scratch .arena , needle , (U8 * )" " , 1 , 0 );
1879+ needle = str8_list_join (scratch .arena , & needles , 0 );
1880+ if (needle .size == 0 )
1881+ {
1882+ scratch_end (scratch );
1883+ return invalid ;
1884+ }
1885+ String8 tmp_str = str8 (needle .str , 1 );
1886+ U64 find_pos = 0 ;
1887+ find_pos = str8_find_needle (haystack , find_pos , tmp_str , StringMatchFlag_CaseInsensitive );
1888+ if (find_pos >= haystack .size )
1889+ {
1890+ scratch_end (scratch );
1891+ return invalid ;
1892+ }
1893+ // Leading character penalty.
1894+ // Only go to a max of 3 based on the article.
1895+ result .score += Min (find_pos , 3 ) * scored_fuzzy_match_unmatched_leading ;
1896+ // We also want to deduct for additional unmatched characters between start and find_pos.
1897+ if (find_pos > 3 )
1898+ {
1899+ result .score += (find_pos - 3 ) * scored_fuzzy_match_unmatched ;
1900+ }
1901+ Rng1U64 range = r1u64 (find_pos , find_pos + 1 );
1902+ FuzzyMatchRangeNode * n = push_array (arena , FuzzyMatchRangeNode , 1 );
1903+ n -> range = range ;
1904+ SLLQueuePush (result .list .first , result .list .last , n );
1905+ result .list .count += 1 ;
1906+ // Match the rest.
1907+ U64 prev_found = find_pos ;
1908+ U64 search_start = 0 ;
1909+ find_pos += 1 ;
1910+ for (U64 idx = 1 ; idx < needle .size ; ++ idx )
1911+ {
1912+ tmp_str = str8 (needle .str + idx , 1 );
1913+ search_start = find_pos ;
1914+ find_pos = str8_find_needle (haystack , find_pos , tmp_str , StringMatchFlag_CaseInsensitive );
1915+ if (find_pos >= haystack .size )
1916+ {
1917+ scratch_end (scratch );
1918+ return invalid ;
1919+ }
1920+ // Compute consecutive bonus.
1921+ if (prev_found + 1 == find_pos )
1922+ {
1923+ result .score += scored_fuzzy_match_consecutive ;
1924+ // We can reuse the existing node and simply extend it.
1925+ result .list .last -> range .max = find_pos + 1 ;
1926+ }
1927+ else
1928+ {
1929+ result .score += (find_pos - search_start ) * scored_fuzzy_match_unmatched ;
1930+ Rng1U64 range = r1u64 (find_pos , find_pos + 1 );
1931+ FuzzyMatchRangeNode * n = push_array (arena , FuzzyMatchRangeNode , 1 );
1932+ n -> range = range ;
1933+ SLLQueuePush (result .list .first , result .list .last , n );
1934+ result .list .count += 1 ;
1935+ }
1936+ prev_found = find_pos ;
1937+ find_pos += 1 ;
1938+ }
1939+ // Compute final unmatched characters.
1940+ result .score += (haystack .size - find_pos ) * scored_fuzzy_match_unmatched ;
1941+ scratch_end (scratch );
1942+ return result ;
1943+ }
1944+
18661945////////////////////////////////
18671946//~ NOTE(allen): Serialization Helpers
18681947
0 commit comments