@@ -1855,6 +1855,85 @@ fuzzy_match_range_list_copy(Arena *arena, FuzzyMatchRangeList *src)
1855
1855
return dst ;
1856
1856
}
1857
1857
1858
+ internal ScoredFuzzyMatchRangeList
1859
+ scored_fuzzy_match_find (Arena * arena , String8 needle , String8 haystack )
1860
+ {
1861
+ Temp scratch = scratch_begin (0 , 0 );
1862
+ // We're going to implement a very simple scoring mechanism similar to that described in
1863
+ // https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/.
1864
+ #define scored_fuzzy_match_unmatched -1
1865
+ #define scored_fuzzy_match_consecutive 5
1866
+ #define scored_fuzzy_match_unmatched_leading -3
1867
+ ScoredFuzzyMatchRangeList invalid = {0 };
1868
+ ScoredFuzzyMatchRangeList result = {0 };
1869
+ // Simplify to a single needle which has common delimiters removed.
1870
+ String8List needles = str8_split (scratch .arena , needle , (U8 * )" " , 1 , 0 );
1871
+ needle = str8_list_join (scratch .arena , & needles , 0 );
1872
+ if (needle .size == 0 )
1873
+ {
1874
+ scratch_end (scratch );
1875
+ return invalid ;
1876
+ }
1877
+ String8 tmp_str = str8 (needle .str , 1 );
1878
+ U64 find_pos = 0 ;
1879
+ find_pos = str8_find_needle (haystack , find_pos , tmp_str , StringMatchFlag_CaseInsensitive );
1880
+ if (find_pos >= haystack .size )
1881
+ {
1882
+ scratch_end (scratch );
1883
+ return invalid ;
1884
+ }
1885
+ // Leading character penalty.
1886
+ // Only go to a max of 3 based on the article.
1887
+ result .score += Min (find_pos , 3 ) * scored_fuzzy_match_unmatched_leading ;
1888
+ // We also want to deduct for additional unmatched characters between start and find_pos.
1889
+ if (find_pos > 3 )
1890
+ {
1891
+ result .score += (find_pos - 3 ) * scored_fuzzy_match_unmatched ;
1892
+ }
1893
+ Rng1U64 range = r1u64 (find_pos , find_pos + 1 );
1894
+ FuzzyMatchRangeNode * n = push_array (arena , FuzzyMatchRangeNode , 1 );
1895
+ n -> range = range ;
1896
+ SLLQueuePush (result .list .first , result .list .last , n );
1897
+ result .list .count += 1 ;
1898
+ // Match the rest.
1899
+ U64 prev_found = find_pos ;
1900
+ U64 search_start = 0 ;
1901
+ find_pos += 1 ;
1902
+ for (U64 idx = 1 ; idx < needle .size ; ++ idx )
1903
+ {
1904
+ tmp_str = str8 (needle .str + idx , 1 );
1905
+ search_start = find_pos ;
1906
+ find_pos = str8_find_needle (haystack , find_pos , tmp_str , StringMatchFlag_CaseInsensitive );
1907
+ if (find_pos >= haystack .size )
1908
+ {
1909
+ scratch_end (scratch );
1910
+ return invalid ;
1911
+ }
1912
+ // Compute consecutive bonus.
1913
+ if (prev_found + 1 == find_pos )
1914
+ {
1915
+ result .score += scored_fuzzy_match_consecutive ;
1916
+ // We can reuse the existing node and simply extend it.
1917
+ result .list .last -> range .max = find_pos + 1 ;
1918
+ }
1919
+ else
1920
+ {
1921
+ result .score += (find_pos - search_start ) * scored_fuzzy_match_unmatched ;
1922
+ Rng1U64 range = r1u64 (find_pos , find_pos + 1 );
1923
+ FuzzyMatchRangeNode * n = push_array (arena , FuzzyMatchRangeNode , 1 );
1924
+ n -> range = range ;
1925
+ SLLQueuePush (result .list .first , result .list .last , n );
1926
+ result .list .count += 1 ;
1927
+ }
1928
+ prev_found = find_pos ;
1929
+ find_pos += 1 ;
1930
+ }
1931
+ // Compute final unmatched characters.
1932
+ result .score += (haystack .size - find_pos ) * scored_fuzzy_match_unmatched ;
1933
+ scratch_end (scratch );
1934
+ return result ;
1935
+ }
1936
+
1858
1937
////////////////////////////////
1859
1938
//~ NOTE(allen): Serialization Helpers
1860
1939
0 commit comments