@@ -1754,6 +1754,85 @@ fuzzy_match_range_list_copy(Arena *arena, FuzzyMatchRangeList *src)
1754
1754
return dst ;
1755
1755
}
1756
1756
1757
+ internal ScoredFuzzyMatchRangeList
1758
+ scored_fuzzy_match_find (Arena * arena , String8 needle , String8 haystack )
1759
+ {
1760
+ Temp scratch = scratch_begin (0 , 0 );
1761
+ // We're going to implement a very simple scoring mechanism similar to that described in
1762
+ // https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/.
1763
+ #define scored_fuzzy_match_unmatched -1
1764
+ #define scored_fuzzy_match_consecutive 5
1765
+ #define scored_fuzzy_match_unmatched_leading -3
1766
+ ScoredFuzzyMatchRangeList invalid = {0 };
1767
+ ScoredFuzzyMatchRangeList result = {0 };
1768
+ // Simplify to a single needle which has common delimiters removed.
1769
+ String8List needles = str8_split (scratch .arena , needle , (U8 * )" " , 1 , 0 );
1770
+ needle = str8_list_join (scratch .arena , & needles , 0 );
1771
+ if (needle .size == 0 )
1772
+ {
1773
+ scratch_end (scratch );
1774
+ return invalid ;
1775
+ }
1776
+ String8 tmp_str = str8 (needle .str , 1 );
1777
+ U64 find_pos = 0 ;
1778
+ find_pos = str8_find_needle (haystack , find_pos , tmp_str , StringMatchFlag_CaseInsensitive );
1779
+ if (find_pos >= haystack .size )
1780
+ {
1781
+ scratch_end (scratch );
1782
+ return invalid ;
1783
+ }
1784
+ // Leading character penalty.
1785
+ // Only go to a max of 3 based on the article.
1786
+ result .score += Min (find_pos , 3 ) * scored_fuzzy_match_unmatched_leading ;
1787
+ // We also want to deduct for additional unmatched characters between start and find_pos.
1788
+ if (find_pos > 3 )
1789
+ {
1790
+ result .score += (find_pos - 3 ) * scored_fuzzy_match_unmatched ;
1791
+ }
1792
+ Rng1U64 range = r1u64 (find_pos , find_pos + 1 );
1793
+ FuzzyMatchRangeNode * n = push_array (arena , FuzzyMatchRangeNode , 1 );
1794
+ n -> range = range ;
1795
+ SLLQueuePush (result .list .first , result .list .last , n );
1796
+ result .list .count += 1 ;
1797
+ // Match the rest.
1798
+ U64 prev_found = find_pos ;
1799
+ U64 search_start = 0 ;
1800
+ find_pos += 1 ;
1801
+ for (U64 idx = 1 ; idx < needle .size ; ++ idx )
1802
+ {
1803
+ tmp_str = str8 (needle .str + idx , 1 );
1804
+ search_start = find_pos ;
1805
+ find_pos = str8_find_needle (haystack , find_pos , tmp_str , StringMatchFlag_CaseInsensitive );
1806
+ if (find_pos >= haystack .size )
1807
+ {
1808
+ scratch_end (scratch );
1809
+ return invalid ;
1810
+ }
1811
+ // Compute consecutive bonus.
1812
+ if (prev_found + 1 == find_pos )
1813
+ {
1814
+ result .score += scored_fuzzy_match_consecutive ;
1815
+ // We can reuse the existing node and simply extend it.
1816
+ result .list .last -> range .max = find_pos + 1 ;
1817
+ }
1818
+ else
1819
+ {
1820
+ result .score += (find_pos - search_start ) * scored_fuzzy_match_unmatched ;
1821
+ Rng1U64 range = r1u64 (find_pos , find_pos + 1 );
1822
+ FuzzyMatchRangeNode * n = push_array (arena , FuzzyMatchRangeNode , 1 );
1823
+ n -> range = range ;
1824
+ SLLQueuePush (result .list .first , result .list .last , n );
1825
+ result .list .count += 1 ;
1826
+ }
1827
+ prev_found = find_pos ;
1828
+ find_pos += 1 ;
1829
+ }
1830
+ // Compute final unmatched characters.
1831
+ result .score += (haystack .size - find_pos ) * scored_fuzzy_match_unmatched ;
1832
+ scratch_end (scratch );
1833
+ return result ;
1834
+ }
1835
+
1757
1836
////////////////////////////////
1758
1837
//~ NOTE(allen): Serialization Helpers
1759
1838
0 commit comments