@@ -1716,6 +1716,85 @@ fuzzy_match_range_list_copy(Arena *arena, FuzzyMatchRangeList *src)
17161716 return dst ;
17171717}
17181718
1719+ internal ScoredFuzzyMatchRangeList
1720+ scored_fuzzy_match_find (Arena * arena , String8 needle , String8 haystack )
1721+ {
1722+ Temp scratch = scratch_begin (0 , 0 );
1723+ // We're going to implement a very simple scoring mechanism similar to that described in
1724+ // https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/.
1725+ #define scored_fuzzy_match_unmatched -1
1726+ #define scored_fuzzy_match_consecutive 5
1727+ #define scored_fuzzy_match_unmatched_leading -3
1728+ ScoredFuzzyMatchRangeList invalid = {0 };
1729+ ScoredFuzzyMatchRangeList result = {0 };
1730+ // Simplify to a single needle which has common delimiters removed.
1731+ String8List needles = str8_split (scratch .arena , needle , (U8 * )" " , 1 , 0 );
1732+ needle = str8_list_join (scratch .arena , & needles , 0 );
1733+ if (needle .size == 0 )
1734+ {
1735+ scratch_end (scratch );
1736+ return invalid ;
1737+ }
1738+ String8 tmp_str = str8 (needle .str , 1 );
1739+ U64 find_pos = 0 ;
1740+ find_pos = str8_find_needle (haystack , find_pos , tmp_str , StringMatchFlag_CaseInsensitive );
1741+ if (find_pos >= haystack .size )
1742+ {
1743+ scratch_end (scratch );
1744+ return invalid ;
1745+ }
1746+ // Leading character penalty.
1747+ // Only go to a max of 3 based on the article.
1748+ result .score += Min (find_pos , 3 ) * scored_fuzzy_match_unmatched_leading ;
1749+ // We also want to deduct for additional unmatched characters between start and find_pos.
1750+ if (find_pos > 3 )
1751+ {
1752+ result .score += (find_pos - 3 ) * scored_fuzzy_match_unmatched ;
1753+ }
1754+ Rng1U64 range = r1u64 (find_pos , find_pos + 1 );
1755+ FuzzyMatchRangeNode * n = push_array (arena , FuzzyMatchRangeNode , 1 );
1756+ n -> range = range ;
1757+ SLLQueuePush (result .list .first , result .list .last , n );
1758+ result .list .count += 1 ;
1759+ // Match the rest.
1760+ U64 prev_found = find_pos ;
1761+ U64 search_start = 0 ;
1762+ find_pos += 1 ;
1763+ for (U64 idx = 1 ; idx < needle .size ; ++ idx )
1764+ {
1765+ tmp_str = str8 (needle .str + idx , 1 );
1766+ search_start = find_pos ;
1767+ find_pos = str8_find_needle (haystack , find_pos , tmp_str , StringMatchFlag_CaseInsensitive );
1768+ if (find_pos >= haystack .size )
1769+ {
1770+ scratch_end (scratch );
1771+ return invalid ;
1772+ }
1773+ // Compute consecutive bonus.
1774+ if (prev_found + 1 == find_pos )
1775+ {
1776+ result .score += scored_fuzzy_match_consecutive ;
1777+ // We can reuse the existing node and simply extend it.
1778+ result .list .last -> range .max = find_pos + 1 ;
1779+ }
1780+ else
1781+ {
1782+ result .score += (find_pos - search_start ) * scored_fuzzy_match_unmatched ;
1783+ Rng1U64 range = r1u64 (find_pos , find_pos + 1 );
1784+ FuzzyMatchRangeNode * n = push_array (arena , FuzzyMatchRangeNode , 1 );
1785+ n -> range = range ;
1786+ SLLQueuePush (result .list .first , result .list .last , n );
1787+ result .list .count += 1 ;
1788+ }
1789+ prev_found = find_pos ;
1790+ find_pos += 1 ;
1791+ }
1792+ // Compute final unmatched characters.
1793+ result .score += (haystack .size - find_pos ) * scored_fuzzy_match_unmatched ;
1794+ scratch_end (scratch );
1795+ return result ;
1796+ }
1797+
17191798////////////////////////////////
17201799//~ NOTE(allen): Serialization Helpers
17211800
0 commit comments