@@ -1658,6 +1658,85 @@ fuzzy_match_range_list_copy(Arena *arena, FuzzyMatchRangeList *src)
1658
1658
return dst ;
1659
1659
}
1660
1660
1661
+ internal ScoredFuzzyMatchRangeList
1662
+ scored_fuzzy_match_find (Arena * arena , String8 needle , String8 haystack )
1663
+ {
1664
+ Temp scratch = scratch_begin (0 , 0 );
1665
+ // We're going to implement a very simple scoring mechanism similar to that described in
1666
+ // https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/.
1667
+ #define scored_fuzzy_match_unmatched -1
1668
+ #define scored_fuzzy_match_consecutive 5
1669
+ #define scored_fuzzy_match_unmatched_leading -3
1670
+ ScoredFuzzyMatchRangeList invalid = {0 };
1671
+ ScoredFuzzyMatchRangeList result = {0 };
1672
+ // Simplify to a single needle which has common delimiters removed.
1673
+ String8List needles = str8_split (scratch .arena , needle , (U8 * )" " , 1 , 0 );
1674
+ needle = str8_list_join (scratch .arena , & needles , 0 );
1675
+ if (needle .size == 0 )
1676
+ {
1677
+ scratch_end (scratch );
1678
+ return invalid ;
1679
+ }
1680
+ String8 tmp_str = str8 (needle .str , 1 );
1681
+ U64 find_pos = 0 ;
1682
+ find_pos = str8_find_needle (haystack , find_pos , tmp_str , StringMatchFlag_CaseInsensitive );
1683
+ if (find_pos >= haystack .size )
1684
+ {
1685
+ scratch_end (scratch );
1686
+ return invalid ;
1687
+ }
1688
+ // Leading character penalty.
1689
+ // Only go to a max of 3 based on the article.
1690
+ result .score += Min (find_pos , 3 ) * scored_fuzzy_match_unmatched_leading ;
1691
+ // We also want to deduct for additional unmatched characters between start and find_pos.
1692
+ if (find_pos > 3 )
1693
+ {
1694
+ result .score += (find_pos - 3 ) * scored_fuzzy_match_unmatched ;
1695
+ }
1696
+ Rng1U64 range = r1u64 (find_pos , find_pos + 1 );
1697
+ FuzzyMatchRangeNode * n = push_array (arena , FuzzyMatchRangeNode , 1 );
1698
+ n -> range = range ;
1699
+ SLLQueuePush (result .list .first , result .list .last , n );
1700
+ result .list .count += 1 ;
1701
+ // Match the rest.
1702
+ U64 prev_found = find_pos ;
1703
+ U64 search_start = 0 ;
1704
+ find_pos += 1 ;
1705
+ for (U64 idx = 1 ; idx < needle .size ; ++ idx )
1706
+ {
1707
+ tmp_str = str8 (needle .str + idx , 1 );
1708
+ search_start = find_pos ;
1709
+ find_pos = str8_find_needle (haystack , find_pos , tmp_str , StringMatchFlag_CaseInsensitive );
1710
+ if (find_pos >= haystack .size )
1711
+ {
1712
+ scratch_end (scratch );
1713
+ return invalid ;
1714
+ }
1715
+ // Compute consecutive bonus.
1716
+ if (prev_found + 1 == find_pos )
1717
+ {
1718
+ result .score += scored_fuzzy_match_consecutive ;
1719
+ // We can reuse the existing node and simply extend it.
1720
+ result .list .last -> range .max = find_pos + 1 ;
1721
+ }
1722
+ else
1723
+ {
1724
+ result .score += (find_pos - search_start ) * scored_fuzzy_match_unmatched ;
1725
+ Rng1U64 range = r1u64 (find_pos , find_pos + 1 );
1726
+ FuzzyMatchRangeNode * n = push_array (arena , FuzzyMatchRangeNode , 1 );
1727
+ n -> range = range ;
1728
+ SLLQueuePush (result .list .first , result .list .last , n );
1729
+ result .list .count += 1 ;
1730
+ }
1731
+ prev_found = find_pos ;
1732
+ find_pos += 1 ;
1733
+ }
1734
+ // Compute final unmatched characters.
1735
+ result .score += (haystack .size - find_pos ) * scored_fuzzy_match_unmatched ;
1736
+ scratch_end (scratch );
1737
+ return result ;
1738
+ }
1739
+
1661
1740
////////////////////////////////
1662
1741
//~ NOTE(allen): Serialization Helpers
1663
1742
0 commit comments