@@ -61,73 +61,3 @@ def predicate(metric: Metric) -> bool:
6161 return pattern .match (metric .name ) is not None
6262
6363 return predicate
64-
65-
66- def filter_ish (pattern : str ) -> Predicate :
67- """
68- Filter metrics for this that are kindof like what you want.
69- Uses difflib
70- """
71-
72- def predicate (metric : Metric ) -> bool :
73- mismatch = letter_mismatch (metric .name , pattern )
74- if mismatch > len (pattern ) / 2 :
75- l .debug (f"{ metric .name } letter mismatch too high: { mismatch } " )
76- return False
77- distance = query_levenshtein (metric .name , pattern , 0 , 0 , (len (metric .name ) + len (pattern )) / 4 , False )
78- ratio = distance / len (pattern )
79- allowable_distance = max (0.1 , 1 / len (pattern )) # 10% of the length of the pattern or 1 character
80- val = ratio - allowable_distance < EPSILON
81- l .debug (f"{ metric .name } ratio: { ratio } , allowable_distance: { allowable_distance } , val: { val } " )
82- return val # epsilon comparison for floating point inexactness`
83-
84- return predicate
85-
86-
87- def letter_mismatch (s , q ):
88- """Quickly eliminate metrics that will never match."""
89-
90- def letter_dict (v : str ) -> dict [str , int ]:
91- d : dict [str , int ] = {}
92- for c in v :
93- d [c ] = d .get (c , 0 ) + 1
94- return d
95-
96- s = letter_dict (s )
97- q = letter_dict (q )
98-
99- misses = 0
100- for k , v in q .items ():
101- delta = v - s .get (k , 0 )
102- if delta > 0 :
103- misses += abs (delta )
104-
105- return misses
106-
107-
108- def query_levenshtein (s , q , si , qi , badness : float , started ):
109- """
110- Modified Levenshtein distance.
111- Tries to not impose penalties for substring matches:
112- - do not penalise advancing
113- - do not penalise differences after a complete match
114-
115- args:
116- - si: start index of s
117- - qi: start index of q
118- """
119- if badness <= 0 :
120- return 1e6
121- if len (s ) == si : # unprocessed query
122- return len (q )
123- elif len (q ) == qi : # entire query is processed, so we're happy
124- return 0
125- elif s [si ] == q [qi ]:
126- return query_levenshtein (s , q , si + 1 , qi + 1 , badness , True )
127- else :
128- skip_s = query_levenshtein (s , q , si + 1 , qi , badness - 1 , started )
129- return min (
130- skip_s + 1 if started else skip_s ,
131- 1 + query_levenshtein (s , q , si , qi + 1 , badness - 1 , True ),
132- 1 + query_levenshtein (s , q , si + 1 , qi + 1 , badness - 1 , True ),
133- )
0 commit comments