Skip to content

Commit df940e3

Browse files
authoredJan 7, 2021
Merge pull request #16 from luistrivelatto/master
Fixes #15, #17, #18, and does other improvements
2 parents 4e58f1c + f17fbbd commit df940e3

File tree

8 files changed

+394
-29
lines changed

8 files changed

+394
-29
lines changed
 

‎.github/actions/dart-test/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM cirrusci/flutter
1+
FROM cirrusci/flutter:beta
22

33
USER root
44

‎CHANGELOG.md

+11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
# CHANGELOG
22

3+
## 0.3.0
4+
- Improve search results on weighted search to combine scores from all keys
5+
- Improve search results on single-keyed search, making it consistent with non-weighted search
6+
- Add parameter to ignore tokens smaller than a certain length when searching
7+
- Add normalization of WeightedKey weights
8+
- Fix bug where results returned from search all had arrayIndex = -1
9+
- Fix bug where the token scores didn't count towards the result score
10+
11+
## 0.2.5
12+
- Fix bug for search that started or ended with whitespace when tokenize option is true
13+
314
## 0.2.4
415
- Bump dependencies, fix CI
516

‎lib/data/fuzzy_options.dart

+32-4
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,17 @@ class WeightedKey<T> {
99
@required this.name,
1010
@required this.getter,
1111
@required this.weight,
12-
}) : assert(weight >= 0 && weight <= 1);
12+
}) : assert(weight > 0, 'Weight should be positive and non-zero');
1313

1414
/// Name of this getter
1515
final String name;
1616

1717
/// Getter to a specifc string inside item
1818
final String Function(T obj) getter;
1919

20-
/// Weight of this getter
20+
/// Weight of this getter. When passing a list of WeightedKey to FuzzyOptions,
21+
/// the weight can be any positive number; FuzzyOptions normalizes it on
22+
/// construction.
2123
final double weight;
2224
}
2325

@@ -28,17 +30,20 @@ int _defaultSortFn<T>(Result<T> a, Result<T> b) => a.score.compareTo(b.score);
2830

2931
/// Options for performing a fuzzy search
3032
class FuzzyOptions<T> {
31-
/// Instantiate an options object
33+
/// Instantiate an options object.
34+
/// The `keys` list requires a positive number (they'll be normalized upon
35+
/// instantiation). If any weight is not positive, throws an ArgumentError.
3236
FuzzyOptions({
3337
this.location = 0,
3438
this.distance = 100,
3539
this.threshold = 0.6,
3640
this.maxPatternLength = 32,
3741
this.isCaseSensitive = false,
3842
Pattern tokenSeparator,
43+
this.minTokenCharLength = 1,
3944
this.findAllMatches = false,
4045
this.minMatchCharLength = 1,
41-
this.keys = const [],
46+
List<WeightedKey<T>> keys = const [],
4247
this.shouldSort = true,
4348
SorterFn<T> sortFn,
4449
this.tokenize = false,
@@ -47,6 +52,7 @@ class FuzzyOptions<T> {
4752
this.shouldNormalize = false,
4853
}) : tokenSeparator =
4954
tokenSeparator ?? RegExp(r' +', caseSensitive: isCaseSensitive),
55+
keys = _normalizeWeights(keys),
5056
sortFn = sortFn ?? _defaultSortFn;
5157

5258
/// Approximately where in the text is the pattern expected to be found?
@@ -72,6 +78,9 @@ class FuzzyOptions<T> {
7278
/// Regex used to separate words when searching. Only applicable when `tokenize` is `true`.
7379
final Pattern tokenSeparator;
7480

81+
/// Ignore tokens with length smaller than this. Only applicable when `tokenize` is `true`.
82+
final int minTokenCharLength;
83+
7584
/// When true, the algorithm continues searching to the end of the input even if a perfect
7685
/// match is found before the end of the same input.
7786
final bool findAllMatches;
@@ -112,6 +121,7 @@ class FuzzyOptions<T> {
112121
maxPatternLength: options?.maxPatternLength ?? maxPatternLength,
113122
isCaseSensitive: options?.isCaseSensitive ?? isCaseSensitive,
114123
tokenSeparator: options?.tokenSeparator ?? tokenSeparator,
124+
minTokenCharLength: options?.minTokenCharLength ?? minTokenCharLength,
115125
findAllMatches: options?.findAllMatches ?? findAllMatches,
116126
minMatchCharLength: options?.minMatchCharLength ?? minMatchCharLength,
117127
keys: options?.keys ?? keys,
@@ -122,4 +132,22 @@ class FuzzyOptions<T> {
122132
verbose: options?.verbose ?? verbose,
123133
shouldNormalize: options?.shouldNormalize ?? shouldNormalize,
124134
);
135+
136+
static List<WeightedKey<T>> _normalizeWeights<T>(List<WeightedKey<T>> keys) {
137+
if (keys.isEmpty) {
138+
return [];
139+
}
140+
141+
var weightSum = keys
142+
.map((key) => key.weight)
143+
.fold<double>(0, (previousValue, element) => previousValue + element);
144+
145+
return keys
146+
.map((key) => WeightedKey<T>(
147+
name: key.name,
148+
getter: key.getter,
149+
weight: key.weight / weightSum,
150+
))
151+
.toList();
152+
}
125153
}

‎lib/data/result.dart

+2-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ class ResultDetails<T> {
8181
/// Score of this result
8282
final double score;
8383

84-
/// nScore of this result (?)
84+
/// nScore of this result. It's the weighted score of the match, when it's
85+
/// a weighted search (i.e. uses WeightedKeys).
8586
double nScore;
8687

8788
/// Indexes of matched patterns on the value

‎lib/fuzzy.dart

+33-21
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,9 @@ class Fuzzy<T> {
6767

6868
if (options.tokenize) {
6969
// Tokenize on the separator
70-
final tokens = pattern.split(options.tokenSeparator);
70+
final tokens = pattern.split(options.tokenSeparator)
71+
..removeWhere((token) => token.isEmpty)
72+
..removeWhere((token) => token.length < options.minTokenCharLength);
7173
for (var i = 0, len = tokens.length; i < len; i += 1) {
7274
tokenSearchers.add(Bitap(tokens[i], options: options));
7375
}
@@ -136,7 +138,6 @@ class Fuzzy<T> {
136138

137139
List<Result<T>> _analyze({
138140
String key = '',
139-
int arrayIndex = -1,
140141
String value,
141142
T record,
142143
int index,
@@ -153,7 +154,7 @@ class Fuzzy<T> {
153154
}
154155

155156
var exists = false;
156-
var averageScore = -1;
157+
var averageScore = -1.0;
157158
var numTextMatches = 0;
158159

159160
final mainSearchResult = fullSearcher.search(value.toString());
@@ -190,8 +191,8 @@ class Fuzzy<T> {
190191
}
191192
}
192193

193-
final averageScore =
194-
scores.fold(0, (memo, score) => memo + score) / scores.length;
194+
averageScore =
195+
scores.fold<double>(0, (memo, score) => memo + score) / scores.length;
195196

196197
_log('Token score average: $averageScore');
197198
}
@@ -218,7 +219,7 @@ class Fuzzy<T> {
218219
// existingResult.score, bitapResult.score
219220
existingResult.matches.add(ResultDetails<T>(
220221
key: key,
221-
arrayIndex: arrayIndex,
222+
arrayIndex: index,
222223
value: value,
223224
score: finalScore,
224225
matchedIndices: mainSearchResult.matchedIndices,
@@ -230,7 +231,7 @@ class Fuzzy<T> {
230231
matches: [
231232
ResultDetails<T>(
232233
key: key,
233-
arrayIndex: arrayIndex,
234+
arrayIndex: index,
234235
value: value,
235236
score: finalScore,
236237
matchedIndices: mainSearchResult.matchedIndices,
@@ -254,29 +255,40 @@ class Fuzzy<T> {
254255
void _computeScore(Map<String, double> weights, List<Result<T>> results) {
255256
_log('\n\nComputing score:\n');
256257

258+
if (weights.length <= 1) {
259+
_computeScoreNoWeights(results);
260+
} else {
261+
_computeScoreWithWeights(weights, results);
262+
}
263+
}
264+
265+
void _computeScoreNoWeights(List<Result<T>> results) {
257266
for (var i = 0, len = results.length; i < len; i += 1) {
258267
final matches = results[i].matches;
259-
final scoreLen = matches.length;
268+
var bestScore = matches.map((m) => m.score).fold<double>(
269+
1.0, (previousValue, element) => min(previousValue, element));
270+
results[i].score = bestScore;
271+
}
272+
}
260273

274+
void _computeScoreWithWeights(
275+
Map<String, double> weights, List<Result<T>> results) {
276+
for (var i = 0, len = results.length; i < len; i += 1) {
261277
var currScore = 1.0;
262-
var bestScore = 1.0;
263278

264-
for (var j = 0; j < scoreLen; j += 1) {
265-
final weight = weights[matches[j].key] ?? 1.0;
266-
final score = weight == 1.0
267-
? matches[j].score
268-
: (matches[j].score == 0.0 ? 0.001 : matches[j].score);
279+
for (var match in results[i].matches) {
280+
var weight = weights[match.key];
281+
assert(weight != null);
282+
283+
// We don't use 0 so that the weight differences don't get zeroed out
284+
final score = match.score == 0.0 ? 0.001 : match.score;
269285
final nScore = score * weight;
270286

271-
if (weight != 1) {
272-
bestScore = min(bestScore, nScore);
273-
} else {
274-
matches[j].nScore = nScore;
275-
currScore *= nScore;
276-
}
287+
match.nScore = nScore;
288+
currScore *= nScore;
277289
}
278290

279-
results[i].score = bestScore == 1.0 ? currScore : bestScore;
291+
results[i].score = currScore;
280292
}
281293
}
282294

‎pubspec.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name: fuzzy
2-
version: 0.2.4
2+
version: 0.3.0
33

44
description: >
55
Fuzzy search in Dart. initially translated from Fuse.js.

‎test/fixtures/games.dart

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
class Game {
2+
Game({this.tournament, this.stage});
3+
4+
final String tournament;
5+
final String stage;
6+
7+
@override
8+
String toString() => '$tournament $stage';
9+
}
10+
11+
final customGameList = [
12+
Game(
13+
tournament: 'WorldCup',
14+
stage: 'Groups',
15+
),
16+
Game(
17+
tournament: 'WorldCup',
18+
stage: 'Semi-finals',
19+
),
20+
Game(
21+
tournament: 'WorldCup',
22+
stage: 'Final',
23+
),
24+
Game(
25+
tournament: 'ChampionsLeague',
26+
stage: 'Groups',
27+
),
28+
Game(
29+
tournament: 'ChampionsLeague',
30+
stage: 'Semi-finals',
31+
),
32+
Game(
33+
tournament: 'ChampionsLeague',
34+
stage: 'Final',
35+
),
36+
];

‎test/fuzzy_test.dart

+278-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import 'package:fuzzy/fuzzy.dart';
22
import 'package:test/test.dart';
33

44
import 'fixtures/books.dart';
5+
import 'fixtures/games.dart';
56

67
final defaultList = ['Apple', 'Orange', 'Banana'];
78
final defaultOptions = FuzzyOptions(
@@ -11,6 +12,7 @@ final defaultOptions = FuzzyOptions(
1112
maxPatternLength: 32,
1213
isCaseSensitive: false,
1314
tokenSeparator: RegExp(r' +'),
15+
minTokenCharLength: 1,
1416
findAllMatches: false,
1517
minMatchCharLength: 1,
1618
shouldSort: true,
@@ -30,6 +32,16 @@ Fuzzy setup({
3032
);
3133
}
3234

35+
Fuzzy<T> setupGeneric<T>({
36+
List<T> itemList,
37+
FuzzyOptions<T> options,
38+
}) {
39+
return Fuzzy<T>(
40+
itemList,
41+
options: options,
42+
);
43+
}
44+
3345
void main() {
3446
group('Empty list of strings', () {
3547
Fuzzy fuse;
@@ -50,7 +62,6 @@ void main() {
5062
});
5163
test('empty result is returned', () {
5264
final result = fuse.search('Bla');
53-
print(result);
5465
expect(result.isEmpty, true);
5566
});
5667
});
@@ -128,6 +139,25 @@ void main() {
128139
});
129140
});
130141

142+
group('Include arrayIndex in result list', () {
143+
Fuzzy fuse;
144+
setUp(() {
145+
fuse = setup();
146+
});
147+
148+
test('When performing a fuzzy search for the term "ran"', () {
149+
final result = fuse.search('ran');
150+
151+
expect(result.length, 2, reason: 'we get a list of containing 2 items');
152+
153+
expect(result[0].item, equals('Orange'));
154+
expect(result[0].matches.single.arrayIndex, 1);
155+
156+
expect(result[1].item, equals('Banana'));
157+
expect(result[1].matches.single.arrayIndex, 2);
158+
});
159+
});
160+
131161
group('Weighted search on typed list', () {
132162
test('When searching for the term "John Smith" with author weighted higher',
133163
() {
@@ -210,6 +240,141 @@ void main() {
210240
});
211241
});
212242

243+
group('Weighted search considers all keys in score', () {
244+
Fuzzy<Game> getFuzzy({double tournamentWeight, double stageWeight}) {
245+
return Fuzzy<Game>(
246+
customGameList,
247+
options: FuzzyOptions(
248+
keys: [
249+
WeightedKey(
250+
getter: (i) => i.tournament,
251+
weight: tournamentWeight,
252+
name: 'tournament'),
253+
WeightedKey(
254+
getter: (i) => i.stage, weight: stageWeight, name: 'stage'),
255+
],
256+
tokenize: true,
257+
),
258+
);
259+
}
260+
261+
test('When searching for "WorldCup Final", where weights are equal', () {
262+
final fuse = getFuzzy(
263+
tournamentWeight: 0.5,
264+
stageWeight: 0.5,
265+
);
266+
final result = fuse.search('WorldCup Final');
267+
268+
void expectLess(String a, String b) {
269+
double scoreOf(String s) =>
270+
result.singleWhere((e) => e.item.toString() == s).score;
271+
expect(scoreOf(a), lessThan(scoreOf(b)));
272+
}
273+
274+
expectLess('WorldCup Final', 'WorldCup Semi-finals');
275+
expectLess('WorldCup Semi-finals', 'WorldCup Groups');
276+
expectLess('WorldCup Groups', 'ChampionsLeague Final');
277+
expectLess('ChampionsLeague Final', 'ChampionsLeague Semi-finals');
278+
});
279+
280+
test(
281+
'When searching for "WorldCup Final", where the tournament is weighted higher',
282+
() {
283+
final fuse = getFuzzy(
284+
tournamentWeight: 0.8,
285+
stageWeight: 0.2,
286+
);
287+
final result = fuse.search('WorldCup Final');
288+
289+
void expectLess(String a, String b) {
290+
double scoreOf(String s) =>
291+
result.singleWhere((e) => e.item.toString() == s).score;
292+
expect(scoreOf(a), lessThan(scoreOf(b)));
293+
}
294+
295+
expectLess('WorldCup Final', 'WorldCup Semi-finals');
296+
expectLess('WorldCup Semi-finals', 'WorldCup Groups');
297+
expectLess('WorldCup Groups', 'ChampionsLeague Final');
298+
expectLess('ChampionsLeague Final', 'ChampionsLeague Semi-finals');
299+
});
300+
301+
test(
302+
'When searching for "WorldCup Final", where the stage is weighted higher',
303+
() {
304+
final fuse = getFuzzy(
305+
tournamentWeight: 0.2,
306+
stageWeight: 0.8,
307+
);
308+
final result = fuse.search('WorldCup Final');
309+
310+
void expectLess(String a, String b) {
311+
double scoreOf(String s) =>
312+
result.singleWhere((e) => e.item.toString() == s).score;
313+
expect(scoreOf(a), lessThan(scoreOf(b)));
314+
}
315+
316+
expectLess('WorldCup Final', 'WorldCup Semi-finals');
317+
expectLess('WorldCup Semi-finals', 'WorldCup Groups');
318+
expectLess('ChampionsLeague Final', 'WorldCup Groups');
319+
expectLess('ChampionsLeague Final', 'ChampionsLeague Semi-finals');
320+
});
321+
});
322+
323+
group('Weighted search with a single key equals non-weighted search', () {
324+
String gameDescription(Game g) => '${g.tournament} ${g.stage}';
325+
326+
test('When searching for "WorldCup semi-final"', () {
327+
final fuseNoKeys = Fuzzy(
328+
customGameList.map((g) => gameDescription(g)).toList(),
329+
options: FuzzyOptions(),
330+
);
331+
Fuzzy fuseSingleKey = Fuzzy<Game>(
332+
customGameList,
333+
options: FuzzyOptions(
334+
keys: [
335+
WeightedKey(
336+
name: 'desc', getter: (g) => gameDescription(g), weight: 1),
337+
],
338+
),
339+
);
340+
final resultNoKeys = fuseNoKeys.search('WorldCup semi-final');
341+
final resultSingleKey = fuseSingleKey.search('WorldCup semi-final');
342+
343+
// Check for equality using 'toString()', otherwise it checks for
344+
// identity equality (i.e. same objects instead of same contents)
345+
expect(resultNoKeys.toString(), equals(resultSingleKey.toString()));
346+
347+
expect(resultNoKeys[0].item, 'WorldCup Semi-finals');
348+
expect(resultNoKeys[0].score, lessThan(resultNoKeys[1].score));
349+
});
350+
});
351+
352+
group('FuzzyOptions normalizes the keys weights', () {
353+
test("WeightedKey doesn't allow creating a non-positive weight", () {
354+
expect(
355+
() => WeightedKey<String>(name: 'name', getter: (i) => i, weight: -1),
356+
throwsA(isA<AssertionError>()));
357+
expect(
358+
() => WeightedKey<String>(name: 'name', getter: (i) => i, weight: 0),
359+
throwsA(isA<AssertionError>()));
360+
expect(
361+
() => WeightedKey<String>(name: 'name', getter: (i) => i, weight: 1),
362+
returnsNormally);
363+
});
364+
365+
test('Normalizes weights', () {
366+
var options = FuzzyOptions(keys: [
367+
WeightedKey<String>(name: 'name1', getter: (i) => i, weight: 0.5),
368+
WeightedKey<String>(name: 'name2', getter: (i) => i, weight: 0.5),
369+
WeightedKey<String>(name: 'name3', getter: (i) => i, weight: 3),
370+
]);
371+
372+
expect(options.keys[0].weight, 0.125);
373+
expect(options.keys[1].weight, 0.125);
374+
expect(options.keys[2].weight, 0.75);
375+
});
376+
});
377+
213378
group(
214379
'Search with match all tokens in a list of strings with leading and trailing whitespace',
215380
() {
@@ -232,6 +397,56 @@ void main() {
232397
});
233398
});
234399

400+
group(
401+
'Search with tokenize where the search pattern starts or ends with the tokenSeparator',
402+
() {
403+
group('With the default tokenSeparator, which is white space', () {
404+
Fuzzy fuse;
405+
setUp(() {
406+
fuse = setup(overwriteOptions: FuzzyOptions(tokenize: true));
407+
});
408+
409+
test('When the search pattern starts with white space', () {
410+
final result = fuse.search(' Apple');
411+
412+
expect(result.length, 1, reason: 'we get a list of exactly 1 item');
413+
expect(result[0].item, equals('Apple'));
414+
});
415+
416+
test('When the search pattern ends with white space', () {
417+
final result = fuse.search('Apple ');
418+
419+
expect(result.length, 1, reason: 'we get a list of exactly 1 item');
420+
expect(result[0].item, equals('Apple'));
421+
});
422+
423+
test('When the search pattern contains white space in the middle', () {
424+
final result = fuse.search('Apple Orange');
425+
426+
expect(result.length, 2, reason: 'we get a list of exactly 2 itens');
427+
expect(result[0].item, equals('Orange'));
428+
expect(result[1].item, equals('Apple'));
429+
});
430+
});
431+
432+
group('With a custom tokenSeparator', () {
433+
Fuzzy fuse;
434+
setUp(() {
435+
fuse = setup(
436+
overwriteOptions:
437+
FuzzyOptions(tokenize: true, tokenSeparator: RegExp(';')));
438+
});
439+
440+
test('When the search pattern ends with a tokenSeparator match', () {
441+
final result = fuse.search('Apple;Orange;');
442+
443+
expect(result.length, 2, reason: 'we get a list of exactly 2 itens');
444+
expect(result[0].item, equals('Orange'));
445+
expect(result[1].item, equals('Apple'));
446+
});
447+
});
448+
});
449+
235450
group('Search with match all tokens', () {
236451
Fuzzy fuse;
237452
setUp(() {
@@ -271,6 +486,29 @@ void main() {
271486
});
272487
});
273488

489+
group('Search with tokenize includes token average on result score', () {
490+
Fuzzy fuse;
491+
setUp(() {
492+
final customList = ['Apple and Orange Juice'];
493+
fuse = setup(
494+
itemList: customList,
495+
overwriteOptions: FuzzyOptions(threshold: 0.1, tokenize: true),
496+
);
497+
});
498+
499+
test('When searching for the term "Apple Juice"', () {
500+
final result = fuse.search('Apple Juice');
501+
502+
// By using a lower threshold, we guarantee that the full text score
503+
// ("apple juice" on "Apple and Orange Juice") returns a score of 1.0,
504+
// while the token searches return 0.0 (perfect matches) for "Apple" and
505+
// "Juice". Thus, the token score average is 0.0, and the result score
506+
// should be (1.0 + 0.0) / 2 = 0.5
507+
expect(result.length, 1);
508+
expect(result[0].score, 0.5);
509+
});
510+
});
511+
274512
group('Searching with default options', () {
275513
Fuzzy fuse;
276514
setUp(() {
@@ -316,6 +554,45 @@ void main() {
316554
});
317555
});
318556

557+
group('Searching with minTokenCharLength', () {
558+
Fuzzy<Book> setUp({int minTokenCharLength}) => setupGeneric<Book>(
559+
itemList: customBookList,
560+
options: FuzzyOptions(
561+
threshold: 0.3,
562+
tokenize: true,
563+
minTokenCharLength: minTokenCharLength,
564+
keys: [
565+
WeightedKey(getter: (i) => i.title, weight: 0.5, name: 'title'),
566+
WeightedKey(getter: (i) => i.author, weight: 0.5, name: 'author'),
567+
],
568+
),
569+
);
570+
571+
test('When searching for "Plants x Zombies" with min = 1', () {
572+
final fuse = setUp(minTokenCharLength: 1);
573+
final result = fuse.search('Plants x Zombies');
574+
575+
expect(result.length, 1, reason: 'We get a match with 1 item');
576+
expect(result.single.item.author, 'John X',
577+
reason: 'Due to the X on John X');
578+
});
579+
580+
test('When searching for "Plants x Zombies" with min = 2', () {
581+
final fuse = setUp(minTokenCharLength: 2);
582+
final result = fuse.search('Plants x Zombies');
583+
584+
expect(result.length, 0, reason: 'We get no matches');
585+
});
586+
587+
test('When searching for a pattern smaller than the length', () {
588+
final fuse = setUp(minTokenCharLength: 100);
589+
final result = fuse.search('John');
590+
591+
expect(result.length, 3,
592+
reason: 'We still get matches because of full text search');
593+
});
594+
});
595+
319596
group('Searching with minCharLength', () {
320597
Fuzzy fuse;
321598
setUp(() {

0 commit comments

Comments
 (0)
Please sign in to comment.