Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 778f3db

Browse files
6543silverwind
authored andcommittedMar 23, 2024
Determine fuzziness of bleve indexer by keyword length (go-gitea#29706)
also bleve did match on fuzzy search and the other way around. this also fix that bug.
1 parent 54a709a commit 778f3db

File tree

4 files changed

+29
-37
lines changed

4 files changed

+29
-37
lines changed
 

‎modules/indexer/code/bleve/bleve.go

+7-8
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ import (
3939
const (
4040
unicodeNormalizeName = "unicodeNormalize"
4141
maxBatchSize = 16
42+
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
43+
fuzzyDenominator = 4
4244
)
4345

4446
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
@@ -239,15 +241,12 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
239241
keywordQuery query.Query
240242
)
241243

244+
phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
245+
phraseQuery.FieldVal = "Content"
246+
phraseQuery.Analyzer = repoIndexerAnalyzer
247+
keywordQuery = phraseQuery
242248
if opts.IsKeywordFuzzy {
243-
phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
244-
phraseQuery.FieldVal = "Content"
245-
phraseQuery.Analyzer = repoIndexerAnalyzer
246-
keywordQuery = phraseQuery
247-
} else {
248-
prefixQuery := bleve.NewPrefixQuery(opts.Keyword)
249-
prefixQuery.FieldVal = "Content"
250-
keywordQuery = prefixQuery
249+
phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator
251250
}
252251

253252
if len(opts.RepoIDs) > 0 {

‎modules/indexer/internal/bleve/query.go

+2-8
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,11 @@ func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
2020
}
2121

2222
// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
23-
func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
23+
func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery {
2424
q := bleve.NewMatchPhraseQuery(matchPhrase)
2525
q.FieldVal = field
2626
q.Analyzer = analyzer
27-
return q
28-
}
29-
30-
// PrefixQuery generates a match prefix query for the given prefix and field
31-
func PrefixQuery(matchPrefix, field string) *query.PrefixQuery {
32-
q := bleve.NewPrefixQuery(matchPrefix)
33-
q.FieldVal = field
27+
q.Fuzziness = fuzziness
3428
return q
3529
}
3630

‎modules/indexer/issues/bleve/bleve.go

+13-12
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,11 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
3535
})
3636
}
3737

38-
const maxBatchSize = 16
38+
const (
39+
maxBatchSize = 16
40+
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
41+
fuzzyDenominator = 4
42+
)
3943

4044
// IndexerData an update to the issue indexer
4145
type IndexerData internal.IndexerData
@@ -156,19 +160,16 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
156160
var queries []query.Query
157161

158162
if options.Keyword != "" {
163+
fuzziness := 0
159164
if options.IsFuzzyKeyword {
160-
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
161-
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer),
162-
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer),
163-
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer),
164-
}...))
165-
} else {
166-
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
167-
inner_bleve.PrefixQuery(options.Keyword, "title"),
168-
inner_bleve.PrefixQuery(options.Keyword, "content"),
169-
inner_bleve.PrefixQuery(options.Keyword, "comments"),
170-
}...))
165+
fuzziness = len(options.Keyword) / fuzzyDenominator
171166
}
167+
168+
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
169+
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
170+
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
171+
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
172+
}...))
172173
}
173174

174175
if len(options.RepoIDs) > 0 || options.AllPublic {

‎tests/integration/repo_search_test.go

+7-9
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ func TestSearchRepo(t *testing.T) {
3232
repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "repo1")
3333
assert.NoError(t, err)
3434

35-
executeIndexer(t, repo, code_indexer.UpdateRepoIndexer)
35+
code_indexer.UpdateRepoIndexer(repo)
3636

3737
testSearch(t, "/user2/repo1/search?q=Description&page=1", []string{"README.md"})
3838

@@ -42,12 +42,14 @@ func TestSearchRepo(t *testing.T) {
4242
repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob")
4343
assert.NoError(t, err)
4444

45-
executeIndexer(t, repo, code_indexer.UpdateRepoIndexer)
45+
code_indexer.UpdateRepoIndexer(repo)
4646

4747
testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"})
48-
testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt"})
49-
testSearch(t, "/user2/glob/search?q=file4&page=1", []string{})
50-
testSearch(t, "/user2/glob/search?q=file5&page=1", []string{})
48+
testSearch(t, "/user2/glob/search?q=loren&page=1&t=match", []string{"a.txt"})
49+
testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt", "a.txt"})
50+
testSearch(t, "/user2/glob/search?q=file3&page=1&t=match", []string{"x/b.txt", "a.txt"})
51+
testSearch(t, "/user2/glob/search?q=file4&page=1&t=match", []string{"x/b.txt", "a.txt"})
52+
testSearch(t, "/user2/glob/search?q=file5&page=1&t=match", []string{"x/b.txt", "a.txt"})
5153
}
5254

5355
func testSearch(t *testing.T, url string, expected []string) {
@@ -57,7 +59,3 @@ func testSearch(t *testing.T, url string, expected []string) {
5759
filenames := resultFilenames(t, NewHTMLParser(t, resp.Body))
5860
assert.EqualValues(t, expected, filenames)
5961
}
60-
61-
func executeIndexer(t *testing.T, repo *repo_model.Repository, op func(*repo_model.Repository)) {
62-
op(repo)
63-
}

0 commit comments

Comments
 (0)
Please sign in to comment.