diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index c607d780ef968..d7f735e957db9 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -39,8 +39,6 @@ import ( const ( unicodeNormalizeName = "unicodeNormalize" maxBatchSize = 16 - // fuzzyDenominator determines the levenshtein distance per each character of a keyword - fuzzyDenominator = 4 ) func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { @@ -241,12 +239,15 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int keywordQuery query.Query ) - phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword) - phraseQuery.FieldVal = "Content" - phraseQuery.Analyzer = repoIndexerAnalyzer - keywordQuery = phraseQuery if opts.IsKeywordFuzzy { - phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator + phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword) + phraseQuery.FieldVal = "Content" + phraseQuery.Analyzer = repoIndexerAnalyzer + keywordQuery = phraseQuery + } else { + prefixQuery := bleve.NewPrefixQuery(opts.Keyword) + prefixQuery.FieldVal = "Content" + keywordQuery = prefixQuery } if len(opts.RepoIDs) > 0 { diff --git a/modules/indexer/internal/bleve/query.go b/modules/indexer/internal/bleve/query.go index 21422b281c498..b96875343e5ea 100644 --- a/modules/indexer/internal/bleve/query.go +++ b/modules/indexer/internal/bleve/query.go @@ -20,11 +20,17 @@ func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery { } // MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer -func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery { +func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery { q := bleve.NewMatchPhraseQuery(matchPhrase) q.FieldVal = field q.Analyzer = analyzer - q.Fuzziness = fuzziness + return q +} + +// PrefixQuery generates a match prefix query for the given prefix and field +func PrefixQuery(matchPrefix, field string) *query.PrefixQuery { + q := bleve.NewPrefixQuery(matchPrefix) + q.FieldVal = field return q } diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go index 1f54be721b37c..927ad58cd4c57 100644 --- a/modules/indexer/issues/bleve/bleve.go +++ b/modules/indexer/issues/bleve/bleve.go @@ -35,11 +35,7 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { }) } -const ( - maxBatchSize = 16 - // fuzzyDenominator determines the levenshtein distance per each character of a keyword - fuzzyDenominator = 4 -) +const maxBatchSize = 16 // IndexerData an update to the issue indexer type IndexerData internal.IndexerData @@ -160,16 +156,19 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( var queries []query.Query if options.Keyword != "" { - fuzziness := 0 if options.IsFuzzyKeyword { - fuzziness = len(options.Keyword) / fuzzyDenominator + queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ + inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer), + inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer), + inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer), + }...)) + } else { + queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ + inner_bleve.PrefixQuery(options.Keyword, "title"), + inner_bleve.PrefixQuery(options.Keyword, "content"), + inner_bleve.PrefixQuery(options.Keyword, "comments"), + }...)) } - - queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ - inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness), - inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness), - inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness), - }...)) } if len(options.RepoIDs) > 0 || options.AllPublic { diff --git a/tests/integration/repo_search_test.go b/tests/integration/repo_search_test.go index 56cc45d9010e5..cf199e98c2895 100644 --- a/tests/integration/repo_search_test.go +++ b/tests/integration/repo_search_test.go @@ -32,7 +32,7 @@ func TestSearchRepo(t *testing.T) { repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "repo1") assert.NoError(t, err) - code_indexer.UpdateRepoIndexer(repo) + executeIndexer(t, repo, code_indexer.UpdateRepoIndexer) testSearch(t, "/user2/repo1/search?q=Description&page=1", []string{"README.md"}) @@ -42,14 +42,12 @@ func TestSearchRepo(t *testing.T) { repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob") assert.NoError(t, err) - code_indexer.UpdateRepoIndexer(repo) + executeIndexer(t, repo, code_indexer.UpdateRepoIndexer) testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"}) - testSearch(t, "/user2/glob/search?q=loren&page=1&t=match", []string{"a.txt"}) - testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt", "a.txt"}) - testSearch(t, "/user2/glob/search?q=file3&page=1&t=match", []string{"x/b.txt", "a.txt"}) - testSearch(t, "/user2/glob/search?q=file4&page=1&t=match", []string{"x/b.txt", "a.txt"}) - testSearch(t, "/user2/glob/search?q=file5&page=1&t=match", []string{"x/b.txt", "a.txt"}) + testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt"}) + testSearch(t, "/user2/glob/search?q=file4&page=1", []string{}) + testSearch(t, "/user2/glob/search?q=file5&page=1", []string{}) } func testSearch(t *testing.T, url string, expected []string) { @@ -59,3 +57,7 @@ func testSearch(t *testing.T, url string, expected []string) { filenames := resultFilenames(t, NewHTMLParser(t, resp.Body)) assert.EqualValues(t, expected, filenames) } + +func executeIndexer(t *testing.T, repo *repo_model.Repository, op func(*repo_model.Repository)) { + op(repo) +}