Skip to content

Commit

Permalink
Boost relevance scoring when search terms appear in close proximity. …
Browse files Browse the repository at this point in the history
…Followup to #532
  • Loading branch information
seanaery committed Dec 18, 2023
1 parent 7922386 commit 4e6d9a8
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 24 deletions.
53 changes: 29 additions & 24 deletions solr/conf/solrconfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@
http://wiki.apache.org/solr/LocalParams
-->

<!-- In general, matches using the pf parameter should be boosted higher
than qf, so the proximity of multiple search terms in the matching
documents is significant in relevance scoring. See:
https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#pf-phrase-fields-parameter
-->
<str name="qf">
collection_title_tesim^150
title_tesim^100
Expand All @@ -110,27 +115,27 @@
text
</str>
<str name="pf">
collection_title_tesim^150
title_tesim^100
normalized_title_teim^100
unitid_identifier_match^40
name_teim^10
place_teim^10
subject_teim^2
id
ead_ssi
ref_ssm
unitid_ssm
container_teim
parent_unittitles_tesim
text
collection_title_tesim^300
title_tesim^200
normalized_title_teim^200
unitid_identifier_match^80
name_teim^20
place_teim^20
subject_teim^5
id^2
ead_ssi^2
ref_ssm^2
unitid_ssm^2
container_teim^2
parent_unittitles_tesim^2
text^2
</str>

<str name="qf_container">
container_teim
</str>
<str name="pf_container">
container_teim
container_teim^2
</str>
<str name="qf_identifier">
id
Expand All @@ -139,36 +144,36 @@
unitid_ssm
</str>
<str name="pf_identifier">
id
ead_ssi
ref_ssm
unitid_ssm
id^2
ead_ssi^2
ref_ssm^2
unitid_ssm^2
</str>
<str name="qf_name">
name_teim
</str>
<str name="pf_name">
name_teim
name_teim^2
</str>
<str name="qf_place">
place_teim
</str>
<str name="pf_place">
place_teim
place_teim^2
</str>
<str name="qf_subject">
subject_teim
</str>
<str name="pf_subject">
subject_teim
subject_teim^2
</str>
<str name="qf_title">
title_tesim
normalized_title_teim
</str>
<str name="pf_title">
title_tesim
normalized_title_teim
title_tesim^2
normalized_title_teim^2
</str>

<int name="ps">3</int>
Expand Down
18 changes: 18 additions & 0 deletions spec/features/search_query_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,22 @@
end
end
end

context 'when two terms match two docs but proximity differs (pf test)' do
it 'counts the doc where the terms are in close proximity as more relevant' do
visit search_catalog_path q: 'splendiferous escapades', search_field: 'all_fields'
within('.document-position-1') do
expect(page).to have_css '.al-document-abstract-or-scope',
text: /This will test the splendiferous escapades phrase/
end
end

it 'counts the doc where the terms are are far apart as less relevant' do
visit search_catalog_path q: 'splendiferous escapades', search_field: 'all_fields'
within('.document-position-2') do
expect(page).to have_css '.al-document-abstract-or-scope',
text: /This splendiferous test will help/
end
end
end
end
10 changes: 10 additions & 0 deletions spec/fixtures/ead/nlm/alphaomegaalpha.xml
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,11 @@
<container id="aspace_b60ff470b57285b4f198c8a7edcf2f61"
parent="aspace_1579b968f44866f4066a9387e4446ce1" type="folder">15</container>
</did>
<scopecontent id="aspace_7482a2ddff80393b0c12a926f9bd6ee1">
<head>Scope and Contents Phrase Query (pf) test 1</head>
<p>This will test the splendiferous escapades phrase to help ensure that relevance
is impacted by the proximity of search terms.</p>
</scopecontent>
</c>
<c id="aspace_d908ca557e8b2f6a4d13002616b086c1" level="otherlevel">
<did>
Expand All @@ -688,6 +693,11 @@
<container id="aspace_6d4dfa7db9a186576147009f3d5bd9ee" label="Mixed Materials"
type="folder">1</container>
</did>
<scopecontent id="aspace_98ae59df3f802993a01476bc133bae04">
<head>Scope and Contents Phrase Query (pf) test 2</head>
<p>This splendiferous test will help to ensure that the relevance
is impacted by the proximity of search term phrase escapades.</p>
</scopecontent>
</c>
<c id="aspace_a68ee517caa6d765f01eb923c177af2b" level="otherlevel">
<did>
Expand Down

0 comments on commit 4e6d9a8

Please sign in to comment.