From 63e395a603d2cc9432a048262acb65d464c5ba42 Mon Sep 17 00:00:00 2001 From: Vaibhav Kansagara Date: Sun, 4 Aug 2019 16:27:03 +0530 Subject: [PATCH 1/2] Add python3 examples for letor --- code/python3/search_letor.py | 92 ++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 code/python3/search_letor.py diff --git a/code/python3/search_letor.py b/code/python3/search_letor.py new file mode 100644 index 00000000..2bfc0ced --- /dev/null +++ b/code/python3/search_letor.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# +# Simple example script demonstrating how to re-rank using the trained model. +# +# Copyright (C) 2019 Vaibhav Kansagara +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 +# USA + +import sys +import xapian +import xapianletor + +# We require three command line arguments. +if len(sys.argv) != 4: + print("Usage: %s DATABASE MSIZE QUERY" % sys.argv[0], + "NB: QUERY should be quoted to protect it from the shell.", + sep='\n', file=sys.stderr) + sys.exit(1) + +try: + db_path = sys.argv[1] + msize = sys.argv[2] + query_string = sys.argv[3] + + db = xapian.Database(db_path) + + parser = xapian.QueryParser() + parser.add_prefix("title", "S") + parser.add_prefix("subject", "S") + parser.set_database(db) + parser.set_default_op(xapian.Query.OP_OR) + parser.set_stemmer(xapian.Stem("en")) + parser.set_stemming_strategy(xapian.QueryParser.STEM_SOME) + + query_no_prefix = parser.parse_query(query_string, + parser.FLAG_DEFAULT| + parser.FLAG_SPELLING_CORRECTION) + # query with title as default prefix + query_default_prefix = parser.parse_query(query_string, + parser.FLAG_DEFAULT| + parser.FLAG_SPELLING_CORRECTION, + "S") + # Combine queries + query = xapian.Query(xapian.Query.OP_OR, query_no_prefix, query_default_prefix) + + enquire = xapian.Enquire(db) + enquire.set_query(query) + + mset = enquire.get_mset(0, msize) + + if mset.empty(): + print("Empty MSet. No documents could be retrieved with the given Query.") + sys.exit(1) + + print("Docids before re-ranking by LTR model:\n") + for m in mset: + print("%i: docid=%i [%s]" % (m.rank + 1, m.docid, m.document.get_data().decode('utf-8'))) + + # Initialise Ranker object with ListNETRanker instance, db path and query. + # See Ranker documentation for available Ranker subclass options. + ranker = xapianletor.ListNETRanker() + ranker.set_database_path(db_path) + ranker.set_query(query) + + # Re-rank the existing mset using the letor model. + ranker.rank(mset) + + print("Docids after re-ranking by LTR model:\n") + + for m in mset: + print("%i: docid=%i [%s]" % (m.rank + 1, m.docid, m.document.get_data().decode('utf-8'))) + +except xapian.QueryParserError as e: + print("Couldn't parse query: %s" % str(e), file=sys.stderr) + sys.exit(1) + +except Exception as e: + print("Exception: %s" % str(e), file=sys.stderr) + sys.exit(1) From 877c4502b22a6fc557d045f5cc540c0977f6590e Mon Sep 17 00:00:00 2001 From: Vaibhav Kansagara Date: Tue, 6 Aug 2019 15:48:22 +0530 Subject: [PATCH 2/2] fixup! Add python3 examples for letor --- code/python3/search_letor.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/code/python3/search_letor.py b/code/python3/search_letor.py index 2bfc0ced..98372db5 100644 --- a/code/python3/search_letor.py +++ b/code/python3/search_letor.py @@ -2,6 +2,9 @@ # # Simple example script demonstrating how to re-rank using the trained model. # +# Copyright (C) 2004,2005,2006,2007,2008,2009,2010,2015 Olly Betts +# Copyright (C) 2011 Parth Gupta +# Copyright (C) 2016 Ayush Tomar # Copyright (C) 2019 Vaibhav Kansagara # # This program is free software; you can redistribute it and/or @@ -46,12 +49,10 @@ parser.set_stemming_strategy(xapian.QueryParser.STEM_SOME) query_no_prefix = parser.parse_query(query_string, - parser.FLAG_DEFAULT| - parser.FLAG_SPELLING_CORRECTION) + parser.FLAG_DEFAULT) # query with title as default prefix query_default_prefix = parser.parse_query(query_string, - parser.FLAG_DEFAULT| - parser.FLAG_SPELLING_CORRECTION, + parser.FLAG_DEFAULT, "S") # Combine queries query = xapian.Query(xapian.Query.OP_OR, query_no_prefix, query_default_prefix) @@ -63,7 +64,7 @@ if mset.empty(): print("Empty MSet. No documents could be retrieved with the given Query.") - sys.exit(1) + sys.exit(0) print("Docids before re-ranking by LTR model:\n") for m in mset: