From fb041ca20fdaa7a4d3074dbeeaa7255aac537942 Mon Sep 17 00:00:00 2001 From: Raphael Sourty Date: Thu, 16 Jun 2022 04:53:09 +0200 Subject: [PATCH] Allow to choose faiss index --- docs/api/rank/DPR.md | 2 +- docs/api/rank/Encoder.md | 2 +- docs/api/retrieve/DPR.md | 6 ++++-- docs/api/retrieve/Encoder.md | 6 ++++-- docs/api/retrieve/Fuzz.md | 2 +- docs/retrieve/dpr.md | 31 ++++++++++++++++--------------- docs/retrieve/encoder.md | 34 +++++++++++++++++++--------------- 7 files changed, 46 insertions(+), 37 deletions(-) diff --git a/docs/api/rank/DPR.md b/docs/api/rank/DPR.md index 83124bb..af025d7 100644 --- a/docs/api/rank/DPR.md +++ b/docs/api/rank/DPR.md @@ -30,7 +30,7 @@ DPR ranks documents using distinct models to encode the query and document conte Path to the file dedicated to storing the embeddings. The ranker will read this file if it already exists to load the embeddings and will update it when documents are added. -- **similarity** – defaults to `` +- **similarity** – defaults to `` Similarity measure to compare documents embeddings and query embedding (similarity.cosine or similarity.dot). diff --git a/docs/api/rank/Encoder.md b/docs/api/rank/Encoder.md index 4d7ad27..c9d1f35 100644 --- a/docs/api/rank/Encoder.md +++ b/docs/api/rank/Encoder.md @@ -26,7 +26,7 @@ SentenceBert Ranker. Path to the file dedicated to storing the embeddings. The ranker will read this file if it already exists to load the embeddings and will update it when documents are added. -- **similarity** – defaults to `` +- **similarity** – defaults to `` Similarity measure to compare documents embeddings and query embedding (similarity.cosine or similarity.dot). diff --git a/docs/api/retrieve/DPR.md b/docs/api/retrieve/DPR.md index 7e5e01d..28f4c52 100644 --- a/docs/api/retrieve/DPR.md +++ b/docs/api/retrieve/DPR.md @@ -24,6 +24,8 @@ DPR as a retriever using Faiss Index. - **path** (*str*) – defaults to `None` +- **index** (*faiss.swigfaiss.IndexFlatL2*) – defaults to `None` + ## Attributes @@ -100,7 +102,7 @@ DPR retriever ???- note "__call__" - Call self as a function. + Search for documents. **Parameters** @@ -120,7 +122,7 @@ DPR retriever **Parameters** - - **tree** (*faiss.swigfaiss.IndexFlatL2*) + - **index** (*faiss.swigfaiss.IndexFlatL2*) - **documents_embeddings** (*list*) ???- note "dump_embeddings" diff --git a/docs/api/retrieve/Encoder.md b/docs/api/retrieve/Encoder.md index 7fdd361..acf7049 100644 --- a/docs/api/retrieve/Encoder.md +++ b/docs/api/retrieve/Encoder.md @@ -22,6 +22,8 @@ Encoder as a retriever using Faiss Index. - **path** (*str*) – defaults to `None` +- **index** (*faiss.swigfaiss.IndexFlatL2*) – defaults to `None` + ## Attributes @@ -97,7 +99,7 @@ Encoder retriever ???- note "__call__" - Call self as a function. + Search for documents. **Parameters** @@ -117,7 +119,7 @@ Encoder retriever **Parameters** - - **tree** (*faiss.swigfaiss.IndexFlatL2*) + - **index** (*faiss.swigfaiss.IndexFlatL2*) - **documents_embeddings** (*list*) ???- note "dump_embeddings" diff --git a/docs/api/retrieve/Fuzz.md b/docs/api/retrieve/Fuzz.md index fbb131b..a8d0d27 100644 --- a/docs/api/retrieve/Fuzz.md +++ b/docs/api/retrieve/Fuzz.md @@ -18,7 +18,7 @@ Number of documents to retrieve. Default is `None`, i.e all documents that match the query will be retrieved. -- **fuzzer** – defaults to `` +- **fuzzer** – defaults to `` [RapidFuzz scorer](https://maxbachmann.github.io/RapidFuzz/Usage/fuzz.html): fuzz.ratio, fuzz.partial_ratio, fuzz.token_set_ratio, fuzz.partial_token_set_ratio, fuzz.token_sort_ratio, fuzz.partial_token_sort_ratio, fuzz.token_ratio, fuzz.partial_token_ratio, fuzz.WRatio, fuzz.QRatio, string_metric.levenshtein, string_metric.normalized_levenshtein diff --git a/docs/retrieve/dpr.md b/docs/retrieve/dpr.md index cb85077..c7b3cbb 100644 --- a/docs/retrieve/dpr.md +++ b/docs/retrieve/dpr.md @@ -47,24 +47,22 @@ If we want to deploy this retriever, we should rely on Pickle to serialize the r [{'id': 1, 'similarity': 0.01113}, {'id': 0, 'similarity': 0.01113}] ``` -## Retriever DPR on GPU +## Index -To speed up the search for the most relevant documents, we can: +The retriever.DPR is based on the [faiss indexes](https://github.com/facebookresearch/faiss/wiki/Faiss-indexes) and is compatible with all the structures proposed by the library. By default, the index used is the `IndexFlatL2`. It is stored in memory and is called via the CPU. Faiss offers a wide range of algorithms that are suitable for different corpus sizes and speed constraints. -- Use the GPU to speed up the DPR model. -- Use the GPU to speed up faiss to retrieve documents. +[Here are the guidelines to choose an index](https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index). -To use faiss GPU, we need first to install faiss-gpu; we have to update the attribute `tree` of the retriever with the `faiss.index_cpu_to_gpu` method. After that, Faiss GPU significantly speeds up the search. +Let's create a faiss index stored in memory that run on GPU with the DPR model that also run on gpu. ```sh pip install faiss-gpu ``` ```python ->>> import faiss - >>> from cherche import retrieve >>> from sentence_transformers import SentenceTransformer +>>> import faiss >>> documents = [ ... { @@ -87,9 +85,15 @@ pip install faiss-gpu ... } ... ] +>>> encoder = SentenceTransformer('facebook-dpr-ctx_encoder-single-nq-base', device="cuda") + +>>> d = encoder.encode("Embeddings size.").shape[0] +>>> index = faiss.IndexFlatL2(d) +>>> index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), 0, index) # 0 is the id of the GPU + >>> retriever = retrieve.DPR( -... encoder = SentenceTransformer('facebook-dpr-ctx_encoder-single-nq-base', device="cuda").encode, -... query_encoder = SentenceTransformer('facebook-dpr-question_encoder-single-nq-base', device="cuda").encode, +... encoder = encoder.encode, +... query_encoder = SentenceTransformer('facebook-dpr-question_encoder-single-nq-base').encode, ... key = "id", ... on = ["title", "article"], ... k = 2, @@ -98,12 +102,9 @@ pip install faiss-gpu >>> retriever.add(documents) -# 0 is the id of the GPU. ->>> retriever.tree = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), 0, retriever.tree) - >>> retriever("paris") -[{'id': 0, 'similarity': 0.9025790931437582}, - {'id': 2, 'similarity': 0.8160134832855334}] +[{'id': 1, 'similarity': 0.012779952697248447}, + {'id': 0, 'similarity': 0.012022932290377224}] ``` ## Map keys to documents @@ -155,7 +156,7 @@ class CustomDPR: model = CustomDPR() # Your model should pass these tests, i.e Sentence Bert API. -assert model.documents(["Paris", "France", "Bordeaux"]).shape[0] == 3 +assert model.documents(["Paris", "France", "Bordeaux"]).shape[0] == 3 assert isinstance(model.documents(["Paris", "France", "Bordeaux"]), np.ndarray) assert len(model.documents("Paris").shape) == 1 diff --git a/docs/retrieve/encoder.md b/docs/retrieve/encoder.md index c9ba214..7377014 100644 --- a/docs/retrieve/encoder.md +++ b/docs/retrieve/encoder.md @@ -55,24 +55,22 @@ If we want to deploy this retriever, we should move the pickle file that contain {'id': 2, 'similarity': 0.8160134832855334}] ``` -## Retriever Encoder on GPU +## Index -To speed up the search for the most relevant documents, we can: +The retriever.encoder is based on the [faiss indexes](https://github.com/facebookresearch/faiss/wiki/Faiss-indexes) and is compatible with all the structures proposed by the library. By default, the index used is the `IndexFlatL2`. It is stored in memory and is called via the CPU. Faiss offers a wide range of algorithms that are suitable for different corpus sizes and speed constraints. -- Use the GPU to speed up the encoder. -- Use the GPU to speed up faiss to retrieve documents. +[Here are the guidelines to choose an index](https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index). -To use faiss GPU, we need first to install faiss-gpu; we have to update the attribute `tree` of the retriever with the `faiss.index_cpu_to_gpu` method. After that, Faiss GPU significantly speeds up the search. +Let's create a faiss index stored in memory that run on GPU with the sentence transformer as encoder that also run on gpu. ```sh pip install faiss-gpu ``` ```python ->>> import faiss - >>> from cherche import retrieve >>> from sentence_transformers import SentenceTransformer +>>> import faiss >>> documents = [ ... { @@ -95,22 +93,28 @@ pip install faiss-gpu ... } ... ] +>>> encoder = SentenceTransformer("sentence-transformers/all-mpnet-base-v2", device="cuda") + +>>> d = encoder.encode("Embeddings size.").shape[0] +>>> index = faiss.IndexFlatL2(d) +>>> index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), 0, index) # 0 is the id of the GPU + >>> retriever = retrieve.Encoder( ... key = "id", ... on = ["title", "article"], -... encoder = SentenceTransformer("sentence-transformers/all-mpnet-base-v2", device="cuda").encode, +... encoder = encoder.encode, ... k = 2, -... path = "all-mpnet-base-v2.pkl" +... path = "all-mpnet-base-v2.pkl", +... index = index, ... ) >>> retriever.add(documents) -# 0 is the id of the GPU. ->>> retriever.tree = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), 0, retriever.tree) - >>> retriever("paris") -[{'id': 0, 'similarity': 0.9025790931437582}, - {'id': 2, 'similarity': 0.8160134832855334}] +[{'id': 0, + 'similarity': 0.9025790931437582}, + {'id': 2, + 'similarity': 0.8160134832855334}] ``` ## Map keys to documents @@ -154,7 +158,7 @@ class CustomEncoder: model = CustomEncoder() # Your model should pass these tests, i.e Sentence Bert API. -assert model.encode(["Paris", "France", "Bordeaux"]).shape[0] == 3 +assert model.encode(["Paris", "France", "Bordeaux"]).shape[0] == 3 assert isinstance(model.encode(["Paris", "France", "Bordeaux"]), np.ndarray) assert len(model.encode("Paris").shape) == 1