diff --git a/Makefile b/Makefile
index ab6a709..fe10090 100644
--- a/Makefile
+++ b/Makefile
@@ -1,20 +1,17 @@
 COMMIT_HASH := $(shell eval git rev-parse HEAD)
 
-cython:
-	python setup.py build_ext --inplace --force
-
 execute-notebooks:
 	jupyter nbconvert --execute --to notebook --inplace docs/*/*.ipynb --ExecutePreprocessor.timeout=-1
 
 render-notebooks:
 
-doc: render-notebooks
-	python docs/scripts/index.py
-	mkdocs build
-
-livedoc: doc
+livedoc:
+	mkdocs build --clean
 	mkdocs serve --dirtyreload
 
+deploydoc:
+	mkdocs gh-deploy --force
+
 .PHONY: bench
 bench:
 	asv run ${COMMIT_HASH} --config benchmarks/asv.conf.json --steps 1
diff --git a/README.md b/README.md
index 0751530..39ae11b 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,8 @@
 
 Cherche enables the development of a neural search pipeline that employs retrievers and pre-trained language models both as retrievers and rankers. The primary advantage of Cherche lies in its capacity to construct end-to-end pipelines. Additionally, Cherche is well-suited for offline semantic search due to its compatibility with batch computation.
 
+Here are some of the features Cherche offers:
+
 [Live demo of a NLP search engine powered by Cherche](https://raphaelsty.github.io/knowledge/?query=cherche%20neural%20search)
 
 ![Alt text](docs/img/explain.png)
@@ -82,12 +84,18 @@ Here is an example of a neural search pipeline composed of a TF-IDF that quickly
 ```python
 from cherche import data, retrieve, rank
 from sentence_transformers import SentenceTransformer
+from lenlp import sparse
 
 # List of dicts
 documents = data.load_towns()
 
 # Retrieve on fields title and article
-retriever = retrieve.TfIdf(key="id", on=["title", "article"], documents=documents, k=30)
+retriever = retrieve.BM25(
+  key="id", 
+  on=["title", "article"], 
+  documents=documents, 
+  k=30
+)
 
 # Rank on fields title and article
 ranker = rank.Encoder(
@@ -163,6 +171,7 @@ search(["Bordeaux", "Paris", "Toulouse"])
 Cherche provides [retrievers](https://raphaelsty.github.io/cherche/retrieve/retrieve/) that filter input documents based on a query.
 
 - retrieve.TfIdf
+- retrieve.BM25
 - retrieve.Lunr
 - retrieve.Flash
 - retrieve.Encoder
@@ -193,7 +202,7 @@ We welcome all contributions.
 
 ## Acknowledgements 👏
 
-TfIdf retriever is a wrapper around [scikit-learn's TfidfVectorizer](https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html). Lunr retriever is a wrapper around [Lunr.py](https://github.com/yeraydiazdiaz/lunr.py). Flash retriever is a wrapper around [FlashText](https://github.com/vi3k6i5/flashtext). DPR, Encode and CrossEncoder rankers are wrappers dedicated to the use of the pre-trained models of [SentenceTransformers](https://www.sbert.net/docs/pretrained_models.html) in a neural search pipeline.
+Lunr retriever is a wrapper around [Lunr.py](https://github.com/yeraydiazdiaz/lunr.py). Flash retriever is a wrapper around [FlashText](https://github.com/vi3k6i5/flashtext). DPR, Encode and CrossEncoder rankers are wrappers dedicated to the use of the pre-trained models of [SentenceTransformers](https://www.sbert.net/docs/pretrained_models.html) in a neural search pipeline.
 
 ## Citations
 
diff --git a/cherche/__version__.py b/cherche/__version__.py
index 5996f20..e6b021f 100644
--- a/cherche/__version__.py
+++ b/cherche/__version__.py
@@ -1,3 +1,3 @@
-VERSION = (2, 0, 6)
+VERSION = (2, 1, 0)
 
 __version__ = ".".join(map(str, VERSION))
diff --git a/cherche/evaluate/evaluate.py b/cherche/evaluate/evaluate.py
index 89b2f78..d0b6820 100644
--- a/cherche/evaluate/evaluate.py
+++ b/cherche/evaluate/evaluate.py
@@ -3,8 +3,6 @@
 import collections
 import typing
 
-import numpy as np
-
 __all__ = ["evaluation"]
 
 
@@ -56,7 +54,7 @@ def evaluation(
     --------
     >>> from pprint import pprint as print
     >>> from cherche import data, evaluate, retrieve
-    >>> from sklearn.feature_extraction.text import TfidfVectorizer
+    >>> from lenlp import sparse
 
     >>> documents, query_answers = data.arxiv_tags(
     ...    arxiv_title=True, arxiv_summary=False, comment=False
@@ -66,7 +64,7 @@ def evaluation(
     ...     key="uri",
     ...     on=["prefLabel_text", "altLabel_text"],
     ...     documents=documents,
-    ...     tfidf=TfidfVectorizer(lowercase=True, ngram_range=(3, 7), analyzer="char"),
+    ...     tfidf=sparse.TfidfVectorizer(normalize=True, ngram_range=(3, 7), analyzer="char"),
     ... ) + documents
 
     >>> scores = evaluate.evaluation(search=search, query_answers=query_answers, k=10)
diff --git a/cherche/query/prf.py b/cherche/query/prf.py
index 8cbefd3..875f1d0 100644
--- a/cherche/query/prf.py
+++ b/cherche/query/prf.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import sklearn
-from sklearn.feature_extraction.text import TfidfVectorizer
+from lenlp import sparse
 from sklearn.metrics.pairwise import cosine_similarity
 
 from ..utils import yield_batch_single
@@ -21,7 +21,7 @@ class PRF(Query):
     on
         Fields to use for fitting the spelling corrector on.
     tf
-        defaults to sklearn.feature_extraction.text.TfidfVectorizer.
+        defaults to sklearn.feature_extraction.text.sparse.TfidfVectorizer.
         If you want to implement your own tf, it needs to follow the sklearn base API and provides the `transform`
         `fit_transform` and `get_feature_names_out` methods. See sklearn documentation for more information.
     nb_docs
@@ -65,7 +65,7 @@ def __init__(
         self,
         on: typing.Union[str, list],
         documents: list,
-        tf: sklearn.feature_extraction.text.CountVectorizer = TfidfVectorizer(),
+        tf: sklearn.feature_extraction.text.CountVectorizer = sparse.TfidfVectorizer(),
         nb_docs: int = 5,
         nb_terms_per_doc: int = 3,
     ) -> None:
diff --git a/cherche/retrieve/__init__.py b/cherche/retrieve/__init__.py
index 9a4a891..bf713ac 100644
--- a/cherche/retrieve/__init__.py
+++ b/cherche/retrieve/__init__.py
@@ -1,4 +1,5 @@
 from .base import Retriever
+from .bm25 import BM25
 from .dpr import DPR
 from .embedding import Embedding
 from .encoder import Encoder
@@ -9,6 +10,7 @@
 
 __all__ = [
     "Retriever",
+    "BM25",
     "DPR",
     "Embedding",
     "Encoder",
diff --git a/cherche/retrieve/bm25.py b/cherche/retrieve/bm25.py
new file mode 100644
index 0000000..5ee4511
--- /dev/null
+++ b/cherche/retrieve/bm25.py
@@ -0,0 +1,111 @@
+__all__ = ["BM25"]
+
+import typing
+
+from lenlp import sparse
+
+from .tfidf import TfIdf
+
+
+class BM25(TfIdf):
+    """TfIdf retriever based on cosine similarities.
+
+    Parameters
+    ----------
+    key
+        Field identifier of each document.
+    on
+        Fields to use to match the query to the documents.
+    documents
+        Documents in TFIdf retriever are static. The retriever must be reseted to index new
+        documents.
+    k
+        Number of documents to retrieve. Default is `None`, i.e all documents that match the query
+        will be retrieved.
+    tfidf
+        TfidfVectorizer class of Sklearn to create a custom TfIdf retriever.
+
+    Examples
+    --------
+
+    >>> from pprint import pprint as print
+    >>> from cherche import retrieve
+
+    >>> documents = [
+    ...     {"id": 0, "title": "Paris", "article": "Eiffel tower"},
+    ...     {"id": 1, "title": "Montreal", "article": "Montreal is in Canada."},
+    ...     {"id": 2, "title": "Paris", "article": "Eiffel tower"},
+    ...     {"id": 3, "title": "Montreal", "article": "Montreal is in Canada."},
+    ... ]
+
+    >>> retriever = retrieve.BM25(
+    ...     key="id",
+    ...     on=["title", "article"],
+    ...     documents=documents,
+    ... )
+
+    >>> documents = [
+    ...     {"id": 4, "title": "Paris", "article": "Eiffel tower"},
+    ...     {"id": 5, "title": "Montreal", "article": "Montreal is in Canada."},
+    ...     {"id": 6, "title": "Paris", "article": "Eiffel tower"},
+    ...     {"id": 7, "title": "Montreal", "article": "Montreal is in Canada."},
+    ... ]
+
+    >>> retriever = retriever.add(documents)
+
+    >>> print(retriever(q=["paris", "canada"], k=4))
+    [[{'id': 6, 'similarity': 0.5404109029445249},
+      {'id': 0, 'similarity': 0.5404109029445249},
+      {'id': 2, 'similarity': 0.5404109029445249},
+      {'id': 4, 'similarity': 0.5404109029445249}],
+     [{'id': 7, 'similarity': 0.3157669764669935},
+      {'id': 5, 'similarity': 0.3157669764669935},
+      {'id': 3, 'similarity': 0.3157669764669935},
+      {'id': 1, 'similarity': 0.3157669764669935}]]
+
+    >>> print(retriever(["unknown", "montreal paris"], k=2))
+    [[],
+     [{'id': 7, 'similarity': 0.7391866872635209},
+      {'id': 5, 'similarity': 0.7391866872635209}]]
+
+
+    >>> print(retriever(q="paris"))
+    [{'id': 6, 'similarity': 0.5404109029445249},
+     {'id': 0, 'similarity': 0.5404109029445249},
+     {'id': 2, 'similarity': 0.5404109029445249},
+     {'id': 4, 'similarity': 0.5404109029445249}]
+
+    References
+    ----------
+    1. [sklearn.feature_extraction.text.TfidfVectorizer](https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html)
+    2. [Python: tf-idf-cosine: to find document similarity](https://stackoverflow.com/questions/12118720/python-tf-idf-cosine-to-find-document-similarity)
+
+    """
+
+    def __init__(
+        self,
+        key: str,
+        on: typing.Union[str, list],
+        documents: typing.List[typing.Dict[str, str]] = None,
+        count_vectorizer: sparse.BM25Vectorizer = None,
+        k: typing.Optional[int] = None,
+        batch_size: int = 1024,
+        fit: bool = True,
+    ) -> None:
+        count_vectorizer = (
+            sparse.BM25Vectorizer(
+                normalize=True, ngram_range=(3, 5), analyzer="char_wb"
+            )
+            if count_vectorizer is None
+            else count_vectorizer
+        )
+
+        super().__init__(
+            key=key,
+            on=on,
+            documents=documents,
+            tfidf=count_vectorizer,
+            k=k,
+            batch_size=batch_size,
+            fit=fit,
+        )
diff --git a/cherche/retrieve/tfidf.py b/cherche/retrieve/tfidf.py
index 511921e..6dd93c0 100644
--- a/cherche/retrieve/tfidf.py
+++ b/cherche/retrieve/tfidf.py
@@ -3,8 +3,8 @@
 import typing
 
 import numpy as np
+from lenlp import sparse
 from scipy.sparse import csc_matrix, hstack
-from sklearn.feature_extraction.text import TfidfVectorizer
 
 from ..utils import yield_batch
 from .base import Retriever
@@ -92,7 +92,7 @@ def __init__(
         key: str,
         on: typing.Union[str, list],
         documents: typing.List[typing.Dict[str, str]] = None,
-        tfidf: TfidfVectorizer = None,
+        tfidf: sparse.TfidfVectorizer = None,
         k: typing.Optional[int] = None,
         batch_size: int = 1024,
         fit: bool = True,
@@ -100,7 +100,9 @@ def __init__(
         super().__init__(key=key, on=on, k=k, batch_size=batch_size)
 
         self.tfidf = (
-            TfidfVectorizer(lowercase=True, ngram_range=(3, 7), analyzer="char_wb")
+            sparse.TfidfVectorizer(
+                normalize=True, ngram_range=(3, 7), analyzer="char_wb"
+            )
             if tfidf is None
             else tfidf
         )
@@ -207,7 +209,7 @@ def __call__(
         ranked = []
 
         for batch in yield_batch(
-            q,
+            array=q,
             batch_size=batch_size if batch_size is not None else self.batch_size,
             desc=f"{self.__class__.__name__} retriever",
             tqdm_bar=tqdm_bar,
diff --git a/docs/api/evaluate/evaluation.md b/docs/api/evaluate/evaluation.md
index 38a0d3b..4be42f4 100644
--- a/docs/api/evaluate/evaluation.md
+++ b/docs/api/evaluate/evaluation.md
@@ -33,7 +33,7 @@ Evaluation function
 ```python
 >>> from pprint import pprint as print
 >>> from cherche import data, evaluate, retrieve
->>> from sklearn.feature_extraction.text import TfidfVectorizer
+>>> from lenlp import sparse
 
 >>> documents, query_answers = data.arxiv_tags(
 ...    arxiv_title=True, arxiv_summary=False, comment=False
@@ -43,7 +43,7 @@ Evaluation function
 ...     key="uri",
 ...     on=["prefLabel_text", "altLabel_text"],
 ...     documents=documents,
-...     tfidf=TfidfVectorizer(lowercase=True, ngram_range=(3, 7), analyzer="char"),
+...     tfidf=sparse.TfidfVectorizer(normalize=True, ngram_range=(3, 7), analyzer="char"),
 ... ) + documents
 
 >>> scores = evaluate.evaluation(search=search, query_answers=query_answers, k=10)
diff --git a/docs/api/query/PRF.md b/docs/api/query/PRF.md
index 2bbaf28..a3fc4c4 100644
--- a/docs/api/query/PRF.md
+++ b/docs/api/query/PRF.md
@@ -12,9 +12,9 @@ Pseudo (or blind) Relevance-Feedback module. The Query-Augmentation method appli
 
 - **documents** (*list*)
 
-- **tf** (*sklearn.feature_extraction.text.CountVectorizer*) – defaults to `TfidfVectorizer()`
+- **tf** (*sklearn.feature_extraction.text.CountVectorizer*) – defaults to `sparse.TfidfVectorizer()`
 
-    defaults to sklearn.feature_extraction.text.TfidfVectorizer. If you want to implement your own tf, it needs to follow the sklearn base API and provides the `transform` `fit_transform` and `get_feature_names_out` methods. See sklearn documentation for more information.
+    defaults to sklearn.feature_extraction.text.sparse.TfidfVectorizer. If you want to implement your own tf, it needs to follow the sklearn base API and provides the `transform` `fit_transform` and `get_feature_names_out` methods. See sklearn documentation for more information.
 
 - **nb_docs** (*int*) – defaults to `5`
 
diff --git a/docs/api/retrieve/TfIdf.md b/docs/api/retrieve/TfIdf.md
index 3235084..e88e948 100644
--- a/docs/api/retrieve/TfIdf.md
+++ b/docs/api/retrieve/TfIdf.md
@@ -18,9 +18,9 @@ TfIdf retriever based on cosine similarities.
 
     Documents in TFIdf retriever are static. The retriever must be reseted to index new documents.
 
-- **tfidf** (*sklearn.feature_extraction.text.TfidfVectorizer*) – defaults to `None`
+- **tfidf** (*sklearn.feature_extraction.text.sparse.TfidfVectorizer*) – defaults to `None`
 
-    TfidfVectorizer class of Sklearn to create a custom TfIdf retriever.
+    sparse.TfidfVectorizer class of Sklearn to create a custom TfIdf retriever.
 
 - **k** (*Optional[int]*) – defaults to `None`
 
@@ -37,7 +37,7 @@ TfIdf retriever based on cosine similarities.
 ```python
 >>> from pprint import pprint as print
 >>> from cherche import retrieve
->>> from sklearn.feature_extraction.text import TfidfVectorizer
+>>> from lenlp import sparse
 
 >>> documents = [
 ...     {"id": 0, "title": "Paris", "article": "Eiffel tower"},
@@ -122,6 +122,6 @@ TfIdf retriever based on cosine similarities.
     
 ## References
 
-1. [sklearn.feature_extraction.text.TfidfVectorizer](https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html)
+1. [sklearn.feature_extraction.text.sparse.TfidfVectorizer](https://github.com/raphaelsty/LeNLP)
 2. [Python: tf-idf-cosine: to find document similarity](https://stackoverflow.com/questions/12118720/python-tf-idf-cosine-to-find-document-similarity)
 
diff --git a/docs/pipeline/pipeline.md b/docs/pipeline/pipeline.md
index a99e627..cdc23a5 100644
--- a/docs/pipeline/pipeline.md
+++ b/docs/pipeline/pipeline.md
@@ -162,7 +162,7 @@ And here is the code:
 ```python
 >>> from cherche import retrieve, rank, data
 >>> from sentence_transformers import SentenceTransformer
->>> from sklearn.feature_extraction.text import TfidfVectorizer
+>>> from lenlp import sparse
 
 >>> documents, _ = data.arxiv_tags(arxiv_title=True, arxiv_summary=False, comment=False)
 
@@ -185,7 +185,7 @@ And here is the code:
 ...    key = "uri",
 ...    on = ["prefLabel_text", "altLabel_text"],
 ...    documents = documents,
-...    tfidf = TfidfVectorizer(lowercase=True, min_df=1, max_df=0.9, ngram_range=(3, 7), analyzer="char"),
+...    tfidf = sparse.TfidfVectorizer(normalize=True, min_df=1, max_df=0.9, ngram_range=(3, 7), analyzer="char"),
 ...    k = 100,
 ... ) + ranker
 
diff --git a/docs/retrieve/.pages b/docs/retrieve/.pages
index 89f9481..c9c1160 100644
--- a/docs/retrieve/.pages
+++ b/docs/retrieve/.pages
@@ -1,6 +1,7 @@
 title: Retrieve
 nav:
     - retrieve.md
+    - bm25.md
     - tfidf.md
     - flash.md
     - lunr.md
diff --git a/docs/retrieve/bm25.md b/docs/retrieve/bm25.md
new file mode 100644
index 0000000..5aca474
--- /dev/null
+++ b/docs/retrieve/bm25.md
@@ -0,0 +1,108 @@
+# TfIdf
+
+Our BM25 retriever relies on the [sparse.BM25Vectorizer](https://github.com/raphaelsty/LeNLP) of LeNLP.
+
+```python
+>>> from cherche import retrieve
+
+>>> documents = [
+...    {
+...        "id": 0,
+...        "article": "Paris is the capital and most populous city of France",
+...        "title": "Paris",
+...        "url": "https://en.wikipedia.org/wiki/Paris"
+...    },
+...    {
+...        "id": 1,
+...        "article": "Paris has been one of Europe major centres of finance, diplomacy , commerce , fashion , gastronomy , science , and arts.",
+...        "title": "Paris",
+...        "url": "https://en.wikipedia.org/wiki/Paris"
+...    },
+...    {
+...        "id": 2,
+...        "article": "The City of Paris is the centre and seat of government of the region and province of Île-de-France .",
+...        "title": "Paris",
+...        "url": "https://en.wikipedia.org/wiki/Paris"
+...    }
+... ]
+
+>>> retriever = retrieve.BM25(key="id", on=["title", "article"], documents=documents, k=30)
+
+>>> retriever("france")
+[{'id': 0, 'similarity': 0.1236413097778466},
+ {'id': 2, 'similarity': 0.08907655343363269},
+ {'id': 1, 'similarity': 0.0031730868527342104}]
+```
+
+We can also initialize the retriever with a custom [sparse.BM25Vectorizer](https://github.com/raphaelsty/LeNLP).
+
+
+
+```python
+>>> from cherche import retrieve
+>>> from lenlp import sparse
+
+>>> documents = [
+...    {
+...        "id": 0,
+...        "article": "Paris is the capital and most populous city of France",
+...        "title": "Paris",
+...        "url": "https://en.wikipedia.org/wiki/Paris"
+...    },
+...    {
+...        "id": 1,
+...        "article": "Paris has been one of Europe major centres of finance, diplomacy , commerce , fashion , gastronomy , science , and arts.",
+...        "title": "Paris",
+...        "url": "https://en.wikipedia.org/wiki/Paris"
+...    },
+...    {
+...        "id": 2,
+...        "article": "The City of Paris is the centre and seat of government of the region and province of Île-de-France .",
+...        "title": "Paris",
+...        "url": "https://en.wikipedia.org/wiki/Paris"
+...    }
+... ]
+
+>>> count_vectorizer = sparse.BM25Vectorizer(
+...  normalize=True, ngram_range=(3, 7), analyzer="char_wb")
+
+>>> retriever = retrieve.BM25Vectorizer(
+...  key="id", on=["title", "article"], documents=documents, count_vectorizer=count_vectorizer)
+
+>>> retriever("fra", k=3)
+[{'id': 0, 'similarity': 0.15055477454160002},
+ {'id': 2, 'similarity': 0.022883459495904895}]
+```
+
+## Batch retrieval
+
+If we have several queries for which we want to retrieve the top k documents then we can
+pass a list of queries to the retriever. This is much faster for multiple queries. In batch-mode,
+retriever returns a list of list of documents instead of a list of documents.
+
+```python
+>>> retriever(["fra", "arts", "capital"], k=3)
+[[{'id': 0, 'similarity': 0.051000705070125066}, # Match query 1
+  {'id': 2, 'similarity': 0.03415513704304113}],
+ [{'id': 1, 'similarity': 0.07021399356970497}], # Match query 2
+ [{'id': 0, 'similarity': 0.25972148184421534}]] # Match query 3
+```
+
+## Map keys to documents
+
+We can map documents to retrieved keys.
+
+```python
+>>> retriever += documents
+>>> retriever("fra")
+[{'id': 0,
+  'article': 'Paris is the capital and most populous city of France',
+  'title': 'Paris',
+  'url': 'https://en.wikipedia.org/wiki/Paris',
+  'similarity': 0.15055477454160002},
+ {'id': 2,
+  'article': 'The City of Paris is the centre and seat of government of the region and province of Île-de-France .',
+  'title': 'Paris',
+  'url': 'https://en.wikipedia.org/wiki/Paris',
+  'similarity': 0.022883459495904895}]
+```
diff --git a/docs/retrieve/tfidf.md b/docs/retrieve/tfidf.md
index 5c339d5..fefc7e1 100644
--- a/docs/retrieve/tfidf.md
+++ b/docs/retrieve/tfidf.md
@@ -1,6 +1,6 @@
 # TfIdf
 
-Our TF-IDF retriever relies on the [TfidfVectorizer](https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html) of Sklearn. It computes the dot product between the query TF-IDF vector and the documents TF-IDF matrix and retrieves the highest match. TfIdf retriever stores a sparse matrix and an index that links the rows of the matrix to document identifiers.
+Our TF-IDF retriever relies on the [sparse.TfidfVectorizer](https://github.com/raphaelsty/LeNLP) of Sklearn. It computes the dot product between the query TF-IDF vector and the documents TF-IDF matrix and retrieves the highest match. TfIdf retriever stores a sparse matrix and an index that links the rows of the matrix to document identifiers.
 
 ```python
 >>> from cherche import retrieve
@@ -34,11 +34,11 @@ Our TF-IDF retriever relies on the [TfidfVectorizer](https://scikit-learn.org/st
  {'id': 1, 'similarity': 0.02505818772920329}]
 ```
 
-We can also initialize the retriever with a custom [TfidfVectorizer](https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html).
+We can also initialize the retriever with a custom [sparse.TfidfVectorizer](https://github.com/raphaelsty/LeNLP).
 
 ```python
 >>> from cherche import retrieve
->>> from sklearn.feature_extraction.text import TfidfVectorizer
+>>> from lenlp import sparse
 
 >>> documents = [
 ...    {
@@ -61,8 +61,8 @@ We can also initialize the retriever with a custom [TfidfVectorizer](https://sci
 ...    }
 ... ]
 
->>> tfidf = TfidfVectorizer(
-...  lowercase=True, ngram_range=(3, 7), analyzer="char_wb")
+>>> tfidf = sparse.TfidfVectorizer(
+...  normalize=True, ngram_range=(3, 7), analyzer="char_wb")
 
 >>> retriever = retrieve.TfIdf(
 ...  key="id", on=["title", "article"], documents=documents, tfidf=tfidf)
diff --git a/setup.py b/setup.py
index 45535ff..080baec 100644
--- a/setup.py
+++ b/setup.py
@@ -13,6 +13,7 @@
     "flashtext >= 2.7",
     "tqdm >= 4.62.3",
     "scipy >= 1.7.3",
+    "lenlp >= 1.0.3",
 ]
 
 cpu = ["sentence-transformers >= 2.2.2", "faiss-cpu >= 1.7.4"]