Skip to content

Commit

Permalink
install nltk
Browse files Browse the repository at this point in the history
  • Loading branch information
anakin87 committed Jan 2, 2025
1 parent be17386 commit 0dc3aef
Showing 1 changed file with 28 additions and 22 deletions.
50 changes: 28 additions & 22 deletions tutorials/42_Sentence_Window_Retriever.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
"%%bash\n",
"\n",
"pip install --upgrade pip\n",
"pip install haystack-ai"
"pip install haystack-ai nltk"
]
},
{
Expand Down Expand Up @@ -98,17 +98,20 @@
"source": [
"from haystack import Document\n",
"from haystack.components.preprocessors import DocumentSplitter\n",
"\n",
"splitter = DocumentSplitter(split_length=1, split_overlap=0, split_by=\"period\")\n",
"\n",
"text = (\"Paul fell asleep to dream of an Arrakeen cavern, silent people all around him moving in the dim light \"\n",
" \"of glowglobes. It was solemn there and like a cathedral as he listened to a faint sound—the \"\n",
" \"drip-drip-drip of water. Even while he remained in the dream, Paul knew he would remember it upon \"\n",
" \"awakening. He always remembered the dreams that were predictions. The dream faded. Paul awoke to feel \"\n",
" \"himself in the warmth of his bed—thinking thinking. This world of Castle Caladan, without play or \"\n",
" \"companions his own age, perhaps did not deserve sadness in farewell. Dr Yueh, his teacher, had \"\n",
" \"hinted that the faufreluches class system was not rigidly guarded on Arrakis. The planet sheltered \"\n",
" \"people who lived at the desert edge without caid or bashar to command them: will-o’-the-sand people \"\n",
" \"called Fremen, marked down on no census of the Imperial Regate.\")\n",
"text = (\n",
" \"Paul fell asleep to dream of an Arrakeen cavern, silent people all around him moving in the dim light \"\n",
" \"of glowglobes. It was solemn there and like a cathedral as he listened to a faint sound—the \"\n",
" \"drip-drip-drip of water. Even while he remained in the dream, Paul knew he would remember it upon \"\n",
" \"awakening. He always remembered the dreams that were predictions. The dream faded. Paul awoke to feel \"\n",
" \"himself in the warmth of his bed—thinking thinking. This world of Castle Caladan, without play or \"\n",
" \"companions his own age, perhaps did not deserve sadness in farewell. Dr Yueh, his teacher, had \"\n",
" \"hinted that the faufreluches class system was not rigidly guarded on Arrakis. The planet sheltered \"\n",
" \"people who lived at the desert edge without caid or bashar to command them: will-o’-the-sand people \"\n",
" \"called Fremen, marked down on no census of the Imperial Regate.\"\n",
")\n",
"\n",
"doc = Document(content=text)\n",
"docs = splitter.run([doc])"
Expand Down Expand Up @@ -144,7 +147,7 @@
"from haystack.document_stores.types import DuplicatePolicy\n",
"\n",
"doc_store = InMemoryDocumentStore()\n",
"doc_store.write_documents(docs['documents'], policy=DuplicatePolicy.OVERWRITE)"
"doc_store.write_documents(docs[\"documents\"], policy=DuplicatePolicy.OVERWRITE)"
]
},
{
Expand All @@ -167,7 +170,7 @@
"from haystack.components.retrievers import SentenceWindowRetriever\n",
"\n",
"retriever = SentenceWindowRetriever(document_store=doc_store, window_size=2)\n",
"result = retriever.run(retrieved_documents=[docs['documents'][4]])"
"result = retriever.run(retrieved_documents=[docs[\"documents\"][4]])"
]
},
{
Expand Down Expand Up @@ -199,7 +202,7 @@
}
],
"source": [
"result['context_windows']"
"result[\"context_windows\"]"
]
},
{
Expand All @@ -224,7 +227,7 @@
}
],
"source": [
"result['context_documents']"
"result[\"context_documents\"]"
]
},
{
Expand Down Expand Up @@ -259,6 +262,7 @@
"import csv\n",
"from haystack import Document\n",
"\n",
"\n",
"def read_documents(file: str) -> List[Document]:\n",
" with open(file, \"r\") as file:\n",
" reader = csv.reader(file, delimiter=\"\\t\")\n",
Expand All @@ -283,11 +287,11 @@
"from pathlib import Path\n",
"import requests\n",
"\n",
"doc = requests.get('https://raw.githubusercontent.com/amankharwal/Website-data/master/bbc-news-data.csv')\n",
"doc = requests.get(\"https://raw.githubusercontent.com/amankharwal/Website-data/master/bbc-news-data.csv\")\n",
"\n",
"datafolder = Path('data')\n",
"datafolder = Path(\"data\")\n",
"datafolder.mkdir(exist_ok=True)\n",
"with open(datafolder/'bbc-news-data.csv', 'wb') as f:\n",
"with open(datafolder / \"bbc-news-data.csv\", \"wb\") as f:\n",
" for chunk in doc.iter_content(512):\n",
" f.write(chunk)"
]
Expand Down Expand Up @@ -356,7 +360,7 @@
"\n",
"indexing_pipeline.connect(\"splitter\", \"writer\")\n",
"\n",
"indexing_pipeline.run({\"documents\":docs})"
"indexing_pipeline.run({\"documents\": docs})"
]
},
{
Expand Down Expand Up @@ -421,7 +425,9 @@
"metadata": {},
"outputs": [],
"source": [
"result = sentence_window_pipeline.run(data={'bm25_retriever': {'query': \"phishing attacks\", \"top_k\": 1}}, include_outputs_from={'bm25_retriever'})"
"result = sentence_window_pipeline.run(\n",
" data={\"bm25_retriever\": {\"query\": \"phishing attacks\", \"top_k\": 1}}, include_outputs_from={\"bm25_retriever\"}\n",
")"
]
},
{
Expand Down Expand Up @@ -450,7 +456,7 @@
}
],
"source": [
"result['bm25_retriever']['documents']"
"result[\"bm25_retriever\"][\"documents\"]"
]
},
{
Expand Down Expand Up @@ -479,7 +485,7 @@
}
],
"source": [
"result['sentence_window__retriever']['context_windows']"
"result[\"sentence_window__retriever\"][\"context_windows\"]"
]
},
{
Expand Down Expand Up @@ -512,7 +518,7 @@
}
],
"source": [
"result['sentence_window__retriever']['context_documents']"
"result[\"sentence_window__retriever\"][\"context_documents\"]"
]
},
{
Expand Down

0 comments on commit 0dc3aef

Please sign in to comment.