diff --git a/bootcamp/OpenAIAssistants/custom_RAG_workflow.ipynb b/bootcamp/OpenAIAssistants/custom_RAG_workflow.ipynb
index 1ce67ce60..afa4e0256 100755
--- a/bootcamp/OpenAIAssistants/custom_RAG_workflow.ipynb
+++ b/bootcamp/OpenAIAssistants/custom_RAG_workflow.ipynb
@@ -96,7 +96,6 @@
"# Connect to Zilliz cloud using endpoint URI and API key TOKEN.\n",
"# TODO change this.\n",
"CLUSTER_ENDPOINT=\"https://in03-xxxx.api.gcp-us-west1.zillizcloud.com:443\"\n",
- "CLUSTER_ENDPOINT=\"https://in03-48a5b11fae525c9.api.gcp-us-west1.zillizcloud.com:443\"\n",
"connections.connect(\n",
" alias='default',\n",
" # Public endpoint obtained from Zilliz Cloud\n",
@@ -338,14 +337,14 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "100%|██████████| 1/1 [00:04<00:00, 4.95s/it]\n"
+ "100%|██████████| 1/1 [00:03<00:00, 3.95s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Milvus Client insert time for 704 vectors: 4.952448844909668 seconds\n"
+ "Milvus Client insert time for 704 vectors: 3.9572505950927734 seconds\n"
]
}
],
@@ -435,10 +434,19 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 7,
"id": "b5b6da85",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/christybergman/mambaforge/envs/py311new/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The class `langchain_community.chat_models.openai.ChatOpenAI` was deprecated in langchain-community 0.0.10 and will be removed in 0.2.0. An updated version of the class exists in the langchain-openai package and should be used instead. To use it run `pip install -U langchain-openai` and import as `from langchain_openai import ChatOpenAI`.\n",
+ " warn_deprecated(\n"
+ ]
+ }
+ ],
"source": [
"# Ragas default uses HuggingFace Datasets.\n",
"# https://docs.ragas.io/en/latest/getstarted/evaluation.html\n",
@@ -454,10 +462,10 @@
" context_recall, \n",
" context_precision, \n",
" # Context -> Answer metrics\n",
- " answer_relevancy, \n",
" faithfulness, \n",
" # Question -> Answer metrics\n",
" answer_similarity,\n",
+ " answer_relevancy, \n",
" answer_correctness\n",
" )\n",
"metrics = ['context_recall', 'context_precision', 'answer_relevancy', 'faithfulness', 'answer_similarity', 'answer_correctness']\n",
@@ -474,7 +482,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 8,
"id": "5e2db9c0",
"metadata": {},
"outputs": [],
@@ -510,37 +518,40 @@
" \n",
" return ragas_ds\n",
"\n",
- "def evaluate_ragas(input_df, answer_col_name=\"OpenAI_RAG_answer\", context_exists=False, row_number=-9999):\n",
+ "def evaluate_ragas(input_df, answer_col_name=\"OpenAI_RAG_answer\", context_exists=False, row_number=-9999, metrics=\"final_only\"):\n",
"\n",
" # Create a ragas dataset.\n",
" ragas_input_ds = assemble_ragas_dataset(input_df, answer_col_name, context_exists, row_number)\n",
"\n",
" # Evaluate the dataset.\n",
- " if context_exists:\n",
+ " if metrics == \"final_only\":\n",
" ragas_result = evaluate(\n",
" ragas_input_ds,\n",
" metrics=[\n",
- " # Question -> Context metrics\n",
- " context_recall, \n",
- " context_precision, \n",
- " # Context -> Answer metrics\n",
- " answer_relevancy, \n",
- " faithfulness, \n",
- " # Question -> Answer metrics\n",
" answer_similarity,\n",
+ " answer_relevancy,\n",
" answer_correctness,])\n",
" else:\n",
+ " # calculate all metrics\n",
" ragas_result = evaluate(\n",
" ragas_input_ds,\n",
" metrics=[\n",
+ " # Question -> Context metrics\n",
+ " context_recall, \n",
+ " context_precision, \n",
+ " # Context -> Answer metrics\n",
+ " faithfulness, \n",
+ " # Question -> Answer metrics\n",
" answer_similarity,\n",
+ " answer_relevancy,\n",
" answer_correctness,])\n",
+ " \n",
" return ragas_result"
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 9,
"id": "5d9124c2",
"metadata": {},
"outputs": [
@@ -705,666 +716,87 @@
]
},
{
- "cell_type": "code",
- "execution_count": 11,
- "id": "819fcfe4",
+ "cell_type": "markdown",
+ "id": "bb69c50d",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "evaluating with [context_recall]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- " 0%| | 0/1 [00:00, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
- "To disable this warning, you can either:\n",
- "\t- Avoid using `tokenizers` before the fork if possible\n",
- "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
- "100%|██████████| 1/1 [00:11<00:00, 11.55s/it]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "evaluating with [context_precision]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 1/1 [00:05<00:00, 5.83s/it]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "evaluating with [answer_relevancy]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 1/1 [00:06<00:00, 6.82s/it]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "evaluating with [faithfulness]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 1/1 [00:30<00:00, 30.46s/it]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "evaluating with [answer_similarity]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 1/1 [00:00<00:00, 1.87it/s]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "evaluating with [answer_correctness]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 1/1 [00:16<00:00, 16.32s/it]\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " question | \n",
- " ground_truths | \n",
- " contexts_Custom_RAG | \n",
- " answer_Custom_RAG | \n",
- " context_recall | \n",
- " context_precision | \n",
- " answer_relevancy | \n",
- " faithfulness | \n",
- " answer_similarity_Custom_RAG | \n",
- " answer_correctness_Custom_RAG | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " What do the parameters for HNSW mean?\\n | \n",
- " [- M: maximum degree of nodes in a layer of th... | \n",
- " [performance, HNSW limits the maximum degree o... | \n",
- " The parameters for HNSW have the following mea... | \n",
- " 1.0 | \n",
- " 1.0 | \n",
- " 0.979217 | \n",
- " 0.8 | \n",
- " 0.844853 | \n",
- " 0.483940 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " What are HNSW good default parameters when dat... | \n",
- " [M=16, efConstruction=32, ef=32] | \n",
- " [Metrics. Vector Index¶ FLAT IVF_FLAT IVF_SQ8 ... | \n",
- " For a data size of 25K vectors with a dimensio... | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.977890 | \n",
- " 0.0 | \n",
- " 0.775916 | \n",
- " 0.622550 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " what is the default distance metric used in AU... | \n",
- " [Trick answer: IP inner product, not yet upda... | \n",
- " [The attributes of collection can be extracted... | \n",
- " The default distance metric used in AUTOINDEX ... | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.990814 | \n",
- " 0.0 | \n",
- " 0.738229 | \n",
- " 0.484557 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " How did New York City get its name? | \n",
- " [In the 1600’s, the Dutch planted a trading po... | \n",
- " [Etymology\\nSee also: Nicknames of New York Ci... | \n",
- " New York City was originally named New Amsterd... | \n",
- " 1.0 | \n",
- " 1.0 | \n",
- " 0.894238 | \n",
- " 1.0 | \n",
- " 0.942196 | \n",
- " 0.664120 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " question \\\n",
- "0 What do the parameters for HNSW mean?\\n \n",
- "1 What are HNSW good default parameters when dat... \n",
- "2 what is the default distance metric used in AU... \n",
- "3 How did New York City get its name? \n",
- "\n",
- " ground_truths \\\n",
- "0 [- M: maximum degree of nodes in a layer of th... \n",
- "1 [M=16, efConstruction=32, ef=32] \n",
- "2 [Trick answer: IP inner product, not yet upda... \n",
- "3 [In the 1600’s, the Dutch planted a trading po... \n",
- "\n",
- " contexts_Custom_RAG \\\n",
- "0 [performance, HNSW limits the maximum degree o... \n",
- "1 [Metrics. Vector Index¶ FLAT IVF_FLAT IVF_SQ8 ... \n",
- "2 [The attributes of collection can be extracted... \n",
- "3 [Etymology\\nSee also: Nicknames of New York Ci... \n",
- "\n",
- " answer_Custom_RAG context_recall \\\n",
- "0 The parameters for HNSW have the following mea... 1.0 \n",
- "1 For a data size of 25K vectors with a dimensio... 0.0 \n",
- "2 The default distance metric used in AUTOINDEX ... 0.0 \n",
- "3 New York City was originally named New Amsterd... 1.0 \n",
- "\n",
- " context_precision answer_relevancy faithfulness \\\n",
- "0 1.0 0.979217 0.8 \n",
- "1 0.0 0.977890 0.0 \n",
- "2 0.0 0.990814 0.0 \n",
- "3 1.0 0.894238 1.0 \n",
- "\n",
- " answer_similarity_Custom_RAG answer_correctness_Custom_RAG \n",
- "0 0.844853 0.483940 \n",
- "1 0.775916 0.622550 \n",
- "2 0.738229 0.484557 \n",
- "3 0.942196 0.664120 "
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
"source": [
- "# Run Ragas Eval for all Questions, all Custom RAG Answers.\n",
- "\n",
- "# def evaluate_ragas(eval_df, answer_col_name=\"OpenAI_RAG_answer\", context_exists=False, row_number=-9999):\n",
- "ragas_result = evaluate_ragas(eval_df, \"Custom_RAG_answer\", True, -9999)\n",
- "ragas_df_Custom_RAG = ragas_result.to_pandas()\n",
- "\n",
- "# Rename the columns.\n",
- "rename_dict = {\n",
- " \"contexts\": \"contexts_Custom_RAG\",\n",
- " \"answer\": \"answer_Custom_RAG\",\n",
- " \"answer_similarity\": \"answer_similarity_Custom_RAG\",\n",
- " \"answer_correctness\": \"answer_correctness_Custom_RAG\"\n",
- "}\n",
- "ragas_df_Custom_RAG.rename(columns=rename_dict, inplace=True)\n",
- "# Reorder the columns.\n",
- "ragas_df_Custom_RAG = ragas_df_Custom_RAG.iloc[:,[0, 3, 1, 2, 4,5,6,7,8,9]]\n",
- "display(ragas_df_Custom_RAG.head())"
+ "## Define a Custom Execution Loop for RAG."
]
},
{
"cell_type": "code",
- "execution_count": 12,
- "id": "47f15260",
+ "execution_count": 10,
+ "id": "9b6aca9b",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "evaluating with [answer_similarity]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 1/1 [00:00<00:00, 1.70it/s]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "evaluating with [answer_correctness]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 1/1 [00:13<00:00, 13.49s/it]\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " question | \n",
- " ground_truths | \n",
- " contexts_OpenAI_RAG | \n",
- " answer_OpenAI_RAG | \n",
- " answer_similarity_OpenAI_RAG | \n",
- " answer_correctness_OpenAI_RAG | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " What do the parameters for HNSW mean?\\n | \n",
- " [- M: maximum degree of nodes in a layer of th... | \n",
- " [] | \n",
- " The HNSW parameters include the “nlist” which ... | \n",
- " 0.747777 | \n",
- " 0.186985 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " What are HNSW good default parameters when dat... | \n",
- " [M=16, efConstruction=32, ef=32] | \n",
- " [] | \n",
- " The default HNSW parameters for data size of 2... | \n",
- " 0.824855 | \n",
- " 0.206232 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " what is the default distance metric used in AU... | \n",
- " [Trick answer: IP inner product, not yet upda... | \n",
- " [] | \n",
- " The default distance metric used in AUTOINDEX ... | \n",
- " 0.770573 | \n",
- " 0.692648 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " How did New York City get its name? | \n",
- " [In the 1600’s, the Dutch planted a trading po... | \n",
- " [] | \n",
- " I'm sorry, but I couldn't find any information... | \n",
- " 0.777990 | \n",
- " 0.194492 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " question \\\n",
- "0 What do the parameters for HNSW mean?\\n \n",
- "1 What are HNSW good default parameters when dat... \n",
- "2 what is the default distance metric used in AU... \n",
- "3 How did New York City get its name? \n",
- "\n",
- " ground_truths contexts_OpenAI_RAG \\\n",
- "0 [- M: maximum degree of nodes in a layer of th... [] \n",
- "1 [M=16, efConstruction=32, ef=32] [] \n",
- "2 [Trick answer: IP inner product, not yet upda... [] \n",
- "3 [In the 1600’s, the Dutch planted a trading po... [] \n",
- "\n",
- " answer_OpenAI_RAG \\\n",
- "0 The HNSW parameters include the “nlist” which ... \n",
- "1 The default HNSW parameters for data size of 2... \n",
- "2 The default distance metric used in AUTOINDEX ... \n",
- "3 I'm sorry, but I couldn't find any information... \n",
- "\n",
- " answer_similarity_OpenAI_RAG answer_correctness_OpenAI_RAG \n",
- "0 0.747777 0.186985 \n",
- "1 0.824855 0.206232 \n",
- "2 0.770573 0.692648 \n",
- "3 0.777990 0.194492 "
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
- "# Run Ragas Eval for all Questions, all OpenAI RAG Answers.\n",
+ "import requests, json, pprint\n",
"\n",
- "ragas_result = evaluate_ragas(eval_df, \"OpenAI_RAG_answer\", False, -9999)\n",
- "ragas_df_OpenAI_RAG = ragas_result.to_pandas()\n",
+ "# Milvus search, define how many retrieval results to return.\n",
+ "# Milvus automatically sorts results descending by distance score.\n",
+ "TOP_K = 3\n",
"\n",
- "# Rename the columns.\n",
- "# Rename the columns.\n",
- "rename_dict = {\n",
- " \"contexts\": \"contexts_OpenAI_RAG\",\n",
- " \"answer\": \"answer_OpenAI_RAG\",\n",
- " \"answer_similarity\": \"answer_similarity_OpenAI_RAG\",\n",
- " \"answer_correctness\": \"answer_correctness_OpenAI_RAG\"\n",
- "}\n",
- "ragas_df_OpenAI_RAG.rename(columns=rename_dict, inplace=True)\n",
- "# Reorder the columns.\n",
- "ragas_df_OpenAI_RAG = ragas_df_OpenAI_RAG.iloc[:,[0, 3, 1, 2, 4,5]]\n",
- "display(ragas_df_OpenAI_RAG)"
+ "# Search a collection containing Milvus Documentation.\n",
+ "def zilliz_pipeline_collection_search(token, question):\n",
+ " # Define the URL, headers, and data\n",
+ " url = \"https://controller.api.gcp-us-west1.zillizcloud.com/v1/pipelines/pipe-3de3fb4a9bc3c2a64a786b/run\"\n",
+ " headers = {\n",
+ " \"Content-Type\": \"application/json\",\n",
+ " \"Authorization\": f\"Bearer {token}\",\n",
+ " }\n",
+ " data = {\n",
+ " \"data\": {\n",
+ " \"query_text\": question\n",
+ " },\n",
+ " \"params\": {\n",
+ " \"limit\": 3,\n",
+ " \"offset\": 0,\n",
+ " \"outputFields\": [\"chunk_text\", \"chunk_id\", \"doc_name\", \"source\"],\n",
+ " \"filter\": \"chunk_id >= 0 && doc_name == 'param.html'\",\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " # Send the POST request\n",
+ " response = requests.post(url, headers=headers, json=data)\n",
+ "\n",
+ " # # Print the response\n",
+ " # pprint.pprint(response.json())\n",
+ " return response.json()\n",
+ "\n",
+ "# Search a collection containing Wikipedia articles about New York City.\n",
+ "def wikipedia_search(mc, collection_name, collection_encoder, question, output_fields=None, top_k=3):\n",
+ " # Embed the query\n",
+ " query_embeddings = _utils.embed_query(collection_encoder, [question])\n",
+ "\n",
+ " # Define search parameters\n",
+ " INDEX_PARAMS = dict({\n",
+ " 'M': M, \n",
+ " \"efConstruction\": efConstruction })\n",
+ " SEARCH_PARAMS = dict({\n",
+ " \"ef\": INDEX_PARAMS['efConstruction']\n",
+ " })\n",
+ "\n",
+ " # Define output fields to return\n",
+ " OUTPUT_FIELDS = [\"h1\", \"source\", \"chunk\"]\n",
+ "\n",
+ " # Perform the search\n",
+ " answers = mc.search(\n",
+ " collection_name,\n",
+ " data=query_embeddings, \n",
+ " search_params=SEARCH_PARAMS,\n",
+ " output_fields=output_fields, \n",
+ " filter=\"(source like 'https://en.wikipedia.org%')\",\n",
+ " limit=top_k,\n",
+ " consistency_level=\"Eventually\"\n",
+ " )\n",
+ "\n",
+ " return answers"
]
},
{
"cell_type": "code",
- "execution_count": 23,
- "id": "b10b757b",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " question | \n",
- " ground_truths | \n",
- " contexts_Custom_RAG | \n",
- " answer_Custom_RAG | \n",
- " contexts_OpenAI_RAG | \n",
- " answer_OpenAI_RAG | \n",
- " answer_similarity_Custom_RAG | \n",
- " answer_correctness_Custom_RAG | \n",
- " answer_similarity_OpenAI_RAG | \n",
- " answer_correctness_OpenAI_RAG | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " What do the parameters for HNSW mean?\\n | \n",
- " [- M: maximum degree of nodes in a layer of th... | \n",
- " [performance, HNSW limits the maximum degree o... | \n",
- " The parameters for HNSW have the following mea... | \n",
- " [] | \n",
- " The HNSW parameters include the “nlist” which ... | \n",
- " 0.844853 | \n",
- " 0.483940 | \n",
- " 0.747777 | \n",
- " 0.186985 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " What are HNSW good default parameters when dat... | \n",
- " [M=16, efConstruction=32, ef=32] | \n",
- " [Metrics. Vector Index¶ FLAT IVF_FLAT IVF_SQ8 ... | \n",
- " For a data size of 25K vectors with a dimensio... | \n",
- " [] | \n",
- " The default HNSW parameters for data size of 2... | \n",
- " 0.775916 | \n",
- " 0.622550 | \n",
- " 0.824855 | \n",
- " 0.206232 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " what is the default distance metric used in AU... | \n",
- " [Trick answer: IP inner product, not yet upda... | \n",
- " [The attributes of collection can be extracted... | \n",
- " The default distance metric used in AUTOINDEX ... | \n",
- " [] | \n",
- " The default distance metric used in AUTOINDEX ... | \n",
- " 0.738229 | \n",
- " 0.484557 | \n",
- " 0.770573 | \n",
- " 0.692648 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " How did New York City get its name? | \n",
- " [In the 1600’s, the Dutch planted a trading po... | \n",
- " [Etymology\\nSee also: Nicknames of New York Ci... | \n",
- " New York City was originally named New Amsterd... | \n",
- " [] | \n",
- " I'm sorry, but I couldn't find any information... | \n",
- " 0.942196 | \n",
- " 0.664120 | \n",
- " 0.777990 | \n",
- " 0.194492 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " question \\\n",
- "0 What do the parameters for HNSW mean?\\n \n",
- "1 What are HNSW good default parameters when dat... \n",
- "2 what is the default distance metric used in AU... \n",
- "3 How did New York City get its name? \n",
- "\n",
- " ground_truths \\\n",
- "0 [- M: maximum degree of nodes in a layer of th... \n",
- "1 [M=16, efConstruction=32, ef=32] \n",
- "2 [Trick answer: IP inner product, not yet upda... \n",
- "3 [In the 1600’s, the Dutch planted a trading po... \n",
- "\n",
- " contexts_Custom_RAG \\\n",
- "0 [performance, HNSW limits the maximum degree o... \n",
- "1 [Metrics. Vector Index¶ FLAT IVF_FLAT IVF_SQ8 ... \n",
- "2 [The attributes of collection can be extracted... \n",
- "3 [Etymology\\nSee also: Nicknames of New York Ci... \n",
- "\n",
- " answer_Custom_RAG contexts_OpenAI_RAG \\\n",
- "0 The parameters for HNSW have the following mea... [] \n",
- "1 For a data size of 25K vectors with a dimensio... [] \n",
- "2 The default distance metric used in AUTOINDEX ... [] \n",
- "3 New York City was originally named New Amsterd... [] \n",
- "\n",
- " answer_OpenAI_RAG \\\n",
- "0 The HNSW parameters include the “nlist” which ... \n",
- "1 The default HNSW parameters for data size of 2... \n",
- "2 The default distance metric used in AUTOINDEX ... \n",
- "3 I'm sorry, but I couldn't find any information... \n",
- "\n",
- " answer_similarity_Custom_RAG answer_correctness_Custom_RAG \\\n",
- "0 0.844853 0.483940 \n",
- "1 0.775916 0.622550 \n",
- "2 0.738229 0.484557 \n",
- "3 0.942196 0.664120 \n",
- "\n",
- " answer_similarity_OpenAI_RAG answer_correctness_OpenAI_RAG \n",
- "0 0.747777 0.186985 \n",
- "1 0.824855 0.206232 \n",
- "2 0.770573 0.692648 \n",
- "3 0.777990 0.194492 "
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "####### FINAL SCORES OPENAI RAG vs MILVUS CUSTOM RAG #########\n",
- "LLM as judge model: gpt-3.5-turbo-1106 with temperature: 0.1 scores:\n",
- "# truth vs RAG answers: 4\n",
- "\n",
- "avg_faithfulness_Custom_RAG: 0.83\n",
- "avg_faithfulness_OpenAI_RAG: 0.78\n",
- "\n",
- "avg_relevancy_Custom_RAG: 0.56\n",
- "avg_relevancy_OpenAI_RAG: 0.32\n"
- ]
- }
- ],
- "source": [
- "# Merge the 2 ragas dfs so they are easier to compare.\n",
- "ragas_merged_df = ragas_df_Custom_RAG.iloc[:,[0,1,2,3,8,9]].merge(ragas_df_OpenAI_RAG.iloc[:, 2:], how='inner', left_index=True, right_index=True)\n",
- "# reorder columns\n",
- "ragas_merged_df = ragas_merged_df.iloc[:,[0,1,2,3,6,7,4,5,8,9]]\n",
- "display(ragas_merged_df.head())\n",
- "\n",
- "print()\n",
- "print(f\"####### FINAL SCORES OPENAI RAG vs MILVUS CUSTOM RAG #########\")\n",
- "print(f\"LLM as judge model: {LLM_NAME} with temperature: {TEMPERATURE} scores:\")\n",
- "print(f\"# Truth vs RAG answers: {len(ragas_merged_df)}\")\n",
- "print()\n",
- "print(f\"avg_faithfulness_Custom_RAG: {np.round(ragas_merged_df.answer_similarity_Custom_RAG.mean(), 2)}\")\n",
- "print(f\"avg_faithfulness_OpenAI_RAG: {np.round(ragas_merged_df.answer_similarity_OpenAI_RAG.mean(), 2)}\")\n",
- "print()\n",
- "print(f\"avg_relevancy_Custom_RAG: {np.round(ragas_merged_df.answer_correctness_Custom_RAG.mean(), 2)}\")\n",
- "print(f\"avg_relevancy_OpenAI_RAG: {np.round(ragas_merged_df.answer_correctness_OpenAI_RAG.mean(), 2)}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "bb69c50d",
- "metadata": {},
- "source": [
- "## Define a Custom Execution Loop for RAG."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "id": "9b6aca9b",
- "metadata": {},
- "outputs": [],
- "source": [
- "import requests, json, pprint\n",
- "\n",
- "# Milvus search, define how many retrieval results to return.\n",
- "# Milvus automatically sorts results descending by distance score.\n",
- "TOP_K = 3\n",
- "\n",
- "# Search a collection containing Milvus Documentation.\n",
- "def zilliz_pipeline_collection_search(token, question):\n",
- " # Define the URL, headers, and data\n",
- " url = \"https://controller.api.gcp-us-west1.zillizcloud.com/v1/pipelines/pipe-3de3fb4a9bc3c2a64a786b/run\"\n",
- " headers = {\n",
- " \"Content-Type\": \"application/json\",\n",
- " \"Authorization\": f\"Bearer {token}\",\n",
- " }\n",
- " data = {\n",
- " \"data\": {\n",
- " \"query_text\": question\n",
- " },\n",
- " \"params\": {\n",
- " \"limit\": 3,\n",
- " \"offset\": 0,\n",
- " \"outputFields\": [\"chunk_text\", \"chunk_id\", \"doc_name\", \"source\"],\n",
- " \"filter\": \"chunk_id >= 0 && doc_name == 'param.html'\",\n",
- " }\n",
- " }\n",
- "\n",
- " # Send the POST request\n",
- " response = requests.post(url, headers=headers, json=data)\n",
- "\n",
- " # # Print the response\n",
- " # pprint.pprint(response.json())\n",
- " return response.json()\n",
- "\n",
- "# Search a collection containing Wikipedia articles about New York City.\n",
- "def wikipedia_search(mc, collection_name, collection_encoder, question, output_fields=None, top_k=3):\n",
- " # Embed the query\n",
- " query_embeddings = _utils.embed_query(collection_encoder, [question])\n",
- "\n",
- " # Define search parameters\n",
- " INDEX_PARAMS = dict({\n",
- " 'M': M, \n",
- " \"efConstruction\": efConstruction })\n",
- " SEARCH_PARAMS = dict({\n",
- " \"ef\": INDEX_PARAMS['efConstruction']\n",
- " })\n",
- "\n",
- " # Define output fields to return\n",
- " OUTPUT_FIELDS = [\"h1\", \"source\", \"chunk\"]\n",
- "\n",
- " # Perform the search\n",
- " answers = mc.search(\n",
- " collection_name,\n",
- " data=query_embeddings, \n",
- " search_params=SEARCH_PARAMS,\n",
- " output_fields=output_fields, \n",
- " filter=\"(source like 'https://en.wikipedia.org%')\",\n",
- " limit=top_k,\n",
- " consistency_level=\"Eventually\"\n",
- " )\n",
- "\n",
- " return answers"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "id": "cfb1f303",
+ "execution_count": 11,
+ "id": "cfb1f303",
"metadata": {},
"outputs": [],
"source": [
@@ -1429,7 +861,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 12,
"id": "d671601b",
"metadata": {},
"outputs": [],
@@ -1450,17 +882,17 @@
" threshold_retrieval_score = 0.6\n",
" ragas_metrics= ['answer_relevancy', 'faithfulness']\n",
"\n",
- " # Step 1: Check input to see if it flags the Moderation API or is a prompt injection\n",
- " if debug:\n",
- " print()\n",
- " print(\"STEP 1: Check input to see if it flags the Moderation API or is a prompt injection\")\n",
- " response = openai_client.moderations.create(input=user_input)\n",
- " moderation_output = response.results[0]\n",
- " print(moderation_output.flagged) # False\n",
- "\n",
- " if moderation_output.flagged:\n",
- " print(\"Step 1: Input flagged by Moderation API.\")\n",
- " return \"Sorry, we cannot process this request.\", message_history\n",
+ " # # Step 1: Check input to see if it flags the Moderation API or is a prompt injection\n",
+ " # if debug:\n",
+ " # print()\n",
+ " # print(\"STEP 1: Check input to see if it flags the Moderation API or is a prompt injection\")\n",
+ " # response = openai_client.moderations.create(input=user_input)\n",
+ " # moderation_output = response.results[0]\n",
+ " # print(moderation_output.flagged) # False\n",
+ "\n",
+ " # if moderation_output.flagged:\n",
+ " # print(\"Step 1: Input flagged by Moderation API.\")\n",
+ " # return \"Sorry, we cannot process this request.\", message_history\n",
"\n",
" # Step 2: Retrieval from collection #1.\n",
" if debug:\n",
@@ -1545,9 +977,9 @@
" if debug:\n",
" print()\n",
" print(\"STEP 6: Evaluate whether the chatbot response answers the initial user query well.\")\n",
- " ragas_result = evaluate_ragas(eval_df, \"Custom_RAG_answer\", True, question_number)\n",
+ " ragas_result = evaluate_ragas(eval_df, \"Custom_RAG_answer\", True, question_number, \"final_only\")\n",
" ragas_df = ragas_result.to_pandas()\n",
- " print(f\"Ragas evaluation: answer similarity: {ragas_df.answer_similarity[0]}, answer relevancy: {np.round(ragas_df.answer_correctness[0],3)}\")\n",
+ " print(f\"Ragas evaluation: answer similarity: {ragas_df.answer_similarity[0]}, answer relevancy: {np.round(ragas_df.answer_relevancy[0],3)}, answer correctness: {np.round(ragas_df.answer_correctness[0],3)}\")\n",
" # could also check for other metrics here...\n",
" evaluation_response = \"Y\"\n",
"\n",
@@ -1566,156 +998,752 @@
},
{
"cell_type": "code",
- "execution_count": 17,
- "id": "bb1a52ca",
+ "execution_count": 13,
+ "id": "bb1a52ca",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "question = How did New York City get its name?\n",
+ "\n",
+ "STEP 2: Retrieval from collection #1 MilvusDocs.\n",
+ "DISTANCE SCORE: 0.39108937978744507 branching logic...\n",
+ "\n",
+ "STEP 3: Score is too low, GET INTENT from the user's question.\n",
+ "intent = new_york\n",
+ "\n",
+ "STEP 4: Based on question intent, retrieve from collection #2 Wikipedia.\n",
+ "chunk_answer: New York City traces its origins to Fort Amsterdam and a trading post founded on the southern tip of Manhattan Island by Dutch colonists in approximat\n",
+ "DISTANCE SCORE: 0.7961502075195312 branch logic...\n",
+ "\n",
+ "Score from custom RAG Retrieval is above threshold, proceed to answer generation step.\n",
+ "\n",
+ "STEP 5: Generating GPT3.5 answer from the custom execution loop for RAG in the ASSISTANT PROMPT.\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+ "To disable this warning, you can either:\n",
+ "\t- Avoid using `tokenizers` before the fork if possible\n",
+ "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "STEP 6: Evaluate whether the chatbot response answers the initial user query well.\n",
+ "evaluating with [answer_similarity]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.49it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "evaluating with [answer_relevancy]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:01<00:00, 1.73s/it]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "evaluating with [answer_correctness]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:05<00:00, 5.98s/it]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Ragas evaluation: answer similarity: 0.9421961714808575, answer relevancy: 0.894, answer correctness: 0.664\n",
+ "\n",
+ "STEP 7: LLM answer passed Evaluation, return it to the user.\n",
+ "('Answer: New York City was originally named New Amsterdam by Dutch colonists '\n",
+ " 'in 1626. However, it was renamed New York in 1664 after King Charles II '\n",
+ " 'granted the lands to his brother, the Duke of York, when the city came under '\n",
+ " 'British control.')\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Test the custom RAG execution loop using a question.\n",
+ "\n",
+ "QUESTION_NUMBER = 3 #2 or 3\n",
+ "SAMPLE_QUESTION = question_list[QUESTION_NUMBER]\n",
+ "print(f\"question = {SAMPLE_QUESTION}\")\n",
+ "\n",
+ "truth_answer = truth_list[QUESTION_NUMBER]\n",
+ "\n",
+ "# Test the OpenAI answer.\n",
+ "all_messages = []\n",
+ "answer_history = []\n",
+ "openai_answer, messages = process_user_message(SAMPLE_QUESTION, QUESTION_NUMBER, all_messages, debug=True)\n",
+ "all_messages.append(messages)\n",
+ "answer_history.append(openai_answer)\n",
+ "pprint.pprint(f\"Answer: {openai_answer}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "67fa1791",
+ "metadata": {},
+ "source": [
+ "## Final Eval Comparisons Custom RAG vs OpenAI RAG"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "aa9a35cd",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "evaluating with [context_recall]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:14<00:00, 14.62s/it]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "evaluating with [context_precision]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:07<00:00, 7.86s/it]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "evaluating with [faithfulness]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:29<00:00, 29.35s/it]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "evaluating with [answer_similarity]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:01<00:00, 1.20s/it]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "evaluating with [answer_relevancy]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:07<00:00, 7.96s/it]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "evaluating with [answer_correctness]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:20<00:00, 20.12s/it]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " question | \n",
+ " ground_truths | \n",
+ " contexts_Custom_RAG | \n",
+ " answer_Custom_RAG | \n",
+ " context_recall | \n",
+ " context_precision | \n",
+ " faithfulness | \n",
+ " answer_similarity_Custom_RAG | \n",
+ " answer_relevancy_Custom_RAG | \n",
+ " answer_correctness_Custom_RAG | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " What do the parameters for HNSW mean?\\n | \n",
+ " [- M: maximum degree of nodes in a layer of th... | \n",
+ " [performance, HNSW limits the maximum degree o... | \n",
+ " The parameters for HNSW have the following mea... | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.8 | \n",
+ " 0.844867 | \n",
+ " 0.979217 | \n",
+ " 0.620304 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " What are HNSW good default parameters when dat... | \n",
+ " [M=16, efConstruction=32, ef=32] | \n",
+ " [Metrics. Vector Index¶ FLAT IVF_FLAT IVF_SQ8 ... | \n",
+ " For a data size of 25K vectors with a dimensio... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.776006 | \n",
+ " 0.977902 | \n",
+ " 0.622550 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " what is the default distance metric used in AU... | \n",
+ " [Trick answer: IP inner product, not yet upda... | \n",
+ " [The attributes of collection can be extracted... | \n",
+ " The default distance metric used in AUTOINDEX ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.738060 | \n",
+ " 0.990814 | \n",
+ " 0.484557 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " How did New York City get its name? | \n",
+ " [In the 1600’s, the Dutch planted a trading po... | \n",
+ " [Etymology\\nSee also: Nicknames of New York Ci... | \n",
+ " New York City was originally named New Amsterd... | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.5 | \n",
+ " 0.942196 | \n",
+ " 0.894259 | \n",
+ " 0.664120 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " question \\\n",
+ "0 What do the parameters for HNSW mean?\\n \n",
+ "1 What are HNSW good default parameters when dat... \n",
+ "2 what is the default distance metric used in AU... \n",
+ "3 How did New York City get its name? \n",
+ "\n",
+ " ground_truths \\\n",
+ "0 [- M: maximum degree of nodes in a layer of th... \n",
+ "1 [M=16, efConstruction=32, ef=32] \n",
+ "2 [Trick answer: IP inner product, not yet upda... \n",
+ "3 [In the 1600’s, the Dutch planted a trading po... \n",
+ "\n",
+ " contexts_Custom_RAG \\\n",
+ "0 [performance, HNSW limits the maximum degree o... \n",
+ "1 [Metrics. Vector Index¶ FLAT IVF_FLAT IVF_SQ8 ... \n",
+ "2 [The attributes of collection can be extracted... \n",
+ "3 [Etymology\\nSee also: Nicknames of New York Ci... \n",
+ "\n",
+ " answer_Custom_RAG context_recall \\\n",
+ "0 The parameters for HNSW have the following mea... 1.0 \n",
+ "1 For a data size of 25K vectors with a dimensio... 0.0 \n",
+ "2 The default distance metric used in AUTOINDEX ... 0.0 \n",
+ "3 New York City was originally named New Amsterd... 1.0 \n",
+ "\n",
+ " context_precision faithfulness answer_similarity_Custom_RAG \\\n",
+ "0 1.0 0.8 0.844867 \n",
+ "1 0.0 0.0 0.776006 \n",
+ "2 0.0 0.0 0.738060 \n",
+ "3 1.0 0.5 0.942196 \n",
+ "\n",
+ " answer_relevancy_Custom_RAG answer_correctness_Custom_RAG \n",
+ "0 0.979217 0.620304 \n",
+ "1 0.977902 0.622550 \n",
+ "2 0.990814 0.484557 \n",
+ "3 0.894259 0.664120 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Run Ragas Eval for all Questions, all Custom RAG Answers.\n",
+ "\n",
+ "# def evaluate_ragas(input_df, answer_col_name=\"OpenAI_RAG_answer\", context_exists=False, row_number=-9999, metrics=\"final_only\"):\n",
+ "ragas_result = evaluate_ragas(eval_df, \"Custom_RAG_answer\", True, -9999, \"all\")\n",
+ "ragas_df_Custom_RAG = ragas_result.to_pandas()\n",
+ "\n",
+ "# Rename the columns.\n",
+ "rename_dict = {\n",
+ " \"contexts\": \"contexts_Custom_RAG\",\n",
+ " \"answer\": \"answer_Custom_RAG\",\n",
+ " \"answer_similarity\": \"answer_similarity_Custom_RAG\",\n",
+ " \"answer_relevancy\": \"answer_relevancy_Custom_RAG\",\n",
+ " \"answer_correctness\": \"answer_correctness_Custom_RAG\"\n",
+ "}\n",
+ "ragas_df_Custom_RAG.rename(columns=rename_dict, inplace=True)\n",
+ "# Reorder the columns.\n",
+ "ragas_df_Custom_RAG = ragas_df_Custom_RAG.iloc[:,[0, 3, 1, 2, 4,5,6,7,8,9]]\n",
+ "display(ragas_df_Custom_RAG.head())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "1f1b1f4e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "evaluating with [answer_similarity]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 2.01it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "evaluating with [answer_relevancy]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:07<00:00, 7.85s/it]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "evaluating with [answer_correctness]\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:14<00:00, 14.49s/it]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " question | \n",
+ " ground_truths | \n",
+ " contexts_OpenAI_RAG | \n",
+ " answer_OpenAI_RAG | \n",
+ " answer_similarity_OpenAI_RAG | \n",
+ " answer_relevancy_OpenAI_RAG | \n",
+ " answer_correctness_OpenAI_RAG | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " What do the parameters for HNSW mean?\\n | \n",
+ " [- M: maximum degree of nodes in a layer of th... | \n",
+ " [] | \n",
+ " The HNSW parameters include the “nlist” which ... | \n",
+ " 0.747939 | \n",
+ " 0.936005 | \n",
+ " 0.186985 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " What are HNSW good default parameters when dat... | \n",
+ " [M=16, efConstruction=32, ef=32] | \n",
+ " [] | \n",
+ " The default HNSW parameters for data size of 2... | \n",
+ " 0.824929 | \n",
+ " 0.981672 | \n",
+ " 0.206232 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " what is the default distance metric used in AU... | \n",
+ " [Trick answer: IP inner product, not yet upda... | \n",
+ " [] | \n",
+ " The default distance metric used in AUTOINDEX ... | \n",
+ " 0.770590 | \n",
+ " 0.990814 | \n",
+ " 0.692648 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " How did New York City get its name? | \n",
+ " [In the 1600’s, the Dutch planted a trading po... | \n",
+ " [] | \n",
+ " I'm sorry, but I couldn't find any information... | \n",
+ " 0.777967 | \n",
+ " 0.000000 | \n",
+ " 0.194492 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " question \\\n",
+ "0 What do the parameters for HNSW mean?\\n \n",
+ "1 What are HNSW good default parameters when dat... \n",
+ "2 what is the default distance metric used in AU... \n",
+ "3 How did New York City get its name? \n",
+ "\n",
+ " ground_truths contexts_OpenAI_RAG \\\n",
+ "0 [- M: maximum degree of nodes in a layer of th... [] \n",
+ "1 [M=16, efConstruction=32, ef=32] [] \n",
+ "2 [Trick answer: IP inner product, not yet upda... [] \n",
+ "3 [In the 1600’s, the Dutch planted a trading po... [] \n",
+ "\n",
+ " answer_OpenAI_RAG \\\n",
+ "0 The HNSW parameters include the “nlist” which ... \n",
+ "1 The default HNSW parameters for data size of 2... \n",
+ "2 The default distance metric used in AUTOINDEX ... \n",
+ "3 I'm sorry, but I couldn't find any information... \n",
+ "\n",
+ " answer_similarity_OpenAI_RAG answer_relevancy_OpenAI_RAG \\\n",
+ "0 0.747939 0.936005 \n",
+ "1 0.824929 0.981672 \n",
+ "2 0.770590 0.990814 \n",
+ "3 0.777967 0.000000 \n",
+ "\n",
+ " answer_correctness_OpenAI_RAG \n",
+ "0 0.186985 \n",
+ "1 0.206232 \n",
+ "2 0.692648 \n",
+ "3 0.194492 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Run Ragas Eval for all Questions, all OpenAI RAG Answers.\n",
+ "\n",
+ "ragas_result = evaluate_ragas(eval_df, \"OpenAI_RAG_answer\", False, -9999)\n",
+ "ragas_df_OpenAI_RAG = ragas_result.to_pandas()\n",
+ "\n",
+ "# Rename the columns.\n",
+ "# Rename the columns.\n",
+ "rename_dict = {\n",
+ " \"contexts\": \"contexts_OpenAI_RAG\",\n",
+ " \"answer\": \"answer_OpenAI_RAG\",\n",
+ " \"answer_similarity\": \"answer_similarity_OpenAI_RAG\",\n",
+ " \"answer_relevancy\": \"answer_relevancy_OpenAI_RAG\",\n",
+ " \"answer_correctness\": \"answer_correctness_OpenAI_RAG\"\n",
+ "}\n",
+ "ragas_df_OpenAI_RAG.rename(columns=rename_dict, inplace=True)\n",
+ "# Reorder the columns.\n",
+ "ragas_df_OpenAI_RAG = ragas_df_OpenAI_RAG.iloc[:,[0, 3, 1, 2, 4,5,6]]\n",
+ "display(ragas_df_OpenAI_RAG)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "c19bc0a5",
"metadata": {},
"outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " question | \n",
+ " ground_truths | \n",
+ " contexts_Custom_RAG | \n",
+ " answer_Custom_RAG | \n",
+ " contexts_OpenAI_RAG | \n",
+ " answer_OpenAI_RAG | \n",
+ " answer_similarity_Custom_RAG | \n",
+ " answer_relevancy_Custom_RAG | \n",
+ " answer_correctness_Custom_RAG | \n",
+ " answer_similarity_OpenAI_RAG | \n",
+ " answer_relevancy_OpenAI_RAG | \n",
+ " answer_correctness_OpenAI_RAG | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " What do the parameters for HNSW mean?\\n | \n",
+ " [- M: maximum degree of nodes in a layer of th... | \n",
+ " [performance, HNSW limits the maximum degree o... | \n",
+ " The parameters for HNSW have the following mea... | \n",
+ " [] | \n",
+ " The HNSW parameters include the “nlist” which ... | \n",
+ " 0.844867 | \n",
+ " 0.979217 | \n",
+ " 0.620304 | \n",
+ " 0.747939 | \n",
+ " 0.936005 | \n",
+ " 0.186985 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " What are HNSW good default parameters when dat... | \n",
+ " [M=16, efConstruction=32, ef=32] | \n",
+ " [Metrics. Vector Index¶ FLAT IVF_FLAT IVF_SQ8 ... | \n",
+ " For a data size of 25K vectors with a dimensio... | \n",
+ " [] | \n",
+ " The default HNSW parameters for data size of 2... | \n",
+ " 0.776006 | \n",
+ " 0.977902 | \n",
+ " 0.622550 | \n",
+ " 0.824929 | \n",
+ " 0.981672 | \n",
+ " 0.206232 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " what is the default distance metric used in AU... | \n",
+ " [Trick answer: IP inner product, not yet upda... | \n",
+ " [The attributes of collection can be extracted... | \n",
+ " The default distance metric used in AUTOINDEX ... | \n",
+ " [] | \n",
+ " The default distance metric used in AUTOINDEX ... | \n",
+ " 0.738060 | \n",
+ " 0.990814 | \n",
+ " 0.484557 | \n",
+ " 0.770590 | \n",
+ " 0.990814 | \n",
+ " 0.692648 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " How did New York City get its name? | \n",
+ " [In the 1600’s, the Dutch planted a trading po... | \n",
+ " [Etymology\\nSee also: Nicknames of New York Ci... | \n",
+ " New York City was originally named New Amsterd... | \n",
+ " [] | \n",
+ " I'm sorry, but I couldn't find any information... | \n",
+ " 0.942196 | \n",
+ " 0.894259 | \n",
+ " 0.664120 | \n",
+ " 0.777967 | \n",
+ " 0.000000 | \n",
+ " 0.194492 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " question \\\n",
+ "0 What do the parameters for HNSW mean?\\n \n",
+ "1 What are HNSW good default parameters when dat... \n",
+ "2 what is the default distance metric used in AU... \n",
+ "3 How did New York City get its name? \n",
+ "\n",
+ " ground_truths \\\n",
+ "0 [- M: maximum degree of nodes in a layer of th... \n",
+ "1 [M=16, efConstruction=32, ef=32] \n",
+ "2 [Trick answer: IP inner product, not yet upda... \n",
+ "3 [In the 1600’s, the Dutch planted a trading po... \n",
+ "\n",
+ " contexts_Custom_RAG \\\n",
+ "0 [performance, HNSW limits the maximum degree o... \n",
+ "1 [Metrics. Vector Index¶ FLAT IVF_FLAT IVF_SQ8 ... \n",
+ "2 [The attributes of collection can be extracted... \n",
+ "3 [Etymology\\nSee also: Nicknames of New York Ci... \n",
+ "\n",
+ " answer_Custom_RAG contexts_OpenAI_RAG \\\n",
+ "0 The parameters for HNSW have the following mea... [] \n",
+ "1 For a data size of 25K vectors with a dimensio... [] \n",
+ "2 The default distance metric used in AUTOINDEX ... [] \n",
+ "3 New York City was originally named New Amsterd... [] \n",
+ "\n",
+ " answer_OpenAI_RAG \\\n",
+ "0 The HNSW parameters include the “nlist” which ... \n",
+ "1 The default HNSW parameters for data size of 2... \n",
+ "2 The default distance metric used in AUTOINDEX ... \n",
+ "3 I'm sorry, but I couldn't find any information... \n",
+ "\n",
+ " answer_similarity_Custom_RAG answer_relevancy_Custom_RAG \\\n",
+ "0 0.844867 0.979217 \n",
+ "1 0.776006 0.977902 \n",
+ "2 0.738060 0.990814 \n",
+ "3 0.942196 0.894259 \n",
+ "\n",
+ " answer_correctness_Custom_RAG answer_similarity_OpenAI_RAG \\\n",
+ "0 0.620304 0.747939 \n",
+ "1 0.622550 0.824929 \n",
+ "2 0.484557 0.770590 \n",
+ "3 0.664120 0.777967 \n",
+ "\n",
+ " answer_relevancy_OpenAI_RAG answer_correctness_OpenAI_RAG \n",
+ "0 0.936005 0.186985 \n",
+ "1 0.981672 0.206232 \n",
+ "2 0.990814 0.692648 \n",
+ "3 0.000000 0.194492 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
{
"name": "stdout",
"output_type": "stream",
"text": [
- "question = How did New York City get its name?\n",
- "\n",
- "STEP 1: Check input to see if it flags the Moderation API or is a prompt injection\n",
- "False\n",
"\n",
- "STEP 2: Retrieval from collection #1 MilvusDocs.\n",
- "DISTANCE SCORE: 0.39108937978744507 branching logic...\n",
- "\n",
- "STEP 3: Score is too low, GET INTENT from the user's question.\n",
- "intent = new_york\n",
- "\n",
- "STEP 4: Based on question intent, retrieve from collection #2 Wikipedia.\n",
- "chunk_answer: New York City traces its origins to Fort Amsterdam and a trading post founded on the southern tip of Manhattan Island by Dutch colonists in approximat\n",
- "DISTANCE SCORE: 0.7961502075195312 branch logic...\n",
- "\n",
- "Score from custom RAG Retrieval is above threshold, proceed to answer generation step.\n",
+ "####### FINAL SCORES OPENAI RAG vs MILVUS CUSTOM RAG #########\n",
+ "LLM as judge model: gpt-3.5-turbo-1106 with temperature: 0.1 scores:\n",
+ "# Truth vs RAG answers: 4\n",
"\n",
- "STEP 5: Generating GPT3.5 answer from the custom execution loop for RAG in the ASSISTANT PROMPT.\n",
+ "avg_similarity_Custom_RAG: 0.83\n",
+ "avg_similarity_OpenAI_RAG: 0.78\n",
"\n",
- "STEP 6: Evaluate whether the chatbot response answers the initial user query well.\n",
- "evaluating with [context_recall]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 1/1 [00:05<00:00, 5.24s/it]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "evaluating with [context_precision]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 1/1 [00:01<00:00, 1.57s/it]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "evaluating with [answer_relevancy]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 1/1 [00:01<00:00, 1.84s/it]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "evaluating with [faithfulness]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 1/1 [00:06<00:00, 6.75s/it]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "evaluating with [answer_similarity]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 1/1 [00:02<00:00, 2.76s/it]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "evaluating with [answer_correctness]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 1/1 [00:04<00:00, 4.89s/it]"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Ragas evaluation: answer similarity: 0.9425150309940216, answer relevancy: 0.664\n",
+ "answer_relevancy_Custom_RAG: 0.96\n",
+ "avg_relevancy_OpenAI_RAG: 0.73\n",
"\n",
- "STEP 7: LLM answer passed Evaluation, return it to the user.\n",
- "('Answer: New York City was originally named New Amsterdam by Dutch colonists '\n",
- " 'in 1626. However, in 1664, the city came under British control and was '\n",
- " 'renamed New York after King Charles II granted the lands to his brother, the '\n",
- " 'Duke of York.')\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\n"
+ "avg_correctness_Custom_RAG: 0.6\n",
+ "avg_correctness_OpenAI_RAG: 0.32\n"
]
}
],
"source": [
- "# Test the custom RAG execution loop using a question.\n",
- "\n",
- "QUESTION_NUMBER = 3 #2 or 3\n",
- "SAMPLE_QUESTION = question_list[QUESTION_NUMBER]\n",
- "print(f\"question = {SAMPLE_QUESTION}\")\n",
- "\n",
- "truth_answer = truth_list[QUESTION_NUMBER]\n",
+ "# Merge the 2 ragas dfs so they are easier to compare.\n",
+ "ragas_merged_df = ragas_df_Custom_RAG.iloc[:,[0,1,2,3,7,8,9]].merge(ragas_df_OpenAI_RAG.iloc[:, 2:], how='inner', left_index=True, right_index=True)\n",
+ "# reorder columns\n",
+ "ragas_merged_df = ragas_merged_df.iloc[:,[0,1,2,3,7,8,4,5,6,9,10,11]]\n",
+ "display(ragas_merged_df.head())\n",
"\n",
- "# Test the OpenAI answer.\n",
- "all_messages = []\n",
- "answer_history = []\n",
- "openai_answer, messages = process_user_message(SAMPLE_QUESTION, QUESTION_NUMBER, all_messages, debug=True)\n",
- "all_messages.append(messages)\n",
- "answer_history.append(openai_answer)\n",
- "pprint.pprint(f\"Answer: {openai_answer}\")"
+ "print()\n",
+ "print(f\"####### FINAL SCORES OPENAI RAG vs MILVUS CUSTOM RAG #########\")\n",
+ "print(f\"LLM as judge model: {LLM_NAME} with temperature: {TEMPERATURE} scores:\")\n",
+ "print(f\"# Truth vs RAG answers: {len(ragas_merged_df)}\")\n",
+ "print()\n",
+ "print(f\"avg_similarity_Custom_RAG: {np.round(ragas_merged_df.answer_similarity_Custom_RAG.mean(), 2)}\")\n",
+ "print(f\"avg_similarity_OpenAI_RAG: {np.round(ragas_merged_df.answer_similarity_OpenAI_RAG.mean(), 2)}\")\n",
+ "print()\n",
+ "print(f\"answer_relevancy_Custom_RAG: {np.round(ragas_merged_df.answer_relevancy_Custom_RAG.mean(), 2)}\")\n",
+ "print(f\"avg_relevancy_OpenAI_RAG: {np.round(ragas_merged_df.answer_relevancy_OpenAI_RAG.mean(), 2)}\")\n",
+ "print()\n",
+ "print(f\"avg_correctness_Custom_RAG: {np.round(ragas_merged_df.answer_correctness_Custom_RAG.mean(), 2)}\")\n",
+ "print(f\"avg_correctness_OpenAI_RAG: {np.round(ragas_merged_df.answer_correctness_OpenAI_RAG.mean(), 2)}\")"
]
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 17,
"id": "d0e81e68",
"metadata": {},
"outputs": [],
@@ -1726,7 +1754,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 18,
"id": "c777937e",
"metadata": {},
"outputs": [