diff --git a/bootcamp/OpenAIAssistants/custom_RAG_workflow.ipynb b/bootcamp/OpenAIAssistants/custom_RAG_workflow.ipynb index 1ce67ce60..afa4e0256 100755 --- a/bootcamp/OpenAIAssistants/custom_RAG_workflow.ipynb +++ b/bootcamp/OpenAIAssistants/custom_RAG_workflow.ipynb @@ -96,7 +96,6 @@ "# Connect to Zilliz cloud using endpoint URI and API key TOKEN.\n", "# TODO change this.\n", "CLUSTER_ENDPOINT=\"https://in03-xxxx.api.gcp-us-west1.zillizcloud.com:443\"\n", - "CLUSTER_ENDPOINT=\"https://in03-48a5b11fae525c9.api.gcp-us-west1.zillizcloud.com:443\"\n", "connections.connect(\n", " alias='default',\n", " # Public endpoint obtained from Zilliz Cloud\n", @@ -338,14 +337,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 1/1 [00:04<00:00, 4.95s/it]\n" + "100%|██████████| 1/1 [00:03<00:00, 3.95s/it]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Milvus Client insert time for 704 vectors: 4.952448844909668 seconds\n" + "Milvus Client insert time for 704 vectors: 3.9572505950927734 seconds\n" ] } ], @@ -435,10 +434,19 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "b5b6da85", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/christybergman/mambaforge/envs/py311new/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The class `langchain_community.chat_models.openai.ChatOpenAI` was deprecated in langchain-community 0.0.10 and will be removed in 0.2.0. An updated version of the class exists in the langchain-openai package and should be used instead. To use it run `pip install -U langchain-openai` and import as `from langchain_openai import ChatOpenAI`.\n", + " warn_deprecated(\n" + ] + } + ], "source": [ "# Ragas default uses HuggingFace Datasets.\n", "# https://docs.ragas.io/en/latest/getstarted/evaluation.html\n", @@ -454,10 +462,10 @@ " context_recall, \n", " context_precision, \n", " # Context -> Answer metrics\n", - " answer_relevancy, \n", " faithfulness, \n", " # Question -> Answer metrics\n", " answer_similarity,\n", + " answer_relevancy, \n", " answer_correctness\n", " )\n", "metrics = ['context_recall', 'context_precision', 'answer_relevancy', 'faithfulness', 'answer_similarity', 'answer_correctness']\n", @@ -474,7 +482,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "5e2db9c0", "metadata": {}, "outputs": [], @@ -510,37 +518,40 @@ " \n", " return ragas_ds\n", "\n", - "def evaluate_ragas(input_df, answer_col_name=\"OpenAI_RAG_answer\", context_exists=False, row_number=-9999):\n", + "def evaluate_ragas(input_df, answer_col_name=\"OpenAI_RAG_answer\", context_exists=False, row_number=-9999, metrics=\"final_only\"):\n", "\n", " # Create a ragas dataset.\n", " ragas_input_ds = assemble_ragas_dataset(input_df, answer_col_name, context_exists, row_number)\n", "\n", " # Evaluate the dataset.\n", - " if context_exists:\n", + " if metrics == \"final_only\":\n", " ragas_result = evaluate(\n", " ragas_input_ds,\n", " metrics=[\n", - " # Question -> Context metrics\n", - " context_recall, \n", - " context_precision, \n", - " # Context -> Answer metrics\n", - " answer_relevancy, \n", - " faithfulness, \n", - " # Question -> Answer metrics\n", " answer_similarity,\n", + " answer_relevancy,\n", " answer_correctness,])\n", " else:\n", + " # calculate all metrics\n", " ragas_result = evaluate(\n", " ragas_input_ds,\n", " metrics=[\n", + " # Question -> Context metrics\n", + " context_recall, \n", + " context_precision, \n", + " # Context -> Answer metrics\n", + " faithfulness, \n", + " # Question -> Answer metrics\n", " answer_similarity,\n", + " answer_relevancy,\n", " answer_correctness,])\n", + " \n", " return ragas_result" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "5d9124c2", "metadata": {}, "outputs": [ @@ -705,666 +716,87 @@ ] }, { - "cell_type": "code", - "execution_count": 11, - "id": "819fcfe4", + "cell_type": "markdown", + "id": "bb69c50d", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "evaluating with [context_recall]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/1 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
questionground_truthscontexts_Custom_RAGanswer_Custom_RAGcontext_recallcontext_precisionanswer_relevancyfaithfulnessanswer_similarity_Custom_RAGanswer_correctness_Custom_RAG
0What do the parameters for HNSW mean?\\n[- M: maximum degree of nodes in a layer of th...[performance, HNSW limits the maximum degree o...The parameters for HNSW have the following mea...1.01.00.9792170.80.8448530.483940
1What are HNSW good default parameters when dat...[M=16, efConstruction=32, ef=32][Metrics. Vector Index¶ FLAT IVF_FLAT IVF_SQ8 ...For a data size of 25K vectors with a dimensio...0.00.00.9778900.00.7759160.622550
2what is the default distance metric used in AU...[Trick answer: IP inner product, not yet upda...[The attributes of collection can be extracted...The default distance metric used in AUTOINDEX ...0.00.00.9908140.00.7382290.484557
3How did New York City get its name?[In the 1600’s, the Dutch planted a trading po...[Etymology\\nSee also: Nicknames of New York Ci...New York City was originally named New Amsterd...1.01.00.8942381.00.9421960.664120
\n", - "" - ], - "text/plain": [ - " question \\\n", - "0 What do the parameters for HNSW mean?\\n \n", - "1 What are HNSW good default parameters when dat... \n", - "2 what is the default distance metric used in AU... \n", - "3 How did New York City get its name? \n", - "\n", - " ground_truths \\\n", - "0 [- M: maximum degree of nodes in a layer of th... \n", - "1 [M=16, efConstruction=32, ef=32] \n", - "2 [Trick answer: IP inner product, not yet upda... \n", - "3 [In the 1600’s, the Dutch planted a trading po... \n", - "\n", - " contexts_Custom_RAG \\\n", - "0 [performance, HNSW limits the maximum degree o... \n", - "1 [Metrics. Vector Index¶ FLAT IVF_FLAT IVF_SQ8 ... \n", - "2 [The attributes of collection can be extracted... \n", - "3 [Etymology\\nSee also: Nicknames of New York Ci... \n", - "\n", - " answer_Custom_RAG context_recall \\\n", - "0 The parameters for HNSW have the following mea... 1.0 \n", - "1 For a data size of 25K vectors with a dimensio... 0.0 \n", - "2 The default distance metric used in AUTOINDEX ... 0.0 \n", - "3 New York City was originally named New Amsterd... 1.0 \n", - "\n", - " context_precision answer_relevancy faithfulness \\\n", - "0 1.0 0.979217 0.8 \n", - "1 0.0 0.977890 0.0 \n", - "2 0.0 0.990814 0.0 \n", - "3 1.0 0.894238 1.0 \n", - "\n", - " answer_similarity_Custom_RAG answer_correctness_Custom_RAG \n", - "0 0.844853 0.483940 \n", - "1 0.775916 0.622550 \n", - "2 0.738229 0.484557 \n", - "3 0.942196 0.664120 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ - "# Run Ragas Eval for all Questions, all Custom RAG Answers.\n", - "\n", - "# def evaluate_ragas(eval_df, answer_col_name=\"OpenAI_RAG_answer\", context_exists=False, row_number=-9999):\n", - "ragas_result = evaluate_ragas(eval_df, \"Custom_RAG_answer\", True, -9999)\n", - "ragas_df_Custom_RAG = ragas_result.to_pandas()\n", - "\n", - "# Rename the columns.\n", - "rename_dict = {\n", - " \"contexts\": \"contexts_Custom_RAG\",\n", - " \"answer\": \"answer_Custom_RAG\",\n", - " \"answer_similarity\": \"answer_similarity_Custom_RAG\",\n", - " \"answer_correctness\": \"answer_correctness_Custom_RAG\"\n", - "}\n", - "ragas_df_Custom_RAG.rename(columns=rename_dict, inplace=True)\n", - "# Reorder the columns.\n", - "ragas_df_Custom_RAG = ragas_df_Custom_RAG.iloc[:,[0, 3, 1, 2, 4,5,6,7,8,9]]\n", - "display(ragas_df_Custom_RAG.head())" + "## Define a Custom Execution Loop for RAG." ] }, { "cell_type": "code", - "execution_count": 12, - "id": "47f15260", + "execution_count": 10, + "id": "9b6aca9b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "evaluating with [answer_similarity]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:00<00:00, 1.70it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "evaluating with [answer_correctness]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:13<00:00, 13.49s/it]\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
questionground_truthscontexts_OpenAI_RAGanswer_OpenAI_RAGanswer_similarity_OpenAI_RAGanswer_correctness_OpenAI_RAG
0What do the parameters for HNSW mean?\\n[- M: maximum degree of nodes in a layer of th...[]The HNSW parameters include the “nlist” which ...0.7477770.186985
1What are HNSW good default parameters when dat...[M=16, efConstruction=32, ef=32][]The default HNSW parameters for data size of 2...0.8248550.206232
2what is the default distance metric used in AU...[Trick answer: IP inner product, not yet upda...[]The default distance metric used in AUTOINDEX ...0.7705730.692648
3How did New York City get its name?[In the 1600’s, the Dutch planted a trading po...[]I'm sorry, but I couldn't find any information...0.7779900.194492
\n", - "
" - ], - "text/plain": [ - " question \\\n", - "0 What do the parameters for HNSW mean?\\n \n", - "1 What are HNSW good default parameters when dat... \n", - "2 what is the default distance metric used in AU... \n", - "3 How did New York City get its name? \n", - "\n", - " ground_truths contexts_OpenAI_RAG \\\n", - "0 [- M: maximum degree of nodes in a layer of th... [] \n", - "1 [M=16, efConstruction=32, ef=32] [] \n", - "2 [Trick answer: IP inner product, not yet upda... [] \n", - "3 [In the 1600’s, the Dutch planted a trading po... [] \n", - "\n", - " answer_OpenAI_RAG \\\n", - "0 The HNSW parameters include the “nlist” which ... \n", - "1 The default HNSW parameters for data size of 2... \n", - "2 The default distance metric used in AUTOINDEX ... \n", - "3 I'm sorry, but I couldn't find any information... \n", - "\n", - " answer_similarity_OpenAI_RAG answer_correctness_OpenAI_RAG \n", - "0 0.747777 0.186985 \n", - "1 0.824855 0.206232 \n", - "2 0.770573 0.692648 \n", - "3 0.777990 0.194492 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "# Run Ragas Eval for all Questions, all OpenAI RAG Answers.\n", + "import requests, json, pprint\n", "\n", - "ragas_result = evaluate_ragas(eval_df, \"OpenAI_RAG_answer\", False, -9999)\n", - "ragas_df_OpenAI_RAG = ragas_result.to_pandas()\n", + "# Milvus search, define how many retrieval results to return.\n", + "# Milvus automatically sorts results descending by distance score.\n", + "TOP_K = 3\n", "\n", - "# Rename the columns.\n", - "# Rename the columns.\n", - "rename_dict = {\n", - " \"contexts\": \"contexts_OpenAI_RAG\",\n", - " \"answer\": \"answer_OpenAI_RAG\",\n", - " \"answer_similarity\": \"answer_similarity_OpenAI_RAG\",\n", - " \"answer_correctness\": \"answer_correctness_OpenAI_RAG\"\n", - "}\n", - "ragas_df_OpenAI_RAG.rename(columns=rename_dict, inplace=True)\n", - "# Reorder the columns.\n", - "ragas_df_OpenAI_RAG = ragas_df_OpenAI_RAG.iloc[:,[0, 3, 1, 2, 4,5]]\n", - "display(ragas_df_OpenAI_RAG)" + "# Search a collection containing Milvus Documentation.\n", + "def zilliz_pipeline_collection_search(token, question):\n", + " # Define the URL, headers, and data\n", + " url = \"https://controller.api.gcp-us-west1.zillizcloud.com/v1/pipelines/pipe-3de3fb4a9bc3c2a64a786b/run\"\n", + " headers = {\n", + " \"Content-Type\": \"application/json\",\n", + " \"Authorization\": f\"Bearer {token}\",\n", + " }\n", + " data = {\n", + " \"data\": {\n", + " \"query_text\": question\n", + " },\n", + " \"params\": {\n", + " \"limit\": 3,\n", + " \"offset\": 0,\n", + " \"outputFields\": [\"chunk_text\", \"chunk_id\", \"doc_name\", \"source\"],\n", + " \"filter\": \"chunk_id >= 0 && doc_name == 'param.html'\",\n", + " }\n", + " }\n", + "\n", + " # Send the POST request\n", + " response = requests.post(url, headers=headers, json=data)\n", + "\n", + " # # Print the response\n", + " # pprint.pprint(response.json())\n", + " return response.json()\n", + "\n", + "# Search a collection containing Wikipedia articles about New York City.\n", + "def wikipedia_search(mc, collection_name, collection_encoder, question, output_fields=None, top_k=3):\n", + " # Embed the query\n", + " query_embeddings = _utils.embed_query(collection_encoder, [question])\n", + "\n", + " # Define search parameters\n", + " INDEX_PARAMS = dict({\n", + " 'M': M, \n", + " \"efConstruction\": efConstruction })\n", + " SEARCH_PARAMS = dict({\n", + " \"ef\": INDEX_PARAMS['efConstruction']\n", + " })\n", + "\n", + " # Define output fields to return\n", + " OUTPUT_FIELDS = [\"h1\", \"source\", \"chunk\"]\n", + "\n", + " # Perform the search\n", + " answers = mc.search(\n", + " collection_name,\n", + " data=query_embeddings, \n", + " search_params=SEARCH_PARAMS,\n", + " output_fields=output_fields, \n", + " filter=\"(source like 'https://en.wikipedia.org%')\",\n", + " limit=top_k,\n", + " consistency_level=\"Eventually\"\n", + " )\n", + "\n", + " return answers" ] }, { "cell_type": "code", - "execution_count": 23, - "id": "b10b757b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
questionground_truthscontexts_Custom_RAGanswer_Custom_RAGcontexts_OpenAI_RAGanswer_OpenAI_RAGanswer_similarity_Custom_RAGanswer_correctness_Custom_RAGanswer_similarity_OpenAI_RAGanswer_correctness_OpenAI_RAG
0What do the parameters for HNSW mean?\\n[- M: maximum degree of nodes in a layer of th...[performance, HNSW limits the maximum degree o...The parameters for HNSW have the following mea...[]The HNSW parameters include the “nlist” which ...0.8448530.4839400.7477770.186985
1What are HNSW good default parameters when dat...[M=16, efConstruction=32, ef=32][Metrics. Vector Index¶ FLAT IVF_FLAT IVF_SQ8 ...For a data size of 25K vectors with a dimensio...[]The default HNSW parameters for data size of 2...0.7759160.6225500.8248550.206232
2what is the default distance metric used in AU...[Trick answer: IP inner product, not yet upda...[The attributes of collection can be extracted...The default distance metric used in AUTOINDEX ...[]The default distance metric used in AUTOINDEX ...0.7382290.4845570.7705730.692648
3How did New York City get its name?[In the 1600’s, the Dutch planted a trading po...[Etymology\\nSee also: Nicknames of New York Ci...New York City was originally named New Amsterd...[]I'm sorry, but I couldn't find any information...0.9421960.6641200.7779900.194492
\n", - "
" - ], - "text/plain": [ - " question \\\n", - "0 What do the parameters for HNSW mean?\\n \n", - "1 What are HNSW good default parameters when dat... \n", - "2 what is the default distance metric used in AU... \n", - "3 How did New York City get its name? \n", - "\n", - " ground_truths \\\n", - "0 [- M: maximum degree of nodes in a layer of th... \n", - "1 [M=16, efConstruction=32, ef=32] \n", - "2 [Trick answer: IP inner product, not yet upda... \n", - "3 [In the 1600’s, the Dutch planted a trading po... \n", - "\n", - " contexts_Custom_RAG \\\n", - "0 [performance, HNSW limits the maximum degree o... \n", - "1 [Metrics. Vector Index¶ FLAT IVF_FLAT IVF_SQ8 ... \n", - "2 [The attributes of collection can be extracted... \n", - "3 [Etymology\\nSee also: Nicknames of New York Ci... \n", - "\n", - " answer_Custom_RAG contexts_OpenAI_RAG \\\n", - "0 The parameters for HNSW have the following mea... [] \n", - "1 For a data size of 25K vectors with a dimensio... [] \n", - "2 The default distance metric used in AUTOINDEX ... [] \n", - "3 New York City was originally named New Amsterd... [] \n", - "\n", - " answer_OpenAI_RAG \\\n", - "0 The HNSW parameters include the “nlist” which ... \n", - "1 The default HNSW parameters for data size of 2... \n", - "2 The default distance metric used in AUTOINDEX ... \n", - "3 I'm sorry, but I couldn't find any information... \n", - "\n", - " answer_similarity_Custom_RAG answer_correctness_Custom_RAG \\\n", - "0 0.844853 0.483940 \n", - "1 0.775916 0.622550 \n", - "2 0.738229 0.484557 \n", - "3 0.942196 0.664120 \n", - "\n", - " answer_similarity_OpenAI_RAG answer_correctness_OpenAI_RAG \n", - "0 0.747777 0.186985 \n", - "1 0.824855 0.206232 \n", - "2 0.770573 0.692648 \n", - "3 0.777990 0.194492 " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "####### FINAL SCORES OPENAI RAG vs MILVUS CUSTOM RAG #########\n", - "LLM as judge model: gpt-3.5-turbo-1106 with temperature: 0.1 scores:\n", - "# truth vs RAG answers: 4\n", - "\n", - "avg_faithfulness_Custom_RAG: 0.83\n", - "avg_faithfulness_OpenAI_RAG: 0.78\n", - "\n", - "avg_relevancy_Custom_RAG: 0.56\n", - "avg_relevancy_OpenAI_RAG: 0.32\n" - ] - } - ], - "source": [ - "# Merge the 2 ragas dfs so they are easier to compare.\n", - "ragas_merged_df = ragas_df_Custom_RAG.iloc[:,[0,1,2,3,8,9]].merge(ragas_df_OpenAI_RAG.iloc[:, 2:], how='inner', left_index=True, right_index=True)\n", - "# reorder columns\n", - "ragas_merged_df = ragas_merged_df.iloc[:,[0,1,2,3,6,7,4,5,8,9]]\n", - "display(ragas_merged_df.head())\n", - "\n", - "print()\n", - "print(f\"####### FINAL SCORES OPENAI RAG vs MILVUS CUSTOM RAG #########\")\n", - "print(f\"LLM as judge model: {LLM_NAME} with temperature: {TEMPERATURE} scores:\")\n", - "print(f\"# Truth vs RAG answers: {len(ragas_merged_df)}\")\n", - "print()\n", - "print(f\"avg_faithfulness_Custom_RAG: {np.round(ragas_merged_df.answer_similarity_Custom_RAG.mean(), 2)}\")\n", - "print(f\"avg_faithfulness_OpenAI_RAG: {np.round(ragas_merged_df.answer_similarity_OpenAI_RAG.mean(), 2)}\")\n", - "print()\n", - "print(f\"avg_relevancy_Custom_RAG: {np.round(ragas_merged_df.answer_correctness_Custom_RAG.mean(), 2)}\")\n", - "print(f\"avg_relevancy_OpenAI_RAG: {np.round(ragas_merged_df.answer_correctness_OpenAI_RAG.mean(), 2)}\")" - ] - }, - { - "cell_type": "markdown", - "id": "bb69c50d", - "metadata": {}, - "source": [ - "## Define a Custom Execution Loop for RAG." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "9b6aca9b", - "metadata": {}, - "outputs": [], - "source": [ - "import requests, json, pprint\n", - "\n", - "# Milvus search, define how many retrieval results to return.\n", - "# Milvus automatically sorts results descending by distance score.\n", - "TOP_K = 3\n", - "\n", - "# Search a collection containing Milvus Documentation.\n", - "def zilliz_pipeline_collection_search(token, question):\n", - " # Define the URL, headers, and data\n", - " url = \"https://controller.api.gcp-us-west1.zillizcloud.com/v1/pipelines/pipe-3de3fb4a9bc3c2a64a786b/run\"\n", - " headers = {\n", - " \"Content-Type\": \"application/json\",\n", - " \"Authorization\": f\"Bearer {token}\",\n", - " }\n", - " data = {\n", - " \"data\": {\n", - " \"query_text\": question\n", - " },\n", - " \"params\": {\n", - " \"limit\": 3,\n", - " \"offset\": 0,\n", - " \"outputFields\": [\"chunk_text\", \"chunk_id\", \"doc_name\", \"source\"],\n", - " \"filter\": \"chunk_id >= 0 && doc_name == 'param.html'\",\n", - " }\n", - " }\n", - "\n", - " # Send the POST request\n", - " response = requests.post(url, headers=headers, json=data)\n", - "\n", - " # # Print the response\n", - " # pprint.pprint(response.json())\n", - " return response.json()\n", - "\n", - "# Search a collection containing Wikipedia articles about New York City.\n", - "def wikipedia_search(mc, collection_name, collection_encoder, question, output_fields=None, top_k=3):\n", - " # Embed the query\n", - " query_embeddings = _utils.embed_query(collection_encoder, [question])\n", - "\n", - " # Define search parameters\n", - " INDEX_PARAMS = dict({\n", - " 'M': M, \n", - " \"efConstruction\": efConstruction })\n", - " SEARCH_PARAMS = dict({\n", - " \"ef\": INDEX_PARAMS['efConstruction']\n", - " })\n", - "\n", - " # Define output fields to return\n", - " OUTPUT_FIELDS = [\"h1\", \"source\", \"chunk\"]\n", - "\n", - " # Perform the search\n", - " answers = mc.search(\n", - " collection_name,\n", - " data=query_embeddings, \n", - " search_params=SEARCH_PARAMS,\n", - " output_fields=output_fields, \n", - " filter=\"(source like 'https://en.wikipedia.org%')\",\n", - " limit=top_k,\n", - " consistency_level=\"Eventually\"\n", - " )\n", - "\n", - " return answers" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "cfb1f303", + "execution_count": 11, + "id": "cfb1f303", "metadata": {}, "outputs": [], "source": [ @@ -1429,7 +861,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 12, "id": "d671601b", "metadata": {}, "outputs": [], @@ -1450,17 +882,17 @@ " threshold_retrieval_score = 0.6\n", " ragas_metrics= ['answer_relevancy', 'faithfulness']\n", "\n", - " # Step 1: Check input to see if it flags the Moderation API or is a prompt injection\n", - " if debug:\n", - " print()\n", - " print(\"STEP 1: Check input to see if it flags the Moderation API or is a prompt injection\")\n", - " response = openai_client.moderations.create(input=user_input)\n", - " moderation_output = response.results[0]\n", - " print(moderation_output.flagged) # False\n", - "\n", - " if moderation_output.flagged:\n", - " print(\"Step 1: Input flagged by Moderation API.\")\n", - " return \"Sorry, we cannot process this request.\", message_history\n", + " # # Step 1: Check input to see if it flags the Moderation API or is a prompt injection\n", + " # if debug:\n", + " # print()\n", + " # print(\"STEP 1: Check input to see if it flags the Moderation API or is a prompt injection\")\n", + " # response = openai_client.moderations.create(input=user_input)\n", + " # moderation_output = response.results[0]\n", + " # print(moderation_output.flagged) # False\n", + "\n", + " # if moderation_output.flagged:\n", + " # print(\"Step 1: Input flagged by Moderation API.\")\n", + " # return \"Sorry, we cannot process this request.\", message_history\n", "\n", " # Step 2: Retrieval from collection #1.\n", " if debug:\n", @@ -1545,9 +977,9 @@ " if debug:\n", " print()\n", " print(\"STEP 6: Evaluate whether the chatbot response answers the initial user query well.\")\n", - " ragas_result = evaluate_ragas(eval_df, \"Custom_RAG_answer\", True, question_number)\n", + " ragas_result = evaluate_ragas(eval_df, \"Custom_RAG_answer\", True, question_number, \"final_only\")\n", " ragas_df = ragas_result.to_pandas()\n", - " print(f\"Ragas evaluation: answer similarity: {ragas_df.answer_similarity[0]}, answer relevancy: {np.round(ragas_df.answer_correctness[0],3)}\")\n", + " print(f\"Ragas evaluation: answer similarity: {ragas_df.answer_similarity[0]}, answer relevancy: {np.round(ragas_df.answer_relevancy[0],3)}, answer correctness: {np.round(ragas_df.answer_correctness[0],3)}\")\n", " # could also check for other metrics here...\n", " evaluation_response = \"Y\"\n", "\n", @@ -1566,156 +998,752 @@ }, { "cell_type": "code", - "execution_count": 17, - "id": "bb1a52ca", + "execution_count": 13, + "id": "bb1a52ca", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "question = How did New York City get its name?\n", + "\n", + "STEP 2: Retrieval from collection #1 MilvusDocs.\n", + "DISTANCE SCORE: 0.39108937978744507 branching logic...\n", + "\n", + "STEP 3: Score is too low, GET INTENT from the user's question.\n", + "intent = new_york\n", + "\n", + "STEP 4: Based on question intent, retrieve from collection #2 Wikipedia.\n", + "chunk_answer: New York City traces its origins to Fort Amsterdam and a trading post founded on the southern tip of Manhattan Island by Dutch colonists in approximat\n", + "DISTANCE SCORE: 0.7961502075195312 branch logic...\n", + "\n", + "Score from custom RAG Retrieval is above threshold, proceed to answer generation step.\n", + "\n", + "STEP 5: Generating GPT3.5 answer from the custom execution loop for RAG in the ASSISTANT PROMPT.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "STEP 6: Evaluate whether the chatbot response answers the initial user query well.\n", + "evaluating with [answer_similarity]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:00<00:00, 1.49it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "evaluating with [answer_relevancy]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:01<00:00, 1.73s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "evaluating with [answer_correctness]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:05<00:00, 5.98s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ragas evaluation: answer similarity: 0.9421961714808575, answer relevancy: 0.894, answer correctness: 0.664\n", + "\n", + "STEP 7: LLM answer passed Evaluation, return it to the user.\n", + "('Answer: New York City was originally named New Amsterdam by Dutch colonists '\n", + " 'in 1626. However, it was renamed New York in 1664 after King Charles II '\n", + " 'granted the lands to his brother, the Duke of York, when the city came under '\n", + " 'British control.')\n" + ] + } + ], + "source": [ + "# Test the custom RAG execution loop using a question.\n", + "\n", + "QUESTION_NUMBER = 3 #2 or 3\n", + "SAMPLE_QUESTION = question_list[QUESTION_NUMBER]\n", + "print(f\"question = {SAMPLE_QUESTION}\")\n", + "\n", + "truth_answer = truth_list[QUESTION_NUMBER]\n", + "\n", + "# Test the OpenAI answer.\n", + "all_messages = []\n", + "answer_history = []\n", + "openai_answer, messages = process_user_message(SAMPLE_QUESTION, QUESTION_NUMBER, all_messages, debug=True)\n", + "all_messages.append(messages)\n", + "answer_history.append(openai_answer)\n", + "pprint.pprint(f\"Answer: {openai_answer}\")" + ] + }, + { + "cell_type": "markdown", + "id": "67fa1791", + "metadata": {}, + "source": [ + "## Final Eval Comparisons Custom RAG vs OpenAI RAG" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "aa9a35cd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "evaluating with [context_recall]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:14<00:00, 14.62s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "evaluating with [context_precision]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:07<00:00, 7.86s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "evaluating with [faithfulness]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:29<00:00, 29.35s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "evaluating with [answer_similarity]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:01<00:00, 1.20s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "evaluating with [answer_relevancy]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:07<00:00, 7.96s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "evaluating with [answer_correctness]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:20<00:00, 20.12s/it]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
questionground_truthscontexts_Custom_RAGanswer_Custom_RAGcontext_recallcontext_precisionfaithfulnessanswer_similarity_Custom_RAGanswer_relevancy_Custom_RAGanswer_correctness_Custom_RAG
0What do the parameters for HNSW mean?\\n[- M: maximum degree of nodes in a layer of th...[performance, HNSW limits the maximum degree o...The parameters for HNSW have the following mea...1.01.00.80.8448670.9792170.620304
1What are HNSW good default parameters when dat...[M=16, efConstruction=32, ef=32][Metrics. Vector Index¶ FLAT IVF_FLAT IVF_SQ8 ...For a data size of 25K vectors with a dimensio...0.00.00.00.7760060.9779020.622550
2what is the default distance metric used in AU...[Trick answer: IP inner product, not yet upda...[The attributes of collection can be extracted...The default distance metric used in AUTOINDEX ...0.00.00.00.7380600.9908140.484557
3How did New York City get its name?[In the 1600’s, the Dutch planted a trading po...[Etymology\\nSee also: Nicknames of New York Ci...New York City was originally named New Amsterd...1.01.00.50.9421960.8942590.664120
\n", + "
" + ], + "text/plain": [ + " question \\\n", + "0 What do the parameters for HNSW mean?\\n \n", + "1 What are HNSW good default parameters when dat... \n", + "2 what is the default distance metric used in AU... \n", + "3 How did New York City get its name? \n", + "\n", + " ground_truths \\\n", + "0 [- M: maximum degree of nodes in a layer of th... \n", + "1 [M=16, efConstruction=32, ef=32] \n", + "2 [Trick answer: IP inner product, not yet upda... \n", + "3 [In the 1600’s, the Dutch planted a trading po... \n", + "\n", + " contexts_Custom_RAG \\\n", + "0 [performance, HNSW limits the maximum degree o... \n", + "1 [Metrics. Vector Index¶ FLAT IVF_FLAT IVF_SQ8 ... \n", + "2 [The attributes of collection can be extracted... \n", + "3 [Etymology\\nSee also: Nicknames of New York Ci... \n", + "\n", + " answer_Custom_RAG context_recall \\\n", + "0 The parameters for HNSW have the following mea... 1.0 \n", + "1 For a data size of 25K vectors with a dimensio... 0.0 \n", + "2 The default distance metric used in AUTOINDEX ... 0.0 \n", + "3 New York City was originally named New Amsterd... 1.0 \n", + "\n", + " context_precision faithfulness answer_similarity_Custom_RAG \\\n", + "0 1.0 0.8 0.844867 \n", + "1 0.0 0.0 0.776006 \n", + "2 0.0 0.0 0.738060 \n", + "3 1.0 0.5 0.942196 \n", + "\n", + " answer_relevancy_Custom_RAG answer_correctness_Custom_RAG \n", + "0 0.979217 0.620304 \n", + "1 0.977902 0.622550 \n", + "2 0.990814 0.484557 \n", + "3 0.894259 0.664120 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Run Ragas Eval for all Questions, all Custom RAG Answers.\n", + "\n", + "# def evaluate_ragas(input_df, answer_col_name=\"OpenAI_RAG_answer\", context_exists=False, row_number=-9999, metrics=\"final_only\"):\n", + "ragas_result = evaluate_ragas(eval_df, \"Custom_RAG_answer\", True, -9999, \"all\")\n", + "ragas_df_Custom_RAG = ragas_result.to_pandas()\n", + "\n", + "# Rename the columns.\n", + "rename_dict = {\n", + " \"contexts\": \"contexts_Custom_RAG\",\n", + " \"answer\": \"answer_Custom_RAG\",\n", + " \"answer_similarity\": \"answer_similarity_Custom_RAG\",\n", + " \"answer_relevancy\": \"answer_relevancy_Custom_RAG\",\n", + " \"answer_correctness\": \"answer_correctness_Custom_RAG\"\n", + "}\n", + "ragas_df_Custom_RAG.rename(columns=rename_dict, inplace=True)\n", + "# Reorder the columns.\n", + "ragas_df_Custom_RAG = ragas_df_Custom_RAG.iloc[:,[0, 3, 1, 2, 4,5,6,7,8,9]]\n", + "display(ragas_df_Custom_RAG.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "1f1b1f4e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "evaluating with [answer_similarity]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:00<00:00, 2.01it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "evaluating with [answer_relevancy]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:07<00:00, 7.85s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "evaluating with [answer_correctness]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:14<00:00, 14.49s/it]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
questionground_truthscontexts_OpenAI_RAGanswer_OpenAI_RAGanswer_similarity_OpenAI_RAGanswer_relevancy_OpenAI_RAGanswer_correctness_OpenAI_RAG
0What do the parameters for HNSW mean?\\n[- M: maximum degree of nodes in a layer of th...[]The HNSW parameters include the “nlist” which ...0.7479390.9360050.186985
1What are HNSW good default parameters when dat...[M=16, efConstruction=32, ef=32][]The default HNSW parameters for data size of 2...0.8249290.9816720.206232
2what is the default distance metric used in AU...[Trick answer: IP inner product, not yet upda...[]The default distance metric used in AUTOINDEX ...0.7705900.9908140.692648
3How did New York City get its name?[In the 1600’s, the Dutch planted a trading po...[]I'm sorry, but I couldn't find any information...0.7779670.0000000.194492
\n", + "
" + ], + "text/plain": [ + " question \\\n", + "0 What do the parameters for HNSW mean?\\n \n", + "1 What are HNSW good default parameters when dat... \n", + "2 what is the default distance metric used in AU... \n", + "3 How did New York City get its name? \n", + "\n", + " ground_truths contexts_OpenAI_RAG \\\n", + "0 [- M: maximum degree of nodes in a layer of th... [] \n", + "1 [M=16, efConstruction=32, ef=32] [] \n", + "2 [Trick answer: IP inner product, not yet upda... [] \n", + "3 [In the 1600’s, the Dutch planted a trading po... [] \n", + "\n", + " answer_OpenAI_RAG \\\n", + "0 The HNSW parameters include the “nlist” which ... \n", + "1 The default HNSW parameters for data size of 2... \n", + "2 The default distance metric used in AUTOINDEX ... \n", + "3 I'm sorry, but I couldn't find any information... \n", + "\n", + " answer_similarity_OpenAI_RAG answer_relevancy_OpenAI_RAG \\\n", + "0 0.747939 0.936005 \n", + "1 0.824929 0.981672 \n", + "2 0.770590 0.990814 \n", + "3 0.777967 0.000000 \n", + "\n", + " answer_correctness_OpenAI_RAG \n", + "0 0.186985 \n", + "1 0.206232 \n", + "2 0.692648 \n", + "3 0.194492 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Run Ragas Eval for all Questions, all OpenAI RAG Answers.\n", + "\n", + "ragas_result = evaluate_ragas(eval_df, \"OpenAI_RAG_answer\", False, -9999)\n", + "ragas_df_OpenAI_RAG = ragas_result.to_pandas()\n", + "\n", + "# Rename the columns.\n", + "# Rename the columns.\n", + "rename_dict = {\n", + " \"contexts\": \"contexts_OpenAI_RAG\",\n", + " \"answer\": \"answer_OpenAI_RAG\",\n", + " \"answer_similarity\": \"answer_similarity_OpenAI_RAG\",\n", + " \"answer_relevancy\": \"answer_relevancy_OpenAI_RAG\",\n", + " \"answer_correctness\": \"answer_correctness_OpenAI_RAG\"\n", + "}\n", + "ragas_df_OpenAI_RAG.rename(columns=rename_dict, inplace=True)\n", + "# Reorder the columns.\n", + "ragas_df_OpenAI_RAG = ragas_df_OpenAI_RAG.iloc[:,[0, 3, 1, 2, 4,5,6]]\n", + "display(ragas_df_OpenAI_RAG)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "c19bc0a5", "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
questionground_truthscontexts_Custom_RAGanswer_Custom_RAGcontexts_OpenAI_RAGanswer_OpenAI_RAGanswer_similarity_Custom_RAGanswer_relevancy_Custom_RAGanswer_correctness_Custom_RAGanswer_similarity_OpenAI_RAGanswer_relevancy_OpenAI_RAGanswer_correctness_OpenAI_RAG
0What do the parameters for HNSW mean?\\n[- M: maximum degree of nodes in a layer of th...[performance, HNSW limits the maximum degree o...The parameters for HNSW have the following mea...[]The HNSW parameters include the “nlist” which ...0.8448670.9792170.6203040.7479390.9360050.186985
1What are HNSW good default parameters when dat...[M=16, efConstruction=32, ef=32][Metrics. Vector Index¶ FLAT IVF_FLAT IVF_SQ8 ...For a data size of 25K vectors with a dimensio...[]The default HNSW parameters for data size of 2...0.7760060.9779020.6225500.8249290.9816720.206232
2what is the default distance metric used in AU...[Trick answer: IP inner product, not yet upda...[The attributes of collection can be extracted...The default distance metric used in AUTOINDEX ...[]The default distance metric used in AUTOINDEX ...0.7380600.9908140.4845570.7705900.9908140.692648
3How did New York City get its name?[In the 1600’s, the Dutch planted a trading po...[Etymology\\nSee also: Nicknames of New York Ci...New York City was originally named New Amsterd...[]I'm sorry, but I couldn't find any information...0.9421960.8942590.6641200.7779670.0000000.194492
\n", + "
" + ], + "text/plain": [ + " question \\\n", + "0 What do the parameters for HNSW mean?\\n \n", + "1 What are HNSW good default parameters when dat... \n", + "2 what is the default distance metric used in AU... \n", + "3 How did New York City get its name? \n", + "\n", + " ground_truths \\\n", + "0 [- M: maximum degree of nodes in a layer of th... \n", + "1 [M=16, efConstruction=32, ef=32] \n", + "2 [Trick answer: IP inner product, not yet upda... \n", + "3 [In the 1600’s, the Dutch planted a trading po... \n", + "\n", + " contexts_Custom_RAG \\\n", + "0 [performance, HNSW limits the maximum degree o... \n", + "1 [Metrics. Vector Index¶ FLAT IVF_FLAT IVF_SQ8 ... \n", + "2 [The attributes of collection can be extracted... \n", + "3 [Etymology\\nSee also: Nicknames of New York Ci... \n", + "\n", + " answer_Custom_RAG contexts_OpenAI_RAG \\\n", + "0 The parameters for HNSW have the following mea... [] \n", + "1 For a data size of 25K vectors with a dimensio... [] \n", + "2 The default distance metric used in AUTOINDEX ... [] \n", + "3 New York City was originally named New Amsterd... [] \n", + "\n", + " answer_OpenAI_RAG \\\n", + "0 The HNSW parameters include the “nlist” which ... \n", + "1 The default HNSW parameters for data size of 2... \n", + "2 The default distance metric used in AUTOINDEX ... \n", + "3 I'm sorry, but I couldn't find any information... \n", + "\n", + " answer_similarity_Custom_RAG answer_relevancy_Custom_RAG \\\n", + "0 0.844867 0.979217 \n", + "1 0.776006 0.977902 \n", + "2 0.738060 0.990814 \n", + "3 0.942196 0.894259 \n", + "\n", + " answer_correctness_Custom_RAG answer_similarity_OpenAI_RAG \\\n", + "0 0.620304 0.747939 \n", + "1 0.622550 0.824929 \n", + "2 0.484557 0.770590 \n", + "3 0.664120 0.777967 \n", + "\n", + " answer_relevancy_OpenAI_RAG answer_correctness_OpenAI_RAG \n", + "0 0.936005 0.186985 \n", + "1 0.981672 0.206232 \n", + "2 0.990814 0.692648 \n", + "3 0.000000 0.194492 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", "text": [ - "question = How did New York City get its name?\n", - "\n", - "STEP 1: Check input to see if it flags the Moderation API or is a prompt injection\n", - "False\n", "\n", - "STEP 2: Retrieval from collection #1 MilvusDocs.\n", - "DISTANCE SCORE: 0.39108937978744507 branching logic...\n", - "\n", - "STEP 3: Score is too low, GET INTENT from the user's question.\n", - "intent = new_york\n", - "\n", - "STEP 4: Based on question intent, retrieve from collection #2 Wikipedia.\n", - "chunk_answer: New York City traces its origins to Fort Amsterdam and a trading post founded on the southern tip of Manhattan Island by Dutch colonists in approximat\n", - "DISTANCE SCORE: 0.7961502075195312 branch logic...\n", - "\n", - "Score from custom RAG Retrieval is above threshold, proceed to answer generation step.\n", + "####### FINAL SCORES OPENAI RAG vs MILVUS CUSTOM RAG #########\n", + "LLM as judge model: gpt-3.5-turbo-1106 with temperature: 0.1 scores:\n", + "# Truth vs RAG answers: 4\n", "\n", - "STEP 5: Generating GPT3.5 answer from the custom execution loop for RAG in the ASSISTANT PROMPT.\n", + "avg_similarity_Custom_RAG: 0.83\n", + "avg_similarity_OpenAI_RAG: 0.78\n", "\n", - "STEP 6: Evaluate whether the chatbot response answers the initial user query well.\n", - "evaluating with [context_recall]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:05<00:00, 5.24s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "evaluating with [context_precision]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:01<00:00, 1.57s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "evaluating with [answer_relevancy]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:01<00:00, 1.84s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "evaluating with [faithfulness]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:06<00:00, 6.75s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "evaluating with [answer_similarity]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:02<00:00, 2.76s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "evaluating with [answer_correctness]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:04<00:00, 4.89s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Ragas evaluation: answer similarity: 0.9425150309940216, answer relevancy: 0.664\n", + "answer_relevancy_Custom_RAG: 0.96\n", + "avg_relevancy_OpenAI_RAG: 0.73\n", "\n", - "STEP 7: LLM answer passed Evaluation, return it to the user.\n", - "('Answer: New York City was originally named New Amsterdam by Dutch colonists '\n", - " 'in 1626. However, in 1664, the city came under British control and was '\n", - " 'renamed New York after King Charles II granted the lands to his brother, the '\n", - " 'Duke of York.')\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" + "avg_correctness_Custom_RAG: 0.6\n", + "avg_correctness_OpenAI_RAG: 0.32\n" ] } ], "source": [ - "# Test the custom RAG execution loop using a question.\n", - "\n", - "QUESTION_NUMBER = 3 #2 or 3\n", - "SAMPLE_QUESTION = question_list[QUESTION_NUMBER]\n", - "print(f\"question = {SAMPLE_QUESTION}\")\n", - "\n", - "truth_answer = truth_list[QUESTION_NUMBER]\n", + "# Merge the 2 ragas dfs so they are easier to compare.\n", + "ragas_merged_df = ragas_df_Custom_RAG.iloc[:,[0,1,2,3,7,8,9]].merge(ragas_df_OpenAI_RAG.iloc[:, 2:], how='inner', left_index=True, right_index=True)\n", + "# reorder columns\n", + "ragas_merged_df = ragas_merged_df.iloc[:,[0,1,2,3,7,8,4,5,6,9,10,11]]\n", + "display(ragas_merged_df.head())\n", "\n", - "# Test the OpenAI answer.\n", - "all_messages = []\n", - "answer_history = []\n", - "openai_answer, messages = process_user_message(SAMPLE_QUESTION, QUESTION_NUMBER, all_messages, debug=True)\n", - "all_messages.append(messages)\n", - "answer_history.append(openai_answer)\n", - "pprint.pprint(f\"Answer: {openai_answer}\")" + "print()\n", + "print(f\"####### FINAL SCORES OPENAI RAG vs MILVUS CUSTOM RAG #########\")\n", + "print(f\"LLM as judge model: {LLM_NAME} with temperature: {TEMPERATURE} scores:\")\n", + "print(f\"# Truth vs RAG answers: {len(ragas_merged_df)}\")\n", + "print()\n", + "print(f\"avg_similarity_Custom_RAG: {np.round(ragas_merged_df.answer_similarity_Custom_RAG.mean(), 2)}\")\n", + "print(f\"avg_similarity_OpenAI_RAG: {np.round(ragas_merged_df.answer_similarity_OpenAI_RAG.mean(), 2)}\")\n", + "print()\n", + "print(f\"answer_relevancy_Custom_RAG: {np.round(ragas_merged_df.answer_relevancy_Custom_RAG.mean(), 2)}\")\n", + "print(f\"avg_relevancy_OpenAI_RAG: {np.round(ragas_merged_df.answer_relevancy_OpenAI_RAG.mean(), 2)}\")\n", + "print()\n", + "print(f\"avg_correctness_Custom_RAG: {np.round(ragas_merged_df.answer_correctness_Custom_RAG.mean(), 2)}\")\n", + "print(f\"avg_correctness_OpenAI_RAG: {np.round(ragas_merged_df.answer_correctness_OpenAI_RAG.mean(), 2)}\")" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "id": "d0e81e68", "metadata": {}, "outputs": [], @@ -1726,7 +1754,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "id": "c777937e", "metadata": {}, "outputs": [