From 8b0615d2fba5cc16d334733841958a17e2d083eb Mon Sep 17 00:00:00 2001 From: christy Date: Mon, 8 Jan 2024 09:12:20 -0800 Subject: [PATCH] Small tweaks to variable names and text Signed-off-by: christy --- .../RAG/readthedocs_zilliz_langchain.ipynb | 109 +++++++++--------- 1 file changed, 56 insertions(+), 53 deletions(-) diff --git a/bootcamp/RAG/readthedocs_zilliz_langchain.ipynb b/bootcamp/RAG/readthedocs_zilliz_langchain.ipynb index e9b420c9c..63fa6ab99 100755 --- a/bootcamp/RAG/readthedocs_zilliz_langchain.ipynb +++ b/bootcamp/RAG/readthedocs_zilliz_langchain.ipynb @@ -140,7 +140,7 @@ "**Embedding model:** We will use the open-source [sentence transformers](https://www.sbert.net/docs/pretrained_models.html) available on HuggingFace to encode the documentation text. We will download the model from HuggingFace and run it locally. \n", "\n", "Two model parameters of note below:\n", - "1. EMBEDDING_LENGTH refers to the dimensionality or length of the embedding vector. In this case, the embeddings generated for EACH token in the input text will have the SAME length = 1024. This size of embedding is often associated with BERT-based models, where the embeddings are used for downstream tasks such as classification, question answering, or text generation.

\n", + "1. EMBEDDING_DIM refers to the dimensionality or length of the embedding vector. In this case, the embeddings generated for EACH token in the input text will have the SAME length = 1024. This size of embedding is often associated with BERT-based models, where the embeddings are used for downstream tasks such as classification, question answering, or text generation.

\n", "2. MAX_SEQ_LENGTH is the maximum length the encoder model can handle for input sequences. In this case, if sequences longer than 512 tokens are given to the model, everything longer will be (silently!) chopped off. This is the reason why a chunking strategy is needed to segment input texts into chunks with lengths that will fit in the model's input." ] }, @@ -174,7 +174,7 @@ " (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})\n", ")\n", "model_name: WhereIsAI/UAE-Large-V1\n", - "EMBEDDING_LENGTH: 1024\n", + "EMBEDDING_DIM: 1024\n", "MAX_SEQ_LENGTH: 512\n" ] } @@ -200,7 +200,7 @@ "print(encoder)\n", "\n", "# Get the model parameters and save for later.\n", - "EMBEDDING_LENGTH = encoder.get_sentence_embedding_dimension()\n", + "EMBEDDING_DIM = encoder.get_sentence_embedding_dimension()\n", "MAX_SEQ_LENGTH_IN_TOKENS = encoder.get_max_seq_length() \n", "# # Assume tokens are 3 characters long.\n", "# MAX_SEQ_LENGTH = MAX_SEQ_LENGTH_IN_TOKENS * 3\n", @@ -211,7 +211,7 @@ "\n", "# Inspect model parameters.\n", "print(f\"model_name: {model_name}\")\n", - "print(f\"EMBEDDING_LENGTH: {EMBEDDING_LENGTH}\")\n", + "print(f\"EMBEDDING_DIM: {EMBEDDING_DIM}\")\n", "print(f\"MAX_SEQ_LENGTH: {MAX_SEQ_LENGTH}\")" ] }, @@ -223,9 +223,9 @@ "\n", "You can think of a collection in Milvus like a \"table\" in SQL databases. The **collection** will contain the \n", "- **Schema** (or [no-schema Milvus client](https://milvus.io/docs/using_milvusclient.md)). \n", - "πŸ’‘ You'll need the vector `EMBEDDING_LENGTH` parameter from your embedding model.\n", + "πŸ’‘ You'll need the vector `EMBEDDING_DIM` parameter from your embedding model.\n", "Typical values are:\n", - " - 768 for sbert embedding models\n", + " - 1024 for sbert embedding models\n", " - 1536 for ada-002 OpenAI embedding models\n", "- **Vector index** for efficient vector search\n", "- **Vector distance metric** for measuring nearest neighbor vectors\n", @@ -263,8 +263,9 @@ "name": "stdout", "output_type": "stream", "text": [ + "Successfully dropped collection: `MilvusDocs`\n", "Successfully created collection: `MilvusDocs`\n", - "{'collection_name': 'MilvusDocs', 'auto_id': True, 'num_shards': 1, 'description': '', 'fields': [{'field_id': 100, 'name': 'id', 'description': '', 'type': 5, 'params': {}, 'element_type': 0, 'auto_id': True, 'is_primary': True}, {'field_id': 101, 'name': 'vector', 'description': '', 'type': 101, 'params': {'dim': 1024}, 'element_type': 0}], 'aliases': [], 'collection_id': 446268198622114319, 'consistency_level': 3, 'properties': {}, 'num_partitions': 1, 'enable_dynamic_field': True}\n" + "{'collection_name': 'MilvusDocs', 'auto_id': True, 'num_shards': 1, 'description': '', 'fields': [{'field_id': 100, 'name': 'id', 'description': '', 'type': 5, 'params': {}, 'element_type': 0, 'auto_id': True, 'is_primary': True}, {'field_id': 101, 'name': 'vector', 'description': '', 'type': 101, 'params': {'dim': 1024}, 'element_type': 0}], 'aliases': [], 'collection_id': 446268198625446578, 'consistency_level': 3, 'properties': {}, 'num_partitions': 1, 'enable_dynamic_field': True}\n" ] } ], @@ -308,7 +309,7 @@ "\n", "# Create the collection.\n", "mc.create_collection(COLLECTION_NAME, \n", - " EMBEDDING_LENGTH,\n", + " EMBEDDING_DIM,\n", " consistency_level=\"Eventually\", \n", " auto_id=True, \n", " overwrite=True,\n", @@ -380,7 +381,7 @@ "output_type": "stream", "text": [ "chunk_size: 511, chunk_overlap: 51.0\n", - "chunking time: 0.019194841384887695\n", + "chunking time: 0.013692855834960938\n", "docs: 8, split into: 8\n", "split into chunks: 156, type: list of \n", "\n", @@ -525,14 +526,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:01<00:00, 1.56s/it]\n" + "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:02<00:00, 2.74s/it]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Milvus Client insert time for 156 vectors: 1.5612208843231201 seconds\n" + "Milvus Client insert time for 156 vectors: 2.7401609420776367 seconds\n" ] } ], @@ -545,7 +546,8 @@ "\n", " # Generate embeddings using encoder from HuggingFace.\n", " embeddings = torch.tensor(encoder.encode([chunk.page_content]))\n", - " embeddings = F.normalize(embeddings, p=2, dim=1)\n", + " # embeddings = F.normalize(embeddings, p=2, dim=1) #use torch\n", + " embeddings = np.array(embeddings / np.linalg.norm(embeddings)) #use numpy\n", " converted_values = list(map(np.float32, embeddings))[0]\n", " \n", " # Only use h1, h2. Truncate the metadata in case too long.\n", @@ -614,14 +616,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "query length: 74\n" + "query length: 75\n" ] } ], "source": [ "# Define a sample question about your data.\n", "QUESTION1 = \"What do the parameters for HNSW mean?\"\n", - "QUESTION2 = \"What are good default values for HNSW parameters with 25K vectors dim 768?\"\n", + "QUESTION2 = \"What are good default values for HNSW parameters with 25K vectors dim 1024?\"\n", "QUESTION3 = \"What is the default AUTOINDEX distance metric in Milvus Client?\"\n", "QUERY = [QUESTION1, QUESTION2, QUESTION3]\n", "\n", @@ -666,7 +668,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Milvus Client search time for 156 vectors: 0.3346102237701416 seconds\n", + "Milvus Client search time for 156 vectors: 0.056038856506347656 seconds\n", "type: , count: 3\n" ] } @@ -698,7 +700,7 @@ " search_params=SEARCH_PARAMS,\n", " output_fields=OUTPUT_FIELDS, \n", " # Milvus can utilize metadata in boolean expressions to filter search.\n", - " # expr=\"\",\n", + " # filter=\"\",\n", " limit=TOP_K,\n", " consistency_level=\"Eventually\"\n", " )\n", @@ -827,8 +829,7 @@ "\n", "# Print the generated answer\n", "print(\"Generated Answer:\", answer)\n", - "\n", - "# No answer." + "# Generated Answer: i don ' t know" ] }, { @@ -894,7 +895,7 @@ "\n", "Now let's make a call to the paid OpenAI GPT.\n", "\n", - "πŸ’‘ Note: We’re using a temperature of 0.0 to enable reproducible experiments. For use cases that need to always be factually grounded, use very low temperature values while more creative tasks can benefit from higher temperatures." + "πŸ’‘ Note: For use cases that need to always be factually grounded, use very low temperature values while more creative tasks can benefit from higher temperatures." ] }, { @@ -905,7 +906,6 @@ "source": [ "import openai, pprint\n", "from openai import OpenAI\n", - "# from dotenv import load_dotenv, find_dotenv\n", "\n", "# Define the generation llm model to use.\n", "LLM_NAME = \"gpt-3.5-turbo-1106\"\n", @@ -940,21 +940,25 @@ "output_type": "stream", "text": [ "Question: What do the parameters for HNSW mean?\n", - "('Answer: The parameters for HNSW have the following meanings:\\n'\n", - " '\\n'\n", - " '1. M: Maximum degree of the node. This parameter limits the maximum degree '\n", - " 'of nodes on each layer of the graph. It is an integer value between 4 and '\n", - " '64.\\n'\n", + "('Answer: The parameters for HNSW are used to configure the index and search '\n", + " 'settings for the HNSW algorithm. \\n'\n", " '\\n'\n", - " '2. efConstruction: This parameter takes effect during the stage of index '\n", - " 'construction. It specifies the search range and is an integer value between '\n", - " '8 and 512.\\n'\n", + " '- M: Maximum degree of the node, which limits the maximum number of '\n", + " 'connections each node can have in the graph. It is set to a value between 4 '\n", + " 'and 64 in the provided example.\\n'\n", + " '- efConstruction: This parameter takes effect during the index construction '\n", + " 'stage. It specifies the size of the dynamic list for the nearest neighbors '\n", + " 'during the construction process. It is set to a value between 8 and 512 in '\n", + " 'the provided example.\\n'\n", + " '- ef: This parameter takes effect during the search scope. It specifies the '\n", + " 'size of the dynamic list for the nearest neighbors when searching for '\n", + " 'targets.\\n'\n", " '\\n'\n", - " '3. ef: This parameter takes effect during the stage of searching targets. It '\n", - " 'also specifies the search range and is an integer value.\\n'\n", + " 'These parameters allow you to fine-tune the performance of the HNSW '\n", + " 'algorithm based on the specific requirements of your application.\\n'\n", " '\\n'\n", - " 'These parameters are used to configure the behavior of the HNSW index when '\n", - " 'building and searching for targets.')\n", + " 'Source: [Milvus '\n", + " 'documentation](https://pymilvus.readthedocs.io/en/latest/param.html)')\n", "\n", "\n" ] @@ -964,7 +968,7 @@ "# CAREFUL!! THIS COSTS MONEY!!\n", "\n", "PROMPT = f\"\"\"Use the Context below to answer the user's question. Be clear, factual, complete, concise.\n", - "If the answer is not in the Context, say \"I don't know\".\n", + "If the answer is not in the Context, say \"I don't know\". Otherwise answer using this format:\n", "Context: {context[0]}\n", "Answer: The answer to the question.\n", "Grounding sources: {context_metadata[0]}\n", @@ -972,10 +976,6 @@ "\n", "# Generate response using the OpenAI API.\n", "response = openai_client.chat.completions.create(\n", - " # response_format={\n", - " # \"type\": \"json_object\", \n", - " # \"schema\": Result.schema_json()\n", - " # },\n", " messages=[\n", " {\"role\": \"system\", \"content\": PROMPT,},\n", " {\"role\": \"user\", \"content\": f\"question: {SAMPLE_QUESTION}, context: {context[0]}\",}\n", @@ -1000,7 +1000,7 @@ "# efConstruction: number of nearest neighbors to consider when connecting nodes in the graph.\n", "# ef: number of nearest neighbors to consider when searching for similar vectors. \n", "\n", - "# Question2: What are good default values for HNSW parameters with 25K vectors dim 768?\n", + "# Question2: What are good default values for HNSW parameters with 25K vectors dim 1024?\n", "# Answer: M=16, efConstruction=500, and ef=64\n", "# Best answer: M=16, efConstruction=32, ef=32\n", "\n", @@ -1032,7 +1032,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -1094,7 +1094,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -1102,15 +1102,19 @@ "output_type": "stream", "text": [ "Question: What do the parameters for HNSW mean?\n", - "('Answer: The parameters for HNSW in Milvus are used to control the graph '\n", - " 'structure and search range. The \"M\" parameter limits the maximum degree of '\n", - " 'nodes on each layer of the graph, with a range of 4 to 64. The '\n", - " '\"efConstruction\" parameter is used during index construction to specify the '\n", - " 'search range, with a range of 8 to 512. When searching targets, the \"ef\" '\n", - " 'parameter is used to specify the search range. These parameters help '\n", - " 'optimize the performance of the index for efficient similarity search. For '\n", - " 'more details, you can refer to the Milvus documentation on index parameters: '\n", - " 'https://pymilvus.readthedocs.io/en/latest/param.html')\n", + "('Answer: The parameters for HNSW in the context refer to the following:\\n'\n", + " '- M: Maximum degree of the node, limiting the maximum degree of nodes on '\n", + " 'each layer of the graph to a specific value between 4 and 64.\\n'\n", + " '- efConstruction: It takes effect during the stage of index construction, '\n", + " 'specifying a search range with a value between 8 and 512.\\n'\n", + " '- ef: It takes effect during the stage of searching targets, specifying a '\n", + " 'search range.\\n'\n", + " '\\n'\n", + " 'These parameters are used when creating and searching within an HNSW index '\n", + " 'to control the graph structure and search behavior.\\n'\n", + " '\\n'\n", + " 'Source: Milvus support to create index to accelerate vector search '\n", + " '(https://pymilvus.readthedocs.io/en/latest/param.html)')\n", "\n", "\n" ] @@ -1118,7 +1122,6 @@ ], "source": [ "# CAREFUL!! THIS COSTS MONEY!!\n", - "\n", "response_pipeline = openai_client.chat.completions.create(\n", " messages=[\n", " {\"role\": \"system\", \"content\": PROMPT,},\n", @@ -1143,7 +1146,7 @@ "# efConstruction: number of nearest neighbors to consider when connecting nodes in the graph.\n", "# ef: number of nearest neighbors to consider when searching for similar vectors. \n", "\n", - "# Question2: What are good default values for HNSW parameters with 25K vectors dim 768?\n", + "# Question2: What are good default values for HNSW parameters with 25K vectors dim 1024?\n", "# Answer: `M=16` and `efConstruction=200`\n", "# Best answer: M=16, efConstruction=32, ef=32\n", "\n", @@ -1154,7 +1157,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 24, "id": "d0e81e68", "metadata": {}, "outputs": [], @@ -1165,7 +1168,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 25, "id": "c777937e", "metadata": {}, "outputs": [