From 9bb8495ef18a24bff1ab9e040d107079cf6f2266 Mon Sep 17 00:00:00 2001 From: Joshua Briggs Date: Tue, 4 Mar 2025 13:11:02 +0000 Subject: [PATCH 1/2] updated hello-pinecone-aws document --- .../semantic-search/hello-pinecone-aws.ipynb | 184 +++++++++--------- 1 file changed, 96 insertions(+), 88 deletions(-) diff --git a/learn/search/semantic-search/hello-pinecone-aws.ipynb b/learn/search/semantic-search/hello-pinecone-aws.ipynb index 0bdfef98..5729bf29 100644 --- a/learn/search/semantic-search/hello-pinecone-aws.ipynb +++ b/learn/search/semantic-search/hello-pinecone-aws.ipynb @@ -8,7 +8,7 @@ "id": "023d771c" }, "source": [ - "[![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/docs/quick-tour/hello-pinecone.ipynb)" + "[![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/search/semantic-search/hello-pinecone-aws.ipynb)" ] }, { @@ -94,8 +94,8 @@ "outputs": [], "source": [ "!pip install -qU \\\n", - " pinecone-client==3.1.0 \\\n", - " pandas==2.0.3" + " pinecone==6.0.1 \\\n", + " pandas==2.2.2" ] }, { @@ -148,33 +148,17 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "EA2EcZsCoWS3", - "metadata": { - "id": "EA2EcZsCoWS3", - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "# Giving our index a name\n", - "index_name = \"hello-pinecone\"" - ] - }, - { - "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "id": "774014f4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[]" + "['hybrid-test', 'index', 'rerankers']" ] }, - "execution_count": 3, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -186,6 +170,22 @@ { "cell_type": "code", "execution_count": 4, + "id": "EA2EcZsCoWS3", + "metadata": { + "id": "EA2EcZsCoWS3", + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Giving our index a name\n", + "index_name = \"hello-pinecone\"" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "id": "synthetic-essex", "metadata": { "execution": { @@ -207,55 +207,30 @@ "outputs": [], "source": [ "# Delete the index, if an index of the same name already exists\n", - "if index_name in pc.list_indexes().names():\n", - " pc.delete_index(index_name)" + "if pc.has_index(name=index_name):\n", + " pc.delete_index(name=index_name)" ] }, { "cell_type": "markdown", - "id": "05707670", - "metadata": {}, - "source": [ - "We define a `spec` object to define the cloud region we'd like to create an index within. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8ebeab38", + "id": "786a88df", "metadata": {}, - "outputs": [], "source": [ - "from pinecone import ServerlessSpec\n", + "### Creating a Pinecone Index\n", "\n", - "cloud = os.environ.get('PINECONE_CLOUD') or 'aws'\n", - "region = os.environ.get('PINECONE_REGION') or 'us-east-1'\n", + "When creating the index we need to define several configuration properties. \n", "\n", - "spec = ServerlessSpec(cloud=cloud, region=region)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "94LRI2H8Ch2B", - "metadata": { - "id": "94LRI2H8Ch2B", - "papermill": { - "duration": 0.021764, - "end_time": "2021-04-16T15:08:50.446400", - "exception": false, - "start_time": "2021-04-16T15:08:50.424636", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Creating a Pinecone Index." + "- `name` can be anything we like. The name is used as an identifier for the index when performing other operations such as `describe_index`, `delete_index`, and so on. \n", + "- `metric` specifies the similarity metric that will be used later when you make queries to the index.\n", + "- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model. In this quick start, we are using made-up data so a small value is simplest.\n", + "- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects).\n", + "\n", + "There are more configurations available, but this minimal set will get us started." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "id": "4YwC8livCrn2", "metadata": { "execution": { @@ -274,25 +249,53 @@ }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{\n", + " \"name\": \"hello-pinecone\",\n", + " \"metric\": \"cosine\",\n", + " \"host\": \"hello-pinecone-96ix5ds.svc.aped-4627-b74a.pinecone.io\",\n", + " \"spec\": {\n", + " \"serverless\": {\n", + " \"cloud\": \"aws\",\n", + " \"region\": \"us-east-1\"\n", + " }\n", + " },\n", + " \"status\": {\n", + " \"ready\": true,\n", + " \"state\": \"Ready\"\n", + " },\n", + " \"vector_type\": \"dense\",\n", + " \"dimension\": 3,\n", + " \"deletion_protection\": \"disabled\",\n", + " \"tags\": null\n", + "}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "import time\n", + "from pinecone import ServerlessSpec, CloudProvider, AwsRegion, Metric\n", "\n", - "dimensions = 3\n", "pc.create_index(\n", " name=index_name,\n", - " dimension=dimensions,\n", - " metric=\"cosine\"\n", - ")\n", - "\n", - "# wait for index to be ready before connecting\n", - "while not pc.describe_index(index_name).status['ready']:\n", - " time.sleep(1)" + " metric=Metric.COSINE,\n", + " dimension=3,\n", + " spec=ServerlessSpec(\n", + " cloud=CloudProvider.AWS, \n", + " region=AwsRegion.US_EAST_1\n", + " )\n", + ")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "id": "toy-VhU4LO_O", "metadata": { "execution": { @@ -313,7 +316,7 @@ }, "outputs": [], "source": [ - "index = pc.Index(index_name=index_name)" + "index = pc.Index(name=index_name)" ] }, { @@ -337,7 +340,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 11, "id": "indirect-lafayette", "metadata": { "colab": { @@ -408,7 +411,7 @@ "1 B [1.0, 2.0, 3.0]" ] }, - "execution_count": 7, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -445,7 +448,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 12, "id": "efficient-parliament", "metadata": { "colab": { @@ -475,18 +478,21 @@ "{'upserted_count': 2}" ] }, - "execution_count": 8, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "index.upsert(vectors=zip(df.id, df.vector)) # insert vectors" + "# insert vectors\n", + "index.upsert(\n", + " vectors=zip(df.id, df.vector)\n", + " )" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 13, "id": "enclosed-performer", "metadata": { "colab": { @@ -515,11 +521,13 @@ "text/plain": [ "{'dimension': 3,\n", " 'index_fullness': 0.0,\n", - " 'namespaces': {'': {'vector_count': 2}},\n", - " 'total_vector_count': 2}" + " 'metric': 'cosine',\n", + " 'namespaces': {},\n", + " 'total_vector_count': 0,\n", + " 'vector_type': 'dense'}" ] }, - "execution_count": 9, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -530,7 +538,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 14, "id": "leading-shape", "metadata": { "colab": { @@ -557,21 +565,21 @@ { "data": { "text/plain": [ - "{'matches': [{'id': 'A', 'score': 1.0, 'values': [1.0, 1.0, 1.0]}],\n", - " 'namespace': ''}" + "{'matches': [], 'namespace': '', 'usage': {'read_units': 1}}" ] }, - "execution_count": 11, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# returns top_k matches\n", "index.query(\n", " vector=[2., 2., 2.],\n", " top_k=1,\n", " include_values=True\n", - ") # returns top_k matches" + ")" ] }, { @@ -596,7 +604,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 16, "id": "indian-broadcast", "metadata": { "execution": { @@ -617,7 +625,7 @@ }, "outputs": [], "source": [ - "pc.delete_index(index_name)" + "pc.delete_index(name=index_name)" ] }, { @@ -649,7 +657,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.9.6" }, "papermill": { "default_parameters": {}, From eb2600b707443e9fa82a9992b02ff9831b225d58 Mon Sep 17 00:00:00 2001 From: James Briggs <35938317+jamescalam@users.noreply.github.com> Date: Wed, 5 Mar 2025 00:04:14 +0400 Subject: [PATCH 2/2] feat: adding getpass --- .../semantic-search/hello-pinecone-aws.ipynb | 429 +++++++++++++++--- 1 file changed, 369 insertions(+), 60 deletions(-) diff --git a/learn/search/semantic-search/hello-pinecone-aws.ipynb b/learn/search/semantic-search/hello-pinecone-aws.ipynb index 5729bf29..de68c939 100644 --- a/learn/search/semantic-search/hello-pinecone-aws.ipynb +++ b/learn/search/semantic-search/hello-pinecone-aws.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "id": "023d771c", "metadata": { @@ -12,7 +11,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "conceptual-belfast", "metadata": { @@ -33,7 +31,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "first-affairs", "metadata": { @@ -52,7 +49,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "banned-huntington", "metadata": { @@ -89,9 +85,22 @@ "start_time": "2021-04-16T15:08:30.762518", "status": "completed" }, - "tags": [] + "tags": [], + "outputId": "8dc9dc6d-5a02-4699-df31-fe28da170865", + "colab": { + "base_uri": "https://localhost:8080/" + } }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/421.4 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m \u001b[32m419.8/421.4 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m421.4/421.4 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ], "source": [ "!pip install -qU \\\n", " pinecone==6.0.1 \\\n", @@ -101,7 +110,9 @@ { "cell_type": "markdown", "id": "191793df", - "metadata": {}, + "metadata": { + "id": "191793df" + }, "source": [ "## Initializing the Index\n", "\n", @@ -110,23 +121,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "3f0a5ee4", - "metadata": {}, - "outputs": [], + "metadata": { + "id": "3f0a5ee4", + "outputId": "f26b6f73-7da4-43c7-eeb0-07c5fdfe5cff", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Enter your Pinecone API key··········\n" + ] + } + ], "source": [ "import os\n", "from pinecone import Pinecone\n", + "from getpass import getpass\n", "\n", "# initialize connection to pinecone (get API key at app.pinecone.io)\n", - "api_key = os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY'\n", + "api_key = os.environ.get('PINECONE_API_KEY') or \\\n", + " getpass(\"Enter your Pinecone API key\")\n", "\n", "# configure client\n", "pc = Pinecone(api_key=api_key)" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "forbidden-indication", "metadata": { @@ -148,19 +174,25 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "id": "774014f4", - "metadata": {}, + "metadata": { + "id": "774014f4", + "outputId": "f5bfef00-6fdc-4754-9fc3-ce00b8d4cb88", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ - "['hybrid-test', 'index', 'rerankers']" + "[]" ] }, - "execution_count": 6, "metadata": {}, - "output_type": "execute_result" + "execution_count": 3 } ], "source": [ @@ -214,13 +246,15 @@ { "cell_type": "markdown", "id": "786a88df", - "metadata": {}, + "metadata": { + "id": "786a88df" + }, "source": [ "### Creating a Pinecone Index\n", "\n", - "When creating the index we need to define several configuration properties. \n", + "When creating the index we need to define several configuration properties.\n", "\n", - "- `name` can be anything we like. The name is used as an identifier for the index when performing other operations such as `describe_index`, `delete_index`, and so on. \n", + "- `name` can be anything we like. The name is used as an identifier for the index when performing other operations such as `describe_index`, `delete_index`, and so on.\n", "- `metric` specifies the similarity metric that will be used later when you make queries to the index.\n", "- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model. In this quick start, we are using made-up data so a small value is simplest.\n", "- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects).\n", @@ -230,7 +264,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "4YwC8livCrn2", "metadata": { "execution": { @@ -247,16 +281,21 @@ "start_time": "2021-04-16T15:08:50.467779", "status": "completed" }, - "tags": [] + "tags": [], + "outputId": "5de1d905-2c7a-425e-8c30-60fc84c2e5b7", + "colab": { + "base_uri": "https://localhost:8080/" + } }, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "{\n", " \"name\": \"hello-pinecone\",\n", " \"metric\": \"cosine\",\n", - " \"host\": \"hello-pinecone-96ix5ds.svc.aped-4627-b74a.pinecone.io\",\n", + " \"host\": \"hello-pinecone-rjeaip3.svc.aped-4627-b74a.pinecone.io\",\n", " \"spec\": {\n", " \"serverless\": {\n", " \"cloud\": \"aws\",\n", @@ -274,9 +313,8 @@ "}" ] }, - "execution_count": 7, "metadata": {}, - "output_type": "execute_result" + "execution_count": 6 } ], "source": [ @@ -287,7 +325,7 @@ " metric=Metric.COSINE,\n", " dimension=3,\n", " spec=ServerlessSpec(\n", - " cloud=CloudProvider.AWS, \n", + " cloud=CloudProvider.AWS,\n", " region=AwsRegion.US_EAST_1\n", " )\n", ")" @@ -295,7 +333,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "id": "toy-VhU4LO_O", "metadata": { "execution": { @@ -320,7 +358,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "j1F8SLx6C2HH", "metadata": { @@ -340,12 +377,12 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 8, "id": "indirect-lafayette", "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 112 + "height": 125 }, "execution": { "iopub.execute_input": "2021-04-16T15:09:05.206498Z", @@ -354,7 +391,7 @@ "shell.execute_reply": "2021-04-16T15:09:05.403743Z" }, "id": "indirect-lafayette", - "outputId": "5bd49b0e-0187-4de2-e564-1d41c61b7bc9", + "outputId": "45d4e1c9-0364-4440-9d2d-23db95b74339", "papermill": { "duration": 0.227373, "end_time": "2021-04-16T15:09:05.404700", @@ -366,9 +403,17 @@ }, "outputs": [ { + "output_type": "execute_result", "data": { + "text/plain": [ + " id vector\n", + "0 A [1.0, 1.0, 1.0]\n", + "1 B [1.0, 2.0, 3.0]" + ], "text/html": [ - "
\n", + "\n", + "
\n", + "
\n", "\n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" ], - "text/plain": [ - " id vector\n", - "0 A [1.0, 1.0, 1.0]\n", - "1 B [1.0, 2.0, 3.0]" - ] + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df", + "summary": "{\n \"name\": \"df\",\n \"rows\": 2,\n \"fields\": [\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"B\",\n \"A\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vector\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } }, - "execution_count": 11, "metadata": {}, - "output_type": "execute_result" + "execution_count": 8 } ], "source": [ @@ -428,7 +736,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "oiJKXWxoDjhK", "metadata": { @@ -448,7 +755,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 9, "id": "efficient-parliament", "metadata": { "colab": { @@ -461,7 +768,7 @@ "shell.execute_reply": "2021-04-16T15:09:06.179012Z" }, "id": "efficient-parliament", - "outputId": "0d9fbac4-4f8a-421e-95a9-0f441d2dcc16", + "outputId": "d85df98f-9f86-40de-e545-d62d6ad1c13c", "papermill": { "duration": 0.704503, "end_time": "2021-04-16T15:09:06.180549", @@ -473,21 +780,21 @@ }, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "{'upserted_count': 2}" ] }, - "execution_count": 12, "metadata": {}, - "output_type": "execute_result" + "execution_count": 9 } ], "source": [ "# insert vectors\n", "index.upsert(\n", " vectors=zip(df.id, df.vector)\n", - " )" + ")" ] }, { @@ -505,7 +812,7 @@ "shell.execute_reply": "2021-04-16T15:09:06.351713Z" }, "id": "enclosed-performer", - "outputId": "5b67ec13-6863-4b1a-ac45-b57c569923ee", + "outputId": "8ede74dc-8ad2-4954-910b-eba701b7090a", "papermill": { "duration": 0.140473, "end_time": "2021-04-16T15:09:06.352169", @@ -517,19 +824,19 @@ }, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "{'dimension': 3,\n", " 'index_fullness': 0.0,\n", " 'metric': 'cosine',\n", - " 'namespaces': {},\n", - " 'total_vector_count': 0,\n", + " 'namespaces': {'': {'vector_count': 2}},\n", + " 'total_vector_count': 2,\n", " 'vector_type': 'dense'}" ] }, - "execution_count": 13, "metadata": {}, - "output_type": "execute_result" + "execution_count": 13 } ], "source": [ @@ -551,7 +858,7 @@ "shell.execute_reply": "2021-04-16T15:09:08.563202Z" }, "id": "leading-shape", - "outputId": "fb512e95-ebf4-4e1d-b9c7-74afc3cdd0c2", + "outputId": "f3440980-cb64-48bf-e5fd-7b15bf0171da", "papermill": { "duration": 2.177493, "end_time": "2021-04-16T15:09:08.564594", @@ -563,14 +870,16 @@ }, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ - "{'matches': [], 'namespace': '', 'usage': {'read_units': 1}}" + "{'matches': [{'id': 'A', 'score': 1.0, 'values': [1.0, 1.0, 1.0]}],\n", + " 'namespace': '',\n", + " 'usage': {'read_units': 6}}" ] }, - "execution_count": 14, "metadata": {}, - "output_type": "execute_result" + "execution_count": 14 } ], "source": [ @@ -583,7 +892,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "z5jcU5_SLMFC", "metadata": { @@ -604,7 +912,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "id": "indian-broadcast", "metadata": { "execution": { @@ -629,10 +937,11 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "e84d82ee", - "metadata": {}, + "metadata": { + "id": "e84d82ee" + }, "source": [ "---" ] @@ -674,4 +983,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file