diff --git a/learn/search/semantic-search/hello-pinecone-aws.ipynb b/learn/search/semantic-search/hello-pinecone-aws.ipynb index 0bdfef98..de68c939 100644 --- a/learn/search/semantic-search/hello-pinecone-aws.ipynb +++ b/learn/search/semantic-search/hello-pinecone-aws.ipynb @@ -1,18 +1,16 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "id": "023d771c", "metadata": { "id": "023d771c" }, "source": [ - "[](https://nbviewer.org/github/pinecone-io/examples/blob/master/docs/quick-tour/hello-pinecone.ipynb)" + "[](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/search/semantic-search/hello-pinecone-aws.ipynb)" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "conceptual-belfast", "metadata": { @@ -33,7 +31,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "first-affairs", "metadata": { @@ -52,7 +49,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "banned-huntington", "metadata": { @@ -89,19 +85,34 @@ "start_time": "2021-04-16T15:08:30.762518", "status": "completed" }, - "tags": [] + "tags": [], + "outputId": "8dc9dc6d-5a02-4699-df31-fe28da170865", + "colab": { + "base_uri": "https://localhost:8080/" + } }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/421.4 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m \u001b[32m419.8/421.4 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m421.4/421.4 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ], "source": [ "!pip install -qU \\\n", - " pinecone-client==3.1.0 \\\n", - " pandas==2.0.3" + " pinecone==6.0.1 \\\n", + " pandas==2.2.2" ] }, { "cell_type": "markdown", "id": "191793df", - "metadata": {}, + "metadata": { + "id": "191793df" + }, "source": [ "## Initializing the Index\n", "\n", @@ -110,23 +121,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "3f0a5ee4", - "metadata": {}, - "outputs": [], + "metadata": { + "id": "3f0a5ee4", + "outputId": "f26b6f73-7da4-43c7-eeb0-07c5fdfe5cff", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Enter your Pinecone API key··········\n" + ] + } + ], "source": [ "import os\n", "from pinecone import Pinecone\n", + "from getpass import getpass\n", "\n", "# initialize connection to pinecone (get API key at app.pinecone.io)\n", - "api_key = os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY'\n", + "api_key = os.environ.get('PINECONE_API_KEY') or \\\n", + " getpass(\"Enter your Pinecone API key\")\n", "\n", "# configure client\n", "pc = Pinecone(api_key=api_key)" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "forbidden-indication", "metadata": { @@ -146,37 +172,27 @@ "With Pinecone you can create a vector index where you can store and search through your vectors." ] }, - { - "cell_type": "code", - "execution_count": 2, - "id": "EA2EcZsCoWS3", - "metadata": { - "id": "EA2EcZsCoWS3", - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "# Giving our index a name\n", - "index_name = \"hello-pinecone\"" - ] - }, { "cell_type": "code", "execution_count": 3, "id": "774014f4", - "metadata": {}, + "metadata": { + "id": "774014f4", + "outputId": "f5bfef00-6fdc-4754-9fc3-ce00b8d4cb88", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "[]" ] }, - "execution_count": 3, "metadata": {}, - "output_type": "execute_result" + "execution_count": 3 } ], "source": [ @@ -186,6 +202,22 @@ { "cell_type": "code", "execution_count": 4, + "id": "EA2EcZsCoWS3", + "metadata": { + "id": "EA2EcZsCoWS3", + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Giving our index a name\n", + "index_name = \"hello-pinecone\"" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "id": "synthetic-essex", "metadata": { "execution": { @@ -207,55 +239,32 @@ "outputs": [], "source": [ "# Delete the index, if an index of the same name already exists\n", - "if index_name in pc.list_indexes().names():\n", - " pc.delete_index(index_name)" - ] - }, - { - "cell_type": "markdown", - "id": "05707670", - "metadata": {}, - "source": [ - "We define a `spec` object to define the cloud region we'd like to create an index within. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8ebeab38", - "metadata": {}, - "outputs": [], - "source": [ - "from pinecone import ServerlessSpec\n", - "\n", - "cloud = os.environ.get('PINECONE_CLOUD') or 'aws'\n", - "region = os.environ.get('PINECONE_REGION') or 'us-east-1'\n", - "\n", - "spec = ServerlessSpec(cloud=cloud, region=region)" + "if pc.has_index(name=index_name):\n", + " pc.delete_index(name=index_name)" ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "94LRI2H8Ch2B", + "id": "786a88df", "metadata": { - "id": "94LRI2H8Ch2B", - "papermill": { - "duration": 0.021764, - "end_time": "2021-04-16T15:08:50.446400", - "exception": false, - "start_time": "2021-04-16T15:08:50.424636", - "status": "completed" - }, - "tags": [] + "id": "786a88df" }, "source": [ - "Creating a Pinecone Index." + "### Creating a Pinecone Index\n", + "\n", + "When creating the index we need to define several configuration properties.\n", + "\n", + "- `name` can be anything we like. The name is used as an identifier for the index when performing other operations such as `describe_index`, `delete_index`, and so on.\n", + "- `metric` specifies the similarity metric that will be used later when you make queries to the index.\n", + "- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model. In this quick start, we are using made-up data so a small value is simplest.\n", + "- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects).\n", + "\n", + "There are more configurations available, but this minimal set will get us started." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "4YwC8livCrn2", "metadata": { "execution": { @@ -272,27 +281,59 @@ "start_time": "2021-04-16T15:08:50.467779", "status": "completed" }, - "tags": [] + "tags": [], + "outputId": "5de1d905-2c7a-425e-8c30-60fc84c2e5b7", + "colab": { + "base_uri": "https://localhost:8080/" + } }, - "outputs": [], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{\n", + " \"name\": \"hello-pinecone\",\n", + " \"metric\": \"cosine\",\n", + " \"host\": \"hello-pinecone-rjeaip3.svc.aped-4627-b74a.pinecone.io\",\n", + " \"spec\": {\n", + " \"serverless\": {\n", + " \"cloud\": \"aws\",\n", + " \"region\": \"us-east-1\"\n", + " }\n", + " },\n", + " \"status\": {\n", + " \"ready\": true,\n", + " \"state\": \"Ready\"\n", + " },\n", + " \"vector_type\": \"dense\",\n", + " \"dimension\": 3,\n", + " \"deletion_protection\": \"disabled\",\n", + " \"tags\": null\n", + "}" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], "source": [ - "import time\n", + "from pinecone import ServerlessSpec, CloudProvider, AwsRegion, Metric\n", "\n", - "dimensions = 3\n", "pc.create_index(\n", " name=index_name,\n", - " dimension=dimensions,\n", - " metric=\"cosine\"\n", - ")\n", - "\n", - "# wait for index to be ready before connecting\n", - "while not pc.describe_index(index_name).status['ready']:\n", - " time.sleep(1)" + " metric=Metric.COSINE,\n", + " dimension=3,\n", + " spec=ServerlessSpec(\n", + " cloud=CloudProvider.AWS,\n", + " region=AwsRegion.US_EAST_1\n", + " )\n", + ")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "toy-VhU4LO_O", "metadata": { "execution": { @@ -313,11 +354,10 @@ }, "outputs": [], "source": [ - "index = pc.Index(index_name=index_name)" + "index = pc.Index(name=index_name)" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "j1F8SLx6C2HH", "metadata": { @@ -337,12 +377,12 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "indirect-lafayette", "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 112 + "height": 125 }, "execution": { "iopub.execute_input": "2021-04-16T15:09:05.206498Z", @@ -351,7 +391,7 @@ "shell.execute_reply": "2021-04-16T15:09:05.403743Z" }, "id": "indirect-lafayette", - "outputId": "5bd49b0e-0187-4de2-e564-1d41c61b7bc9", + "outputId": "45d4e1c9-0364-4440-9d2d-23db95b74339", "papermill": { "duration": 0.227373, "end_time": "2021-04-16T15:09:05.404700", @@ -363,9 +403,17 @@ }, "outputs": [ { + "output_type": "execute_result", "data": { + "text/plain": [ + " id vector\n", + "0 A [1.0, 1.0, 1.0]\n", + "1 B [1.0, 2.0, 3.0]" + ], "text/html": [ - "