Skip to content

Commit 161d59d

Browse files
authoredFeb 20, 2025··
Add a report of Pinecone client library usage (#408)
## Problem We need to a way to identify which notebooks are out of date. ## Solution - Add a workflow script that prints a report of pinecone version usage on every push to CI. - It also assess plugin usage ## Type of Change - [x] Infrastructure change (CI configs, etc)
1 parent 77f085d commit 161d59d

File tree

4 files changed

+420
-0
lines changed

4 files changed

+420
-0
lines changed
 

‎.github/scripts/version-census.py

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#! /usr/bin/env python
2+
3+
# This script will inspect every notebook in the repository and gather
4+
# information about the notebooks
5+
6+
import os
7+
import json
8+
import nbformat
9+
import re
10+
11+
def get_plugin_used(source):
12+
regex = rf"\s\"?\'?(pinecone-plugin-[a-zA-Z0-9-_]+)\"?\'?"
13+
match = re.search(regex, source)
14+
if match is None:
15+
return None
16+
return match.group(1)
17+
18+
def get_version(source, client):
19+
# escape square brackets in client name
20+
client = re.escape(client)
21+
regex = rf"\s\"?\'?{client}\"?\'?==\"?\'?([0-9]+\.[0-9]+\.[0-9]+)\"?\'?"
22+
match = re.search(regex, source)
23+
if match is None:
24+
return None
25+
return match.group(1)
26+
27+
def has_client(source, client):
28+
return f"{client}" in source
29+
30+
def main():
31+
# Track distribution of pinecone versions being used
32+
pinecone_versions = {}
33+
plugins_used = {}
34+
malformed_notebooks = []
35+
36+
for root, _, files in os.walk("."):
37+
for file in files:
38+
if file.endswith(".ipynb"):
39+
notebook_path = os.path.join(root, file)
40+
with open(notebook_path, "r", encoding="utf-8") as f:
41+
nb = nbformat.read(f, as_version=4)
42+
for cell in nb.cells:
43+
if cell.cell_type == "code":
44+
if "pip" not in cell.source:
45+
continue
46+
47+
plugin = get_plugin_used(cell.source)
48+
if plugin is not None:
49+
if plugin in plugins_used:
50+
plugins_used[plugin].append(notebook_path)
51+
else:
52+
plugins_used[plugin] = [notebook_path]
53+
continue
54+
55+
clients = [
56+
"pinecone-client[grpc]",
57+
"pinecone[grpc]",
58+
"pinecone-client",
59+
"langchain-pinecone",
60+
"pinecone",
61+
]
62+
63+
for client in clients:
64+
found_client = None
65+
if has_client(cell.source, client):
66+
found_client = client
67+
break
68+
if not found_client:
69+
continue
70+
71+
if f"{client}==" in cell.source:
72+
version = get_version(cell.source, client)
73+
if version is None:
74+
print('===============================================')
75+
print(f"Could not find {client} version in {notebook_path}")
76+
print(cell.source)
77+
print('===============================================')
78+
malformed_notebooks.append(notebook_path)
79+
continue
80+
else:
81+
version = "unversioned"
82+
83+
combined_version = f"{client}=={version}"
84+
85+
if combined_version in pinecone_versions:
86+
pinecone_versions[combined_version].append(notebook_path)
87+
else:
88+
pinecone_versions[combined_version] = [notebook_path]
89+
90+
client_types = [
91+
"pinecone",
92+
"pinecone[grpc]",
93+
"pinecone-client",
94+
"pinecone-client[grpc]",
95+
"langchain-pinecone",
96+
]
97+
for client_type in client_types:
98+
print()
99+
print(f"Notebooks using {client_type}:")
100+
for version, notebooks in sorted(pinecone_versions.items()):
101+
if client_type in version:
102+
print(f" {version}: {len(notebooks)} notebooks")
103+
for notebook in notebooks:
104+
print(" - ", notebook)
105+
print()
106+
107+
print()
108+
print("Notebooks with malformed pinecone version specifiers:")
109+
for notebook in malformed_notebooks:
110+
print(" - ", notebook)
111+
112+
print()
113+
print("Notebooks using plugins:")
114+
for plugin, notebooks in sorted(plugins_used.items()):
115+
print(f" {plugin}: {len(notebooks)} notebooks")
116+
for notebook in notebooks:
117+
print(" - ", notebook)
118+
print()
119+
120+
121+
if __name__ == "__main__":
122+
main()
123+
+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: "Report: Client Version Usage"
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
9+
jobs:
10+
analyze-client-versions:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- uses: actions/checkout@v4
14+
15+
- name: Set up Python
16+
uses: actions/setup-python@v5
17+
with:
18+
python-version: '3.11'
19+
20+
- name: Install uv
21+
uses: astral-sh/setup-uv@v5
22+
23+
- name: Install dependencies
24+
run: |
25+
uv sync
26+
27+
- name: Run version census
28+
run: |
29+
uv run .github/scripts/version-census.py

‎pyproject.toml

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
[project]
2+
name = "examples"
3+
version = "0.1.0"
4+
description = "Add your description here"
5+
readme = "README.md"
6+
requires-python = ">=3.9"
7+
dependencies = [
8+
"nbformat>=5.10.4",
9+
]

‎uv.lock

+259
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)
Please sign in to comment.