|
| 1 | +#! /usr/bin/env python |
| 2 | + |
| 3 | +# This script will inspect every notebook in the repository and gather |
| 4 | +# information about the notebooks |
| 5 | + |
| 6 | +import os |
| 7 | +import json |
| 8 | +import nbformat |
| 9 | +import re |
| 10 | + |
| 11 | +def get_plugin_used(source): |
| 12 | + regex = rf"\s\"?\'?(pinecone-plugin-[a-zA-Z0-9-_]+)\"?\'?" |
| 13 | + match = re.search(regex, source) |
| 14 | + if match is None: |
| 15 | + return None |
| 16 | + return match.group(1) |
| 17 | + |
| 18 | +def get_version(source, client): |
| 19 | + # escape square brackets in client name |
| 20 | + client = re.escape(client) |
| 21 | + regex = rf"\s\"?\'?{client}\"?\'?==\"?\'?([0-9]+\.[0-9]+\.[0-9]+)\"?\'?" |
| 22 | + match = re.search(regex, source) |
| 23 | + if match is None: |
| 24 | + return None |
| 25 | + return match.group(1) |
| 26 | + |
| 27 | +def has_client(source, client): |
| 28 | + return f"{client}" in source |
| 29 | + |
| 30 | +def main(): |
| 31 | + # Track distribution of pinecone versions being used |
| 32 | + pinecone_versions = {} |
| 33 | + plugins_used = {} |
| 34 | + malformed_notebooks = [] |
| 35 | + |
| 36 | + for root, _, files in os.walk("."): |
| 37 | + for file in files: |
| 38 | + if file.endswith(".ipynb"): |
| 39 | + notebook_path = os.path.join(root, file) |
| 40 | + with open(notebook_path, "r", encoding="utf-8") as f: |
| 41 | + nb = nbformat.read(f, as_version=4) |
| 42 | + for cell in nb.cells: |
| 43 | + if cell.cell_type == "code": |
| 44 | + if "pip" not in cell.source: |
| 45 | + continue |
| 46 | + |
| 47 | + plugin = get_plugin_used(cell.source) |
| 48 | + if plugin is not None: |
| 49 | + if plugin in plugins_used: |
| 50 | + plugins_used[plugin].append(notebook_path) |
| 51 | + else: |
| 52 | + plugins_used[plugin] = [notebook_path] |
| 53 | + continue |
| 54 | + |
| 55 | + clients = [ |
| 56 | + "pinecone-client[grpc]", |
| 57 | + "pinecone[grpc]", |
| 58 | + "pinecone-client", |
| 59 | + "langchain-pinecone", |
| 60 | + "pinecone", |
| 61 | + ] |
| 62 | + |
| 63 | + for client in clients: |
| 64 | + found_client = None |
| 65 | + if has_client(cell.source, client): |
| 66 | + found_client = client |
| 67 | + break |
| 68 | + if not found_client: |
| 69 | + continue |
| 70 | + |
| 71 | + if f"{client}==" in cell.source: |
| 72 | + version = get_version(cell.source, client) |
| 73 | + if version is None: |
| 74 | + print('===============================================') |
| 75 | + print(f"Could not find {client} version in {notebook_path}") |
| 76 | + print(cell.source) |
| 77 | + print('===============================================') |
| 78 | + malformed_notebooks.append(notebook_path) |
| 79 | + continue |
| 80 | + else: |
| 81 | + version = "unversioned" |
| 82 | + |
| 83 | + combined_version = f"{client}=={version}" |
| 84 | + |
| 85 | + if combined_version in pinecone_versions: |
| 86 | + pinecone_versions[combined_version].append(notebook_path) |
| 87 | + else: |
| 88 | + pinecone_versions[combined_version] = [notebook_path] |
| 89 | + |
| 90 | + client_types = [ |
| 91 | + "pinecone", |
| 92 | + "pinecone[grpc]", |
| 93 | + "pinecone-client", |
| 94 | + "pinecone-client[grpc]", |
| 95 | + "langchain-pinecone", |
| 96 | + ] |
| 97 | + for client_type in client_types: |
| 98 | + print() |
| 99 | + print(f"Notebooks using {client_type}:") |
| 100 | + for version, notebooks in sorted(pinecone_versions.items()): |
| 101 | + if client_type in version: |
| 102 | + print(f" {version}: {len(notebooks)} notebooks") |
| 103 | + for notebook in notebooks: |
| 104 | + print(" - ", notebook) |
| 105 | + print() |
| 106 | + |
| 107 | + print() |
| 108 | + print("Notebooks with malformed pinecone version specifiers:") |
| 109 | + for notebook in malformed_notebooks: |
| 110 | + print(" - ", notebook) |
| 111 | + |
| 112 | + print() |
| 113 | + print("Notebooks using plugins:") |
| 114 | + for plugin, notebooks in sorted(plugins_used.items()): |
| 115 | + print(f" {plugin}: {len(notebooks)} notebooks") |
| 116 | + for notebook in notebooks: |
| 117 | + print(" - ", notebook) |
| 118 | + print() |
| 119 | + |
| 120 | + |
| 121 | +if __name__ == "__main__": |
| 122 | + main() |
| 123 | + |
0 commit comments