|
2 | 2 | Script to upload data to nilDB using nilRAG.
|
3 | 3 | """
|
4 | 4 |
|
5 |
| -import os |
6 |
| -import json |
7 |
| -import sys |
8 | 5 | import argparse
|
9 |
| -import time |
10 | 6 | import asyncio
|
| 7 | +import time |
| 8 | + |
11 | 9 | import nilql
|
12 |
| -from nilrag.util import ( |
13 |
| - create_chunks, |
14 |
| - encrypt_float_list, |
15 |
| - generate_embeddings_huggingface, |
16 |
| - load_file, |
17 |
| -) |
18 |
| -from nilrag.nildb_requests import NilDB, Node |
19 | 10 |
|
| 11 | +from nilrag.config import load_nil_db_config |
| 12 | +from nilrag.util import (create_chunks, encrypt_float_list, |
| 13 | + generate_embeddings_huggingface, load_file) |
20 | 14 |
|
21 | 15 | DEFAULT_CONFIG = "examples/nildb_config.json"
|
22 |
| -DEFAULT_FILE_PATH = 'examples/data/20-fake.txt' |
| 16 | +DEFAULT_FILE_PATH = "examples/data/20-fake.txt" |
| 17 | + |
23 | 18 |
|
24 | 19 | async def main():
|
25 |
| - parser = argparse.ArgumentParser(description='Upload data to nilDB using nilRAG') |
26 |
| - parser.add_argument('--config', type=str, default=DEFAULT_CONFIG, |
27 |
| - help=f'Path to nilDB config file (default: {DEFAULT_CONFIG})') |
28 |
| - parser.add_argument('--file', type=str, default=DEFAULT_FILE_PATH, |
29 |
| - help=f'Path to data file to upload (default: {DEFAULT_FILE_PATH})') |
30 |
| - args = parser.parse_args() |
| 20 | + """ |
| 21 | + Upload data to nilDB using nilRAG. |
31 | 22 |
|
32 |
| - # Load NilDB from JSON file if it exists |
33 |
| - if os.path.exists(args.config): |
34 |
| - print(f"Loading NilDB configuration from {args.config}...") |
35 |
| - with open(args.config, "r", encoding="utf-8") as f: |
36 |
| - data = json.load(f) |
37 |
| - nodes = [] |
38 |
| - for node_data in data["nodes"]: |
39 |
| - nodes.append( |
40 |
| - Node( |
41 |
| - node_data["url"], |
42 |
| - node_data["node_id"], |
43 |
| - data["org_did"], |
44 |
| - node_data["bearer_token"], |
45 |
| - node_data.get("schema_id"), |
46 |
| - ) |
47 |
| - ) |
48 |
| - nilDB = NilDB(nodes) |
49 |
| - else: |
50 |
| - print(f"Error: NilDB configuration file not found at {args.config}") |
51 |
| - sys.exit(1) |
| 23 | + This script: |
| 24 | + 1. Loads the nilDB configuration |
| 25 | + 2. Initializes encryption keys for different modes |
| 26 | + 3. Processes the input file into chunks and embeddings |
| 27 | + 4. Encrypts the data using nilQL |
| 28 | + 5. Uploads the encrypted data to nilDB nodes |
| 29 | + """ |
| 30 | + parser = argparse.ArgumentParser(description="Upload data to nilDB using nilRAG") |
| 31 | + parser.add_argument( |
| 32 | + "--config", |
| 33 | + type=str, |
| 34 | + default=DEFAULT_CONFIG, |
| 35 | + help=f"Path to nilDB config file (default: {DEFAULT_CONFIG})", |
| 36 | + ) |
| 37 | + parser.add_argument( |
| 38 | + "--file", |
| 39 | + type=str, |
| 40 | + default=DEFAULT_FILE_PATH, |
| 41 | + help=f"Path to data file to upload (default: {DEFAULT_FILE_PATH})", |
| 42 | + ) |
| 43 | + args = parser.parse_args() |
52 | 44 |
|
53 |
| - print(nilDB) |
| 45 | + # Load NilDB configuration |
| 46 | + nil_db, _ = load_nil_db_config( |
| 47 | + args.config, |
| 48 | + require_bearer_token=True, |
| 49 | + require_schema_id=True, |
| 50 | + ) |
| 51 | + print(nil_db) |
54 | 52 | print()
|
55 | 53 |
|
56 | 54 | # Initialize secret keys for different modes of operation
|
57 |
| - num_nodes = len(nilDB.nodes) |
58 |
| - additive_key = nilql.ClusterKey.generate({'nodes': [{}] * num_nodes}, {'sum': True}) |
59 |
| - xor_key = nilql.ClusterKey.generate({'nodes': [{}] * num_nodes}, {'store': True}) |
| 55 | + num_nodes = len(nil_db.nodes) |
| 56 | + additive_key = nilql.ClusterKey.generate( |
| 57 | + {"nodes": [{}] * num_nodes}, {"sum": True} |
| 58 | + ) |
| 59 | + xor_key = nilql.ClusterKey.generate( |
| 60 | + {"nodes": [{}] * num_nodes}, {"store": True} |
| 61 | + ) |
60 | 62 |
|
61 | 63 | # Load and process input file
|
62 | 64 | paragraphs = load_file(args.file)
|
63 | 65 | chunks = create_chunks(paragraphs, chunk_size=50, overlap=10)
|
64 | 66 |
|
65 | 67 | # Generate embeddings and chunks
|
66 |
| - print('Generating embeddings and chunks...') |
| 68 | + print("Generating embeddings and chunks...") |
67 | 69 | start_time = time.time()
|
68 | 70 | embeddings = generate_embeddings_huggingface(chunks)
|
69 | 71 | end_time = time.time()
|
70 |
| - print(f'Embeddings and chunks generated in {end_time - start_time:.2f} seconds!') |
| 72 | + print(f"Embeddings and chunks generated in {end_time - start_time:.2f} seconds!") |
71 | 73 |
|
72 | 74 | # Encrypt chunks and embeddings
|
73 |
| - print('Encrypting data...') |
| 75 | + print("Encrypting data...") |
74 | 76 | start_time = time.time()
|
75 | 77 | chunks_shares = [nilql.encrypt(xor_key, chunk) for chunk in chunks]
|
76 | 78 | embeddings_shares = [
|
77 | 79 | encrypt_float_list(additive_key, embedding) for embedding in embeddings
|
78 | 80 | ]
|
79 | 81 | end_time = time.time()
|
80 |
| - print(f'Data encrypted in {end_time - start_time:.2f} seconds') |
| 82 | + print(f"Data encrypted in {end_time - start_time:.2f} seconds") |
81 | 83 |
|
82 | 84 | # Upload encrypted data to nilDB
|
83 |
| - print('Uploading data...') |
| 85 | + print("Uploading data...") |
84 | 86 | start_time = time.time()
|
85 |
| - await nilDB.upload_data(embeddings_shares, chunks_shares) |
| 87 | + await nil_db.upload_data(embeddings_shares, chunks_shares) |
86 | 88 | end_time = time.time()
|
87 |
| - print(f'Data uploaded in {end_time - start_time:.2f} seconds') |
| 89 | + print(f"Data uploaded in {end_time - start_time:.2f} seconds") |
| 90 | + |
88 | 91 |
|
89 | 92 | if __name__ == "__main__":
|
90 | 93 | asyncio.run(main())
|
0 commit comments