Skip to content

Commit 80630df

Browse files
authored
add search doc (#235)
* add search doc * fmt
1 parent ee57035 commit 80630df

File tree

5 files changed

+177
-3
lines changed

5 files changed

+177
-3
lines changed

core/src/query.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ pub fn update_search_tokens_trigger_queries(
121121
);
122122
let apply_trigger = format!(
123123
"
124-
CREATE TRIGGER {job_name}_search_tokens_trigger
124+
CREATE OR REPLACE TRIGGER {job_name}_search_tokens_trigger
125125
AFTER INSERT OR UPDATE OR DELETE ON {src_schema}.{src_table}
126126
FOR EACH ROW
127127
EXECUTE FUNCTION {trigger_fn_name}();"

docker-compose.server.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ services:
3535
environment:
3636
<<: *default-env
3737
RUST_LOG: debug
38-
VECTORIZE_PROXY_ENABLED: ${VECTORIZE_PROXY_ENABLED}
38+
VECTORIZE_PROXY_ENABLED: ${VECTORIZE_PROXY_ENABLED:-1}
3939
command: vectorize-server
4040
vector-serve:
4141
restart: always

server/README.md

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# Instant Search on any Postgres
2+
3+
An HTTP server that sits in between your application and Postgres.
4+
5+
## Features
6+
- Quickly sets up semantic and full text search on any Postgres table.
7+
- Generate embeddings from OpenAI, Hugging Face, and many other embedding model providers.
8+
- Updates embeddings and full text search token indices whenever data changes
9+
- Compatible with any Postgres that has [pgvector](https://github.com/pgvector/pgvector) installed (RDS, CloudSQL, etc)
10+
11+
## Getting started
12+
13+
Run Postgres and the HTTP servers in separate containers locally:
14+
15+
```bash
16+
# docker-compose.server.yml is located in the root of this repository
17+
docker compose -f docker-compose.server.yml up -d
18+
```
19+
20+
There are three contains; postgres, a local embedding server, and the HTTP search service.
21+
22+
```plaintext
23+
docker ps --format "table {{.Image}}\t{{.Names}}"
24+
25+
IMAGE NAMES
26+
pg_vectorize-server pg_vectorize-server-1
27+
pgvector/pgvector:0.8.0-pg17 pg_vectorize-postgres-1
28+
ghcr.io/chuckhend/vector-serve:latest pg_vectorize-vector-serve-1
29+
```
30+
31+
## Create a table and insert some data
32+
33+
`sql/example.sql` contains an example products data set.
34+
35+
```bash
36+
psql postgres://postgres:postgres@localhost:5432/postgres -f sql/example.sql
37+
```
38+
39+
## Generating embeddings
40+
41+
We'll use the API to create a job that will generate embeddings for the `description` column in the `my_products` table. Anytime we insert or update a row in this table, the embeddings will automatically be updated.
42+
43+
```bash
44+
curl -X POST http://localhost:8080/api/v1/table -d '{
45+
"job_name": "my_job",
46+
"src_table": "my_products",
47+
"src_schema": "public",
48+
"src_column": "description",
49+
"primary_key": "product_id",
50+
"update_time_col": "updated_at",
51+
"model": "sentence-transformers/all-MiniLM-L6-v2"
52+
}' \
53+
-H "Content-Type: application/json"
54+
```
55+
56+
## Search with HTTP API
57+
58+
```bash
59+
curl -X GET "http://localhost:8080/api/v1/search?job_name=my_job&query=camping%20grear&limit=2" | jq .
60+
```
61+
62+
```json
63+
[
64+
{
65+
"description": "Sling made of fabric or netting, suspended between two points for relaxation",
66+
"fts_rank": null,
67+
"price": 40.0,
68+
"product_category": "outdoor",
69+
"product_id": 39,
70+
"product_name": "Hammock",
71+
"rrf_score": 0.01639344262295082,
72+
"semantic_rank": 1,
73+
"similarity_score": 0.3192296909597241,
74+
"updated_at": "2025-06-25T19:57:22.410561+00:00"
75+
},
76+
{
77+
"description": "Container for holding plants, often with drainage",
78+
"fts_rank": null,
79+
"price": 12.0,
80+
"product_category": "garden",
81+
"product_id": 8,
82+
"product_name": "Plant Pot",
83+
"rrf_score": 0.016129032258064516,
84+
"semantic_rank": 2,
85+
"similarity_score": 0.3032694847366062,
86+
"updated_at": "2025-06-25T19:57:22.410561+00:00"
87+
}
88+
]
89+
```
90+
91+
## SQL proxy example
92+
93+
We can also use the SQL proxy to perform the same search query, but using SQL instead of the HTTP API. This is useful if you have additional joins are advanced SQL queries that you want to perform.
94+
95+
Note that this query routes through the proxy on port 5433.
96+
97+
```sql
98+
psql postgres://postgres:postgres@localhost:5433/postgres -c \
99+
"SELECT * FROM (
100+
SELECT t0.*, t1.similarity_score
101+
FROM (
102+
SELECT
103+
product_id,
104+
1 - (embeddings <=> vectorize.embed('plants', 'my_job')) as similarity_score
105+
FROM vectorize._embeddings_my_job
106+
) t1
107+
INNER JOIN public.my_products t0 on t0.product_id = t1.product_id
108+
) t
109+
ORDER BY t.similarity_score DESC
110+
LIMIT 2;"
111+
```
112+
113+
```plaintext
114+
product_id | product_name | description | product_category | price | updated_at | similarity_score
115+
------------+------------------+---------------------------------------------------+------------------+-------+-------------------------------+---------------------
116+
8 | Plant Pot | Container for holding plants, often with drainage | garden | 12.00 | 2025-06-25 20:27:07.725765+00 | 0.46105278002586925
117+
35 | Gardening Gloves | Handwear for protection during gardening tasks | garden | 8.00 | 2025-06-25 20:27:07.725765+00 | 0.2909192990160845
118+
(2 rows)
119+
```

server/sql/example.sql

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
CREATE TABLE my_products (
2+
product_id SERIAL PRIMARY KEY,
3+
product_name TEXT NOT NULL,
4+
description TEXT,
5+
product_category TEXT NOT NULL,
6+
price DECIMAL(10, 2) NOT NULL,
7+
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
8+
);
9+
10+
INSERT INTO my_products(product_name, description, product_category, price, updated_at) VALUES
11+
('Pencil', 'Utensil used for writing and often works best on paper', 'stationery', 1.50, NOW()),
12+
('Laptop Stand', 'Elevated platform for laptops, enhancing ergonomics', 'electronics', 35.99, NOW()),
13+
('Desk Lamp', 'Illumination device for workspaces, often adjustable', 'furniture', 22.50, NOW()),
14+
('Bluetooth Speaker', 'Portable audio device with wireless connectivity', 'electronics', 99.99, NOW()),
15+
('Water Bottle', 'Reusable container for liquids, often insulated', 'kitchenware', 15.00, NOW()),
16+
('Backpack', 'Storage solution for carrying personal items on one’s back', 'accessories', 45.00, NOW()),
17+
('Wireless Mouse', 'Pointing device without the need for a physical connection', 'electronics', 25.00, NOW()),
18+
('Plant Pot', 'Container for holding plants, often with drainage', 'garden', 12.00, NOW()),
19+
('Sunglasses', 'Protective eyewear to shield eyes from UV rays', 'accessories', 50.00, NOW()),
20+
('Notebook', 'Bound sheets of paper for note-taking or sketching', 'stationery', 3.99, NOW()),
21+
('Stylus Pen', 'Tool for touchscreen devices, mimics finger touch', 'electronics', 18.50, NOW()),
22+
('Travel Mug', 'Insulated container for beverages on-the-go', 'kitchenware', 10.99, NOW()),
23+
('Phone Charger', 'Device to replenish the battery of mobile phones', 'electronics', 20.00, NOW()),
24+
('Yoga Mat', 'Cushioned surface for practicing yoga or exercise', 'sports', 30.00, NOW()),
25+
('Wall Clock', 'Time-telling device meant to hang on walls', 'furniture', 15.50, NOW()),
26+
('Keychain', 'Small device for holding keys together', 'accessories', 5.00, NOW()),
27+
('Desk Organizer', 'Tool for sorting and storing desk items', 'furniture', 12.50, NOW()),
28+
('Earbuds', 'Small headphones that fit directly inside the ear', 'electronics', 49.99, NOW()),
29+
('Calendar', 'Physical representation of days and months, often used for scheduling', 'stationery', 10.00, NOW()),
30+
('Umbrella', 'Protective gear against rain or intense sun', 'accessories', 8.99, NOW()),
31+
('Hand Sanitizer', 'Liquid or gel used to decrease infectious agents on hands', 'personal care', 2.50, NOW()),
32+
('Sketchbook', 'Paper-filled book used for drawing or painting', 'stationery', 6.99, NOW()),
33+
('Flash Drive', 'Portable storage device for digital files', 'electronics', 12.00, NOW()),
34+
('Tablet Holder', 'Stand or grip for holding tablets or e-readers', 'electronics', 22.99, NOW()),
35+
('Shampoo', 'Hair care product designed to cleanse the scalp and hair', 'personal care', 7.50, NOW()),
36+
('Wristwatch', 'Time-telling device worn around the wrist', 'accessories', 120.00, NOW()),
37+
('Basketball', 'Spherical sporting equipment used in basketball games', 'sports', 20.00, NOW()),
38+
('Guitar Picks', 'Small flat tool used to strum or pick a guitar', 'music', 5.00, NOW()),
39+
('Thermal Flask', 'Insulated bottle for keeping beverages hot or cold', 'kitchenware', 18.99, NOW()),
40+
('Slippers', 'Soft and light footwear intended for indoor use', 'footwear', 10.00, NOW()),
41+
('Easel', 'Upright support for artists to display or work on canvases', 'art supplies', 45.00, NOW()),
42+
('Bicycle Helmet', 'Protective headgear for cyclists', 'sports', 35.00, NOW()),
43+
('Candle Holder', 'Accessory to safely hold candles when they burn', 'home decor', 15.00, NOW()),
44+
('Cutting Board', 'Durable board on which to place materials for cutting', 'kitchenware', 10.50, NOW()),
45+
('Gardening Gloves', 'Handwear for protection during gardening tasks', 'garden', 8.00, NOW()),
46+
('Alarm Clock', 'Time-telling device with a feature to sound at a specified time', 'electronics', 25.00, NOW()),
47+
('Spatula', 'Flat tool used in cooking for flipping or spreading', 'kitchenware', 3.99, NOW()),
48+
('Jigsaw Puzzle', 'Picture printed on cardboard or wood and cut into pieces to be reassembled', 'toys', 12.99, NOW()),
49+
('Hammock', 'Sling made of fabric or netting, suspended between two points for relaxation', 'outdoor', 40.00, NOW()),
50+
('Luggage Tag', 'Accessory attached to luggage for identification purposes', 'travel', 7.50, NOW())
51+
;

server/src/routes/table.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,11 @@ pub async fn table(
4040
&payload.src_table,
4141
&payload.update_time_col,
4242
)
43-
.await?;
43+
.await
44+
.map_err(|e| match e {
45+
vectorize_core::errors::VectorizeError::NotFound(msg) => ServerError::NotFoundError(msg),
46+
_ => ServerError::from(e),
47+
})?;
4448
if datatype != "timestamp with time zone" {
4549
return Err(ServerError::InvalidRequest(format!(
4650
"Column {} in table {}.{} must be of type 'timestamp with time zone'",

0 commit comments

Comments
 (0)