Skip to content

Commit 96c29ea

Browse files
committed
Iterating on upgrade code
1 parent 4dcc2bb commit 96c29ea

File tree

2 files changed

+281
-15
lines changed

2 files changed

+281
-15
lines changed

pgvectorscale/src/access_method/mod.rs

Lines changed: 116 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -88,23 +88,125 @@ fn amhandler(_fcinfo: pg_sys::FunctionCallInfo) -> PgBox<pg_sys::IndexAmRoutine>
8888
amroutine.into_pg_boxed()
8989
}
9090

91+
// Background on system catalog state needed to understand the SQL for idempotent install/upgrade
92+
// ----------------------------------------------------------------------------------------------
93+
//
94+
// When installing from scratch, we execute:
95+
//
96+
// CREATE OPERATOR CLASS vector_cosine_ops
97+
// DEFAULT FOR TYPE vector USING diskann AS
98+
// OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops,
99+
// FUNCTION 1 distance_type_cosine();
100+
//
101+
// This creates the following system catalog state:
102+
//
103+
// (1) A row in pg_opclass for vector_l2_ops and diskann:
104+
//
105+
// oid | opcmethod | opcname | opcnamespace | opcowner | opcfamily | opcintype | opcdefault | opckeytype
106+
// -------+-----------+-------------------+--------------+----------+-----------+-----------+------------+------------
107+
// 17722 | 17718 | vector_cosine_ops | 2200 | 10 | 17721 | 17389 | t | 0
108+
//
109+
// Note: opcmethod is the oid of the access method (diskann) already in pg_am.
110+
// Also: note that opcdefault is t, which means that this is the default operator class for the type.
111+
//
112+
// (2) A row in pg_amop for the <=> operator:
113+
// oid | amopfamily | amoplefttype | amoprighttype | amopstrategy | amoppurpose | amopopr | amopmethod | amopsortfamily
114+
// -------+------------+--------------+---------------+--------------+-------------+---------+------------+----------------
115+
// 17723 | 17721 | 17389 | 17389 | 1 | o | 17438 | 17718 | 1970
116+
//
117+
// (3) A row in pg_amproc for the distance_type_cosine function:
118+
//
119+
// oid | amprocfamily | amproclefttype | amprocrighttype | amprocnum | amproc
120+
// -------+--------------+----------------+-----------------+-----------+----------------------
121+
// 17724 | 17721 | 17389 | 17389 | 1 | distance_type_cosine
122+
//
123+
// Version 0.4.0 contained the same SQL as above, but without the FUNCTION 1 part:
124+
//
125+
// CREATE OPERATOR CLASS vector_cosine_ops
126+
// DEFAULT FOR TYPE vector USING diskann AS
127+
// OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops;
128+
//
129+
// Thus, when upgrading from 0.4.0 to 0.5.0, we need to add the appropriate entry in `pg_amproc`.
130+
//
131+
// Similarly, here is the sample system catalog state created by:
132+
//
133+
// CREATE OPERATOR CLASS vector_l2_ops
134+
// FOR TYPE vector USING diskann AS
135+
// OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops,
136+
// FUNCTION 1 distance_type_l2();
137+
//
138+
// (1) A row in pg_opclass for vector_l2_ops and diskann:
139+
//
140+
// oid | opcmethod | opcname | opcnamespace | opcowner | opcfamily | opcintype | opcdefault | opckeytype
141+
// -------+-----------+---------------+--------------+----------+-----------+-----------+------------+------------
142+
// 17726 | 17718 | vector_l2_ops | 2200 | 10 | 17725 | 17389 | f | 0
143+
//
144+
// Note: opcmethod is the oid of the access method (diskann) already in pg_am.
145+
// Also: note that opcdefault is f, which means that this is not the default operator class for the type.
146+
//
147+
// (2) A row in pg_amop for the <-> operator:
148+
//
149+
// oid | amopfamily | amoplefttype | amoprighttype | amopstrategy | amoppurpose | amopopr | amopmethod | amopsortfamily
150+
// -------+------------+--------------+---------------+--------------+-------------+---------+------------+----------------
151+
// 17727 | 17725 | 17389 | 17389 | 1 | o | 17436 | 17718 | 1970
152+
//
153+
// (3) A row in pg_amproc for the distance_type_l2 function:
154+
//
155+
// oid | amprocfamily | amproclefttype | amprocrighttype | amprocnum | amproc
156+
// -------+--------------+----------------+-----------------+-----------+------------------
157+
// 17728 | 17725 | 17389 | 17389 | 1 | distance_type_l2
158+
//
159+
// However, the situation is easier for upgrade. Version 0.4.0 did not contain support for the L2 distance, so we can
160+
// just run the CREATE OPERATOR CLASS statement above to add the L2 distance support.
161+
91162
// This SQL is made idempotent so that we can use the same script for the installation and the upgrade.
92163
extension_sql!(
93164
r#"
94-
DROP OPERATOR CLASS IF EXISTS vector_cosine_ops USING diskann;
95-
96-
CREATE OPERATOR CLASS vector_cosine_ops
97-
DEFAULT FOR TYPE vector USING diskann AS
98-
OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops,
99-
FUNCTION 1 distance_type_cosine();
100-
101-
102-
DROP OPERATOR CLASS IF EXISTS vector_l2_ops USING diskann;
103-
104-
CREATE OPERATOR CLASS vector_l2_ops
105-
FOR TYPE vector USING diskann AS
106-
OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops,
107-
FUNCTION 1 distance_type_l2();
165+
DO $$
166+
DECLARE
167+
c int;
168+
d int;
169+
BEGIN
170+
-- Has cosine operator class been installed previously?
171+
SELECT count(*)
172+
INTO c
173+
FROM pg_catalog.pg_opclass c
174+
WHERE c.opcname = 'vector_cosine_ops'
175+
AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann');
176+
177+
-- Has L2 operator class been installed previously?
178+
SELECT count(*)
179+
INTO d
180+
FROM pg_catalog.pg_opclass c
181+
WHERE c.opcname = 'vector_l2_ops'
182+
AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann');
183+
184+
IF c = 0 THEN
185+
-- Fresh install from scratch
186+
CREATE OPERATOR CLASS vector_cosine_ops DEFAULT
187+
FOR TYPE vector USING diskann AS
188+
OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops,
189+
FUNCTION 1 distance_type_cosine();
190+
191+
CREATE OPERATOR CLASS vector_l2_ops
192+
FOR TYPE vector USING diskann AS
193+
OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops,
194+
FUNCTION 1 distance_type_l2();
195+
ELSIF d = 0 THEN
196+
-- Upgrade to add L2 distance support and update cosine opclass to
197+
-- include the distance_type_cosine function
198+
INSERT INTO pg_amproc (amprocfamily, amproclefttype, amprocrighttype, amprocnum, amproc)
199+
SELECT c.opcfamily, c.opcintype, c.opcintype, 1, 'distance_type_l2'
200+
FROM pg_opclass c, pg_am a
201+
WHERE a.oid = c.opcmethod AND c.opcname = 'vector_l2_ops' AND a.amname = 'diskann';
202+
203+
CREATE OPERATOR CLASS vector_l2_ops
204+
FOR TYPE vector USING diskann AS
205+
OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops,
206+
FUNCTION 1 distance_type_l2();
207+
END IF;
208+
END;
209+
$$;
108210
"#,
109211
name = "diskann_ops_operator",
110212
requires = [amhandler, distance_type_cosine, distance_type_l2]

pgvectorscale/src/access_method/upgrade_test.rs

Lines changed: 165 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ pub mod tests {
2424
#[test]
2525
#[ignore]
2626
///This function is only a mock to bring up the test framewokr in test_delete_vacuum
27-
fn test_upgrade() {
27+
fn test_upgrade_from_0_0_2() {
2828
if cfg!(feature = "pg17") {
2929
// PG17 is only supported for one version
3030
return;
@@ -210,4 +210,168 @@ pub mod tests {
210210
))
211211
.unwrap();
212212
}
213+
214+
#[test]
215+
// #[ignore]
216+
fn test_upgrade_from_0_4_0() {
217+
if cfg!(feature = "pg17") {
218+
// PG17 is only supported for one version
219+
return;
220+
}
221+
pgrx_tests::run_test(
222+
"test_delete_mock_fn",
223+
None,
224+
crate::pg_test::postgresql_conf_options(),
225+
)
226+
.unwrap();
227+
228+
let (mut client, _) = pgrx_tests::client().unwrap();
229+
230+
client
231+
.execute(
232+
&"DROP EXTENSION IF EXISTS vectorscale CASCADE;".to_string(),
233+
&[],
234+
)
235+
.unwrap();
236+
237+
let current_file = file!();
238+
239+
// Convert the file path to an absolute path
240+
let current_dir = std::env::current_dir().unwrap();
241+
let mut absolute_path = std::path::Path::new(&current_dir).join(current_file);
242+
absolute_path = absolute_path.ancestors().nth(4).unwrap().to_path_buf();
243+
244+
let temp_dir = tempfile::tempdir().unwrap();
245+
let temp_path = temp_dir.path();
246+
247+
copy_dir_all(absolute_path.clone(), temp_dir.path()).unwrap();
248+
249+
let pgrx = pgrx_pg_config::Pgrx::from_config().unwrap();
250+
let pg_version = pg_sys::get_pg_major_version_num();
251+
let pg_config = pgrx.get(&format!("pg{}", pg_version)).unwrap();
252+
253+
let version = "0.4.0";
254+
let res = std::process::Command::new("git")
255+
.current_dir(temp_path)
256+
.arg("checkout")
257+
.arg("-f")
258+
.arg(version)
259+
.output()
260+
.unwrap();
261+
assert!(
262+
res.status.success(),
263+
"failed: {:?} {:?} {:?}",
264+
res,
265+
absolute_path,
266+
temp_dir.path()
267+
);
268+
269+
let res = std::process::Command::new("cargo")
270+
.current_dir(temp_path.join("pgvectorscale"))
271+
.arg("pgrx")
272+
.arg("install")
273+
.arg("--test")
274+
.arg("--pg-config")
275+
.arg(pg_config.path().unwrap())
276+
.stdout(Stdio::inherit())
277+
.stderr(Stdio::piped())
278+
.output()
279+
.unwrap();
280+
assert!(res.status.success(), "failed: {:?}", res);
281+
282+
client
283+
.execute(
284+
&format!(
285+
"CREATE EXTENSION vectorscale VERSION '{}' CASCADE;",
286+
version
287+
),
288+
&[],
289+
)
290+
.unwrap();
291+
292+
let suffix = (1..=253)
293+
.map(|i| format!("{}", i))
294+
.collect::<Vec<String>>()
295+
.join(", ");
296+
297+
client
298+
.batch_execute(&format!(
299+
"CREATE TABLE test(embedding vector(256));
300+
301+
select setseed(0.5);
302+
-- generate 300 vectors
303+
INSERT INTO test(embedding)
304+
SELECT
305+
*
306+
FROM (
307+
SELECT
308+
('[ 0 , ' || array_to_string(array_agg(random()), ',', '0') || ']')::vector AS embedding
309+
FROM
310+
generate_series(1, 255 * 300) i
311+
GROUP BY
312+
i % 300) g;
313+
314+
INSERT INTO test(embedding) VALUES ('[1,2,3,{suffix}]'), ('[4,5,6,{suffix}]'), ('[7,8,10,{suffix}]');
315+
316+
CREATE INDEX idxtest
317+
ON test
318+
USING diskann(embedding);
319+
"
320+
))
321+
.unwrap();
322+
323+
client.execute("set enable_seqscan = 0;", &[]).unwrap();
324+
let cnt: i64 = client.query_one(&format!("WITH cte as (select * from test order by embedding <=> '[1,1,1,{suffix}]') SELECT count(*) from cte;"), &[]).unwrap().get(0);
325+
assert_eq!(cnt, 303, "count before upgrade");
326+
327+
//reinstall myself
328+
let res = std::process::Command::new("cargo")
329+
.arg("pgrx")
330+
.arg("install")
331+
.arg("--test")
332+
.arg("--pg-config")
333+
.arg(pg_config.path().unwrap())
334+
.stdout(Stdio::inherit())
335+
.stderr(Stdio::piped())
336+
.output()
337+
.unwrap();
338+
assert!(res.status.success(), "failed: {:?}", res);
339+
340+
client
341+
.execute(
342+
&"UPDATE pg_extension SET extname='vectorscale' WHERE extname = 'timescale_vector';".to_string(),
343+
&[],
344+
)
345+
.unwrap();
346+
347+
//need to recreate the client to avoid double load of GUC. Look into this later.
348+
let (mut client, _) = pgrx_tests::client().unwrap();
349+
client
350+
.execute(
351+
&format!(
352+
"ALTER EXTENSION vectorscale UPDATE TO '{}'",
353+
env!("CARGO_PKG_VERSION")
354+
),
355+
&[],
356+
)
357+
.unwrap();
358+
359+
client.execute("set enable_seqscan = 0;", &[]).unwrap();
360+
let cnt: i64 = client.query_one(&format!("WITH cte as (select * from test order by embedding <=> '[1,1,1,{suffix}]') SELECT count(*) from cte;"), &[]).unwrap().get(0);
361+
assert_eq!(cnt, 303, "count after upgrade");
362+
363+
client
364+
.batch_execute(&format!(
365+
"DROP INDEX idxtest;
366+
367+
CREATE INDEX idxtest_cosine
368+
ON test
369+
USING diskann(embedding vector_cosine_ops);
370+
371+
CREATE INDEX idxtest_l2
372+
ON test
373+
USING diskann(embedding vector_l2_ops);"
374+
))
375+
.unwrap();
376+
}
213377
}

0 commit comments

Comments
 (0)