Skip to content

Commit 04b350b

Browse files
authored
feat(stream): support snapshot backfill in backfill order control (#22215)
1 parent e1ae4d4 commit 04b350b

File tree

9 files changed

+279
-161
lines changed

9 files changed

+279
-161
lines changed
Lines changed: 4 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -1,142 +1,9 @@
11
statement ok
2-
SET RW_IMPLICIT_FLUSH TO TRUE;
2+
set streaming_use_snapshot_backfill=true;
33

4-
statement ok
5-
drop table if exists car_sales cascade;
6-
7-
statement ok
8-
drop table if exists car_info cascade;
9-
10-
statement ok
11-
drop table if exists car_regions cascade;
12-
13-
statement ok
14-
create table car_sales(id int, car_id int, region_id int, price int);
15-
16-
statement ok
17-
create table car_info(id int, name varchar);
18-
19-
statement ok
20-
create table car_regions(id int, region varchar);
21-
22-
statement ok
23-
insert into car_info select t.id as id, 'unmatched' from generate_series(10, 100000) t(id);
24-
25-
statement ok
26-
insert into car_info values (1, 'Toyota'), (2, 'Honda'), (3, 'Ford'), (4, 'Chevy'), (5, 'BMW'), (6, 'Audi'), (7, 'Mercedes');
27-
28-
statement ok
29-
insert into car_regions select t.id as id, 'unmatched' from generate_series(10, 100000) t(id);
30-
31-
statement ok
32-
insert into car_regions values (1, 'North America'), (2, 'Europe'), (3, 'Asia'), (4, 'South America'), (5, 'Africa'), (6, 'Australia'), (7, 'Antarctica');
33-
34-
# Generate 100K sales per car per region
35-
statement ok
36-
INSERT INTO car_sales
37-
SELECT
38-
sales_id_gen.id as id,
39-
car_id_gen.id as car_id,
40-
region_id_gen.id as region_id,
41-
sales_id_gen.id as price
42-
FROM generate_series(1, 100000) as sales_id_gen(id)
43-
CROSS JOIN generate_series(1, 5) as car_id_gen(id)
44-
CROSS JOIN generate_series(1, 5) as region_id_gen(id);
45-
46-
# should fail with cycle
47-
statement error Backfill order strategy has a cycle
48-
create materialized view m1
49-
with (backfill_order = FIXED(car_sales -> car_sales))
50-
as select count(*) from car_sales join car_info
51-
on car_sales.car_id = car_info.id
52-
join car_regions
53-
on car_sales.region_id = car_regions.id;
54-
55-
# should fail with cycle
56-
statement error Backfill order strategy has a cycle
57-
create materialized view m1
58-
with (backfill_order = FIXED(car_sales -> car_regions, car_info -> car_sales, car_regions -> car_sales))
59-
as select count(*) from car_sales join car_info
60-
on car_sales.car_id = car_info.id
61-
join car_regions
62-
on car_sales.region_id = car_regions.id;
63-
64-
# Create an MV that groups sales by a range of prices
65-
statement ok
66-
create materialized view m1
67-
with (backfill_order = FIXED(car_regions -> car_sales, car_info -> car_sales))
68-
as
69-
with price_ranges as (
70-
select
71-
car_info.name as name,
72-
car_sales.price as price,
73-
round(log10(1 + car_sales.price)::numeric, 1) as price_range
74-
from car_sales join car_info
75-
on car_sales.car_id = car_info.id
76-
join car_regions
77-
on car_sales.region_id = car_regions.id
78-
)
79-
select
80-
name,
81-
price_range,
82-
count(*) as sales_count,
83-
sum(price) as sales_volume,
84-
avg(price) as sales_avg,
85-
min(price) as sales_min,
86-
max(price) as sales_max,
87-
approx_percentile(0.5) WITHIN GROUP (ORDER BY price) as sales_est_median,
88-
approx_percentile(0.01) WITHIN GROUP (ORDER BY price) as sales_est_bottom_1_percent,
89-
approx_percentile(0.99) WITHIN GROUP (ORDER BY price) as sales_est_top_1_percent
90-
FROM
91-
price_ranges
92-
GROUP BY name, price_range;
93-
94-
# Create an MV that groups sales by a range of prices, no backfill order
95-
statement ok
96-
create materialized view m1_no_order
97-
as
98-
with price_ranges as (
99-
select
100-
car_info.name as name,
101-
car_sales.price as price,
102-
round(log10(1 + car_sales.price)::numeric, 1) as price_range
103-
from car_sales join car_info
104-
on car_sales.car_id = car_info.id
105-
join car_regions
106-
on car_sales.region_id = car_regions.id
107-
)
108-
select
109-
name,
110-
price_range,
111-
count(*) as sales_count,
112-
sum(price) as sales_volume,
113-
avg(price) as sales_avg,
114-
min(price) as sales_min,
115-
max(price) as sales_max,
116-
approx_percentile(0.5) WITHIN GROUP (ORDER BY price) as sales_est_median,
117-
approx_percentile(0.01) WITHIN GROUP (ORDER BY price) as sales_est_bottom_1_percent,
118-
approx_percentile(0.99) WITHIN GROUP (ORDER BY price) as sales_est_top_1_percent
119-
FROM
120-
price_ranges
121-
GROUP BY name, price_range;
122-
123-
query I
124-
select * from m1
125-
except
126-
select * from m1_no_order
127-
----
128-
129-
query I
130-
select * from m1_no_order
131-
except
132-
select * from m1
133-
----
134-
135-
statement ok
136-
drop table if exists car_sales cascade;
4+
include ./backfill_order_control.slt.part
1375

1386
statement ok
139-
drop table if exists car_info cascade;
7+
set streaming_use_snapshot_backfill=false;
1408

141-
statement ok
142-
drop table if exists car_regions cascade;
9+
include ./backfill_order_control.slt.part
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
statement ok
2+
SET RW_IMPLICIT_FLUSH TO TRUE;
3+
4+
statement ok
5+
drop table if exists car_sales cascade;
6+
7+
statement ok
8+
drop table if exists car_info cascade;
9+
10+
statement ok
11+
drop table if exists car_regions cascade;
12+
13+
statement ok
14+
create table car_sales(id int, car_id int, region_id int, price int);
15+
16+
statement ok
17+
create table car_info(id int, name varchar);
18+
19+
statement ok
20+
create table car_regions(id int, region varchar);
21+
22+
statement ok
23+
insert into car_info select t.id as id, 'unmatched' from generate_series(10, 100000) t(id);
24+
25+
statement ok
26+
insert into car_info values (1, 'Toyota'), (2, 'Honda'), (3, 'Ford'), (4, 'Chevy'), (5, 'BMW'), (6, 'Audi'), (7, 'Mercedes');
27+
28+
statement ok
29+
insert into car_regions select t.id as id, 'unmatched' from generate_series(10, 100000) t(id);
30+
31+
statement ok
32+
insert into car_regions values (1, 'North America'), (2, 'Europe'), (3, 'Asia'), (4, 'South America'), (5, 'Africa'), (6, 'Australia'), (7, 'Antarctica');
33+
34+
# Generate 10K sales per car per region
35+
statement ok
36+
INSERT INTO car_sales
37+
SELECT
38+
sales_id_gen.id as id,
39+
car_id_gen.id as car_id,
40+
region_id_gen.id as region_id,
41+
sales_id_gen.id as price
42+
FROM generate_series(1, 10000) as sales_id_gen(id)
43+
CROSS JOIN generate_series(1, 5) as car_id_gen(id)
44+
CROSS JOIN generate_series(1, 5) as region_id_gen(id);
45+
46+
# should fail with cycle
47+
statement error Backfill order strategy has a cycle
48+
create materialized view m1
49+
with (backfill_order = FIXED(car_sales -> car_sales))
50+
as select count(*) from car_sales join car_info
51+
on car_sales.car_id = car_info.id
52+
join car_regions
53+
on car_sales.region_id = car_regions.id;
54+
55+
# should fail with cycle
56+
statement error Backfill order strategy has a cycle
57+
create materialized view m1
58+
with (backfill_order = FIXED(car_sales -> car_regions, car_info -> car_sales, car_regions -> car_sales))
59+
as select count(*) from car_sales join car_info
60+
on car_sales.car_id = car_info.id
61+
join car_regions
62+
on car_sales.region_id = car_regions.id;
63+
64+
# Create an MV that groups sales by a range of prices
65+
statement ok
66+
create materialized view m1
67+
with (backfill_order = FIXED(car_regions -> car_sales, car_info -> car_sales))
68+
as
69+
with price_ranges as (
70+
select
71+
car_info.name as name,
72+
car_sales.price as price,
73+
round(log10(1 + car_sales.price)::numeric, 1) as price_range
74+
from car_sales join car_info
75+
on car_sales.car_id = car_info.id
76+
join car_regions
77+
on car_sales.region_id = car_regions.id
78+
)
79+
select
80+
name,
81+
price_range,
82+
count(*) as sales_count,
83+
sum(price) as sales_volume,
84+
avg(price) as sales_avg,
85+
min(price) as sales_min,
86+
max(price) as sales_max,
87+
approx_percentile(0.5) WITHIN GROUP (ORDER BY price) as sales_est_median,
88+
approx_percentile(0.01) WITHIN GROUP (ORDER BY price) as sales_est_bottom_1_percent,
89+
approx_percentile(0.99) WITHIN GROUP (ORDER BY price) as sales_est_top_1_percent
90+
FROM
91+
price_ranges
92+
GROUP BY name, price_range;
93+
94+
# Create an MV that groups sales by a range of prices, no backfill order
95+
statement ok
96+
create materialized view m1_no_order
97+
as
98+
with price_ranges as (
99+
select
100+
car_info.name as name,
101+
car_sales.price as price,
102+
round(log10(1 + car_sales.price)::numeric, 1) as price_range
103+
from car_sales join car_info
104+
on car_sales.car_id = car_info.id
105+
join car_regions
106+
on car_sales.region_id = car_regions.id
107+
)
108+
select
109+
name,
110+
price_range,
111+
count(*) as sales_count,
112+
sum(price) as sales_volume,
113+
avg(price) as sales_avg,
114+
min(price) as sales_min,
115+
max(price) as sales_max,
116+
approx_percentile(0.5) WITHIN GROUP (ORDER BY price) as sales_est_median,
117+
approx_percentile(0.01) WITHIN GROUP (ORDER BY price) as sales_est_bottom_1_percent,
118+
approx_percentile(0.99) WITHIN GROUP (ORDER BY price) as sales_est_top_1_percent
119+
FROM
120+
price_ranges
121+
GROUP BY name, price_range;
122+
123+
query I
124+
select * from m1
125+
except
126+
select * from m1_no_order
127+
----
128+
129+
query I
130+
select * from m1_no_order
131+
except
132+
select * from m1
133+
----
134+
135+
statement ok
136+
drop table if exists car_sales cascade;
137+
138+
statement ok
139+
drop table if exists car_info cascade;
140+
141+
statement ok
142+
drop table if exists car_regions cascade;

e2e_test/backfill/backfill_order_control_recovery.slt

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,34 @@ statement ok
4545
wait;
4646

4747
statement ok
48-
drop materialized view m1 cascade;
48+
set background_ddl=false;
49+
50+
statement ok
51+
set backfill_rate_limit=default;
52+
53+
statement ok
54+
create materialized view m2
55+
as select v1 from t1
56+
union
57+
select v1 from t2
58+
union
59+
select v1 from t3;
60+
61+
query I
62+
select * from m1 except select * from m2;
63+
----
64+
65+
66+
query I
67+
select * from m2 except select * from m1;
68+
----
69+
70+
71+
statement ok
72+
drop materialized view m1;
73+
74+
statement ok
75+
drop materialized view m2;
4976

5077
statement ok
5178
drop table if exists t1 cascade;

0 commit comments

Comments
 (0)