Skip to content

Commit 56525ef

Browse files
authored
Add Parquet RowSelection benchmark (#6623)
* add benchmark * add and_then benchmark * fix ci * update bench
1 parent a9294d7 commit 56525ef

File tree

2 files changed

+92
-0
lines changed

2 files changed

+92
-0
lines changed

parquet/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,5 +222,10 @@ harness = false
222222
name = "metadata"
223223
harness = false
224224

225+
[[bench]]
226+
name = "row_selector"
227+
harness = false
228+
required-features = ["arrow"]
229+
225230
[lib]
226231
bench = false

parquet/benches/row_selector.rs

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow_array::BooleanArray;
19+
use criterion::*;
20+
use parquet::arrow::arrow_reader::RowSelection;
21+
use rand::Rng;
22+
23+
/// Generates a random RowSelection with a specified selection ratio.
24+
///
25+
/// # Arguments
26+
///
27+
/// * `total_rows` - The total number of rows in the selection.
28+
/// * `selection_ratio` - The ratio of rows to select (e.g., 1/3 for ~33% selection).
29+
///
30+
/// # Returns
31+
///
32+
/// * A `BooleanArray` instance with randomly selected rows based on the provided ratio.
33+
fn generate_random_row_selection(total_rows: usize, selection_ratio: f64) -> BooleanArray {
34+
let mut rng = rand::thread_rng();
35+
let bools: Vec<bool> = (0..total_rows)
36+
.map(|_| rng.gen_bool(selection_ratio))
37+
.collect();
38+
BooleanArray::from(bools)
39+
}
40+
41+
fn criterion_benchmark(c: &mut Criterion) {
42+
let total_rows = 300_000;
43+
let selection_ratio = 1.0 / 3.0;
44+
45+
// Generate two random RowSelections with approximately 1/3 of the rows selected.
46+
let row_selection_a =
47+
RowSelection::from_filters(&[generate_random_row_selection(total_rows, selection_ratio)]);
48+
let row_selection_b =
49+
RowSelection::from_filters(&[generate_random_row_selection(total_rows, selection_ratio)]);
50+
51+
// Benchmark the intersection of the two RowSelections.
52+
c.bench_function("intersection", |b| {
53+
b.iter(|| {
54+
let intersection = row_selection_a.intersection(&row_selection_b);
55+
criterion::black_box(intersection);
56+
})
57+
});
58+
59+
c.bench_function("union", |b| {
60+
b.iter(|| {
61+
let union = row_selection_a.union(&row_selection_b);
62+
criterion::black_box(union);
63+
})
64+
});
65+
66+
c.bench_function("from_filters", |b| {
67+
let boolean_array = generate_random_row_selection(total_rows, selection_ratio);
68+
b.iter(|| {
69+
let array = boolean_array.clone();
70+
let selection = RowSelection::from_filters(&[array]);
71+
criterion::black_box(selection);
72+
})
73+
});
74+
75+
c.bench_function("and_then", |b| {
76+
let selected = row_selection_a.row_count();
77+
let sub_selection =
78+
RowSelection::from_filters(&[generate_random_row_selection(selected, selection_ratio)]);
79+
b.iter(|| {
80+
let result = row_selection_a.and_then(&sub_selection);
81+
criterion::black_box(result);
82+
})
83+
});
84+
}
85+
86+
criterion_group!(benches, criterion_benchmark);
87+
criterion_main!(benches);

0 commit comments

Comments
 (0)