-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Add Parquet read pruning configuration for max elements in inList #19928
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 11 commits
79a7dc4
7b1a1c4
0481ad7
571d282
4be8ca4
989b389
118dc6f
22b98df
1f3f6d5
f9bda34
0995d71
3b83ab3
f93022d
29c84fb
eeaba83
f6624d7
e03847d
0f6cdeb
b5b2e4d
d9d1870
0201029
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -49,7 +49,9 @@ use datafusion_physical_expr_common::physical_expr::{ | |
| use datafusion_physical_plan::metrics::{ | ||
| Count, ExecutionPlanMetricsSet, Gauge, MetricBuilder, PruningMetrics, | ||
| }; | ||
| use datafusion_pruning::{FilePruner, PruningPredicate, build_pruning_predicate}; | ||
| use datafusion_pruning::{ | ||
| FilePruner, PruningPredicate, PruningPredicateConfig, build_pruning_predicate, | ||
| }; | ||
|
|
||
| use crate::sort::reverse_row_selection; | ||
| #[cfg(feature = "parquet_encryption")] | ||
|
|
@@ -104,6 +106,8 @@ pub(super) struct ParquetOpener { | |
| pub enable_bloom_filter: bool, | ||
| /// Should row group pruning be applied | ||
| pub enable_row_group_stats_pruning: bool, | ||
| /// Maximum number of elements (inclusive) in InList exprs to be eligible for pruning | ||
| pub pruning_max_inlist_limit: usize, | ||
|
||
| /// Coerce INT96 timestamps to specific TimeUnit | ||
| pub coerce_int96: Option<TimeUnit>, | ||
| /// Optional parquet FileDecryptionProperties | ||
|
|
@@ -280,6 +284,9 @@ impl FileOpener for ParquetOpener { | |
|
|
||
| let reverse_row_groups = self.reverse_row_groups; | ||
| let preserve_order = self.preserve_order; | ||
| let pruning_predicate_config = PruningPredicateConfig { | ||
| max_in_list: self.pruning_max_inlist_limit, | ||
| }; | ||
|
|
||
| Ok(Box::pin(async move { | ||
| #[cfg(feature = "parquet_encryption")] | ||
|
|
@@ -326,6 +333,7 @@ impl FileOpener for ParquetOpener { | |
| &logical_file_schema, | ||
| &partitioned_file, | ||
| predicate_creation_errors.clone(), | ||
| pruning_predicate_config.clone(), | ||
| ) | ||
| }); | ||
|
|
||
|
|
@@ -426,6 +434,7 @@ impl FileOpener for ParquetOpener { | |
| predicate.as_ref(), | ||
| &physical_file_schema, | ||
| &predicate_creation_errors, | ||
| &pruning_predicate_config, | ||
| ); | ||
|
|
||
| // The page index is not stored inline in the parquet footer so the | ||
|
|
@@ -513,6 +522,7 @@ impl FileOpener for ParquetOpener { | |
| rg_metadata, | ||
| predicate, | ||
| &file_metrics, | ||
| &pruning_predicate_config, | ||
| ); | ||
| } else { | ||
| // Update metrics: statistics unavailable, so all row groups are | ||
|
|
@@ -938,17 +948,20 @@ fn create_initial_plan( | |
| pub(crate) fn build_page_pruning_predicate( | ||
| predicate: &Arc<dyn PhysicalExpr>, | ||
| file_schema: &SchemaRef, | ||
| config: &PruningPredicateConfig, | ||
| ) -> Arc<PagePruningAccessPlanFilter> { | ||
| Arc::new(PagePruningAccessPlanFilter::new( | ||
| predicate, | ||
| Arc::clone(file_schema), | ||
| config, | ||
| )) | ||
| } | ||
|
|
||
| pub(crate) fn build_pruning_predicates( | ||
| predicate: Option<&Arc<dyn PhysicalExpr>>, | ||
| file_schema: &SchemaRef, | ||
| predicate_creation_errors: &Count, | ||
| config: &PruningPredicateConfig, | ||
| ) -> ( | ||
| Option<Arc<PruningPredicate>>, | ||
| Option<Arc<PagePruningAccessPlanFilter>>, | ||
|
|
@@ -960,8 +973,10 @@ pub(crate) fn build_pruning_predicates( | |
| Arc::clone(predicate), | ||
| file_schema, | ||
| predicate_creation_errors, | ||
| config, | ||
| ); | ||
| let page_pruning_predicate = build_page_pruning_predicate(predicate, file_schema); | ||
| let page_pruning_predicate = | ||
| build_page_pruning_predicate(predicate, file_schema, config); | ||
| (pruning_predicate, Some(page_pruning_predicate)) | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.