-
Notifications
You must be signed in to change notification settings - Fork 139
feat: Add CDF tracing for Phase 1 of Change Data feed #1654
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 38 commits
936f6f2
51c1b11
b311f15
8a6ca27
05cfdbe
563130a
4842e50
143749c
244d62a
fd341be
162e710
30a7f84
cc2446c
fcf241f
da7e2be
b29b946
ba191f3
5b46ce5
0193410
e28f03f
afa0c2f
87fae6a
5a7a8b3
a938d4d
bf5efe5
7323bf0
c4867c2
3b07816
6251ec3
0b3a3d4
a0ecd22
abd5371
4aa9449
25d8f33
652f717
76536ad
b99c00d
80b924d
e3f1c7e
800365d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,14 +15,12 @@ use crate::table_features::{ColumnMappingMode, TableFeature}; | |
| use crate::utils::test_utils::{assert_result_error_with_message, Action, LocalMockTable}; | ||
| use crate::Predicate; | ||
| use crate::{DeltaResult, Engine, Error, Version}; | ||
| use test_utils::LogWriter; | ||
| use test_utils::LoggingTest; | ||
|
|
||
| use itertools::Itertools; | ||
| use std::collections::HashMap; | ||
| use std::path::Path; | ||
| use std::sync::Arc; | ||
| use std::sync::Mutex; | ||
| use tracing_subscriber::layer::SubscriberExt; | ||
|
|
||
| fn get_schema() -> StructType { | ||
| StructType::new_unchecked([ | ||
|
|
@@ -921,16 +919,7 @@ async fn file_meta_timestamp() { | |
|
|
||
| #[tokio::test] | ||
| async fn print_table_configuration() { | ||
| let logs = Arc::new(Mutex::new(Vec::new())); | ||
| let logs_clone = logs.clone(); | ||
|
|
||
| let subscriber = tracing_subscriber::registry().with( | ||
| tracing_subscriber::fmt::layer() | ||
| .with_writer(move || LogWriter(logs_clone.clone())) | ||
| .with_ansi(false), | ||
| ); | ||
|
|
||
| let _guard = tracing::subscriber::set_default(subscriber); | ||
| let tracing_guard = LoggingTest::new(); | ||
|
|
||
| let engine = Arc::new(SyncEngine::new()); | ||
| let mut mock_table = LocalMockTable::new(); | ||
|
|
@@ -978,7 +967,7 @@ async fn print_table_configuration() { | |
| .unwrap() | ||
| .try_collect(); | ||
|
|
||
| let log_output = String::from_utf8(logs.lock().unwrap().clone()).unwrap(); | ||
| let log_output = tracing_guard.logs(); | ||
|
|
||
| assert!(log_output.contains("Table configuration updated during CDF query")); | ||
| assert!(log_output.contains("version=0")); | ||
|
|
@@ -992,3 +981,184 @@ async fn print_table_configuration() { | |
| assert!(log_output.contains("\"delta.columnMapping.mode\": \"none\"")); | ||
| assert!(log_output.contains("\"delta.enableDeletionVectors\": \"true\"")); | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn print_table_info_post_phase1() { | ||
| let tracing_guard = LoggingTest::new(); | ||
|
|
||
| let engine = Arc::new(SyncEngine::new()); | ||
| let mut mock_table = LocalMockTable::new(); | ||
| // This specific commit (with these actions) isn't necessary to test the tracing for this test, we just need to have one commit with any actions | ||
| mock_table | ||
| .commit([ | ||
| Action::Metadata( | ||
| Metadata::try_new( | ||
| None, | ||
| None, | ||
| get_schema(), | ||
| vec![], | ||
| 0, | ||
| HashMap::from([ | ||
| ("delta.enableChangeDataFeed".to_string(), "true".to_string()), | ||
| ( | ||
| "delta.enableDeletionVectors".to_string(), | ||
| "true".to_string(), | ||
| ), | ||
| ("delta.columnMapping.mode".to_string(), "none".to_string()), | ||
| ]), | ||
| ) | ||
| .unwrap(), | ||
| ), | ||
| Action::Protocol( | ||
| Protocol::try_new( | ||
| 3, | ||
| 7, | ||
| Some([TableFeature::DeletionVectors]), | ||
| Some([TableFeature::DeletionVectors, TableFeature::ChangeDataFeed]), | ||
| ) | ||
| .unwrap(), | ||
| ), | ||
| ]) | ||
| .await; | ||
|
|
||
| let commits = get_segment(engine.as_ref(), mock_table.table_root(), 0, None) | ||
| .unwrap() | ||
| .into_iter(); | ||
|
|
||
| let table_root_url = url::Url::from_directory_path(mock_table.table_root()).unwrap(); | ||
| let table_config = get_default_table_config(&table_root_url); | ||
|
|
||
| let _scan_batches: DeltaResult<Vec<_>> = | ||
| table_changes_action_iter(engine, &table_config, commits, get_schema().into(), None) | ||
| .unwrap() | ||
| .try_collect(); | ||
|
|
||
| let log_output = tracing_guard.logs(); | ||
|
Comment on lines
+1024
to
+1036
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added LoggingTest to reduce some duplication of the logging setup as suggested, but also noticed that the tests are still quite similar in structure. I thought about extracting out basically this logic to another function, but I figure that other tests for tracing might have slightly different structures and this might not be necessary if it'll only be used for these four tests that have been added. Let me know if you think I should add this though!
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah it's not the best. A lot of it is from the poor formatting of mock_table's stuff 😔 Since the tests in that module are generally similar, I think this is fine. |
||
|
|
||
| assert!(log_output.contains("Phase 1 of CDF query processing completed")); | ||
| assert!(log_output.contains("id=")); | ||
| assert!(log_output.contains("remove_dvs_size=0")); | ||
lorenarosati marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| assert!(log_output.contains("has_cdc_action=false")); | ||
lorenarosati marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| assert!(log_output.contains("file_path=")); | ||
| assert!(log_output.contains("version=0")); | ||
| assert!(log_output.contains("timestamp=")); | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn print_table_info_post_phase1_has_cdc() { | ||
| let tracing_guard = LoggingTest::new(); | ||
|
|
||
| let engine = Arc::new(SyncEngine::new()); | ||
| let mut mock_table = LocalMockTable::new(); | ||
|
|
||
| mock_table | ||
| .commit([ | ||
| Action::Add(Add { | ||
| path: "fake_path_1".into(), | ||
| data_change: true, | ||
| ..Default::default() | ||
| }), | ||
| Action::Cdc(Cdc { | ||
| path: "fake_path_2".into(), | ||
| ..Default::default() | ||
| }), | ||
| ]) | ||
| .await; | ||
|
|
||
| let commits = get_segment(engine.as_ref(), mock_table.table_root(), 0, None) | ||
| .unwrap() | ||
| .into_iter(); | ||
|
|
||
| let table_root_url = url::Url::from_directory_path(mock_table.table_root()).unwrap(); | ||
| let table_config = get_default_table_config(&table_root_url); | ||
|
|
||
| let _scan_batches: DeltaResult<Vec<_>> = | ||
| table_changes_action_iter(engine, &table_config, commits, get_schema().into(), None) | ||
| .unwrap() | ||
| .try_collect(); | ||
|
|
||
| let log_output = tracing_guard.logs(); | ||
|
|
||
| assert!(log_output.contains("Phase 1 of CDF query processing completed")); | ||
| assert!(log_output.contains("id=")); | ||
| assert!(log_output.contains("remove_dvs_size=0")); | ||
| assert!(log_output.contains("has_cdc_action=true")); | ||
| assert!(log_output.contains("file_path=")); | ||
| assert!(log_output.contains("version=0")); | ||
| assert!(log_output.contains("timestamp=")); | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn print_table_info_post_phase1_has_dv() { | ||
| let tracing_guard = LoggingTest::new(); | ||
|
|
||
| let engine = Arc::new(SyncEngine::new()); | ||
| let mut mock_table = LocalMockTable::new(); | ||
|
|
||
| let deletion_vector1 = DeletionVectorDescriptor { | ||
| storage_type: DeletionVectorStorageType::PersistedRelative, | ||
| path_or_inline_dv: "vBn[lx{q8@P<9BNH/isA".to_string(), | ||
| offset: Some(1), | ||
| size_in_bytes: 36, | ||
| cardinality: 2, | ||
| }; | ||
| let deletion_vector2 = DeletionVectorDescriptor { | ||
| storage_type: DeletionVectorStorageType::PersistedRelative, | ||
| path_or_inline_dv: "U5OWRz5k%CFT.Td}yCPW".to_string(), | ||
| offset: Some(1), | ||
| size_in_bytes: 38, | ||
| cardinality: 3, | ||
| }; | ||
| // - fake_path_1 undergoes a restore. All rows are restored, so the deletion vector is removed. | ||
| // - All remaining rows of fake_path_2 are deleted | ||
| mock_table | ||
| .commit([ | ||
| Action::Remove(Remove { | ||
| path: "fake_path_1".into(), | ||
| data_change: true, | ||
| deletion_vector: Some(deletion_vector1.clone()), | ||
| ..Default::default() | ||
| }), | ||
| Action::Add(Add { | ||
| path: "fake_path_1".into(), | ||
| data_change: true, | ||
| ..Default::default() | ||
| }), | ||
| Action::Remove(Remove { | ||
| path: "fake_path_2".into(), | ||
| data_change: true, | ||
| deletion_vector: Some(deletion_vector2.clone()), | ||
| ..Default::default() | ||
| }), | ||
| ]) | ||
| .await; | ||
|
|
||
| let commits = get_segment(engine.as_ref(), mock_table.table_root(), 0, None) | ||
| .unwrap() | ||
| .into_iter(); | ||
|
|
||
| let table_root_url = url::Url::from_directory_path(mock_table.table_root()).unwrap(); | ||
| let table_config = get_default_table_config(&table_root_url); | ||
| let _scan_batches: DeltaResult<Vec<_>> = | ||
| table_changes_action_iter(engine, &table_config, commits, get_schema().into(), None) | ||
| .unwrap() | ||
| .try_collect(); | ||
|
|
||
| let log_output = tracing_guard.logs(); | ||
|
|
||
| let expected_remove_dvs: Arc<HashMap<String, DvInfo>> = HashMap::from([( | ||
| "fake_path_1".to_string(), | ||
| DvInfo { | ||
| deletion_vector: Some(deletion_vector1.clone()), | ||
| }, | ||
| )]) | ||
| .into(); | ||
|
|
||
| assert!(log_output.contains("Phase 1 of CDF query processing completed")); | ||
| assert!(log_output.contains("id=")); | ||
| assert!(log_output.contains(&format!("remove_dvs_size={}", expected_remove_dvs.len()))); | ||
| assert!(log_output.contains("has_cdc_action=false")); | ||
| assert!(log_output.contains("file_path=")); | ||
| assert!(log_output.contains("version=0")); | ||
| assert!(log_output.contains("timestamp=")); | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.