From 751fa77a4c767c19f0323b99c8c2b79419f00599 Mon Sep 17 00:00:00 2001 From: Marc hurabielle Date: Mon, 7 Aug 2023 17:15:50 +0900 Subject: [PATCH] validate tag timestamp (#3703) * feat: Add validation to timestamp and tag fields Timestamp and tag fields shouldnt start or end with dot Todo: Implement tests * ensure we validate escaped string * add test for tag validation * fix linter * Endwith dont need to be checked with string literal `//.` and `.` should end in the same way Also address review comment about `['//.']` -> `"//."` --------- Co-authored-by: sravan Co-authored-by: Paul Masurel --- .../src/default_doc_mapper/default_mapper.rs | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs index 7483383d8cd..6010abc42c6 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs @@ -116,6 +116,12 @@ fn validate_timestamp_field( timestamp_field_path: &str, mapping_root_node: &MappingNode, ) -> anyhow::Result<()> { + if timestamp_field_path.starts_with('.') || timestamp_field_path.starts_with("\\.") { + bail!("Timestamp field `{timestamp_field_path}` should not start with a `.`."); + } + if timestamp_field_path.ends_with('.') { + bail!("Timestamp field `{timestamp_field_path}` should not end with a `.`."); + } let Some(timestamp_field_type) = mapping_root_node.find_field_mapping_type(timestamp_field_path) else { @@ -262,6 +268,12 @@ impl TryFrom for DefaultDocMapper { /// - if str, the field must use the `raw` tokenizer for indexing. /// - the field must be indexed. fn validate_tag(tag_field_name: &str, schema: &Schema) -> Result<(), anyhow::Error> { + if tag_field_name.starts_with('.') || tag_field_name.starts_with("\\.") { + bail!("Tag field `{tag_field_name}` should not start with a `.`."); + } + if tag_field_name.ends_with('.') { + bail!("Tag field `{tag_field_name}` should not end with a `.`."); + } let field = schema .get_field(tag_field_name) .with_context(|| format!("Unknown tag field: `{tag_field_name}`"))?; @@ -821,6 +833,120 @@ mod tests { .unwrap(); } + #[test] + fn test_timestamp_field_that_start_with_dot_is_invalid() { + assert_eq!( + serde_json::from_str::( + r#"{ + "field_mappings": [ + { + "name": "my.timestamp", + "type": "datetime", + "fast": true + } + ], + "timestamp_field": ".my.timestamp" + }"#, + ) + .unwrap_err() + .to_string(), + "Timestamp field `.my.timestamp` should not start with a `.`.", + ); + + assert_eq!( + serde_json::from_str::( + r#"{ + "field_mappings": [ + { + "name": "my.timestamp", + "type": "datetime", + "fast": true + } + ], + "timestamp_field": "\\.my\\.timestamp" + }"#, + ) + .unwrap_err() + .to_string(), + "Timestamp field `\\.my\\.timestamp` should not start with a `.`.", + ) + } + + #[test] + fn test_timestamp_field_that_ends_with_dot_is_invalid() { + assert_eq!( + serde_json::from_str::( + r#"{ + "timestamp_field": "my.timestamp." + }"#, + ) + .unwrap_err() + .to_string(), + "Timestamp field `my.timestamp.` should not end with a `.`.", + ); + + assert_eq!( + serde_json::from_str::( + r#"{ + "timestamp_field": "my\\.timestamp\\." + }"#, + ) + .unwrap_err() + .to_string(), + "Timestamp field `my\\.timestamp\\.` should not end with a `.`.", + ) + } + + #[test] + fn test_tag_field_name_that_starts_with_dot_is_invalid() { + assert_eq!( + serde_json::from_str::( + r#"{ + "tag_fields": [".my.tag"] + }"#, + ) + .unwrap_err() + .to_string(), + "Tag field `.my.tag` should not start with a `.`.", + ); + + assert_eq!( + serde_json::from_str::( + r#"{ + "tag_fields": ["\\.my\\.tag"] + }"#, + ) + .unwrap_err() + .to_string(), + "Tag field `\\.my\\.tag` should not start with a `.`.", + ) + } + + #[test] + fn test_tag_field_name_that_ends_with_dot_is_invalid() { + assert_eq!( + serde_json::from_str::( + r#"{ + "tag_fields": ["my.tag."] + }"#, + ) + .unwrap_err() + .to_string(), + "Tag field `my.tag.` should not end with a `.`.", + ); + + assert_eq!( + serde_json::from_str::( + r#"{ + "tag_fields": ["my\\.tag\\."] + }"#, + ) + .unwrap_err() + .to_string(), + "Tag field `my\\.tag\\.` should not end with a `.`.", + ) + } + #[test] fn test_fail_to_build_doc_mapper_with_timestamp_field_with_multivalues_cardinality() { let doc_mapper = r#"{