From 76eec8b8d1dd5ef1e7990f795e70582f4108ecd5 Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Thu, 12 Dec 2024 02:16:40 +0200 Subject: [PATCH] feat(cubesql): Support literal members in CubeScan under wrapper --- .../cubesql/src/compile/engine/df/wrapper.rs | 101 ++++++++++++++++-- .../cubesql/src/compile/test/test_wrapper.rs | 63 +++++++++-- 2 files changed, 151 insertions(+), 13 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs b/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs index ab424432b4f31..b6c9af05d07af 100644 --- a/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs +++ b/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs @@ -641,6 +641,7 @@ impl CubeScanWrapperNode { node ))); } + let data_source = &data_sources[0]; let mut meta_with_user = load_request_meta.as_ref().clone(); meta_with_user.set_change_user(node.options.change_user.clone()); @@ -662,16 +663,32 @@ impl CubeScanWrapperNode { .and_then(|f| f.qualifier().cloned()); let mut remapper = Remapper::new(from_alias.clone(), true); let mut member_to_alias = HashMap::new(); + let mut has_literal_members = false; + let mut wrapper_exprs = vec![]; + for (member, field) in node.member_fields.iter().zip(node.schema.fields().iter()) { let alias = remapper.add_column(&field.qualified_column())?; - if let MemberField::Member(f) = member { - member_to_alias.insert(f.to_string(), alias); - } + let expr = match member { + MemberField::Member(f) => { + member_to_alias.insert(f.to_string(), alias.clone()); + // `alias` is column name that would be generated by Cube.js, just reference that + Expr::Column(Column::from_name(alias.clone())) + } + MemberField::Literal(value) => { + has_literal_members = true; + // Don't care for `member_to_alias`, Cube.js does not handle literals + // Generate literal expression, and put alias into remapper to use higher up + Expr::Literal(value.clone()) + } + }; + wrapper_exprs.push((expr, alias)); } - let column_remapping = remapper.into_remapping(); + // This is SQL for CubeScan from Cube.js + // It does have all the members with aliases from `member_to_alias` + // But it does not have any literal members let sql = transport .sql( node.span_id.clone(), @@ -683,11 +700,81 @@ impl CubeScanWrapperNode { ) .await?; - // TODO Add wrapper for reprojection and literal members handling + // TODO is this check necessary? + let sql = if has_literal_members { + // Need to generate wrapper SELECT with literal columns + // Generated columns need to have same aliases as targets in `remapper` + // Because that's what plans higher up would use in generated SQL + let generator = plan + .meta + .data_source_to_sql_generator + .get(data_source) + .ok_or_else(|| { + CubeError::internal(format!( + "Can't generate SQL for CubeScan: no SQL generator for data source {data_source:?}" + )) + })? + .clone(); + + let mut columns = vec![]; + let mut new_sql = sql.sql; + + for (expr, alias) in wrapper_exprs { + // Don't use `generate_column_expr` here + // 1. `generate_column_expr` has different idea of literal members + // When generating column expression that points to literal member it would render literal and generate alias + // Here it should just generate the literal + // 2. It would not allow to provide aliases for expressions, instead it usually generates them + let (expr, sql) = Self::generate_sql_for_expr( + plan.clone(), + new_sql, + generator.clone(), + expr, + None, + Arc::new(HashMap::new()), + ) + .await?; + columns.push(AliasedColumn { expr, alias }); + new_sql = sql; + } + + // Use SQL from Cube.js as FROM, and prepared expressions as projection + let resulting_sql = generator + .get_sql_templates() + .select( + new_sql.sql.to_string(), + columns, + vec![], + vec![], + vec![], + // TODO + from_alias.clone().unwrap_or("".to_string()), + None, + None, + vec![], + None, + None, + false, + ) + .map_err(|e| { + DataFusionError::Internal(format!( + "Can't generate SQL for CubeScan in wrapped select: {}", + e + )) + })?; + new_sql.replace_sql(resulting_sql); + + new_sql + } else { + sql.sql + }; + + let column_remapping = remapper.into_remapping(); + return Ok(SqlGenerationResult { - data_source: Some(data_sources[0].clone()), + data_source: Some(data_source.clone()), from_alias, - sql: sql.sql, + sql, column_remapping, request: node.request.clone(), }); diff --git a/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs b/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs index f36f817f58959..b89241fab2c01 100644 --- a/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs +++ b/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs @@ -1,5 +1,5 @@ -use cubeclient::models::V1LoadRequestQuery; -use datafusion::physical_plan::displayable; +use cubeclient::models::{V1LoadRequestQuery, V1LoadRequestQueryTimeDimension}; +use datafusion::{physical_plan::displayable, scalar::ScalarValue}; use pretty_assertions::assert_eq; use regex::Regex; use serde_json::json; @@ -7,6 +7,7 @@ use std::sync::Arc; use crate::{ compile::{ + engine::df::scan::MemberField, rewrite::rewriter::Rewriter, test::{ convert_select_to_query_plan, convert_select_to_query_plan_customized, @@ -1167,6 +1168,12 @@ cube_scan_subq AS ( SELECT logs_alias.content logs_content, DATE_TRUNC('month', kibana_alias.last_mod) last_mod_month, + kibana_alias.__user AS cube_user, + 1 AS literal, + -- Columns without aliases should also work + DATE_TRUNC('month', kibana_alias.order_date), + kibana_alias.__cubeJoinField, + 2, CASE WHEN sum(kibana_alias."sumPrice") IS NOT NULL THEN sum(kibana_alias."sumPrice") @@ -1175,9 +1182,7 @@ cube_scan_subq AS ( FROM KibanaSampleDataEcommerce kibana_alias JOIN Logs logs_alias ON kibana_alias.__cubeJoinField = logs_alias.__cubeJoinField - GROUP BY - logs_content, - last_mod_month + GROUP BY 1,2,3,4,5,6,7 ), filter_subq AS ( SELECT @@ -1187,7 +1192,12 @@ filter_subq AS ( logs_content_filter ) SELECT - logs_content + -- Should use SELECT * here to reference columns without aliases. + -- But it's broken ATM in DF, initial plan contains `Projection: ... #__subquery-0.logs_content_filter` on top, but it should not be there + -- TODO fix it + logs_content, + cube_user, + literal FROM cube_scan_subq WHERE -- This subquery filter should trigger wrapping of whole query @@ -1216,6 +1226,43 @@ WHERE .unwrap() .sql; + assert_eq!( + logical_plan.find_cube_scan().request, + V1LoadRequestQuery { + measures: Some(vec!["KibanaSampleDataEcommerce.sumPrice".to_string(),]), + dimensions: Some(vec!["Logs.content".to_string(),]), + time_dimensions: Some(vec![ + V1LoadRequestQueryTimeDimension { + dimension: "KibanaSampleDataEcommerce.last_mod".to_string(), + granularity: Some("month".to_string()), + date_range: None, + }, + V1LoadRequestQueryTimeDimension { + dimension: "KibanaSampleDataEcommerce.order_date".to_string(), + granularity: Some("month".to_string()), + date_range: None, + }, + ]), + segments: Some(vec![]), + order: Some(vec![]), + ..Default::default() + } + ); + + assert_eq!( + logical_plan.find_cube_scan().member_fields, + vec![ + MemberField::Member("Logs.content".to_string()), + MemberField::Member("KibanaSampleDataEcommerce.last_mod.month".to_string()), + MemberField::Literal(ScalarValue::Utf8(None)), + MemberField::Literal(ScalarValue::Int64(Some(1))), + MemberField::Member("KibanaSampleDataEcommerce.order_date.month".to_string()), + MemberField::Literal(ScalarValue::Utf8(None)), + MemberField::Literal(ScalarValue::Int64(Some(2))), + MemberField::Member("KibanaSampleDataEcommerce.sumPrice".to_string()), + ], + ); + // Check that all aliases from different tables have same qualifier, and that names are simple and short // logs_content => logs_alias.content // last_mod_month => DATE_TRUNC('month', kibana_alias.last_mod), @@ -1228,6 +1275,10 @@ WHERE let sum_price_re = Regex::new(r#"CASE WHEN "logs_alias"."[a-zA-Z0-9_]{1,16}" IS NOT NULL THEN "logs_alias"."[a-zA-Z0-9_]{1,16}" ELSE 0 END "sum_price""#) .unwrap(); assert!(sum_price_re.is_match(&sql)); + let cube_user_re = Regex::new(r#""logs_alias"."[a-zA-Z0-9_]{1,16}" "cube_user""#).unwrap(); + assert!(cube_user_re.is_match(&sql)); + let literal_re = Regex::new(r#""logs_alias"."[a-zA-Z0-9_]{1,16}" "literal""#).unwrap(); + assert!(literal_re.is_match(&sql)); } /// Test that WrappedSelect(... limit=Some(0) ...) will render it correctly