Skip to content

Commit

Permalink
feat(cubesql): Support literal members in CubeScan under wrapper
Browse files Browse the repository at this point in the history
  • Loading branch information
mcheshkov committed Dec 13, 2024
1 parent ab3ed92 commit 76eec8b
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 13 deletions.
101 changes: 94 additions & 7 deletions rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,7 @@ impl CubeScanWrapperNode {
node
)));
}
let data_source = &data_sources[0];
let mut meta_with_user = load_request_meta.as_ref().clone();
meta_with_user.set_change_user(node.options.change_user.clone());

Expand All @@ -662,16 +663,32 @@ impl CubeScanWrapperNode {
.and_then(|f| f.qualifier().cloned());
let mut remapper = Remapper::new(from_alias.clone(), true);
let mut member_to_alias = HashMap::new();
let mut has_literal_members = false;
let mut wrapper_exprs = vec![];

for (member, field) in
node.member_fields.iter().zip(node.schema.fields().iter())
{
let alias = remapper.add_column(&field.qualified_column())?;
if let MemberField::Member(f) = member {
member_to_alias.insert(f.to_string(), alias);
}
let expr = match member {
MemberField::Member(f) => {
member_to_alias.insert(f.to_string(), alias.clone());
// `alias` is column name that would be generated by Cube.js, just reference that
Expr::Column(Column::from_name(alias.clone()))
}
MemberField::Literal(value) => {
has_literal_members = true;
// Don't care for `member_to_alias`, Cube.js does not handle literals
// Generate literal expression, and put alias into remapper to use higher up
Expr::Literal(value.clone())
}
};
wrapper_exprs.push((expr, alias));
}
let column_remapping = remapper.into_remapping();

// This is SQL for CubeScan from Cube.js
// It does have all the members with aliases from `member_to_alias`
// But it does not have any literal members
let sql = transport
.sql(
node.span_id.clone(),
Expand All @@ -683,11 +700,81 @@ impl CubeScanWrapperNode {
)
.await?;

// TODO Add wrapper for reprojection and literal members handling
// TODO is this check necessary?
let sql = if has_literal_members {
// Need to generate wrapper SELECT with literal columns
// Generated columns need to have same aliases as targets in `remapper`
// Because that's what plans higher up would use in generated SQL
let generator = plan
.meta
.data_source_to_sql_generator
.get(data_source)
.ok_or_else(|| {
CubeError::internal(format!(
"Can't generate SQL for CubeScan: no SQL generator for data source {data_source:?}"
))

Check warning on line 715 in rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs

View check run for this annotation

Codecov / codecov/patch

rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs#L713-L715

Added lines #L713 - L715 were not covered by tests
})?
.clone();

let mut columns = vec![];
let mut new_sql = sql.sql;

for (expr, alias) in wrapper_exprs {
// Don't use `generate_column_expr` here
// 1. `generate_column_expr` has different idea of literal members
// When generating column expression that points to literal member it would render literal and generate alias
// Here it should just generate the literal
// 2. It would not allow to provide aliases for expressions, instead it usually generates them
let (expr, sql) = Self::generate_sql_for_expr(
plan.clone(),
new_sql,
generator.clone(),
expr,
None,
Arc::new(HashMap::new()),
)
.await?;

Check warning on line 736 in rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs

View check run for this annotation

Codecov / codecov/patch

rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs#L736

Added line #L736 was not covered by tests
columns.push(AliasedColumn { expr, alias });
new_sql = sql;
}

// Use SQL from Cube.js as FROM, and prepared expressions as projection
let resulting_sql = generator
.get_sql_templates()
.select(
new_sql.sql.to_string(),
columns,
vec![],
vec![],
vec![],
// TODO
from_alias.clone().unwrap_or("".to_string()),
None,
None,
vec![],
None,
None,
false,
)
.map_err(|e| {
DataFusionError::Internal(format!(
"Can't generate SQL for CubeScan in wrapped select: {}",
e
))

Check warning on line 763 in rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs

View check run for this annotation

Codecov / codecov/patch

rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs#L760-L763

Added lines #L760 - L763 were not covered by tests
})?;
new_sql.replace_sql(resulting_sql);

new_sql
} else {
sql.sql
};

let column_remapping = remapper.into_remapping();

return Ok(SqlGenerationResult {
data_source: Some(data_sources[0].clone()),
data_source: Some(data_source.clone()),
from_alias,
sql: sql.sql,
sql,
column_remapping,
request: node.request.clone(),
});
Expand Down
63 changes: 57 additions & 6 deletions rust/cubesql/cubesql/src/compile/test/test_wrapper.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use cubeclient::models::V1LoadRequestQuery;
use datafusion::physical_plan::displayable;
use cubeclient::models::{V1LoadRequestQuery, V1LoadRequestQueryTimeDimension};
use datafusion::{physical_plan::displayable, scalar::ScalarValue};
use pretty_assertions::assert_eq;
use regex::Regex;
use serde_json::json;
use std::sync::Arc;

use crate::{
compile::{
engine::df::scan::MemberField,
rewrite::rewriter::Rewriter,
test::{
convert_select_to_query_plan, convert_select_to_query_plan_customized,
Expand Down Expand Up @@ -1167,6 +1168,12 @@ cube_scan_subq AS (
SELECT
logs_alias.content logs_content,
DATE_TRUNC('month', kibana_alias.last_mod) last_mod_month,
kibana_alias.__user AS cube_user,
1 AS literal,
-- Columns without aliases should also work
DATE_TRUNC('month', kibana_alias.order_date),
kibana_alias.__cubeJoinField,
2,
CASE
WHEN sum(kibana_alias."sumPrice") IS NOT NULL
THEN sum(kibana_alias."sumPrice")
Expand All @@ -1175,9 +1182,7 @@ cube_scan_subq AS (
FROM KibanaSampleDataEcommerce kibana_alias
JOIN Logs logs_alias
ON kibana_alias.__cubeJoinField = logs_alias.__cubeJoinField
GROUP BY
logs_content,
last_mod_month
GROUP BY 1,2,3,4,5,6,7
),
filter_subq AS (
SELECT
Expand All @@ -1187,7 +1192,12 @@ filter_subq AS (
logs_content_filter
)
SELECT
logs_content
-- Should use SELECT * here to reference columns without aliases.
-- But it's broken ATM in DF, initial plan contains `Projection: ... #__subquery-0.logs_content_filter` on top, but it should not be there
-- TODO fix it
logs_content,
cube_user,
literal
FROM cube_scan_subq
WHERE
-- This subquery filter should trigger wrapping of whole query
Expand Down Expand Up @@ -1216,6 +1226,43 @@ WHERE
.unwrap()
.sql;

assert_eq!(
logical_plan.find_cube_scan().request,
V1LoadRequestQuery {
measures: Some(vec!["KibanaSampleDataEcommerce.sumPrice".to_string(),]),
dimensions: Some(vec!["Logs.content".to_string(),]),
time_dimensions: Some(vec![
V1LoadRequestQueryTimeDimension {
dimension: "KibanaSampleDataEcommerce.last_mod".to_string(),
granularity: Some("month".to_string()),
date_range: None,
},
V1LoadRequestQueryTimeDimension {
dimension: "KibanaSampleDataEcommerce.order_date".to_string(),
granularity: Some("month".to_string()),
date_range: None,
},
]),
segments: Some(vec![]),
order: Some(vec![]),
..Default::default()
}
);

assert_eq!(
logical_plan.find_cube_scan().member_fields,
vec![
MemberField::Member("Logs.content".to_string()),
MemberField::Member("KibanaSampleDataEcommerce.last_mod.month".to_string()),
MemberField::Literal(ScalarValue::Utf8(None)),
MemberField::Literal(ScalarValue::Int64(Some(1))),
MemberField::Member("KibanaSampleDataEcommerce.order_date.month".to_string()),
MemberField::Literal(ScalarValue::Utf8(None)),
MemberField::Literal(ScalarValue::Int64(Some(2))),
MemberField::Member("KibanaSampleDataEcommerce.sumPrice".to_string()),
],
);

// Check that all aliases from different tables have same qualifier, and that names are simple and short
// logs_content => logs_alias.content
// last_mod_month => DATE_TRUNC('month', kibana_alias.last_mod),
Expand All @@ -1228,6 +1275,10 @@ WHERE
let sum_price_re = Regex::new(r#"CASE WHEN "logs_alias"."[a-zA-Z0-9_]{1,16}" IS NOT NULL THEN "logs_alias"."[a-zA-Z0-9_]{1,16}" ELSE 0 END "sum_price""#)
.unwrap();
assert!(sum_price_re.is_match(&sql));
let cube_user_re = Regex::new(r#""logs_alias"."[a-zA-Z0-9_]{1,16}" "cube_user""#).unwrap();
assert!(cube_user_re.is_match(&sql));
let literal_re = Regex::new(r#""logs_alias"."[a-zA-Z0-9_]{1,16}" "literal""#).unwrap();
assert!(literal_re.is_match(&sql));
}

/// Test that WrappedSelect(... limit=Some(0) ...) will render it correctly
Expand Down

0 comments on commit 76eec8b

Please sign in to comment.