Skip to content

Commit 58dcb0a

Browse files
committed
feat(cubesql): Support literal members in CubeScan under wrapper
1 parent 1b78841 commit 58dcb0a

File tree

2 files changed

+155
-13
lines changed

2 files changed

+155
-13
lines changed

rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs

+98-7
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,7 @@ impl CubeScanWrapperNode {
645645
node
646646
)));
647647
}
648+
let data_source = &data_sources[0];
648649
let mut meta_with_user = load_request_meta.as_ref().clone();
649650
meta_with_user.set_change_user(node.options.change_user.clone());
650651

@@ -666,16 +667,36 @@ impl CubeScanWrapperNode {
666667
.and_then(|f| f.qualifier().cloned());
667668
let mut remapper = Remapper::new(from_alias.clone(), true);
668669
let mut member_to_alias = HashMap::new();
670+
// Probably it should just use member expression for all MemberField::Literal
671+
// But turning literals to dimensions could mess up with NULL in grouping key and joins on Cube.js side (like in fullKeyQuery)
672+
// And tuning literals to measures would require ugly wrapping with noop aggregation function
673+
// TODO investigate Cube.js joins, try to implement dimension member expression
674+
let mut has_literal_members = false;
675+
let mut wrapper_exprs = vec![];
676+
669677
for (member, field) in
670678
node.member_fields.iter().zip(node.schema.fields().iter())
671679
{
672680
let alias = remapper.add_column(&field.qualified_column())?;
673-
if let MemberField::Member(f) = member {
674-
member_to_alias.insert(f.to_string(), alias);
675-
}
681+
let expr = match member {
682+
MemberField::Member(f) => {
683+
member_to_alias.insert(f.to_string(), alias.clone());
684+
// `alias` is column name that would be generated by Cube.js, just reference that
685+
Expr::Column(Column::from_name(alias.clone()))
686+
}
687+
MemberField::Literal(value) => {
688+
has_literal_members = true;
689+
// Don't care for `member_to_alias`, Cube.js does not handle literals
690+
// Generate literal expression, and put alias into remapper to use higher up
691+
Expr::Literal(value.clone())
692+
}
693+
};
694+
wrapper_exprs.push((expr, alias));
676695
}
677-
let column_remapping = remapper.into_remapping();
678696

697+
// This is SQL for CubeScan from Cube.js
698+
// It does have all the members with aliases from `member_to_alias`
699+
// But it does not have any literal members
679700
let sql = transport
680701
.sql(
681702
node.span_id.clone(),
@@ -687,11 +708,81 @@ impl CubeScanWrapperNode {
687708
)
688709
.await?;
689710

690-
// TODO Add wrapper for reprojection and literal members handling
711+
// TODO is this check necessary?
712+
let sql = if has_literal_members {
713+
// Need to generate wrapper SELECT with literal columns
714+
// Generated columns need to have same aliases as targets in `remapper`
715+
// Because that's what plans higher up would use in generated SQL
716+
let generator = plan
717+
.meta
718+
.data_source_to_sql_generator
719+
.get(data_source)
720+
.ok_or_else(|| {
721+
CubeError::internal(format!(
722+
"Can't generate SQL for CubeScan: no SQL generator for data source {data_source:?}"
723+
))
724+
})?
725+
.clone();
726+
727+
let mut columns = vec![];
728+
let mut new_sql = sql.sql;
729+
730+
for (expr, alias) in wrapper_exprs {
731+
// Don't use `generate_column_expr` here
732+
// 1. `generate_column_expr` has different idea of literal members
733+
// When generating column expression that points to literal member it would render literal and generate alias
734+
// Here it should just generate the literal
735+
// 2. It would not allow to provide aliases for expressions, instead it usually generates them
736+
let (expr, sql) = Self::generate_sql_for_expr(
737+
plan.clone(),
738+
new_sql,
739+
generator.clone(),
740+
expr,
741+
None,
742+
Arc::new(HashMap::new()),
743+
)
744+
.await?;
745+
columns.push(AliasedColumn { expr, alias });
746+
new_sql = sql;
747+
}
748+
749+
// Use SQL from Cube.js as FROM, and prepared expressions as projection
750+
let resulting_sql = generator
751+
.get_sql_templates()
752+
.select(
753+
new_sql.sql.to_string(),
754+
columns,
755+
vec![],
756+
vec![],
757+
vec![],
758+
// TODO
759+
from_alias.clone().unwrap_or("".to_string()),
760+
None,
761+
None,
762+
vec![],
763+
None,
764+
None,
765+
false,
766+
)
767+
.map_err(|e| {
768+
DataFusionError::Internal(format!(
769+
"Can't generate SQL for CubeScan in wrapped select: {}",
770+
e
771+
))
772+
})?;
773+
new_sql.replace_sql(resulting_sql);
774+
775+
new_sql
776+
} else {
777+
sql.sql
778+
};
779+
780+
let column_remapping = remapper.into_remapping();
781+
691782
return Ok(SqlGenerationResult {
692-
data_source: Some(data_sources[0].clone()),
783+
data_source: Some(data_source.clone()),
693784
from_alias,
694-
sql: sql.sql,
785+
sql,
695786
column_remapping,
696787
request: node.request.clone(),
697788
});

rust/cubesql/cubesql/src/compile/test/test_wrapper.rs

+57-6
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1-
use cubeclient::models::V1LoadRequestQuery;
2-
use datafusion::physical_plan::displayable;
1+
use cubeclient::models::{V1LoadRequestQuery, V1LoadRequestQueryTimeDimension};
2+
use datafusion::{physical_plan::displayable, scalar::ScalarValue};
33
use pretty_assertions::assert_eq;
44
use regex::Regex;
55
use serde_json::json;
66
use std::sync::Arc;
77

88
use crate::{
99
compile::{
10+
engine::df::scan::MemberField,
1011
rewrite::rewriter::Rewriter,
1112
test::{
1213
convert_select_to_query_plan, convert_select_to_query_plan_customized,
@@ -1167,6 +1168,12 @@ cube_scan_subq AS (
11671168
SELECT
11681169
logs_alias.content logs_content,
11691170
DATE_TRUNC('month', kibana_alias.last_mod) last_mod_month,
1171+
kibana_alias.__user AS cube_user,
1172+
1 AS literal,
1173+
-- Columns without aliases should also work
1174+
DATE_TRUNC('month', kibana_alias.order_date),
1175+
kibana_alias.__cubeJoinField,
1176+
2,
11701177
CASE
11711178
WHEN sum(kibana_alias."sumPrice") IS NOT NULL
11721179
THEN sum(kibana_alias."sumPrice")
@@ -1175,9 +1182,7 @@ cube_scan_subq AS (
11751182
FROM KibanaSampleDataEcommerce kibana_alias
11761183
JOIN Logs logs_alias
11771184
ON kibana_alias.__cubeJoinField = logs_alias.__cubeJoinField
1178-
GROUP BY
1179-
logs_content,
1180-
last_mod_month
1185+
GROUP BY 1,2,3,4,5,6,7
11811186
),
11821187
filter_subq AS (
11831188
SELECT
@@ -1187,7 +1192,12 @@ filter_subq AS (
11871192
logs_content_filter
11881193
)
11891194
SELECT
1190-
logs_content
1195+
-- Should use SELECT * here to reference columns without aliases.
1196+
-- But it's broken ATM in DF, initial plan contains `Projection: ... #__subquery-0.logs_content_filter` on top, but it should not be there
1197+
-- TODO fix it
1198+
logs_content,
1199+
cube_user,
1200+
literal
11911201
FROM cube_scan_subq
11921202
WHERE
11931203
-- This subquery filter should trigger wrapping of whole query
@@ -1216,6 +1226,43 @@ WHERE
12161226
.unwrap()
12171227
.sql;
12181228

1229+
assert_eq!(
1230+
logical_plan.find_cube_scan().request,
1231+
V1LoadRequestQuery {
1232+
measures: Some(vec!["KibanaSampleDataEcommerce.sumPrice".to_string(),]),
1233+
dimensions: Some(vec!["Logs.content".to_string(),]),
1234+
time_dimensions: Some(vec![
1235+
V1LoadRequestQueryTimeDimension {
1236+
dimension: "KibanaSampleDataEcommerce.last_mod".to_string(),
1237+
granularity: Some("month".to_string()),
1238+
date_range: None,
1239+
},
1240+
V1LoadRequestQueryTimeDimension {
1241+
dimension: "KibanaSampleDataEcommerce.order_date".to_string(),
1242+
granularity: Some("month".to_string()),
1243+
date_range: None,
1244+
},
1245+
]),
1246+
segments: Some(vec![]),
1247+
order: Some(vec![]),
1248+
..Default::default()
1249+
}
1250+
);
1251+
1252+
assert_eq!(
1253+
logical_plan.find_cube_scan().member_fields,
1254+
vec![
1255+
MemberField::Member("Logs.content".to_string()),
1256+
MemberField::Member("KibanaSampleDataEcommerce.last_mod.month".to_string()),
1257+
MemberField::Literal(ScalarValue::Utf8(None)),
1258+
MemberField::Literal(ScalarValue::Int64(Some(1))),
1259+
MemberField::Member("KibanaSampleDataEcommerce.order_date.month".to_string()),
1260+
MemberField::Literal(ScalarValue::Utf8(None)),
1261+
MemberField::Literal(ScalarValue::Int64(Some(2))),
1262+
MemberField::Member("KibanaSampleDataEcommerce.sumPrice".to_string()),
1263+
],
1264+
);
1265+
12191266
// Check that all aliases from different tables have same qualifier, and that names are simple and short
12201267
// logs_content => logs_alias.content
12211268
// last_mod_month => DATE_TRUNC('month', kibana_alias.last_mod),
@@ -1228,6 +1275,10 @@ WHERE
12281275
let sum_price_re = Regex::new(r#"CASE WHEN "logs_alias"."[a-zA-Z0-9_]{1,16}" IS NOT NULL THEN "logs_alias"."[a-zA-Z0-9_]{1,16}" ELSE 0 END "sum_price""#)
12291276
.unwrap();
12301277
assert!(sum_price_re.is_match(&sql));
1278+
let cube_user_re = Regex::new(r#""logs_alias"."[a-zA-Z0-9_]{1,16}" "cube_user""#).unwrap();
1279+
assert!(cube_user_re.is_match(&sql));
1280+
let literal_re = Regex::new(r#""logs_alias"."[a-zA-Z0-9_]{1,16}" "literal""#).unwrap();
1281+
assert!(literal_re.is_match(&sql));
12311282
}
12321283

12331284
/// Test that WrappedSelect(... limit=Some(0) ...) will render it correctly

0 commit comments

Comments
 (0)