Skip to content

Commit bde6eea

Browse files
mcheshkovpauldheinrichs
andauthoredJan 16, 2025··
feat(cubesql): Implement format and col_description (#9072)
* Adds `format` function, only `%I` placeholder implemented * Adds `col_description` stub implementation, always return empty string *Add stub for regclass casting of schema qualified type names, always returns 0 Picked up from #8947, addresses #8926 --------- Co-authored-by: Paul Heinrichs <paul.heinrichs@vidyard.com>
1 parent 8f0758e commit bde6eea

9 files changed

+655
-11
lines changed
 

‎rust/cubesql/cubesql/src/compile/engine/udf/common.rs

+177-11
Original file line numberDiff line numberDiff line change
@@ -3087,10 +3087,18 @@ pub fn create_cube_regclass_cast_udf() -> ScalarUDF {
30873087
Some(as_str) => {
30883088
match PgType::get_all().iter().find(|e| e.typname == as_str) {
30893089
None => {
3090-
return Err(DataFusionError::Execution(format!(
3091-
"Unable to cast expression to Regclass: Unknown type: {}",
3092-
as_str
3093-
)))
3090+
// If the type name contains a dot, it's a schema-qualified name
3091+
// and we should return the approprate RegClass to be converted to OID
3092+
// For now, we'll return 0 so metabase can sync without failing
3093+
// TODO actually read `pg_type`
3094+
if as_str.contains('.') {
3095+
builder.append_value(0)?;
3096+
} else {
3097+
return Err(DataFusionError::Execution(format!(
3098+
"Unable to cast expression to Regclass: Unknown type: {}",
3099+
as_str
3100+
)));
3101+
}
30943102
}
30953103
Some(ty) => {
30963104
builder.append_value(ty.oid as i64)?;
@@ -3148,6 +3156,171 @@ pub fn create_pg_get_serial_sequence_udf() -> ScalarUDF {
31483156
)
31493157
}
31503158

3159+
// Return a NOOP for this so metabase can sync without failing
3160+
// See https://www.postgresql.org/docs/17/functions-info.html#FUNCTIONS-INFO-COMMENT here
3161+
// TODO: Implement this
3162+
pub fn create_col_description_udf() -> ScalarUDF {
3163+
let fun = make_scalar_function(move |args: &[ArrayRef]| {
3164+
// Ensure the output array has the same length as the input
3165+
let input_length = args[0].len();
3166+
let mut builder = StringBuilder::new(input_length);
3167+
3168+
for _ in 0..input_length {
3169+
builder.append_null()?;
3170+
}
3171+
3172+
Ok(Arc::new(builder.finish()) as ArrayRef)
3173+
});
3174+
3175+
let return_type: ReturnTypeFunction = Arc::new(move |_| Ok(Arc::new(DataType::Utf8)));
3176+
3177+
ScalarUDF::new(
3178+
"col_description",
3179+
// Correct signature for col_description should be `(oid, integer) → text`
3180+
// We model oid as UInt32, so [DataType::UInt32, DataType::Int32] is a proper arguments
3181+
// However, it seems that coercion rules in DF differs from PostgreSQL at the moment
3182+
// And metabase uses col_description(CAST(CAST(... AS regclass) AS oid), cardinal_number)
3183+
// And we model regclass as Int64, and cardinal_number as UInt32
3184+
// Which is why second signature is necessary
3185+
&Signature::one_of(
3186+
vec![
3187+
TypeSignature::Exact(vec![DataType::UInt32, DataType::Int32]),
3188+
// TODO remove this signature in favor of proper model/coercion
3189+
TypeSignature::Exact(vec![DataType::Int64, DataType::UInt32]),
3190+
],
3191+
Volatility::Stable,
3192+
),
3193+
&return_type,
3194+
&fun,
3195+
)
3196+
}
3197+
3198+
// See https://www.postgresql.org/docs/17/functions-string.html#FUNCTIONS-STRING-FORMAT
3199+
pub fn create_format_udf() -> ScalarUDF {
3200+
let fun = make_scalar_function(move |args: &[ArrayRef]| {
3201+
// Ensure at least one argument is provided
3202+
if args.is_empty() {
3203+
return Err(DataFusionError::Execution(
3204+
"format() requires at least one argument".to_string(),
3205+
));
3206+
}
3207+
3208+
// Ensure the first argument is a Utf8 (string)
3209+
if args[0].data_type() != &DataType::Utf8 {
3210+
return Err(DataFusionError::Execution(
3211+
"format() first argument must be a string".to_string(),
3212+
));
3213+
}
3214+
3215+
let format_strings = downcast_string_arg!(&args[0], "format_str", i32);
3216+
let mut builder = StringBuilder::new(format_strings.len());
3217+
3218+
for i in 0..format_strings.len() {
3219+
if format_strings.is_null(i) {
3220+
builder.append_null()?;
3221+
continue;
3222+
}
3223+
3224+
let format_str = format_strings.value(i);
3225+
let mut result = String::new();
3226+
let mut format_chars = format_str.chars().peekable();
3227+
let mut arg_index = 1; // Start from first argument after format string
3228+
3229+
while let Some(c) = format_chars.next() {
3230+
if c != '%' {
3231+
result.push(c);
3232+
continue;
3233+
}
3234+
3235+
match format_chars.next() {
3236+
Some('I') => {
3237+
// Handle %I - SQL identifier
3238+
if arg_index >= args.len() {
3239+
return Err(DataFusionError::Execution(
3240+
"Not enough arguments for format string".to_string(),
3241+
));
3242+
}
3243+
3244+
let arg = &args[arg_index];
3245+
let value = match arg.data_type() {
3246+
DataType::Utf8 => {
3247+
let str_arr = downcast_string_arg!(arg, "arg", i32);
3248+
if str_arr.is_null(i) {
3249+
return Err(DataFusionError::Execution(
3250+
"NULL values cannot be formatted as identifiers"
3251+
.to_string(),
3252+
));
3253+
}
3254+
str_arr.value(i).to_string()
3255+
}
3256+
_ => {
3257+
// For other types, try to convert to string
3258+
let str_arr = cast(&arg, &DataType::Utf8)?;
3259+
let str_arr =
3260+
str_arr.as_any().downcast_ref::<StringArray>().unwrap();
3261+
if str_arr.is_null(i) {
3262+
return Err(DataFusionError::Execution(
3263+
"NULL values cannot be formatted as identifiers"
3264+
.to_string(),
3265+
));
3266+
}
3267+
str_arr.value(i).to_string()
3268+
}
3269+
};
3270+
3271+
// Quote any identifier for now
3272+
// That's a safety-first approach: it would quote too much, but every edge-case would be covered
3273+
// Like `1` or `1a` or `select`
3274+
// TODO Quote identifier only if necessary
3275+
let needs_quoting = true;
3276+
3277+
if needs_quoting {
3278+
result.push('"');
3279+
result.push_str(&value.replace('"', "\"\""));
3280+
result.push('"');
3281+
} else {
3282+
result.push_str(&value);
3283+
}
3284+
arg_index += 1;
3285+
}
3286+
Some('%') => {
3287+
// %% is escaped to single %
3288+
result.push('%');
3289+
}
3290+
Some(c) => {
3291+
return Err(DataFusionError::Execution(format!(
3292+
"Unsupported format specifier %{}",
3293+
c
3294+
)));
3295+
}
3296+
None => {
3297+
return Err(DataFusionError::Execution(
3298+
"Invalid format string - ends with %".to_string(),
3299+
));
3300+
}
3301+
}
3302+
}
3303+
3304+
builder.append_value(result)?;
3305+
}
3306+
3307+
Ok(Arc::new(builder.finish()) as ArrayRef)
3308+
});
3309+
3310+
let return_type: ReturnTypeFunction = Arc::new(move |_| Ok(Arc::new(DataType::Utf8)));
3311+
3312+
ScalarUDF::new(
3313+
"format",
3314+
// Actually, format should be variadic with types (Utf8, any*)
3315+
// But ATM DataFusion does not support those signatures
3316+
// And this would work through implicit casting to Utf8
3317+
// TODO migrate to proper custom signature once it's supported by DF
3318+
&Signature::variadic(vec![DataType::Utf8], Volatility::Immutable),
3319+
&return_type,
3320+
&fun,
3321+
)
3322+
}
3323+
31513324
pub fn create_json_build_object_udf() -> ScalarUDF {
31523325
let fun = make_scalar_function(move |_args: &[ArrayRef]| {
31533326
// TODO: Implement
@@ -3769,13 +3942,6 @@ pub fn register_fun_stubs(mut ctx: SessionContext) -> SessionContext {
37693942
rettyp = TimestampTz,
37703943
vol = Volatile
37713944
);
3772-
register_fun_stub!(
3773-
udf,
3774-
"col_description",
3775-
tsig = [Oid, Int32],
3776-
rettyp = Utf8,
3777-
vol = Stable
3778-
);
37793945
register_fun_stub!(udf, "convert", tsig = [Binary, Utf8, Utf8], rettyp = Binary);
37803946
register_fun_stub!(udf, "convert_from", tsig = [Binary, Utf8], rettyp = Utf8);
37813947
register_fun_stub!(udf, "convert_to", tsig = [Utf8, Utf8], rettyp = Binary);

‎rust/cubesql/cubesql/src/compile/mod.rs

+54
Original file line numberDiff line numberDiff line change
@@ -16211,4 +16211,58 @@ LIMIT {{ limit }}{% endif %}"#.to_string(),
1621116211

1621216212
Ok(())
1621316213
}
16214+
16215+
#[tokio::test]
16216+
async fn test_format_function() -> Result<(), CubeError> {
16217+
// Test: Basic usage with a single identifier
16218+
let result = execute_query(
16219+
"SELECT format('%I', 'column_name') AS formatted_identifier".to_string(),
16220+
DatabaseProtocol::PostgreSQL,
16221+
)
16222+
.await?;
16223+
insta::assert_snapshot!("formatted_identifier", result);
16224+
16225+
// Test: Using multiple identifiers
16226+
let result = execute_query(
16227+
"SELECT format('%I, %I', 'table_name', 'column_name') AS formatted_identifiers"
16228+
.to_string(),
16229+
DatabaseProtocol::PostgreSQL,
16230+
)
16231+
.await?;
16232+
insta::assert_snapshot!("formatted_identifiers", result);
16233+
16234+
// Test: Unsupported format specifier
16235+
let result = execute_query(
16236+
"SELECT format('%X', 'value') AS unsupported_specifier".to_string(),
16237+
DatabaseProtocol::PostgreSQL,
16238+
)
16239+
.await;
16240+
assert!(result.is_err());
16241+
16242+
// Test: Format string ending with %
16243+
let result = execute_query(
16244+
"SELECT format('%', 'value') AS invalid_format".to_string(),
16245+
DatabaseProtocol::PostgreSQL,
16246+
)
16247+
.await;
16248+
assert!(result.is_err());
16249+
16250+
// Test: Quoting necessary for special characters
16251+
let result = execute_query(
16252+
"SELECT format('%I', 'column-name') AS quoted_identifier".to_string(),
16253+
DatabaseProtocol::PostgreSQL,
16254+
)
16255+
.await?;
16256+
insta::assert_snapshot!("quoted_identifier", result);
16257+
16258+
// Test: Quoting necessary for reserved keywords
16259+
let result = execute_query(
16260+
"SELECT format('%I', 'select') AS quoted_keyword".to_string(),
16261+
DatabaseProtocol::PostgreSQL,
16262+
)
16263+
.await?;
16264+
insta::assert_snapshot!("quoted_keyword", result);
16265+
16266+
Ok(())
16267+
}
1621416268
}

‎rust/cubesql/cubesql/src/compile/query_engine.rs

+2
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,9 @@ impl QueryEngine for SqlQueryEngine {
467467
ctx.register_udf(create_current_timestamp_udf("localtimestamp"));
468468
ctx.register_udf(create_current_schema_udf());
469469
ctx.register_udf(create_current_schemas_udf());
470+
ctx.register_udf(create_format_udf());
470471
ctx.register_udf(create_format_type_udf());
472+
ctx.register_udf(create_col_description_udf());
471473
ctx.register_udf(create_pg_datetime_precision_udf());
472474
ctx.register_udf(create_pg_numeric_precision_udf());
473475
ctx.register_udf(create_pg_numeric_scale_udf());
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
source: cubesql/src/compile/mod.rs
3+
expression: result
4+
---
5+
+----------------------+
6+
| formatted_identifier |
7+
+----------------------+
8+
| "column_name" |
9+
+----------------------+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
source: cubesql/src/compile/mod.rs
3+
expression: result
4+
---
5+
+-----------------------------+
6+
| formatted_identifiers |
7+
+-----------------------------+
8+
| "table_name", "column_name" |
9+
+-----------------------------+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
source: cubesql/src/compile/mod.rs
3+
expression: result
4+
---
5+
+-------------------+
6+
| quoted_identifier |
7+
+-------------------+
8+
| "column-name" |
9+
+-------------------+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
source: cubesql/src/compile/mod.rs
3+
expression: result
4+
---
5+
+----------------+
6+
| quoted_keyword |
7+
+----------------+
8+
| "select" |
9+
+----------------+

‎rust/cubesql/cubesql/src/compile/test/snapshots/cubesql__compile__test__test_introspection__test_metabase_v0_51_2_introspection_field_indoption.snap

+308
Large diffs are not rendered by default.

‎rust/cubesql/cubesql/src/compile/test/test_introspection.rs

+78
Original file line numberDiff line numberDiff line change
@@ -3140,3 +3140,81 @@ async fn test_metabase_introspection_indoption() -> Result<(), CubeError> {
31403140
);
31413141
Ok(())
31423142
}
3143+
3144+
#[tokio::test]
3145+
async fn test_metabase_v0_51_2_introspection_field_indoption() -> Result<(), CubeError> {
3146+
init_testing_logger();
3147+
3148+
insta::assert_snapshot!(
3149+
"test_metabase_v0_51_2_introspection_field_indoption",
3150+
execute_query(
3151+
// language=PostgreSQL
3152+
r#"
3153+
SELECT
3154+
c.column_name AS name,
3155+
c.udt_name AS database_type,
3156+
c.ordinal_position - 1 AS database_position,
3157+
c.table_schema AS table_schema,
3158+
c.table_name AS table_name,
3159+
pk.column_name IS NOT NULL AS pk,
3160+
COL_DESCRIPTION(
3161+
CAST(
3162+
CAST(
3163+
FORMAT(
3164+
'%I.%I',
3165+
CAST(c.table_schema AS TEXT),
3166+
CAST(c.table_name AS TEXT)
3167+
) AS REGCLASS
3168+
) AS OID
3169+
),
3170+
c.ordinal_position
3171+
) AS field_comment,
3172+
(
3173+
(column_default IS NULL)
3174+
OR (LOWER(column_default) = 'null')
3175+
)
3176+
AND (is_nullable = 'NO')
3177+
AND NOT (
3178+
(
3179+
(column_default IS NOT NULL)
3180+
AND (column_default LIKE '%nextval(%')
3181+
)
3182+
OR (is_identity <> 'NO')
3183+
) AS database_required,
3184+
(
3185+
(column_default IS NOT NULL)
3186+
AND (column_default LIKE '%nextval(%')
3187+
)
3188+
OR (is_identity <> 'NO') AS database_is_auto_increment
3189+
FROM
3190+
information_schema.columns AS c
3191+
LEFT JOIN (
3192+
SELECT
3193+
tc.table_schema,
3194+
tc.table_name,
3195+
kc.column_name
3196+
FROM
3197+
information_schema.table_constraints AS tc
3198+
INNER JOIN information_schema.key_column_usage AS kc ON (tc.constraint_name = kc.constraint_name)
3199+
AND (tc.table_schema = kc.table_schema)
3200+
AND (tc.table_name = kc.table_name)
3201+
WHERE
3202+
tc.constraint_type = 'PRIMARY KEY'
3203+
) AS pk ON (c.table_schema = pk.table_schema)
3204+
AND (c.table_name = pk.table_name)
3205+
AND (c.column_name = pk.column_name)
3206+
WHERE
3207+
c.table_schema !~ '^information_schema|catalog_history|pg_'
3208+
AND (c.table_schema IN ('public'))
3209+
ORDER BY
3210+
table_schema ASC,
3211+
table_name ASC,
3212+
database_position ASC
3213+
"#
3214+
.to_string(),
3215+
DatabaseProtocol::PostgreSQL
3216+
)
3217+
.await?
3218+
);
3219+
Ok(())
3220+
}

0 commit comments

Comments
 (0)
Please sign in to comment.