@@ -3087,10 +3087,18 @@ pub fn create_cube_regclass_cast_udf() -> ScalarUDF {
3087
3087
Some ( as_str) => {
3088
3088
match PgType :: get_all ( ) . iter ( ) . find ( |e| e. typname == as_str) {
3089
3089
None => {
3090
- return Err ( DataFusionError :: Execution ( format ! (
3091
- "Unable to cast expression to Regclass: Unknown type: {}" ,
3092
- as_str
3093
- ) ) )
3090
+ // If the type name contains a dot, it's a schema-qualified name
3091
+ // and we should return the approprate RegClass to be converted to OID
3092
+ // For now, we'll return 0 so metabase can sync without failing
3093
+ // TODO actually read `pg_type`
3094
+ if as_str. contains ( '.' ) {
3095
+ builder. append_value ( 0 ) ?;
3096
+ } else {
3097
+ return Err ( DataFusionError :: Execution ( format ! (
3098
+ "Unable to cast expression to Regclass: Unknown type: {}" ,
3099
+ as_str
3100
+ ) ) ) ;
3101
+ }
3094
3102
}
3095
3103
Some ( ty) => {
3096
3104
builder. append_value ( ty. oid as i64 ) ?;
@@ -3148,6 +3156,171 @@ pub fn create_pg_get_serial_sequence_udf() -> ScalarUDF {
3148
3156
)
3149
3157
}
3150
3158
3159
+ // Return a NOOP for this so metabase can sync without failing
3160
+ // See https://www.postgresql.org/docs/17/functions-info.html#FUNCTIONS-INFO-COMMENT here
3161
+ // TODO: Implement this
3162
+ pub fn create_col_description_udf ( ) -> ScalarUDF {
3163
+ let fun = make_scalar_function ( move |args : & [ ArrayRef ] | {
3164
+ // Ensure the output array has the same length as the input
3165
+ let input_length = args[ 0 ] . len ( ) ;
3166
+ let mut builder = StringBuilder :: new ( input_length) ;
3167
+
3168
+ for _ in 0 ..input_length {
3169
+ builder. append_null ( ) ?;
3170
+ }
3171
+
3172
+ Ok ( Arc :: new ( builder. finish ( ) ) as ArrayRef )
3173
+ } ) ;
3174
+
3175
+ let return_type: ReturnTypeFunction = Arc :: new ( move |_| Ok ( Arc :: new ( DataType :: Utf8 ) ) ) ;
3176
+
3177
+ ScalarUDF :: new (
3178
+ "col_description" ,
3179
+ // Correct signature for col_description should be `(oid, integer) → text`
3180
+ // We model oid as UInt32, so [DataType::UInt32, DataType::Int32] is a proper arguments
3181
+ // However, it seems that coercion rules in DF differs from PostgreSQL at the moment
3182
+ // And metabase uses col_description(CAST(CAST(... AS regclass) AS oid), cardinal_number)
3183
+ // And we model regclass as Int64, and cardinal_number as UInt32
3184
+ // Which is why second signature is necessary
3185
+ & Signature :: one_of (
3186
+ vec ! [
3187
+ TypeSignature :: Exact ( vec![ DataType :: UInt32 , DataType :: Int32 ] ) ,
3188
+ // TODO remove this signature in favor of proper model/coercion
3189
+ TypeSignature :: Exact ( vec![ DataType :: Int64 , DataType :: UInt32 ] ) ,
3190
+ ] ,
3191
+ Volatility :: Stable ,
3192
+ ) ,
3193
+ & return_type,
3194
+ & fun,
3195
+ )
3196
+ }
3197
+
3198
+ // See https://www.postgresql.org/docs/17/functions-string.html#FUNCTIONS-STRING-FORMAT
3199
+ pub fn create_format_udf ( ) -> ScalarUDF {
3200
+ let fun = make_scalar_function ( move |args : & [ ArrayRef ] | {
3201
+ // Ensure at least one argument is provided
3202
+ if args. is_empty ( ) {
3203
+ return Err ( DataFusionError :: Execution (
3204
+ "format() requires at least one argument" . to_string ( ) ,
3205
+ ) ) ;
3206
+ }
3207
+
3208
+ // Ensure the first argument is a Utf8 (string)
3209
+ if args[ 0 ] . data_type ( ) != & DataType :: Utf8 {
3210
+ return Err ( DataFusionError :: Execution (
3211
+ "format() first argument must be a string" . to_string ( ) ,
3212
+ ) ) ;
3213
+ }
3214
+
3215
+ let format_strings = downcast_string_arg ! ( & args[ 0 ] , "format_str" , i32 ) ;
3216
+ let mut builder = StringBuilder :: new ( format_strings. len ( ) ) ;
3217
+
3218
+ for i in 0 ..format_strings. len ( ) {
3219
+ if format_strings. is_null ( i) {
3220
+ builder. append_null ( ) ?;
3221
+ continue ;
3222
+ }
3223
+
3224
+ let format_str = format_strings. value ( i) ;
3225
+ let mut result = String :: new ( ) ;
3226
+ let mut format_chars = format_str. chars ( ) . peekable ( ) ;
3227
+ let mut arg_index = 1 ; // Start from first argument after format string
3228
+
3229
+ while let Some ( c) = format_chars. next ( ) {
3230
+ if c != '%' {
3231
+ result. push ( c) ;
3232
+ continue ;
3233
+ }
3234
+
3235
+ match format_chars. next ( ) {
3236
+ Some ( 'I' ) => {
3237
+ // Handle %I - SQL identifier
3238
+ if arg_index >= args. len ( ) {
3239
+ return Err ( DataFusionError :: Execution (
3240
+ "Not enough arguments for format string" . to_string ( ) ,
3241
+ ) ) ;
3242
+ }
3243
+
3244
+ let arg = & args[ arg_index] ;
3245
+ let value = match arg. data_type ( ) {
3246
+ DataType :: Utf8 => {
3247
+ let str_arr = downcast_string_arg ! ( arg, "arg" , i32 ) ;
3248
+ if str_arr. is_null ( i) {
3249
+ return Err ( DataFusionError :: Execution (
3250
+ "NULL values cannot be formatted as identifiers"
3251
+ . to_string ( ) ,
3252
+ ) ) ;
3253
+ }
3254
+ str_arr. value ( i) . to_string ( )
3255
+ }
3256
+ _ => {
3257
+ // For other types, try to convert to string
3258
+ let str_arr = cast ( & arg, & DataType :: Utf8 ) ?;
3259
+ let str_arr =
3260
+ str_arr. as_any ( ) . downcast_ref :: < StringArray > ( ) . unwrap ( ) ;
3261
+ if str_arr. is_null ( i) {
3262
+ return Err ( DataFusionError :: Execution (
3263
+ "NULL values cannot be formatted as identifiers"
3264
+ . to_string ( ) ,
3265
+ ) ) ;
3266
+ }
3267
+ str_arr. value ( i) . to_string ( )
3268
+ }
3269
+ } ;
3270
+
3271
+ // Quote any identifier for now
3272
+ // That's a safety-first approach: it would quote too much, but every edge-case would be covered
3273
+ // Like `1` or `1a` or `select`
3274
+ // TODO Quote identifier only if necessary
3275
+ let needs_quoting = true ;
3276
+
3277
+ if needs_quoting {
3278
+ result. push ( '"' ) ;
3279
+ result. push_str ( & value. replace ( '"' , "\" \" " ) ) ;
3280
+ result. push ( '"' ) ;
3281
+ } else {
3282
+ result. push_str ( & value) ;
3283
+ }
3284
+ arg_index += 1 ;
3285
+ }
3286
+ Some ( '%' ) => {
3287
+ // %% is escaped to single %
3288
+ result. push ( '%' ) ;
3289
+ }
3290
+ Some ( c) => {
3291
+ return Err ( DataFusionError :: Execution ( format ! (
3292
+ "Unsupported format specifier %{}" ,
3293
+ c
3294
+ ) ) ) ;
3295
+ }
3296
+ None => {
3297
+ return Err ( DataFusionError :: Execution (
3298
+ "Invalid format string - ends with %" . to_string ( ) ,
3299
+ ) ) ;
3300
+ }
3301
+ }
3302
+ }
3303
+
3304
+ builder. append_value ( result) ?;
3305
+ }
3306
+
3307
+ Ok ( Arc :: new ( builder. finish ( ) ) as ArrayRef )
3308
+ } ) ;
3309
+
3310
+ let return_type: ReturnTypeFunction = Arc :: new ( move |_| Ok ( Arc :: new ( DataType :: Utf8 ) ) ) ;
3311
+
3312
+ ScalarUDF :: new (
3313
+ "format" ,
3314
+ // Actually, format should be variadic with types (Utf8, any*)
3315
+ // But ATM DataFusion does not support those signatures
3316
+ // And this would work through implicit casting to Utf8
3317
+ // TODO migrate to proper custom signature once it's supported by DF
3318
+ & Signature :: variadic ( vec ! [ DataType :: Utf8 ] , Volatility :: Immutable ) ,
3319
+ & return_type,
3320
+ & fun,
3321
+ )
3322
+ }
3323
+
3151
3324
pub fn create_json_build_object_udf ( ) -> ScalarUDF {
3152
3325
let fun = make_scalar_function ( move |_args : & [ ArrayRef ] | {
3153
3326
// TODO: Implement
@@ -3769,13 +3942,6 @@ pub fn register_fun_stubs(mut ctx: SessionContext) -> SessionContext {
3769
3942
rettyp = TimestampTz ,
3770
3943
vol = Volatile
3771
3944
) ;
3772
- register_fun_stub ! (
3773
- udf,
3774
- "col_description" ,
3775
- tsig = [ Oid , Int32 ] ,
3776
- rettyp = Utf8 ,
3777
- vol = Stable
3778
- ) ;
3779
3945
register_fun_stub ! ( udf, "convert" , tsig = [ Binary , Utf8 , Utf8 ] , rettyp = Binary ) ;
3780
3946
register_fun_stub ! ( udf, "convert_from" , tsig = [ Binary , Utf8 ] , rettyp = Utf8 ) ;
3781
3947
register_fun_stub ! ( udf, "convert_to" , tsig = [ Utf8 , Utf8 ] , rettyp = Binary ) ;
0 commit comments