3131import org .apache .calcite .rel .logical .LogicalUnion ;
3232import org .apache .calcite .rel .logical .LogicalValues ;
3333import org .apache .calcite .rel .type .RelDataType ;
34- import org .apache .calcite .rel .type .RelDataTypeField ;
35- import org .apache .calcite .rel .type .RelRecordType ;
3634import org .apache .calcite .rex .RexBuilder ;
3735import org .apache .calcite .rex .RexCall ;
3836import org .apache .calcite .rex .RexLiteral ;
3937import org .apache .calcite .rex .RexNode ;
4038import org .apache .calcite .rex .RexShuttle ;
4139import org .apache .calcite .rex .RexUtil ;
42- import org .apache .calcite .sql .SqlIdentifier ;
4340import org .apache .calcite .sql .SqlKind ;
44- import org .apache .calcite .sql .SqlOperator ;
4541import org .apache .calcite .sql .fun .SqlStdOperatorTable ;
46- import org .apache .calcite .sql .parser .SqlParserPos ;
4742import org .apache .calcite .sql .type .ArraySqlType ;
48- import org .apache .calcite .sql .type .SqlReturnTypeInference ;
4943import org .apache .calcite .sql .type .SqlTypeName ;
50- import org .apache .calcite .sql .validate .SqlUserDefinedFunction ;
5144
5245import com .linkedin .coral .com .google .common .collect .ImmutableList ;
53- import com .linkedin .coral .com .google .common .collect .Lists ;
5446import com .linkedin .coral .common .functions .GenericProjectFunction ;
55- import com .linkedin .coral .hive .hive2rel .functions .CoalesceStructUtility ;
56- import com .linkedin .coral .hive .hive2rel .functions .HiveNamedStructFunction ;
5747import com .linkedin .coral .spark .containers .SparkRelInfo ;
5848import com .linkedin .coral .spark .containers .SparkUDFInfo ;
5949import com .linkedin .coral .spark .utils .RelDataTypeToHiveTypeStringConverter ;
@@ -200,9 +190,8 @@ public RexNode visitCall(RexCall call) {
200190 RexCall updatedCall = (RexCall ) super .visitCall (call );
201191
202192 RexNode convertToNewNode =
203- convertToZeroBasedArrayIndex (updatedCall ).orElseGet (() -> convertToNamedStruct (updatedCall ).orElseGet (
204- () -> convertFuzzyUnionGenericProject (updatedCall ).orElseGet (() -> swapExtractUnionFunction (updatedCall )
205- .orElseGet (() -> removeCastToEnsureCorrectNullability (updatedCall ).orElse (updatedCall )))));
193+ convertToZeroBasedArrayIndex (updatedCall ).orElseGet (() -> convertFuzzyUnionGenericProject (updatedCall )
194+ .orElseGet (() -> removeCastToEnsureCorrectNullability (updatedCall ).orElse (updatedCall )));
206195
207196 return convertToNewNode ;
208197 }
@@ -227,25 +216,6 @@ private Optional<RexNode> convertToZeroBasedArrayIndex(RexCall call) {
227216 return Optional .empty ();
228217 }
229218
230- // Convert CAST(ROW: RECORD_TYPE) to named_struct
231- private Optional <RexNode > convertToNamedStruct (RexCall call ) {
232- if (call .getOperator ().equals (SqlStdOperatorTable .CAST )) {
233- RexNode operand = call .getOperands ().get (0 );
234- if (operand instanceof RexCall && ((RexCall ) operand ).getOperator ().equals (SqlStdOperatorTable .ROW )) {
235- RelRecordType recordType = (RelRecordType ) call .getType ();
236- List <RexNode > rowOperands = ((RexCall ) operand ).getOperands ();
237- List <RexNode > newOperands = new ArrayList <>(recordType .getFieldCount () * 2 );
238- for (int i = 0 ; i < recordType .getFieldCount (); i += 1 ) {
239- RelDataTypeField dataTypeField = recordType .getFieldList ().get (i );
240- newOperands .add (rexBuilder .makeLiteral (dataTypeField .getKey ()));
241- newOperands .add (rexBuilder .makeCast (dataTypeField .getType (), rowOperands .get (i )));
242- }
243- return Optional .of (rexBuilder .makeCall (call .getType (), new HiveNamedStructFunction (), newOperands ));
244- }
245- }
246- return Optional .empty ();
247- }
248-
249219 /**
250220 * Add the schema to GenericProject in Fuzzy Union
251221 * @param call a given RexCall
@@ -270,44 +240,6 @@ private Optional<RexNode> convertFuzzyUnionGenericProject(RexCall call) {
270240 return Optional .empty ();
271241 }
272242
273- /**
274- * Instead of leaving extract_union visible to (Hive)Spark, since we adopted the new exploded struct schema(
275- * a.k.a struct_tr) that is different from extract_union's output (a.k.a struct_ex) to interpret union in Coral IR,
276- * we need to swap the reference of "extract_union" to a new UDF that is coalescing the difference between
277- * struct_tr and struct_ex.
278- *
279- * See com.linkedin.coral.common.functions.FunctionReturnTypes#COALESCE_STRUCT_FUNCTION_RETURN_STRATEGY
280- * and its comments for more details.
281- *
282- * @param call the original extract_union function call.
283- * @return A new {@link RexNode} replacing the original extract_union call.
284- */
285- private Optional <RexNode > swapExtractUnionFunction (RexCall call ) {
286- if (call .getOperator ().getName ().equalsIgnoreCase ("extract_union" )) {
287- // Only when there's a necessity to register coalesce_struct UDF
288- sparkUDFInfos .add (new SparkUDFInfo ("com.linkedin.coalescestruct.GenericUDFCoalesceStruct" , "coalesce_struct" ,
289- ImmutableList .of (URI .create ("ivy://com.linkedin.coalesce-struct:coalesce-struct-impl:+" )),
290- SparkUDFInfo .UDFTYPE .HIVE_CUSTOM_UDF ));
291-
292- // one arg case: extract_union(field_name)
293- if (call .getOperands ().size () == 1 ) {
294- return Optional .of (rexBuilder .makeCall (
295- createUDF ("coalesce_struct" , CoalesceStructUtility .COALESCE_STRUCT_FUNCTION_RETURN_STRATEGY ),
296- call .getOperands ()));
297- }
298- // two arg case: extract_union(field_name, ordinal)
299- else if (call .getOperands ().size () == 2 ) {
300- int ordinal = ((RexLiteral ) call .getOperands ().get (1 )).getValueAs (Integer .class ) + 1 ;
301- List <RexNode > operandsCopy = Lists .newArrayList (call .getOperands ());
302- operandsCopy .set (1 , rexBuilder .makeExactLiteral (new BigDecimal (ordinal )));
303- return Optional .of (rexBuilder .makeCall (
304- createUDF ("coalesce_struct" , CoalesceStructUtility .COALESCE_STRUCT_FUNCTION_RETURN_STRATEGY ),
305- operandsCopy ));
306- }
307- }
308- return Optional .empty ();
309- }
310-
311243 /**
312244 * Calcite entails the nullability of an expression by casting it to the correct nullable type.
313245 * However, for complex types like ARRAY<STRING NOT NULL> (element non-nullable, but top-level nullable),
@@ -336,10 +268,5 @@ private Optional<RexNode> removeCastToEnsureCorrectNullability(RexCall call) {
336268 }
337269 return Optional .empty ();
338270 }
339-
340- private static SqlOperator createUDF (String udfName , SqlReturnTypeInference typeInference ) {
341- return new SqlUserDefinedFunction (new SqlIdentifier (ImmutableList .of (udfName ), SqlParserPos .ZERO ), typeInference ,
342- null , null , null , null );
343- }
344271 }
345272}
0 commit comments