@@ -321,6 +321,95 @@ public void shouldHandleSingleElementUnionsInArraysAndMaps() {
321321 "priority field should be non-nullable int" );
322322 }
323323
324+ @ Test
325+ public void shouldHandleUnionEncodedAsStruct () {
326+ // This test verifies that when Hive has unions encoded as structs with tag/field0/field1/...
327+ // and the Avro partner has the unions in their original form,
328+ // the merged schema correctly preserves nullability from the Avro union branches.
329+ // Tests both 2-way unions ["null", T] and 3-way unions ["null", T1, T2].
330+ // Expected: tag should be non-nullable, array items should be non-nullable,
331+ // and nested field nullability should be preserved from Avro.
332+
333+ String hive = "struct<"
334+ + "twowayunion:struct<tag:int,field0:array<struct<name:string,value:bigint>>>,"
335+ + "threewayunion:struct<tag:int,field0:bigint,field1:array<struct<description:string,metadata:string>>>"
336+ + ">" ;
337+
338+ // Avro partner schema with both 2-way and 3-way unions
339+ String avroSchemaLiteral = "{\" type\" :\" record\" ,\" name\" :\" TestRecord\" ,\" namespace\" :\" test\" ,\" fields\" :["
340+ // 2-way union: null or array
341+ + "{\" name\" :\" twoWayUnion\" ,\" type\" :[\" null\" ,{\" type\" :\" array\" ,\" items\" :{\" type\" :\" record\" ,\" name\" :\" TwoWayItem\" ,\" fields\" :["
342+ + "{\" name\" :\" name\" ,\" type\" :\" string\" },"
343+ + "{\" name\" :\" value\" ,\" type\" :[\" null\" ,\" long\" ]}"
344+ + "]}}]},"
345+ // 3-way union: null, long, or array
346+ + "{\" name\" :\" threeWayUnion\" ,\" type\" :[\" null\" ,\" long\" ,{\" type\" :\" array\" ,\" items\" :{\" type\" :\" record\" ,\" name\" :\" ThreeWayItem\" ,\" fields\" :["
347+ + "{\" name\" :\" description\" ,\" type\" :\" string\" },"
348+ + "{\" name\" :\" metadata\" ,\" type\" :[\" null\" ,\" string\" ]}"
349+ + "]}}]}"
350+ + "]}" ;
351+
352+ Schema avro = new Schema .Parser ().parse (avroSchemaLiteral );
353+ Schema merged = merge (hive , avro );
354+
355+ // ===== Test 2-way union =====
356+ Schema .Field twoWayField = merged .getField ("twoWayUnion" );
357+ assertNotNull (twoWayField , "twoWayUnion field should exist" );
358+ Schema twoWayStruct = twoWayField .schema ();
359+ assertEquals (twoWayStruct .getType (), Schema .Type .RECORD , "twoWayUnion should be a record (union encoded as struct)" );
360+
361+ // Verify tag field is non-nullable int
362+ Schema .Field twoWayTagField = twoWayStruct .getField ("tag" );
363+ assertNotNull (twoWayTagField , "tag field should exist in twoWayUnion" );
364+ assertEquals (twoWayTagField .schema ().getType (), Schema .Type .INT , "tag should be non-nullable int" );
365+
366+ // Extract field0 (the array branch)
367+ Schema .Field twoWayField0 = twoWayStruct .getField ("field0" );
368+ assertNotNull (twoWayField0 , "field0 should exist in twoWayUnion" );
369+ Schema twoWayArraySchema = SchemaUtilities .extractIfOption (twoWayField0 .schema ());
370+ assertEquals (twoWayArraySchema .getType (), Schema .Type .ARRAY , "field0 should be an array" );
371+
372+ // Verify array items are records (not ["null", record])
373+ Schema twoWayItemSchema = twoWayArraySchema .getElementType ();
374+ assertEquals (twoWayItemSchema .getType (), Schema .Type .RECORD ,
375+ "Array items should be records, not unions with null" );
376+
377+ // Verify nested field nullability: name is non-nullable, value is nullable
378+ assertEquals (twoWayItemSchema .getField ("name" ).schema ().getType (), Schema .Type .STRING ,
379+ "name field should be non-nullable string" );
380+ assertTrue (AvroSerdeUtils .isNullableType (twoWayItemSchema .getField ("value" ).schema ()),
381+ "value field should be nullable" );
382+
383+ // ===== Test 3-way union =====
384+ Schema .Field threeWayField = merged .getField ("threeWayUnion" );
385+ assertNotNull (threeWayField , "threeWayUnion field should exist" );
386+ Schema threeWayStruct = threeWayField .schema ();
387+ assertEquals (threeWayStruct .getType (), Schema .Type .RECORD ,
388+ "threeWayUnion should be a record (union encoded as struct)" );
389+
390+ // Verify tag field is non-nullable int
391+ Schema .Field threeWayTagField = threeWayStruct .getField ("tag" );
392+ assertNotNull (threeWayTagField , "tag field should exist in threeWayUnion" );
393+ assertEquals (threeWayTagField .schema ().getType (), Schema .Type .INT , "tag should be non-nullable int" );
394+
395+ // Extract field1 (the array branch - field0 is the long branch)
396+ Schema .Field threeWayField1 = threeWayStruct .getField ("field1" );
397+ assertNotNull (threeWayField1 , "field1 should exist in threeWayUnion" );
398+ Schema threeWayArraySchema = SchemaUtilities .extractIfOption (threeWayField1 .schema ());
399+ assertEquals (threeWayArraySchema .getType (), Schema .Type .ARRAY , "field1 should be an array" );
400+
401+ // Verify array items are records (not ["null", record])
402+ Schema threeWayItemSchema = threeWayArraySchema .getElementType ();
403+ assertEquals (threeWayItemSchema .getType (), Schema .Type .RECORD ,
404+ "Array items should be records, not unions with null" );
405+
406+ // Verify nested field nullability: description is non-nullable, metadata is nullable
407+ assertEquals (threeWayItemSchema .getField ("description" ).schema ().getType (), Schema .Type .STRING ,
408+ "description field should be non-nullable string" );
409+ assertTrue (AvroSerdeUtils .isNullableType (threeWayItemSchema .getField ("metadata" ).schema ()),
410+ "metadata field should be nullable" );
411+ }
412+
324413 // TODO: tests to retain schema props
325414 // TODO: tests for explicit type compatibility check between hive and avro primitives, once we implement it
326415 // TODO: tests for error case => default value in Avro does not match with type from hive
0 commit comments