@@ -240,6 +240,87 @@ public void shouldHandleUnions() {
240240 assertSchema (expected , merge (hive , avro ));
241241 }
242242
243+ @ Test
244+ public void shouldHandleSingleElementUnionsInArraysAndMaps () {
245+ // This test verifies that single-element unions in array items and map values are properly unwrapped
246+ // and the nested field nullability is preserved during schema merging.
247+ // This reproduces the fix for handling avro.schema.literal with single-element unions like:
248+ // - Array items: "items": [{"type":"record",...}]
249+ // - Map values: "values": [{"type":"record",...}]
250+ // These single-element unions appear in real-world Avro schemas stored as avro.schema.literal
251+
252+ String hive = "struct<id:bigint,items:array<struct<fooconfiguration:struct<name:string,urlvalue:string,source:string>,"
253+ + "barconfiguration:struct<name:string,domain:string>>>,"
254+ + "metadata:map<string,struct<category:string,priority:int>>>" ;
255+
256+ // Define an Avro schema literal similar to what would be stored in avro.schema.literal table property
257+ // Note the single-element unions in array items and map values: [{"type":"record",...}]
258+ String avroSchemaLiteral =
259+ "{\" type\" :\" record\" ,\" name\" :\" test_complex_array_table\" ,\" namespace\" :\" com.example.test\" ,\" fields\" :["
260+ + "{\" name\" :\" id\" ,\" type\" :[\" null\" ,\" long\" ],\" default\" :null},"
261+ + "{\" name\" :\" items\" ,\" type\" :[\" null\" ,{\" type\" :\" array\" ,\" items\" :[{\" type\" :\" record\" ,\" name\" :\" ItemConfig\" ,\" namespace\" :\" com.example.data\" ,\" fields\" :["
262+ + "{\" name\" :\" fooConfiguration\" ,\" type\" :[\" null\" ,{\" type\" :\" record\" ,\" name\" :\" FooConfiguration\" ,\" fields\" :["
263+ + "{\" name\" :\" name\" ,\" type\" :\" string\" },"
264+ + "{\" name\" :\" urlValue\" ,\" type\" :\" string\" },"
265+ + "{\" name\" :\" source\" ,\" type\" :\" string\" }"
266+ + "]}],\" default\" :null},"
267+ + "{\" name\" :\" barConfiguration\" ,\" type\" :[\" null\" ,{\" type\" :\" record\" ,\" name\" :\" BarConfiguration\" ,\" fields\" :["
268+ + "{\" name\" :\" name\" ,\" type\" :\" string\" },"
269+ + "{\" name\" :\" domain\" ,\" type\" :\" string\" }"
270+ + "]}],\" default\" :null}"
271+ + "]}]}],\" default\" :null},"
272+ + "{\" name\" :\" metadata\" ,\" type\" :[\" null\" ,{\" type\" :\" map\" ,\" values\" :[{\" type\" :\" record\" ,\" name\" :\" MetadataValue\" ,\" namespace\" :\" com.example.data\" ,\" fields\" :["
273+ + "{\" name\" :\" category\" ,\" type\" :\" string\" },"
274+ + "{\" name\" :\" priority\" ,\" type\" :\" int\" }"
275+ + "]}]}],\" default\" :null}"
276+ + "]}" ;
277+
278+ Schema avro = new Schema .Parser ().parse (avroSchemaLiteral );
279+ Schema merged = merge (hive , avro );
280+
281+ // Verify that single-element unions were properly handled
282+ // Extract items array
283+ Schema mergedItemsArray = SchemaUtilities .extractIfOption (merged .getField ("items" ).schema ());
284+ Schema mergedItemConfig = mergedItemsArray .getElementType ();
285+
286+ // The fix ensures that single-element union [ItemConfig] is unwrapped to ItemConfig
287+ // Without the fix, this would fail because the union wouldn't be unwrapped
288+ assertEquals (mergedItemConfig .getType (), Schema .Type .RECORD , "Array element should be a record, not a union" );
289+
290+ // Extract fooConfiguration and verify nested field nullability is preserved
291+ Schema mergedFooConfig =
292+ SchemaUtilities .extractIfOption (mergedItemConfig .getField ("fooConfiguration" ).schema ());
293+
294+ // Nested fields should be non-nullable (required) as defined in the avro.schema.literal
295+ assertEquals (mergedFooConfig .getField ("name" ).schema ().getType (), Schema .Type .STRING ,
296+ "name field should be non-nullable string" );
297+ assertEquals (mergedFooConfig .getField ("urlValue" ).schema ().getType (), Schema .Type .STRING ,
298+ "urlValue field should be non-nullable string" );
299+ assertEquals (mergedFooConfig .getField ("source" ).schema ().getType (), Schema .Type .STRING ,
300+ "source field should be non-nullable string" );
301+
302+ // Verify barConfiguration nested fields
303+ Schema mergedBarConfig =
304+ SchemaUtilities .extractIfOption (mergedItemConfig .getField ("barConfiguration" ).schema ());
305+ assertEquals (mergedBarConfig .getField ("name" ).schema ().getType (), Schema .Type .STRING ,
306+ "bar name field should be non-nullable string" );
307+ assertEquals (mergedBarConfig .getField ("domain" ).schema ().getType (), Schema .Type .STRING ,
308+ "domain field should be non-nullable string" );
309+
310+ // Extract metadata map value and verify
311+ // Ensures that single-element union [MetadataValue] is unwrapped to MetadataValue
312+ Schema mergedMetadataMap = SchemaUtilities .extractIfOption (merged .getField ("metadata" ).schema ());
313+ Schema mergedMetadataValue = mergedMetadataMap .getValueType ();
314+
315+ assertEquals (mergedMetadataValue .getType (), Schema .Type .RECORD , "Map value should be a record, not a union" );
316+
317+ // Fields in MetadataValue should be non-nullable as defined in avro.schema.literal
318+ assertEquals (mergedMetadataValue .getField ("category" ).schema ().getType (), Schema .Type .STRING ,
319+ "category field should be non-nullable string" );
320+ assertEquals (mergedMetadataValue .getField ("priority" ).schema ().getType (), Schema .Type .INT ,
321+ "priority field should be non-nullable int" );
322+ }
323+
243324 // TODO: tests to retain schema props
244325 // TODO: tests for explicit type compatibility check between hive and avro primitives, once we implement it
245326 // TODO: tests for error case => default value in Avro does not match with type from hive
0 commit comments