Skip to content

Commit d274580

Browse files
committed
test that reproduces the error
1 parent 71e9ac6 commit d274580

File tree

1 file changed

+89
-0
lines changed

1 file changed

+89
-0
lines changed

coral-schema/src/test/java/com/linkedin/coral/schema/avro/MergeHiveSchemaWithAvroTests.java

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,95 @@ public void shouldHandleSingleElementUnionsInArraysAndMaps() {
321321
"priority field should be non-nullable int");
322322
}
323323

324+
@Test
325+
public void shouldHandleUnionEncodedAsStruct() {
326+
// This test verifies that when Hive has unions encoded as structs with tag/field0/field1/...
327+
// and the Avro partner has the unions in their original form,
328+
// the merged schema correctly preserves nullability from the Avro union branches.
329+
// Tests both 2-way unions ["null", T] and 3-way unions ["null", T1, T2].
330+
// Expected: tag should be non-nullable, array items should be non-nullable,
331+
// and nested field nullability should be preserved from Avro.
332+
333+
String hive = "struct<"
334+
+ "twowayunion:struct<tag:int,field0:array<struct<name:string,value:bigint>>>,"
335+
+ "threewayunion:struct<tag:int,field0:bigint,field1:array<struct<description:string,metadata:string>>>"
336+
+ ">";
337+
338+
// Avro partner schema with both 2-way and 3-way unions
339+
String avroSchemaLiteral = "{\"type\":\"record\",\"name\":\"TestRecord\",\"namespace\":\"test\",\"fields\":["
340+
// 2-way union: null or array
341+
+ "{\"name\":\"twoWayUnion\",\"type\":[\"null\",{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"TwoWayItem\",\"fields\":["
342+
+ "{\"name\":\"name\",\"type\":\"string\"},"
343+
+ "{\"name\":\"value\",\"type\":[\"null\",\"long\"]}"
344+
+ "]}}]},"
345+
// 3-way union: null, long, or array
346+
+ "{\"name\":\"threeWayUnion\",\"type\":[\"null\",\"long\",{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"ThreeWayItem\",\"fields\":["
347+
+ "{\"name\":\"description\",\"type\":\"string\"},"
348+
+ "{\"name\":\"metadata\",\"type\":[\"null\",\"string\"]}"
349+
+ "]}}]}"
350+
+ "]}";
351+
352+
Schema avro = new Schema.Parser().parse(avroSchemaLiteral);
353+
Schema merged = merge(hive, avro);
354+
355+
// ===== Test 2-way union =====
356+
Schema.Field twoWayField = merged.getField("twoWayUnion");
357+
assertNotNull(twoWayField, "twoWayUnion field should exist");
358+
Schema twoWayStruct = twoWayField.schema();
359+
assertEquals(twoWayStruct.getType(), Schema.Type.RECORD, "twoWayUnion should be a record (union encoded as struct)");
360+
361+
// Verify tag field is non-nullable int
362+
Schema.Field twoWayTagField = twoWayStruct.getField("tag");
363+
assertNotNull(twoWayTagField, "tag field should exist in twoWayUnion");
364+
assertEquals(twoWayTagField.schema().getType(), Schema.Type.INT, "tag should be non-nullable int");
365+
366+
// Extract field0 (the array branch)
367+
Schema.Field twoWayField0 = twoWayStruct.getField("field0");
368+
assertNotNull(twoWayField0, "field0 should exist in twoWayUnion");
369+
Schema twoWayArraySchema = SchemaUtilities.extractIfOption(twoWayField0.schema());
370+
assertEquals(twoWayArraySchema.getType(), Schema.Type.ARRAY, "field0 should be an array");
371+
372+
// Verify array items are records (not ["null", record])
373+
Schema twoWayItemSchema = twoWayArraySchema.getElementType();
374+
assertEquals(twoWayItemSchema.getType(), Schema.Type.RECORD,
375+
"Array items should be records, not unions with null");
376+
377+
// Verify nested field nullability: name is non-nullable, value is nullable
378+
assertEquals(twoWayItemSchema.getField("name").schema().getType(), Schema.Type.STRING,
379+
"name field should be non-nullable string");
380+
assertTrue(AvroSerdeUtils.isNullableType(twoWayItemSchema.getField("value").schema()),
381+
"value field should be nullable");
382+
383+
// ===== Test 3-way union =====
384+
Schema.Field threeWayField = merged.getField("threeWayUnion");
385+
assertNotNull(threeWayField, "threeWayUnion field should exist");
386+
Schema threeWayStruct = threeWayField.schema();
387+
assertEquals(threeWayStruct.getType(), Schema.Type.RECORD,
388+
"threeWayUnion should be a record (union encoded as struct)");
389+
390+
// Verify tag field is non-nullable int
391+
Schema.Field threeWayTagField = threeWayStruct.getField("tag");
392+
assertNotNull(threeWayTagField, "tag field should exist in threeWayUnion");
393+
assertEquals(threeWayTagField.schema().getType(), Schema.Type.INT, "tag should be non-nullable int");
394+
395+
// Extract field1 (the array branch - field0 is the long branch)
396+
Schema.Field threeWayField1 = threeWayStruct.getField("field1");
397+
assertNotNull(threeWayField1, "field1 should exist in threeWayUnion");
398+
Schema threeWayArraySchema = SchemaUtilities.extractIfOption(threeWayField1.schema());
399+
assertEquals(threeWayArraySchema.getType(), Schema.Type.ARRAY, "field1 should be an array");
400+
401+
// Verify array items are records (not ["null", record])
402+
Schema threeWayItemSchema = threeWayArraySchema.getElementType();
403+
assertEquals(threeWayItemSchema.getType(), Schema.Type.RECORD,
404+
"Array items should be records, not unions with null");
405+
406+
// Verify nested field nullability: description is non-nullable, metadata is nullable
407+
assertEquals(threeWayItemSchema.getField("description").schema().getType(), Schema.Type.STRING,
408+
"description field should be non-nullable string");
409+
assertTrue(AvroSerdeUtils.isNullableType(threeWayItemSchema.getField("metadata").schema()),
410+
"metadata field should be nullable");
411+
}
412+
324413
// TODO: tests to retain schema props
325414
// TODO: tests for explicit type compatibility check between hive and avro primitives, once we implement it
326415
// TODO: tests for error case => default value in Avro does not match with type from hive

0 commit comments

Comments
 (0)