Skip to content

Commit 2c111d3

Browse files
committed
add unit test
1 parent 2f41449 commit 2c111d3

File tree

1 file changed

+81
-0
lines changed

1 file changed

+81
-0
lines changed

coral-schema/src/test/java/com/linkedin/coral/schema/avro/MergeHiveSchemaWithAvroTests.java

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,87 @@ public void shouldHandleUnions() {
240240
assertSchema(expected, merge(hive, avro));
241241
}
242242

243+
@Test
244+
public void shouldHandleSingleElementUnionsInArraysAndMaps() {
245+
// This test verifies that single-element unions in array items and map values are properly unwrapped
246+
// and the nested field nullability is preserved during schema merging.
247+
// This reproduces the fix for handling avro.schema.literal with single-element unions like:
248+
// - Array items: "items": [{"type":"record",...}]
249+
// - Map values: "values": [{"type":"record",...}]
250+
// These single-element unions appear in real-world Avro schemas stored as avro.schema.literal
251+
252+
String hive = "struct<id:bigint,items:array<struct<fooconfiguration:struct<name:string,urlvalue:string,source:string>,"
253+
+ "barconfiguration:struct<name:string,domain:string>>>,"
254+
+ "metadata:map<string,struct<category:string,priority:int>>>";
255+
256+
// Define an Avro schema literal similar to what would be stored in avro.schema.literal table property
257+
// Note the single-element unions in array items and map values: [{"type":"record",...}]
258+
String avroSchemaLiteral =
259+
"{\"type\":\"record\",\"name\":\"test_complex_array_table\",\"namespace\":\"com.example.test\",\"fields\":["
260+
+ "{\"name\":\"id\",\"type\":[\"null\",\"long\"],\"default\":null},"
261+
+ "{\"name\":\"items\",\"type\":[\"null\",{\"type\":\"array\",\"items\":[{\"type\":\"record\",\"name\":\"ItemConfig\",\"namespace\":\"com.example.data\",\"fields\":["
262+
+ "{\"name\":\"fooConfiguration\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"FooConfiguration\",\"fields\":["
263+
+ "{\"name\":\"name\",\"type\":\"string\"},"
264+
+ "{\"name\":\"urlValue\",\"type\":\"string\"},"
265+
+ "{\"name\":\"source\",\"type\":\"string\"}"
266+
+ "]}],\"default\":null},"
267+
+ "{\"name\":\"barConfiguration\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"BarConfiguration\",\"fields\":["
268+
+ "{\"name\":\"name\",\"type\":\"string\"},"
269+
+ "{\"name\":\"domain\",\"type\":\"string\"}"
270+
+ "]}],\"default\":null}"
271+
+ "]}]}],\"default\":null},"
272+
+ "{\"name\":\"metadata\",\"type\":[\"null\",{\"type\":\"map\",\"values\":[{\"type\":\"record\",\"name\":\"MetadataValue\",\"namespace\":\"com.example.data\",\"fields\":["
273+
+ "{\"name\":\"category\",\"type\":\"string\"},"
274+
+ "{\"name\":\"priority\",\"type\":\"int\"}"
275+
+ "]}]}],\"default\":null}"
276+
+ "]}";
277+
278+
Schema avro = new Schema.Parser().parse(avroSchemaLiteral);
279+
Schema merged = merge(hive, avro);
280+
281+
// Verify that single-element unions were properly handled
282+
// Extract items array
283+
Schema mergedItemsArray = SchemaUtilities.extractIfOption(merged.getField("items").schema());
284+
Schema mergedItemConfig = mergedItemsArray.getElementType();
285+
286+
// The fix ensures that single-element union [ItemConfig] is unwrapped to ItemConfig
287+
// Without the fix, this would fail because the union wouldn't be unwrapped
288+
assertEquals(mergedItemConfig.getType(), Schema.Type.RECORD, "Array element should be a record, not a union");
289+
290+
// Extract fooConfiguration and verify nested field nullability is preserved
291+
Schema mergedFooConfig =
292+
SchemaUtilities.extractIfOption(mergedItemConfig.getField("fooConfiguration").schema());
293+
294+
// Nested fields should be non-nullable (required) as defined in the avro.schema.literal
295+
assertEquals(mergedFooConfig.getField("name").schema().getType(), Schema.Type.STRING,
296+
"name field should be non-nullable string");
297+
assertEquals(mergedFooConfig.getField("urlValue").schema().getType(), Schema.Type.STRING,
298+
"urlValue field should be non-nullable string");
299+
assertEquals(mergedFooConfig.getField("source").schema().getType(), Schema.Type.STRING,
300+
"source field should be non-nullable string");
301+
302+
// Verify barConfiguration nested fields
303+
Schema mergedBarConfig =
304+
SchemaUtilities.extractIfOption(mergedItemConfig.getField("barConfiguration").schema());
305+
assertEquals(mergedBarConfig.getField("name").schema().getType(), Schema.Type.STRING,
306+
"bar name field should be non-nullable string");
307+
assertEquals(mergedBarConfig.getField("domain").schema().getType(), Schema.Type.STRING,
308+
"domain field should be non-nullable string");
309+
310+
// Extract metadata map value and verify
311+
// Ensures that single-element union [MetadataValue] is unwrapped to MetadataValue
312+
Schema mergedMetadataMap = SchemaUtilities.extractIfOption(merged.getField("metadata").schema());
313+
Schema mergedMetadataValue = mergedMetadataMap.getValueType();
314+
315+
assertEquals(mergedMetadataValue.getType(), Schema.Type.RECORD, "Map value should be a record, not a union");
316+
317+
// Fields in MetadataValue should be non-nullable as defined in avro.schema.literal
318+
assertEquals(mergedMetadataValue.getField("category").schema().getType(), Schema.Type.STRING,
319+
"category field should be non-nullable string");
320+
assertEquals(mergedMetadataValue.getField("priority").schema().getType(), Schema.Type.INT,
321+
"priority field should be non-nullable int");
322+
}
323+
243324
// TODO: tests to retain schema props
244325
// TODO: tests for explicit type compatibility check between hive and avro primitives, once we implement it
245326
// TODO: tests for error case => default value in Avro does not match with type from hive

0 commit comments

Comments
 (0)