Skip to content

Commit 97a136e

Browse files
committed
also detect deeply nested collisions in records
1 parent db993d7 commit 97a136e

File tree

2 files changed

+67
-0
lines changed

2 files changed

+67
-0
lines changed

coral-schema/src/main/java/com/linkedin/coral/schema/avro/SchemaUtilities.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,10 @@ private static void collectRecordTypes(@Nonnull Schema schema,
782782
case RECORD:
783783
String originalNamespace = schema.getNamespace() != null ? schema.getNamespace() : "";
784784
recordNameToNamespaces.computeIfAbsent(schema.getName(), k -> new ArrayList<>()).add(originalNamespace);
785+
// Recursively collect records from this record's fields to detect deeply nested collisions
786+
for (Schema.Field field : schema.getFields()) {
787+
collectRecordTypes(field.schema(), recordNameToNamespaces);
788+
}
785789
break;
786790
case UNION:
787791
for (Schema type : schema.getTypes()) {

coral-schema/src/test/java/com/linkedin/coral/schema/avro/SchemaUtilitiesTests.java

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,4 +194,67 @@ public void testSetupNameAndNamespaceDetectsDirectRecordCollisions() {
194194
Assert.assertTrue(namespace2.endsWith("-0") || namespace2.endsWith("-1"),
195195
"Second record namespace should have numeric suffix. Got: " + namespace2);
196196
}
197+
198+
/**
199+
* Test to verify that collision detection works for deeply nested records.
200+
* This reproduces the real-world scenario where a record with the same name appears twice with different namespaces,
201+
* but both are nested inside an intermediate record, which is itself nested in the parent.
202+
*
203+
* Schema structure:
204+
* ParentRecord (top-level)
205+
* └─ intermediateField (IntermediateRecord - contains the colliding records)
206+
* ├─ collidingField1 (CollidingRecord from com.foo.v1 namespace)
207+
* └─ collidingField2 (CollidingRecord from com.bar.v2 namespace)
208+
*/
209+
@Test
210+
public void testSetupNameAndNamespaceDetectsDeeplyNestedCollisions() {
211+
// Create two "CollidingRecord" records with the same name but different namespaces
212+
// These represent the deeply nested records that will collide
213+
Schema collidingRecord1 = SchemaBuilder.record("CollidingRecord").namespace("com.foo.v1").fields().name("field1")
214+
.type().stringType().noDefault().endRecord();
215+
216+
Schema collidingRecord2 = SchemaBuilder.record("CollidingRecord").namespace("com.bar.v2").fields().name("field2")
217+
.type().intType().noDefault().endRecord();
218+
219+
// Create an intermediate record that contains both colliding records
220+
// This represents the middle layer in the nesting hierarchy
221+
Schema intermediateRecord = SchemaBuilder.record("IntermediateRecord").namespace("com.intermediate").fields()
222+
.name("collidingField1").type(collidingRecord1).noDefault().name("collidingField2").type(collidingRecord2)
223+
.noDefault().endRecord();
224+
225+
// Create top-level parent schema that contains the intermediate record
226+
Schema parentSchema = SchemaBuilder.record("ParentRecord").namespace("com.parent").fields().name("intermediateField")
227+
.type(intermediateRecord).noDefault().endRecord();
228+
229+
// Apply setupNameAndNamespace
230+
Schema resultSchema = SchemaUtilities.setupNameAndNamespace(parentSchema, "ParentRecord", "com.result");
231+
232+
// Navigate to the deeply nested colliding records
233+
Schema.Field intermediateField = resultSchema.getField("intermediateField");
234+
Schema intermediateSchema = intermediateField.schema();
235+
236+
Schema.Field collidingField1 = intermediateSchema.getField("collidingField1");
237+
Schema.Field collidingField2 = intermediateSchema.getField("collidingField2");
238+
239+
Schema resultColliding1 = collidingField1.schema();
240+
Schema resultColliding2 = collidingField2.schema();
241+
242+
String namespace1 = resultColliding1.getNamespace();
243+
String namespace2 = resultColliding2.getNamespace();
244+
245+
// Both records have the same name
246+
Assert.assertEquals(resultColliding1.getName(), "CollidingRecord");
247+
Assert.assertEquals(resultColliding2.getName(), "CollidingRecord");
248+
249+
// But they should have different namespaces with numeric suffixes because collision was detected
250+
Assert.assertNotEquals(namespace1, namespace2,
251+
"Namespaces should be different when collision is detected in deeply nested records. Got namespace1: "
252+
+ namespace1 + ", namespace2: " + namespace2);
253+
254+
// Verify that numeric suffixes are appended to distinguish the colliding records
255+
Assert.assertTrue(namespace1.endsWith("-0") || namespace1.endsWith("-1"),
256+
"First colliding record namespace should have numeric suffix. Got: " + namespace1);
257+
Assert.assertTrue(namespace2.endsWith("-0") || namespace2.endsWith("-1"),
258+
"Second colliding record namespace should have numeric suffix. Got: " + namespace2);
259+
}
197260
}

0 commit comments

Comments
 (0)