Skip to content

Commit db993d7

Browse files
committed
fix namespace collisions
1 parent 9f8dfce commit db993d7

File tree

2 files changed

+207
-9
lines changed

2 files changed

+207
-9
lines changed

coral-schema/src/main/java/com/linkedin/coral/schema/avro/SchemaUtilities.java

Lines changed: 103 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -735,18 +735,109 @@ private static void appendFieldWithNewNamespace(@Nonnull Schema.Field field, @No
735735
}
736736

737737
private static Schema setupNestedNamespaceForRecord(@Nonnull Schema schema, @Nonnull String namespace) {
738+
// Detect collisions and build mapping for suffix assignment
739+
Map<String, List<String>> collisionMap = detectNamespaceCollisions(schema);
740+
return setupNestedNamespaceForRecord(schema, namespace, collisionMap);
741+
}
742+
743+
/**
744+
* Detects which record names would collide (same name, different original namespace) at the current level.
745+
* This includes records that appear directly in fields, within unions, arrays, or maps.
746+
*
747+
* @param schema The parent record schema to scan
748+
* @return Map from record name -> ordered list of original namespaces (only for records with collisions)
749+
*/
750+
private static Map<String, List<String>> detectNamespaceCollisions(@Nonnull Schema schema) {
751+
Map<String, List<String>> recordNameToOriginalNamespaces = new LinkedHashMap<>();
752+
753+
// Scan all fields to collect record types and their original namespaces in order
754+
for (Schema.Field field : schema.getFields()) {
755+
collectRecordTypes(field.schema(), recordNameToOriginalNamespaces);
756+
}
757+
758+
// Filter to only records that have collisions (multiple different original namespaces)
759+
Map<String, List<String>> collisions = new LinkedHashMap<>();
760+
for (Map.Entry<String, List<String>> entry : recordNameToOriginalNamespaces.entrySet()) {
761+
// Use a Set to check uniqueness while preserving order in List
762+
Set<String> uniqueNamespaces = new LinkedHashSet<>(entry.getValue());
763+
if (uniqueNamespaces.size() > 1) {
764+
collisions.put(entry.getKey(), entry.getValue());
765+
}
766+
}
767+
768+
return collisions;
769+
}
770+
771+
/**
772+
* Recursively collects all record types and their original namespaces from a schema.
773+
* This traverses through unions, arrays, and maps to find all nested record types.
774+
* The order of collection matches the order records appear in the schema.
775+
*
776+
* @param schema The schema to scan
777+
* @param recordNameToNamespaces Map to populate with record name -> ordered list of original namespaces
778+
*/
779+
private static void collectRecordTypes(@Nonnull Schema schema,
780+
@Nonnull Map<String, List<String>> recordNameToNamespaces) {
781+
switch (schema.getType()) {
782+
case RECORD:
783+
String originalNamespace = schema.getNamespace() != null ? schema.getNamespace() : "";
784+
recordNameToNamespaces.computeIfAbsent(schema.getName(), k -> new ArrayList<>()).add(originalNamespace);
785+
break;
786+
case UNION:
787+
for (Schema type : schema.getTypes()) {
788+
collectRecordTypes(type, recordNameToNamespaces);
789+
}
790+
break;
791+
case ARRAY:
792+
collectRecordTypes(schema.getElementType(), recordNameToNamespaces);
793+
break;
794+
case MAP:
795+
collectRecordTypes(schema.getValueType(), recordNameToNamespaces);
796+
break;
797+
case ENUM:
798+
case BOOLEAN:
799+
case BYTES:
800+
case DOUBLE:
801+
case FLOAT:
802+
case INT:
803+
case LONG:
804+
case STRING:
805+
case FIXED:
806+
case NULL:
807+
// These types don't contain nested records
808+
break;
809+
default:
810+
break;
811+
}
812+
}
813+
814+
private static Schema setupNestedNamespaceForRecord(@Nonnull Schema schema, @Nonnull String namespace,
815+
@Nonnull Map<String, List<String>> collisionMap) {
738816
Preconditions.checkNotNull(schema);
739817
Preconditions.checkNotNull(namespace);
818+
Preconditions.checkNotNull(collisionMap);
740819

741820
if (!schema.getType().equals(Schema.Type.RECORD)) {
742821
throw new IllegalArgumentException(
743822
"Input schemas must be of RECORD type. " + "The actual type is: " + schema.getType());
744823
}
745824

825+
// Add numeric suffix to avoid collisions when multiple fields have nested records with the same name
826+
String recordNamespace = namespace;
827+
if (collisionMap.containsKey(schema.getName())) {
828+
List<String> namespaces = collisionMap.get(schema.getName());
829+
String originalNamespace = schema.getNamespace() != null ? schema.getNamespace() : "";
830+
int index = namespaces.indexOf(originalNamespace);
831+
if (index >= 0) {
832+
// Append numeric suffix based on order encountered: -0, -1, -2, etc.
833+
recordNamespace = namespace + "-" + index;
834+
}
835+
}
836+
746837
SchemaBuilder.FieldAssembler<Schema> fieldAssembler =
747-
SchemaBuilder.record(schema.getName()).namespace(namespace).fields();
838+
SchemaBuilder.record(schema.getName()).namespace(recordNamespace).fields();
748839

749-
String nestedNamespace = namespace + "." + schema.getName();
840+
String nestedNamespace = recordNamespace + "." + schema.getName();
750841

751842
for (Schema.Field field : schema.getFields()) {
752843
switch (field.schema().getType()) {
@@ -765,7 +856,7 @@ private static Schema setupNestedNamespaceForRecord(@Nonnull Schema schema, @Non
765856
case MAP:
766857
case UNION:
767858
case ARRAY:
768-
Schema newFieldSchema = setupNestedNamespace(field.schema(), nestedNamespace);
859+
Schema newFieldSchema = setupNestedNamespace(field.schema(), nestedNamespace, collisionMap);
769860
Schema.Field newField = AvroCompatibilityHelper.createSchemaField(field.name(), newFieldSchema, field.doc(),
770861
defaultValue(field), field.order());
771862
appendField(newField, fieldAssembler);
@@ -774,7 +865,8 @@ private static Schema setupNestedNamespaceForRecord(@Nonnull Schema schema, @Non
774865
appendFieldWithNewNamespace(field, nestedNamespace, fieldAssembler);
775866
break;
776867
case RECORD:
777-
Schema recordSchemaWithNestedNamespace = setupNestedNamespaceForRecord(field.schema(), nestedNamespace);
868+
Schema recordSchemaWithNestedNamespace =
869+
setupNestedNamespaceForRecord(field.schema(), nestedNamespace, collisionMap);
778870
Schema.Field newRecordFiled = AvroCompatibilityHelper.createSchemaField(field.name(),
779871
recordSchemaWithNestedNamespace, field.doc(), defaultValue(field), field.order());
780872
appendField(newRecordFiled, fieldAssembler);
@@ -787,9 +879,11 @@ private static Schema setupNestedNamespaceForRecord(@Nonnull Schema schema, @Non
787879
return fieldAssembler.endRecord();
788880
}
789881

790-
private static Schema setupNestedNamespace(@Nonnull Schema schema, @Nonnull String namespace) {
882+
private static Schema setupNestedNamespace(@Nonnull Schema schema, @Nonnull String namespace,
883+
@Nonnull Map<String, List<String>> collisionMap) {
791884
Preconditions.checkNotNull(schema);
792885
Preconditions.checkNotNull(namespace);
886+
Preconditions.checkNotNull(collisionMap);
793887

794888
switch (schema.getType()) {
795889
case NULL:
@@ -805,21 +899,21 @@ private static Schema setupNestedNamespace(@Nonnull Schema schema, @Nonnull Stri
805899
return schema;
806900
case MAP:
807901
Schema valueSchema = schema.getValueType();
808-
Schema valueSchemaWithNestedNamespace = setupNestedNamespace(valueSchema, namespace);
902+
Schema valueSchemaWithNestedNamespace = setupNestedNamespace(valueSchema, namespace, collisionMap);
809903
return Schema.createMap(valueSchemaWithNestedNamespace);
810904
case ARRAY:
811905
Schema elementSchema = schema.getElementType();
812-
Schema elementSchemaWithNestedNamespace = setupNestedNamespace(elementSchema, namespace);
906+
Schema elementSchemaWithNestedNamespace = setupNestedNamespace(elementSchema, namespace, collisionMap);
813907
return Schema.createArray(elementSchemaWithNestedNamespace);
814908
case ENUM:
815909
return Schema.createEnum(schema.getName(), schema.getDoc(), namespace, schema.getEnumSymbols());
816910
case RECORD:
817-
return setupNestedNamespaceForRecord(schema, namespace);
911+
return setupNestedNamespaceForRecord(schema, namespace, collisionMap);
818912
case UNION:
819913
List<Schema> types = new ArrayList<>();
820914

821915
for (Schema type : schema.getTypes()) {
822-
Schema typeWithNestNamespace = setupNestedNamespace(type, namespace);
916+
Schema typeWithNestNamespace = setupNestedNamespace(type, namespace, collisionMap);
823917
types.add(typeWithNestNamespace);
824918
}
825919
return Schema.createUnion(types);

coral-schema/src/test/java/com/linkedin/coral/schema/avro/SchemaUtilitiesTests.java

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,4 +90,108 @@ public void testToNullableSchema() {
9090

9191
Assert.assertEquals(outputSchema.toString(true), TestUtils.loadSchema("testToNullableSchema-expected.avsc"));
9292
}
93+
94+
/**
95+
* Test to verify that setupNameAndNamespace preserves original namespaces for nested records with the same name.
96+
* This prevents namespace collisions when two fields have nested records with the same name but different original namespaces.
97+
*/
98+
@Test
99+
public void testSetupNameAndNamespacePreservesOriginalNamespace() {
100+
// Create first nested record with namespace "com.foo.bar"
101+
Schema nestedRecord1 = SchemaBuilder.record("FooRecord").namespace("com.foo.bar").fields().name("field1")
102+
.type().intType().noDefault().endRecord();
103+
104+
// Create second nested record with the same name but different namespace "com.baz.qux"
105+
Schema nestedRecord2 = SchemaBuilder.record("FooRecord").namespace("com.baz.qux").fields().name("field2")
106+
.type().stringType().noDefault().endRecord();
107+
108+
// Create nullable unions for both nested records
109+
Schema nullableRecord1 = Schema.createUnion(Schema.create(Schema.Type.NULL), nestedRecord1);
110+
Schema nullableRecord2 = Schema.createUnion(Schema.create(Schema.Type.NULL), nestedRecord2);
111+
112+
// Create parent schema with two fields containing the nested records
113+
Schema parentSchema = SchemaBuilder.record("ParentRecord").namespace("com.parent").fields().name("contextV1")
114+
.type(nullableRecord1).noDefault().name("contextV2").type(nullableRecord2).noDefault().endRecord();
115+
116+
// Apply setupNameAndNamespace
117+
Schema resultSchema = SchemaUtilities.setupNameAndNamespace(parentSchema, "ParentRecord", "com.parent.test");
118+
119+
// Extract the nested record schemas from the result
120+
Schema.Field contextV1Field = resultSchema.getField("contextV1");
121+
Schema.Field contextV2Field = resultSchema.getField("contextV2");
122+
123+
// Get the non-null type from the union
124+
Schema resultRecord1 = contextV1Field.schema().getTypes().get(1);
125+
Schema resultRecord2 = contextV2Field.schema().getTypes().get(1);
126+
127+
// Verify that both records still have different namespaces (preserving original namespace info)
128+
// The new namespace should incorporate the original namespace to avoid conflicts
129+
String namespace1 = resultRecord1.getNamespace();
130+
String namespace2 = resultRecord2.getNamespace();
131+
132+
// Both records have the same name
133+
Assert.assertEquals(resultRecord1.getName(), "FooRecord");
134+
Assert.assertEquals(resultRecord2.getName(), "FooRecord");
135+
136+
// But they should have different namespaces with numeric suffixes to avoid conflicts
137+
Assert.assertNotEquals(namespace1, namespace2,
138+
"Namespaces should be different to avoid conflicts. Got namespace1: " + namespace1 + ", namespace2: "
139+
+ namespace2);
140+
141+
// Verify that numeric suffixes are appended to distinguish the colliding records
142+
Assert.assertTrue(namespace1.endsWith("-0") || namespace1.endsWith("-1"),
143+
"First record namespace should have numeric suffix. Got: " + namespace1);
144+
Assert.assertTrue(namespace2.endsWith("-0") || namespace2.endsWith("-1"),
145+
"Second record namespace should have numeric suffix. Got: " + namespace2);
146+
}
147+
148+
/**
149+
* Test to verify that collision detection works for direct nested records (not in unions).
150+
* When two fields have direct nested records with the same name but different original namespaces,
151+
* the system should detect the collision and preserve the original namespaces.
152+
*/
153+
@Test
154+
public void testSetupNameAndNamespaceDetectsDirectRecordCollisions() {
155+
// Create first nested record with namespace ending in "admin"
156+
Schema nestedRecord1 = SchemaBuilder.record("ConfigRecord").namespace("com.foo.admin").fields().name("setting1")
157+
.type().intType().noDefault().endRecord();
158+
159+
// Create second nested record with the same name but namespace ending in "client"
160+
Schema nestedRecord2 = SchemaBuilder.record("ConfigRecord").namespace("com.bar.client").fields().name("setting2")
161+
.type().stringType().noDefault().endRecord();
162+
163+
// Create parent schema with two fields containing the nested records directly (NOT in unions)
164+
Schema parentSchema = SchemaBuilder.record("ApplicationConfig").namespace("com.app").fields().name("serviceConfig1")
165+
.type(nestedRecord1).noDefault().name("serviceConfig2").type(nestedRecord2).noDefault().endRecord();
166+
167+
// Apply setupNameAndNamespace
168+
Schema resultSchema =
169+
SchemaUtilities.setupNameAndNamespace(parentSchema, "ApplicationConfig", "com.app.config");
170+
171+
// Extract the nested record schemas from the result
172+
Schema.Field config1Field = resultSchema.getField("serviceConfig1");
173+
Schema.Field config2Field = resultSchema.getField("serviceConfig2");
174+
175+
Schema resultRecord1 = config1Field.schema();
176+
Schema resultRecord2 = config2Field.schema();
177+
178+
// Verify that both records still have different namespaces (collision was detected and handled)
179+
String namespace1 = resultRecord1.getNamespace();
180+
String namespace2 = resultRecord2.getNamespace();
181+
182+
// Both records have the same name
183+
Assert.assertEquals(resultRecord1.getName(), "ConfigRecord");
184+
Assert.assertEquals(resultRecord2.getName(), "ConfigRecord");
185+
186+
// But they should have different namespaces with numeric suffixes because collision was detected
187+
Assert.assertNotEquals(namespace1, namespace2,
188+
"Namespaces should be different when collision is detected. Got namespace1: " + namespace1 + ", namespace2: "
189+
+ namespace2);
190+
191+
// Verify that numeric suffixes are appended to distinguish the colliding records
192+
Assert.assertTrue(namespace1.endsWith("-0") || namespace1.endsWith("-1"),
193+
"First record namespace should have numeric suffix. Got: " + namespace1);
194+
Assert.assertTrue(namespace2.endsWith("-0") || namespace2.endsWith("-1"),
195+
"Second record namespace should have numeric suffix. Got: " + namespace2);
196+
}
93197
}

0 commit comments

Comments
 (0)