Skip to content

Commit 22e1658

Browse files
authored
Add Support for Ignoring JSON Properties in Schema Comparison in own + vanilla codegen (#589)
* Add jsonignore props to SchemaBuilder, and both vanilla + own codegen * Use comparison util
1 parent 1bc561a commit 22e1658

File tree

10 files changed

+553
-65
lines changed

10 files changed

+553
-65
lines changed

avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,12 @@ public static void main(String[] args) throws Exception {
122122
.defaultsTo("true")
123123
.describedAs("true/false");
124124

125+
OptionSpec<String> jsonPropsToIgnoreOpt = parser.accepts(
126+
"jsonPropsToIgnore",
127+
"Comma-separated JSON property names to ignore during schema equality comparison (e.g., custom props)")
128+
.withOptionalArg()
129+
.withValuesSeparatedBy(',');
130+
125131
//allow plugins to add CLI options
126132
for (BuilderPlugin plugin : plugins) {
127133
plugin.customizeCLI(parser);
@@ -261,6 +267,19 @@ public static void main(String[] args) throws Exception {
261267
}
262268
}
263269

270+
java.util.Set<String> jsonPropsToIgnore = new java.util.HashSet<>();
271+
if (options.has(jsonPropsToIgnoreOpt)) {
272+
List<String> vals = options.valuesOf(jsonPropsToIgnoreOpt);
273+
for (String v : vals) {
274+
if (v != null) {
275+
String trimmed = v.trim();
276+
if (!trimmed.isEmpty()) {
277+
jsonPropsToIgnore.add(trimmed);
278+
}
279+
}
280+
}
281+
}
282+
264283
//allow plugins to parse and validate their own added options
265284
for (BuilderPlugin plugin : plugins) {
266285
plugin.parseAndValidateOptions(options);
@@ -282,7 +301,8 @@ public static void main(String[] args) throws Exception {
282301
handleAvro702,
283302
handleUtf8EncodingInPutByIndex,
284303
skipCodegenIfSchemaOnClasspath,
285-
handleUtf8Encoding
304+
handleUtf8Encoding,
305+
jsonPropsToIgnore
286306
);
287307

288308
opConfig.validateParameters();

avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/operations/codegen/CodeGenOpConfig.java

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ public class CodeGenOpConfig {
5050
boolean utf8EncodingPutByIndex;
5151
boolean skipCodegenIfSchemaOnClasspath;
5252
boolean enableUtf8Encoding;
53+
// names of json properties to ignore during schema equality comparisons (e.g., custom props)
54+
java.util.Set<String> jsonPropsToIgnore;
5355

5456
@Deprecated
5557
public CodeGenOpConfig(
@@ -223,7 +225,8 @@ public CodeGenOpConfig(List<File> inputRoots,
223225
boolean avro702Handling,
224226
boolean handleUtf8EncodingInPutByIndex,
225227
boolean skipCodegenIfSchemaOnClasspath,
226-
boolean handleUtf8Encoding) {
228+
boolean handleUtf8Encoding,
229+
java.util.Set<String> jsonPropsToIgnore) {
227230
this.inputRoots = inputRoots;
228231
this.nonImportableSourceRoots = nonImportableSourceRoots;
229232
this.includeClasspath = includeClasspath;
@@ -240,6 +243,45 @@ public CodeGenOpConfig(List<File> inputRoots,
240243
this.utf8EncodingPutByIndex = handleUtf8EncodingInPutByIndex;
241244
this.skipCodegenIfSchemaOnClasspath = skipCodegenIfSchemaOnClasspath;
242245
this.enableUtf8Encoding = handleUtf8Encoding;
246+
this.jsonPropsToIgnore = jsonPropsToIgnore;
247+
}
248+
249+
// Overload preserving original public API (without jsonPropsToIgnore)
250+
public CodeGenOpConfig(List<File> inputRoots,
251+
List<File> nonImportableSourceRoots,
252+
boolean includeClasspath,
253+
File outputSpecificRecordClassesRoot,
254+
File outputExpandedSchemasRoot,
255+
List<File> resolverPath,
256+
CodeGenerator generatorType,
257+
DuplicateSchemaBehaviour dupBehaviour,
258+
List<String> duplicateSchemasToIgnore,
259+
StringRepresentation stringRepresentation,
260+
StringRepresentation methodStringRepresentation,
261+
AvroVersion minAvroVersion,
262+
boolean avro702Handling,
263+
boolean handleUtf8EncodingInPutByIndex,
264+
boolean skipCodegenIfSchemaOnClasspath,
265+
boolean handleUtf8Encoding) {
266+
this(
267+
inputRoots,
268+
nonImportableSourceRoots,
269+
includeClasspath,
270+
outputSpecificRecordClassesRoot,
271+
outputExpandedSchemasRoot,
272+
resolverPath,
273+
generatorType,
274+
dupBehaviour,
275+
duplicateSchemasToIgnore,
276+
stringRepresentation,
277+
methodStringRepresentation,
278+
minAvroVersion,
279+
avro702Handling,
280+
handleUtf8EncodingInPutByIndex,
281+
skipCodegenIfSchemaOnClasspath,
282+
handleUtf8Encoding,
283+
java.util.Collections.emptySet()
284+
);
243285
}
244286

245287
/**
@@ -357,6 +399,10 @@ public boolean isUtf8EncodingEnabled() {
357399
return enableUtf8Encoding;
358400
}
359401

402+
public java.util.Set<String> getJsonPropsToIgnore() {
403+
return jsonPropsToIgnore;
404+
}
405+
360406
private void validateInput(Collection<File> files, String desc) {
361407
for (File f : files) {
362408
if (!f.exists()) {

avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/operations/codegen/own/AvroUtilOperationContextBuilder.java

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,26 @@
77
package com.linkedin.avroutil1.builder.operations.codegen.own;
88

99
import com.linkedin.avroutil1.builder.operations.OperationContext;
10+
import com.linkedin.avroutil1.builder.operations.SchemaSet;
1011
import com.linkedin.avroutil1.builder.operations.codegen.CodeGenOpConfig;
1112
import com.linkedin.avroutil1.builder.operations.codegen.OperationContextBuilder;
1213
import com.linkedin.avroutil1.builder.operations.codegen.util.AvscFileFinderUtil;
1314
import com.linkedin.avroutil1.builder.operations.codegen.vanilla.ClasspathSchemaSet;
1415
import com.linkedin.avroutil1.builder.operations.codegen.vanilla.ResolverPathSchemaSet;
15-
import com.linkedin.avroutil1.builder.operations.SchemaSet;
16+
import com.linkedin.avroutil1.builder.operations.codegen.util.SchemaComparisonUtil;
1617
import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper;
1718
import com.linkedin.avroutil1.compatibility.AvscGenerationConfig;
18-
import com.linkedin.avroutil1.compatibility.SchemaComparisonConfiguration;
1919
import com.linkedin.avroutil1.model.AvroNamedSchema;
2020
import com.linkedin.avroutil1.model.AvroSchema;
2121
import com.linkedin.avroutil1.model.SchemaOrRef;
2222
import com.linkedin.avroutil1.parser.avsc.AvroParseContext;
2323
import com.linkedin.avroutil1.parser.avsc.AvscParseResult;
2424
import com.linkedin.avroutil1.parser.avsc.AvscParser;
25-
import com.linkedin.avroutil1.util.ConfigurableAvroSchemaComparator;
26-
import com.linkedin.avroutil1.writer.avsc.AvscSchemaWriter;
27-
import com.linkedin.avroutil1.writer.avsc.AvscWriterConfig;
25+
import org.apache.avro.Schema;
26+
import org.slf4j.Logger;
27+
import org.slf4j.LoggerFactory;
28+
29+
2830
import java.io.File;
2931
import java.util.Collections;
3032
import java.util.HashSet;
@@ -34,9 +36,6 @@
3436
import java.util.StringJoiner;
3537
import java.util.stream.Collectors;
3638
import java.util.stream.Stream;
37-
import org.apache.avro.Schema;
38-
import org.slf4j.Logger;
39-
import org.slf4j.LoggerFactory;
4039

4140

4241
public class AvroUtilOperationContextBuilder implements OperationContextBuilder {
@@ -122,8 +121,7 @@ public OperationContext buildOperationContext(CodeGenOpConfig config) throws Exc
122121
if (cpSchema != null) {
123122
// check if the schema on classpath is the same as the one we are trying to generate
124123
AvroSchema avroSchemaFromClasspath = (new AvscParser()).parse(cpSchema.toString()).getTopLevelSchema();
125-
boolean areEqual = ConfigurableAvroSchemaComparator.equals(avroSchemaFromClasspath, schema,
126-
SchemaComparisonConfiguration.STRICT);
124+
boolean areEqual = SchemaComparisonUtil.equalsAvroSchema(avroSchemaFromClasspath, schema, config.getJsonPropsToIgnore());
127125
if (!areEqual) {
128126
throw new IllegalStateException("Schema with name " + fullName
129127
+ " is defined in the filesystem and on the classpath, but the two schemas are not equal.");
@@ -155,28 +153,26 @@ public OperationContext buildOperationContext(CodeGenOpConfig config) throws Exc
155153
System.err.println("WARNING: schema " + fqcn + " found in 2+ places: " + allFilesString);
156154
break;
157155
case FAIL_IF_DIFFERENT:
158-
String baseSchema = null;
156+
AvroNamedSchema baseNamed = null;
159157
AvscParseResult baseSchemaResult = null;
158+
// Use SchemaComparisonUtil to build the comparison configuration
160159
for (AvscParseResult duplicateParseResult : duplicateEntry.getValue()) {
161-
if (baseSchema == null) {
162-
baseSchema = new AvscSchemaWriter().generateAvsc(duplicateParseResult.getDefinedSchema(fqcn),
163-
AvscWriterConfig.CORRECT_MITIGATED);
160+
AvroNamedSchema currentNamed = duplicateParseResult.getDefinedSchema(fqcn);
161+
if (baseNamed == null) {
162+
baseNamed = currentNamed;
164163
baseSchemaResult = duplicateParseResult;
165164
continue;
166165
}
167-
String currSchema = new AvscSchemaWriter().generateAvsc(duplicateParseResult.getDefinedSchema(fqcn),
168-
AvscWriterConfig.CORRECT_MITIGATED);
169-
170-
// TODO: compare canonical forms when canonicalization work is complete
166+
// Compare using SchemaComparisonUtil for consistent schema comparison
167+
boolean equal = SchemaComparisonUtil.equalsAvroSchema(baseNamed, currentNamed, config.getJsonPropsToIgnore());
171168
long baseLineNumber = baseSchemaResult.getDefinedSchema(fqcn).getCodeLocation().getEnd().getLineNumber();
172-
long duplicateLineNumber =
173-
duplicateParseResult.getDefinedSchema(fqcn).getCodeLocation().getEnd().getLineNumber();
174-
String errorMsg = "schema " + fqcn + " found DIFFERENT in 2+ places: " + baseSchemaResult.getURI() + "#L"
169+
long duplicateLineNumber = currentNamed.getCodeLocation().getEnd().getLineNumber();
170+
String msg = "schema " + fqcn + " found DIFFERENT in 2+ places: " + baseSchemaResult.getURI() + "#L"
175171
+ baseLineNumber + " and " + duplicateParseResult.getURI() + "#L" + duplicateLineNumber;
176-
if (!baseSchema.equals(currSchema)) {
177-
throw new RuntimeException("ERROR: " + errorMsg);
172+
if (!equal) {
173+
throw new RuntimeException("ERROR: " + msg);
178174
} else {
179-
System.err.println("WARNING: " + errorMsg);
175+
System.err.println("WARNING: " + msg);
180176
}
181177
}
182178
break;
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Copyright 2025 LinkedIn Corp.
3+
* Licensed under the BSD 2-Clause License (the "License").
4+
* See License in the project root for license information.
5+
*/
6+
7+
package com.linkedin.avroutil1.builder.operations.codegen.util;
8+
9+
import com.linkedin.avroutil1.compatibility.ConfigurableSchemaComparator;
10+
import com.linkedin.avroutil1.compatibility.SchemaComparisonConfiguration;
11+
import com.linkedin.avroutil1.model.AvroSchema;
12+
import java.util.Set;
13+
import org.apache.avro.Schema;
14+
15+
/**
16+
* Centralized helpers for schema equality checks used across builder flows.
17+
*/
18+
public final class SchemaComparisonUtil {
19+
20+
private SchemaComparisonUtil() { }
21+
22+
public static SchemaComparisonConfiguration buildConfig(Set<String> jsonPropsToIgnore) {
23+
SchemaComparisonConfiguration config = SchemaComparisonConfiguration.STRICT;
24+
if (jsonPropsToIgnore != null && !jsonPropsToIgnore.isEmpty()) {
25+
config = config.jsonPropNamesToIgnore(jsonPropsToIgnore);
26+
}
27+
return config;
28+
}
29+
30+
public static boolean equalsApacheSchema(Schema a, Schema b, Set<String> jsonPropsToIgnore) {
31+
if (a == null || b == null) {
32+
return a == b;
33+
}
34+
return ConfigurableSchemaComparator.equals(a, b, buildConfig(jsonPropsToIgnore));
35+
}
36+
37+
public static boolean equalsAvroSchema(AvroSchema a, AvroSchema b, Set<String> jsonPropsToIgnore) {
38+
if (a == null || b == null) {
39+
return a == b;
40+
}
41+
return com.linkedin.avroutil1.util.ConfigurableAvroSchemaComparator.equals(a, b, buildConfig(jsonPropsToIgnore));
42+
}
43+
}

0 commit comments

Comments
 (0)