|
5 | 5 | */ |
6 | 6 | package com.linkedin.coral.schema.avro; |
7 | 7 |
|
| 8 | +import java.util.LinkedHashMap; |
8 | 9 | import java.util.List; |
| 10 | +import java.util.Map; |
| 11 | +import java.util.function.Function; |
| 12 | +import java.util.stream.Collectors; |
9 | 13 |
|
10 | 14 | import com.google.common.collect.Lists; |
11 | 15 | import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; |
12 | 16 |
|
13 | 17 | import org.apache.avro.Schema; |
| 18 | +import org.apache.avro.generic.GenericData; |
14 | 19 |
|
15 | 20 |
|
16 | 21 | /** |
@@ -62,12 +67,163 @@ public Schema primitive(Schema primitive) { |
62 | 67 | return primitive; |
63 | 68 | } |
64 | 69 |
|
| 70 | + /** |
| 71 | + * Lowercases a field, including lowercasing any field names within its default value. |
| 72 | + * @param field The original field |
| 73 | + * @param schema The lowercased schema for this field |
| 74 | + * @return A new field with lowercased name and lowercased default value |
| 75 | + */ |
65 | 76 | private Schema.Field lowercaseField(Schema.Field field, Schema schema) { |
| 77 | + Object originalDefaultValue = SchemaUtilities.defaultValue(field); |
| 78 | + Object lowercasedDefaultValue = lowercaseDefaultValue(originalDefaultValue, schema); |
| 79 | + |
66 | 80 | Schema.Field lowercasedField = AvroCompatibilityHelper.createSchemaField(field.name().toLowerCase(), schema, |
67 | | - field.doc(), SchemaUtilities.defaultValue(field), field.order()); |
| 81 | + field.doc(), lowercasedDefaultValue, field.order()); |
68 | 82 |
|
69 | 83 | SchemaUtilities.replicateFieldProps(field, lowercasedField); |
70 | 84 |
|
71 | 85 | return lowercasedField; |
72 | 86 | } |
| 87 | + |
| 88 | + /** |
| 89 | + * Recursively lowercases field names within default values based on the schema structure. |
| 90 | + * This handles complex types like records, maps, and arrays where field names appear in default values. |
| 91 | + * |
| 92 | + * @param defaultValue The original default value (can be null, primitive, Map, List, etc.) |
| 93 | + * @param schema The schema that describes the structure of this default value |
| 94 | + * @return The default value with all field names lowercased |
| 95 | + */ |
| 96 | + @SuppressWarnings("unchecked") |
| 97 | + private Object lowercaseDefaultValue(Object defaultValue, Schema schema) { |
| 98 | + if (defaultValue == null) { |
| 99 | + return null; |
| 100 | + } |
| 101 | + |
| 102 | + Schema actualSchema = schema; |
| 103 | + |
| 104 | + // Handle union types - get the actual schema based on the default value type |
| 105 | + if (schema.getType() == Schema.Type.UNION) { |
| 106 | + // For unions, the default value corresponds to the first type in the union |
| 107 | + actualSchema = schema.getTypes().get(0); |
| 108 | + } |
| 109 | + |
| 110 | + switch (actualSchema.getType()) { |
| 111 | + case RECORD: |
| 112 | + // For records, the default value can be either a Map or GenericData.Record |
| 113 | + if (defaultValue instanceof GenericData.Record) { |
| 114 | + GenericData.Record record = (GenericData.Record) defaultValue; |
| 115 | + return lowercaseRecordDefaultValue(actualSchema, lowercasedFieldName -> { |
| 116 | + // Find the matching field in the original record's schema (case-insensitive) |
| 117 | + Schema.Field originalField = record.getSchema().getField(lowercasedFieldName); |
| 118 | + if (originalField == null) { |
| 119 | + for (Schema.Field f : record.getSchema().getFields()) { |
| 120 | + if (f.name().equalsIgnoreCase(lowercasedFieldName)) { |
| 121 | + originalField = f; |
| 122 | + break; |
| 123 | + } |
| 124 | + } |
| 125 | + } |
| 126 | + return originalField != null ? record.get(originalField.pos()) : null; |
| 127 | + }); |
| 128 | + } else if (defaultValue instanceof Map) { |
| 129 | + Map<?, ?> recordMap = (Map<?, ?>) defaultValue; |
| 130 | + return lowercaseRecordDefaultValue(actualSchema, lowercasedFieldName -> { |
| 131 | + // Find the matching key in the original map (case-insensitive) |
| 132 | + String matchingKey = findMatchingKeyForLowercased(recordMap, lowercasedFieldName); |
| 133 | + return matchingKey != null ? recordMap.get(matchingKey) : null; |
| 134 | + }); |
| 135 | + } |
| 136 | + // If neither Map nor GenericData.Record, return as-is |
| 137 | + return defaultValue; |
| 138 | + |
| 139 | + case MAP: |
| 140 | + // For maps, lowercase the keys and recursively process values |
| 141 | + if (defaultValue instanceof Map) { |
| 142 | + Map<?, ?> mapValue = (Map<?, ?>) defaultValue; // Use wildcards to handle Utf8 keys |
| 143 | + Map<String, Object> lowercasedMap = new LinkedHashMap<>(); |
| 144 | + Schema valueSchema = actualSchema.getValueType(); |
| 145 | + |
| 146 | + for (Map.Entry<?, ?> entry : mapValue.entrySet()) { |
| 147 | + String originalKey = entry.getKey().toString(); // Handle both String and Utf8 |
| 148 | + String lowercasedKey = originalKey.toLowerCase(); |
| 149 | + Object lowercasedValue = lowercaseDefaultValue(entry.getValue(), valueSchema); |
| 150 | + lowercasedMap.put(lowercasedKey, lowercasedValue); |
| 151 | + } |
| 152 | + return lowercasedMap; |
| 153 | + } |
| 154 | + return defaultValue; |
| 155 | + |
| 156 | + case ARRAY: |
| 157 | + // For arrays, recursively process each element |
| 158 | + if (defaultValue instanceof List) { |
| 159 | + List<Object> arrayValue = (List<Object>) defaultValue; |
| 160 | + Schema elementSchema = actualSchema.getElementType(); |
| 161 | + |
| 162 | + return arrayValue.stream() |
| 163 | + .map(element -> lowercaseDefaultValue(element, elementSchema)) |
| 164 | + .collect(Collectors.toList()); |
| 165 | + } |
| 166 | + return defaultValue; |
| 167 | + |
| 168 | + case NULL: |
| 169 | + case BOOLEAN: |
| 170 | + case INT: |
| 171 | + case LONG: |
| 172 | + case FLOAT: |
| 173 | + case DOUBLE: |
| 174 | + case BYTES: |
| 175 | + case STRING: |
| 176 | + case ENUM: |
| 177 | + case FIXED: |
| 178 | + default: |
| 179 | + // Primitive types and others: return as-is |
| 180 | + return defaultValue; |
| 181 | + } |
| 182 | + } |
| 183 | + |
| 184 | + /** |
| 185 | + * Helper method that extracts the common logic for lowercasing record default values. |
| 186 | + * This handles both GenericData.Record and Map-based default values. |
| 187 | + * |
| 188 | + * @param actualSchema The lowercased schema for the record |
| 189 | + * @param valueExtractor Function that retrieves the original field value given a lowercased field name |
| 190 | + * @return A Map with lowercased field names and recursively lowercased values |
| 191 | + */ |
| 192 | + private Map<String, Object> lowercaseRecordDefaultValue(Schema actualSchema, |
| 193 | + Function<String, Object> valueExtractor) { |
| 194 | + Map<String, Object> lowercasedRecordMap = new LinkedHashMap<>(); |
| 195 | + |
| 196 | + // Iterate through the lowercased schema fields |
| 197 | + for (Schema.Field field : actualSchema.getFields()) { |
| 198 | + String lowercasedFieldName = field.name(); |
| 199 | + Object fieldValue = valueExtractor.apply(lowercasedFieldName); |
| 200 | + |
| 201 | + if (fieldValue != null) { |
| 202 | + Object lowercasedFieldValue = lowercaseDefaultValue(fieldValue, field.schema()); |
| 203 | + lowercasedRecordMap.put(lowercasedFieldName, lowercasedFieldValue); |
| 204 | + } |
| 205 | + } |
| 206 | + |
| 207 | + return lowercasedRecordMap; |
| 208 | + } |
| 209 | + |
| 210 | + /** |
| 211 | + * Finds a key in the original default value map that matches the lowercased field name. |
| 212 | + * This is needed because the original default value may have field names in mixed case. |
| 213 | + * |
| 214 | + * @param map The map containing the original default value |
| 215 | + * @param lowercasedFieldName The lowercased field name from the transformed schema |
| 216 | + * @return The matching key from the original map, or null if not found |
| 217 | + */ |
| 218 | + private String findMatchingKeyForLowercased(Map<?, ?> map, String lowercasedFieldName) { |
| 219 | + // Try case-insensitive match to find the original key |
| 220 | + for (Object keyObj : map.keySet()) { |
| 221 | + String key = keyObj.toString(); // Handle both String and Utf8 |
| 222 | + if (key.equalsIgnoreCase(lowercasedFieldName)) { |
| 223 | + return key; |
| 224 | + } |
| 225 | + } |
| 226 | + |
| 227 | + return null; |
| 228 | + } |
73 | 229 | } |
0 commit comments