diff --git a/core/utils/type-utils/src/main/java/datawave/data/type/BaseType.java b/core/utils/type-utils/src/main/java/datawave/data/type/BaseType.java index 377f6f23314..9f0c566e9d4 100644 --- a/core/utils/type-utils/src/main/java/datawave/data/type/BaseType.java +++ b/core/utils/type-utils/src/main/java/datawave/data/type/BaseType.java @@ -13,7 +13,7 @@ public class BaseType & Serializable> implements Serializable, Type, ObjectSizeOf { - private static final long serialVersionUID = 5354270429891763693L; + private static final long serialVersionUID = -3747720721391071135L; private static final long STATIC_SIZE = PrecomputedSizes.STRING_STATIC_REF + Sizer.REFERENCE + Sizer.REFERENCE; protected T delegate; @@ -34,7 +34,8 @@ public T getDelegate() { } public void setDelegateFromString(String in) { - setDelegate(normalizer.denormalize(in)); + T denormalized = normalizer.denormalize(in); + setDelegate(denormalized); } public void setDelegate(T delegate) { @@ -91,7 +92,8 @@ public boolean normalizedRegexIsLossy(String in) { @Override public void normalizeAndSetNormalizedValue(T valueToNormalize) { - setNormalizedValue(normalizer.normalizeDelegateType(valueToNormalize)); + String normalized = normalizer.normalizeDelegateType(valueToNormalize); + setNormalizedValue(normalized); } public void validate() { @@ -179,19 +181,45 @@ public long sizeInBytes() { @Override public void write(Kryo kryo, Output output) { - output.writeString(getDelegateAsString()); + boolean equivalent = normalizedValue.equals(delegate); + output.writeBoolean(equivalent); + output.writeString(normalizedValue); + if (!equivalent) { + // write the delegate if not equivalent to the normalized value + String delegateString = getDelegateAsString(); + output.writeString(delegateString); + } } @Override public void read(Kryo kryo, Input input) { - String delegateString = input.readString(); + boolean equivalent = input.readBoolean(); + String normalizedValue = input.readString(); + if (equivalent) { + setNormalizedValue(normalizedValue); + setDelegateWithoutNormalization(normalizedValue); + } else { + String delegateString = input.readString(); + setNormalizedValue(normalizedValue); + setDelegateWithoutNormalization(delegateString); + } + } + + /** + * Set the delegate from a string without using the normalizer + * + * @param delegateString + * the delegate's normalized string value + */ + @SuppressWarnings("unchecked") + protected void setDelegateWithoutNormalization(String delegateString) { try { - setDelegateFromString(delegateString); + this.delegate = (T) delegateString; } catch (Exception e) { // if there was some problem with setting the delegate for the specific Type, then // set the normalized value to the input string. This effectively mimics falling back // to a NoOpType - setNormalizedValue(delegateString); + this.normalizedValue = delegateString; } } } diff --git a/warehouse/query-core/src/test/java/datawave/query/attributes/DocumentTest.java b/warehouse/query-core/src/test/java/datawave/query/attributes/DocumentTest.java index fd43fdc8fe9..f8efaa5f667 100644 --- a/warehouse/query-core/src/test/java/datawave/query/attributes/DocumentTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/attributes/DocumentTest.java @@ -146,7 +146,7 @@ public void testLargeDocument() { Attribute attr = createAttribute("LC", "value-" + i); d.put("LC", attr); } - roundTrip(MAX_ITERATIONS, 188006); + roundTrip(MAX_ITERATIONS, 198006); } @Test diff --git a/warehouse/query-core/src/test/java/datawave/query/attributes/TypeAttributeTest.java b/warehouse/query-core/src/test/java/datawave/query/attributes/TypeAttributeTest.java index 069cc0900d0..5957847b2d6 100644 --- a/warehouse/query-core/src/test/java/datawave/query/attributes/TypeAttributeTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/attributes/TypeAttributeTest.java @@ -37,6 +37,8 @@ public void testExceptionLeadsToNoOpType() { output.writeInt(0, true); output.writeString("MrMcDoesn'tExist"); output.writeBoolean(false); // write metadata when not set + output.writeBoolean(false); // normalized value and delegate not equivalent + output.writeString("normalized value"); output.writeString("delegate value as string"); output.writeBoolean(false); // to keep false output.writeInt(12, true); // hash code @@ -48,6 +50,7 @@ public void testExceptionLeadsToNoOpType() { assertInstanceOf(TypeAttribute.class, type); assertInstanceOf(NoOpType.class, type.getType()); + assertEquals("normalized value", type.getType().getNormalizedValue()); assertEquals("delegate value as string", type.getData().toString()); } } diff --git a/warehouse/query-core/src/test/java/datawave/query/attributes/it/IpAddressTypeAttributeIT.java b/warehouse/query-core/src/test/java/datawave/query/attributes/it/IpAddressTypeAttributeIT.java index ee025584d9a..ce902e8d6e8 100644 --- a/warehouse/query-core/src/test/java/datawave/query/attributes/it/IpAddressTypeAttributeIT.java +++ b/warehouse/query-core/src/test/java/datawave/query/attributes/it/IpAddressTypeAttributeIT.java @@ -55,8 +55,9 @@ public void testKryoValuePreservation() { // serializing full type name: 54 // serializing type name index: 23 // serialize hash code: 28 - verifyKryoPreservesValue(createNormalizedAttribute(), 28); - verifyKryoPreservesValue(createNonNormalizedAttribute(), 28); + // serialize normalized and non-normalized value to avoid expensive calls to the normalizer: 44, 44 + verifyKryoPreservesValue(createNormalizedAttribute(), 44); + verifyKryoPreservesValue(createNonNormalizedAttribute(), 44); } @Test diff --git a/warehouse/query-core/src/test/java/datawave/query/attributes/it/LcNoDiacriticsTypeAttributeIT.java b/warehouse/query-core/src/test/java/datawave/query/attributes/it/LcNoDiacriticsTypeAttributeIT.java index 2fba6462c34..da2f317f2cb 100644 --- a/warehouse/query-core/src/test/java/datawave/query/attributes/it/LcNoDiacriticsTypeAttributeIT.java +++ b/warehouse/query-core/src/test/java/datawave/query/attributes/it/LcNoDiacriticsTypeAttributeIT.java @@ -44,8 +44,9 @@ public void testKryoValuePreservation() { // serializing full type name: 52, 54 // serializing type name index: 16, 18 // serialize hash code: 21, 23 - verifyKryoPreservesValue(createNormalizedAttribute(), 21); - verifyKryoPreservesValue(createNonNormalizedAttribute(), 23); + // serialize normalized and non-normalized value to avoid expensive calls to the normalizer: 22, 28 + verifyKryoPreservesValue(createNormalizedAttribute(), 22); + verifyKryoPreservesValue(createNonNormalizedAttribute(), 28); } @Test diff --git a/warehouse/query-core/src/test/java/datawave/query/attributes/it/LcTypeAttributeIT.java b/warehouse/query-core/src/test/java/datawave/query/attributes/it/LcTypeAttributeIT.java index 9004306122a..51a8a596b25 100644 --- a/warehouse/query-core/src/test/java/datawave/query/attributes/it/LcTypeAttributeIT.java +++ b/warehouse/query-core/src/test/java/datawave/query/attributes/it/LcTypeAttributeIT.java @@ -44,8 +44,9 @@ public void testKryoValuePreservation() { // serializing full type name: 41 // serializing type name index: 17 // serialize hash code: 22 - verifyKryoPreservesValue(createNormalizedAttribute(), 22); - verifyKryoPreservesValue(createNonNormalizedAttribute(), 22); + // serialize normalized and non-normalized value to avoid expensive calls to the normalizer: 23, 28 + verifyKryoPreservesValue(createNormalizedAttribute(), 23); + verifyKryoPreservesValue(createNonNormalizedAttribute(), 28); } @Test diff --git a/warehouse/query-core/src/test/java/datawave/query/attributes/it/NoOpTypeAttributeIT.java b/warehouse/query-core/src/test/java/datawave/query/attributes/it/NoOpTypeAttributeIT.java index 4024c2b6b02..7d19ae6dbc2 100644 --- a/warehouse/query-core/src/test/java/datawave/query/attributes/it/NoOpTypeAttributeIT.java +++ b/warehouse/query-core/src/test/java/datawave/query/attributes/it/NoOpTypeAttributeIT.java @@ -70,8 +70,9 @@ public void testKryoValuePreservation() { // serializing full type name: 43 // serializing type name index: 17 // serialize hash code: 22 - verifyKryoPreservesValue(createNormalizedAttribute(), 22); - verifyKryoPreservesValue(createNonNormalizedAttribute(), 22); + // serialize normalized and non-normalized value to avoid expensive calls to the normalizer: 23, 23 + verifyKryoPreservesValue(createNormalizedAttribute(), 23); + verifyKryoPreservesValue(createNonNormalizedAttribute(), 23); } @Test diff --git a/warehouse/query-core/src/test/java/datawave/query/function/serializer/KryoDocumentSerDeTest.java b/warehouse/query-core/src/test/java/datawave/query/function/serializer/KryoDocumentSerDeTest.java index 89e6e6d5d7d..19a0079ccd1 100644 --- a/warehouse/query-core/src/test/java/datawave/query/function/serializer/KryoDocumentSerDeTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/function/serializer/KryoDocumentSerDeTest.java @@ -89,7 +89,7 @@ public void testBulkSerialization() { int max = 1_000_000; for (int i = 1; i <= max; i++) { byte[] bytes = serializer.serialize(d); - assertTrue(450 < bytes.length && bytes.length <= 460); + assertTrue(460 < bytes.length && bytes.length <= 465, "actual size: " + bytes.length); } } }