Skip to content

Commit f007ef5

Browse files
committed
BaseType serialization optimized to avoid double normalization, if normalized value and delegate are equivalent only write single value
1 parent a60d23a commit f007ef5

File tree

8 files changed

+53
-17
lines changed

8 files changed

+53
-17
lines changed

core/utils/type-utils/src/main/java/datawave/data/type/BaseType.java

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
public class BaseType<T extends Comparable<T> & Serializable> implements Serializable, Type<T>, ObjectSizeOf {
1515

16-
private static final long serialVersionUID = 5354270429891763693L;
16+
private static final long serialVersionUID = -3747720721391071135L;
1717
private static final long STATIC_SIZE = PrecomputedSizes.STRING_STATIC_REF + Sizer.REFERENCE + Sizer.REFERENCE;
1818

1919
protected T delegate;
@@ -34,7 +34,8 @@ public T getDelegate() {
3434
}
3535

3636
public void setDelegateFromString(String in) {
37-
setDelegate(normalizer.denormalize(in));
37+
T denormalized = normalizer.denormalize(in);
38+
setDelegate(denormalized);
3839
}
3940

4041
public void setDelegate(T delegate) {
@@ -91,7 +92,8 @@ public boolean normalizedRegexIsLossy(String in) {
9192

9293
@Override
9394
public void normalizeAndSetNormalizedValue(T valueToNormalize) {
94-
setNormalizedValue(normalizer.normalizeDelegateType(valueToNormalize));
95+
String normalized = normalizer.normalizeDelegateType(valueToNormalize);
96+
setNormalizedValue(normalized);
9597
}
9698

9799
public void validate() {
@@ -179,19 +181,45 @@ public long sizeInBytes() {
179181

180182
@Override
181183
public void write(Kryo kryo, Output output) {
182-
output.writeString(getDelegateAsString());
184+
boolean equivalent = normalizedValue.equals(delegate);
185+
output.writeBoolean(equivalent);
186+
output.writeString(normalizedValue);
187+
if (!equivalent) {
188+
// write the delegate if not equivalent to the normalized value
189+
String delegateString = getDelegateAsString();
190+
output.writeString(delegateString);
191+
}
183192
}
184193

185194
@Override
186195
public void read(Kryo kryo, Input input) {
187-
String delegateString = input.readString();
196+
boolean equivalent = input.readBoolean();
197+
String normalizedValue = input.readString();
198+
if (equivalent) {
199+
setNormalizedValue(normalizedValue);
200+
setDelegateWithoutNormalization(normalizedValue);
201+
} else {
202+
String delegateString = input.readString();
203+
setNormalizedValue(normalizedValue);
204+
setDelegateWithoutNormalization(delegateString);
205+
}
206+
}
207+
208+
/**
209+
* Set the delegate from a string without using the normalizer
210+
*
211+
* @param delegateString
212+
* the delegate's normalized string value
213+
*/
214+
@SuppressWarnings("unchecked")
215+
protected void setDelegateWithoutNormalization(String delegateString) {
188216
try {
189-
setDelegateFromString(delegateString);
217+
this.delegate = (T) delegateString;
190218
} catch (Exception e) {
191219
// if there was some problem with setting the delegate for the specific Type, then
192220
// set the normalized value to the input string. This effectively mimics falling back
193221
// to a NoOpType
194-
setNormalizedValue(delegateString);
222+
this.normalizedValue = delegateString;
195223
}
196224
}
197225
}

warehouse/query-core/src/test/java/datawave/query/attributes/DocumentTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ public void testLargeDocument() {
146146
Attribute<?> attr = createAttribute("LC", "value-" + i);
147147
d.put("LC", attr);
148148
}
149-
roundTrip(MAX_ITERATIONS, 188006);
149+
roundTrip(MAX_ITERATIONS, 198006);
150150
}
151151

152152
@Test

warehouse/query-core/src/test/java/datawave/query/attributes/TypeAttributeTest.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ public void testExceptionLeadsToNoOpType() {
3737
output.writeInt(0, true);
3838
output.writeString("MrMcDoesn'tExist");
3939
output.writeBoolean(false); // write metadata when not set
40+
output.writeBoolean(false); // normalized value and delegate not equivalent
41+
output.writeString("normalized value");
4042
output.writeString("delegate value as string");
4143
output.writeBoolean(false); // to keep false
4244
output.writeInt(12, true); // hash code
@@ -48,6 +50,8 @@ public void testExceptionLeadsToNoOpType() {
4850

4951
assertInstanceOf(TypeAttribute.class, type);
5052
assertInstanceOf(NoOpType.class, type.getType());
53+
// exception handling does something to the input stream, the leading 'n' character is dropped
54+
assertEquals("normalized value", type.getType().getNormalizedValue());
5155
assertEquals("delegate value as string", type.getData().toString());
5256
}
5357
}

warehouse/query-core/src/test/java/datawave/query/attributes/it/IpAddressTypeAttributeIT.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,9 @@ public void testKryoValuePreservation() {
5555
// serializing full type name: 54
5656
// serializing type name index: 23
5757
// serialize hash code: 28
58-
verifyKryoPreservesValue(createNormalizedAttribute(), 28);
59-
verifyKryoPreservesValue(createNonNormalizedAttribute(), 28);
58+
// serialize normalized and non-normalized value to avoid expensive calls to the normalizer: 44, 44
59+
verifyKryoPreservesValue(createNormalizedAttribute(), 44);
60+
verifyKryoPreservesValue(createNonNormalizedAttribute(), 44);
6061
}
6162

6263
@Test

warehouse/query-core/src/test/java/datawave/query/attributes/it/LcNoDiacriticsTypeAttributeIT.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,9 @@ public void testKryoValuePreservation() {
4444
// serializing full type name: 52, 54
4545
// serializing type name index: 16, 18
4646
// serialize hash code: 21, 23
47-
verifyKryoPreservesValue(createNormalizedAttribute(), 21);
48-
verifyKryoPreservesValue(createNonNormalizedAttribute(), 23);
47+
// serialize normalized and non-normalized value to avoid expensive calls to the normalizer: 22, 28
48+
verifyKryoPreservesValue(createNormalizedAttribute(), 22);
49+
verifyKryoPreservesValue(createNonNormalizedAttribute(), 28);
4950
}
5051

5152
@Test

warehouse/query-core/src/test/java/datawave/query/attributes/it/LcTypeAttributeIT.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,9 @@ public void testKryoValuePreservation() {
4444
// serializing full type name: 41
4545
// serializing type name index: 17
4646
// serialize hash code: 22
47-
verifyKryoPreservesValue(createNormalizedAttribute(), 22);
48-
verifyKryoPreservesValue(createNonNormalizedAttribute(), 22);
47+
// serialize normalized and non-normalized value to avoid expensive calls to the normalizer: 23, 28
48+
verifyKryoPreservesValue(createNormalizedAttribute(), 23);
49+
verifyKryoPreservesValue(createNonNormalizedAttribute(), 28);
4950
}
5051

5152
@Test

warehouse/query-core/src/test/java/datawave/query/attributes/it/NoOpTypeAttributeIT.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@ public void testKryoValuePreservation() {
7070
// serializing full type name: 43
7171
// serializing type name index: 17
7272
// serialize hash code: 22
73-
verifyKryoPreservesValue(createNormalizedAttribute(), 22);
74-
verifyKryoPreservesValue(createNonNormalizedAttribute(), 22);
73+
// serialize normalized and non-normalized value to avoid expensive calls to the normalizer: 23, 23
74+
verifyKryoPreservesValue(createNormalizedAttribute(), 23);
75+
verifyKryoPreservesValue(createNonNormalizedAttribute(), 23);
7576
}
7677

7778
@Test

warehouse/query-core/src/test/java/datawave/query/function/serializer/KryoDocumentSerDeTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ public void testBulkSerialization() {
8989
int max = 1_000_000;
9090
for (int i = 1; i <= max; i++) {
9191
byte[] bytes = serializer.serialize(d);
92-
assertTrue(450 < bytes.length && bytes.length <= 460);
92+
assertTrue(460 < bytes.length && bytes.length <= 465, "actual size: " + bytes.length);
9393
}
9494
}
9595
}

0 commit comments

Comments
 (0)