diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java index 7d6fd1b64b52..1aeeea49c4ae 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java @@ -196,9 +196,7 @@ public static FuzzySet deserialize(DataInput in) throws IOException { int bloomSize = in.readInt(); int numLongs = in.readInt(); long[] longs = new long[numLongs]; - for (int i = 0; i < numLongs; i++) { - longs[i] = in.readLong(); - } + in.readLongs(longs, 0, numLongs); FixedBitSet bits = new FixedBitSet(longs, bloomSize + 1); return new FuzzySet(bits, bloomSize, hashCount); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java index b547a9adcf2f..9ad7090d23ce 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java @@ -101,9 +101,7 @@ public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext contex private FixedBitSet readFixedBitSet(IndexInput input, int length) throws IOException { long[] data = new long[FixedBitSet.bits2words(length)]; - for (int i = 0; i < data.length; i++) { - data[i] = input.readLong(); - } + input.readLongs(data, 0, data.length); return new FixedBitSet(data, length); } diff --git a/lucene/core/src/java/org/apache/lucene/store/BufferedChecksum.java b/lucene/core/src/java/org/apache/lucene/store/BufferedChecksum.java index 308825687e17..c61984bcbe16 100644 --- a/lucene/core/src/java/org/apache/lucene/store/BufferedChecksum.java +++ b/lucene/core/src/java/org/apache/lucene/store/BufferedChecksum.java @@ -16,7 +16,11 @@ */ package org.apache.lucene.store; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.LongBuffer; import java.util.zip.Checksum; +import org.apache.lucene.util.BitUtil; /** Wraps another {@link Checksum} with an internal buffer to speed up checksum calculations. */ public class BufferedChecksum implements Checksum { @@ -60,6 +64,45 @@ public void update(byte[] b, int off, int len) { } } + void updateShort(short val) { + if (upto + Short.BYTES > buffer.length) flush(); + BitUtil.VH_LE_SHORT.set(buffer, upto, val); + upto += Short.BYTES; + } + + void updateInt(int val) { + if (upto + Integer.BYTES > buffer.length) flush(); + BitUtil.VH_LE_INT.set(buffer, upto, val); + upto += Integer.BYTES; + } + + void updateLong(long val) { + if (upto + Long.BYTES > buffer.length) flush(); + BitUtil.VH_LE_LONG.set(buffer, upto, val); + upto += Long.BYTES; + } + + void updateLongs(long[] vals, int offset, int len) { + if (upto > 0) { + int remainingCapacityInLong = Math.min((buffer.length - upto) / Long.BYTES, len); + for (int i = 0; i < remainingCapacityInLong; i++, offset++, len--) { + updateLong(vals[offset]); + } + if (0 == len) return; + } + + LongBuffer b = ByteBuffer.wrap(buffer).order(ByteOrder.LITTLE_ENDIAN).asLongBuffer(); + final int capacityInLong = buffer.length / Long.BYTES; + while (len > 0) { + flush(); + int l = Math.min(capacityInLong, len); + b.put(0, vals, offset, l); + upto += l * Long.BYTES; + offset += l; + len -= l; + } + } + @Override public long getValue() { flush(); diff --git a/lucene/core/src/java/org/apache/lucene/store/BufferedChecksumIndexInput.java b/lucene/core/src/java/org/apache/lucene/store/BufferedChecksumIndexInput.java index 19a1d7295161..f11ceebcd46d 100644 --- a/lucene/core/src/java/org/apache/lucene/store/BufferedChecksumIndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/BufferedChecksumIndexInput.java @@ -18,14 +18,13 @@ import java.io.IOException; import java.util.zip.CRC32; -import java.util.zip.Checksum; /** * Simple implementation of {@link ChecksumIndexInput} that wraps another input and delegates calls. */ public class BufferedChecksumIndexInput extends ChecksumIndexInput { final IndexInput main; - final Checksum digest; + final BufferedChecksum digest; /** Creates a new BufferedChecksumIndexInput */ public BufferedChecksumIndexInput(IndexInput main) { @@ -47,6 +46,33 @@ public void readBytes(byte[] b, int offset, int len) throws IOException { digest.update(b, offset, len); } + @Override + public short readShort() throws IOException { + short v = main.readShort(); + digest.updateShort(v); + return v; + } + + @Override + public int readInt() throws IOException { + int v = main.readInt(); + digest.updateInt(v); + return v; + } + + @Override + public long readLong() throws IOException { + long v = main.readLong(); + digest.updateLong(v); + return v; + } + + @Override + public void readLongs(long[] dst, int offset, int length) throws IOException { + main.readLongs(dst, offset, length); + digest.updateLongs(dst, offset, length); + } + @Override public long getChecksum() { return digest.getValue(); diff --git a/lucene/core/src/test/org/apache/lucene/store/TestBufferedChecksum.java b/lucene/core/src/test/org/apache/lucene/store/TestBufferedChecksum.java index 47d8be79728c..20604d7e12cd 100644 --- a/lucene/core/src/test/org/apache/lucene/store/TestBufferedChecksum.java +++ b/lucene/core/src/test/org/apache/lucene/store/TestBufferedChecksum.java @@ -16,9 +16,13 @@ */ package org.apache.lucene.store; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.LongBuffer; import java.util.zip.CRC32; import java.util.zip.Checksum; import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.util.BitUtil; public class TestBufferedChecksum extends LuceneTestCase { @@ -63,4 +67,104 @@ public void testRandom() { } assertEquals(c1.getValue(), c2.getValue()); } + + public void testDifferentInputTypes() { + Checksum crc = new CRC32(); + BufferedChecksum buffered = new BufferedChecksum(new CRC32()); + int iterations = atLeast(1000); + for (int i = 0; i < iterations; i++) { + byte[] input = new byte[4096]; + random().nextBytes(input); + crc.update(input); + final long checksum = crc.getValue(); + crc.reset(); + updateByShorts(checksum, buffered, input); + updateByInts(checksum, buffered, input); + updateByChunkOfBytes(checksum, buffered, input); + updateByChunkOfLongs(checksum, buffered, input); + } + } + + private void updateByChunkOfBytes(long expected, BufferedChecksum checksum, byte[] input) { + for (int i = 0; i < input.length; i++) { + checksum.update(input[i]); + } + checkChecksumValueAndReset(expected, checksum); + + checksum.update(input); + checkChecksumValueAndReset(expected, checksum); + + int iterations = atLeast(10); + for (int ite = 0; ite < iterations; ite++) { + int len0 = random().nextInt(input.length / 2); + checksum.update(input, 0, len0); + checksum.update(input, len0, input.length - len0); + checkChecksumValueAndReset(expected, checksum); + + checksum.update(input, 0, len0); + int len1 = random().nextInt(input.length / 4); + for (int i = 0; i < len1; i++) { + checksum.update(input[len0 + i]); + } + checksum.update(input, len0 + len1, input.length - len1 - len0); + checkChecksumValueAndReset(expected, checksum); + } + } + + private void updateByShorts(long expected, BufferedChecksum checksum, byte[] input) { + for (int i = 0; i < input.length / Short.BYTES; i++) { + checksum.updateShort((short) BitUtil.VH_LE_SHORT.get(input, i * Short.BYTES)); + } + checkChecksumValueAndReset(expected, checksum); + } + + private void updateByInts(long expected, BufferedChecksum checksum, byte[] input) { + for (int i = 0; i < input.length / Integer.BYTES; i++) { + checksum.updateInt((int) BitUtil.VH_LE_INT.get(input, i * Integer.BYTES)); + } + checkChecksumValueAndReset(expected, checksum); + } + + private void updateByChunkOfLongs(long expected, BufferedChecksum checksum, byte[] input) { + LongBuffer b = ByteBuffer.wrap(input).order(ByteOrder.LITTLE_ENDIAN).asLongBuffer(); + long[] longInput = new long[input.length / Long.BYTES]; + b.get(longInput); + for (int i = 0; i < longInput.length; i++) { + checksum.updateLong(longInput[i]); + } + checkChecksumValueAndReset(expected, checksum); + + checksum.updateLongs(longInput, 0, longInput.length); + checkChecksumValueAndReset(expected, checksum); + + int iterations = atLeast(10); + for (int ite = 0; ite < iterations; ite++) { + int len0 = random().nextInt(longInput.length / 2); + checksum.updateLongs(longInput, 0, len0); + checksum.updateLongs(longInput, len0, longInput.length - len0); + checkChecksumValueAndReset(expected, checksum); + + checksum.updateLongs(longInput, 0, len0); + int len1 = random().nextInt(longInput.length / 4); + for (int i = 0; i < len1; i++) { + checksum.updateLong(longInput[len0 + i]); + } + checksum.updateLongs(longInput, len0 + len1, longInput.length - len1 - len0); + checkChecksumValueAndReset(expected, checksum); + + checksum.updateLongs(longInput, 0, len0); + checksum.update(input, len0 * Long.BYTES, input.length - len0 * Long.BYTES); + checkChecksumValueAndReset(expected, checksum); + + len0 &= ~(Long.BYTES - 1); // truncates to multiple of Long.BYTES + checksum.update(input, 0, len0 * Long.BYTES); + checksum.updateLongs(longInput, len0, longInput.length - len0); + checkChecksumValueAndReset(expected, checksum); + } + } + + private void checkChecksumValueAndReset(long expected, Checksum checksum) { + assertEquals(expected, checksum.getValue()); + checksum.reset(); + } }