Skip to content

Commit

Permalink
Merge branch 'main' into knn-dictionary
Browse files Browse the repository at this point in the history
  • Loading branch information
ChrisHegarty committed Oct 29, 2024
2 parents ed233ba + 937432a commit 5c2cb2d
Show file tree
Hide file tree
Showing 57 changed files with 724 additions and 737 deletions.
26 changes: 25 additions & 1 deletion lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ http://s.apache.org/luceneversions

API Changes
---------------------
(No changes)
* GITHUB#11023: Removing deprecated parameters from CheckIndex. (Jakub Slowinski)

New Features
---------------------
Expand Down Expand Up @@ -36,6 +36,10 @@ API Changes

* GITHUB#13859: Allow open-ended ranges in Intervals range queries. (Mayya Sharipova)

* GITHUB#13950: Make BooleanQuery#getClauses public and add #add(Collection<BooleanClause>) to BQ builder. (Shubham Chaudhary)

* GITHUB#13957: Removed LeafSimScorer class, to save its overhead. Scorers now
compute scores directly from a SimScorer, postings and norms. (Adrien Grand)

* GITHUB#13831: Complete refactoring of random-access vector API, eliminating copy() method. Now random-access vectors
are accessed by calling Byte/FloatVectorValues.vectors().get(int).
Expand All @@ -56,12 +60,32 @@ Optimizations
* GITHUB#13800: MaxScoreBulkScorer now recomputes scorer partitions when the
minimum competitive allows for a more favorable partitioning. (Adrien Grand)

* GITHUB#13930: Use growNoCopy when copying bytes in BytesRefBuilder. (Ignacio Vera)

* GITHUB#13931: Refactored `BooleanScorer` to evaluate matches of sub clauses
using the `Scorer` abstraction rather than the `BulkScorer` abstraction. This
speeds up exhaustive evaluation of disjunctions of term queries.
(Adrien Grand)

* GITHUB#13941: Optimized computation of top-hits on disjunctive queries with
many clauses. (Adrien Grand)

* GITHUB#13954: Disabled exchanging scores across slices for exhaustive
top-hits evaluation. (Adrien Grand)

* GITHUB#13899: Check ahead if we can get the count. (Lu Xugang)

* GITHUB#13943: Removed shared `HitsThresholdChecker`, which reduces overhead
but may delay a bit when dynamic pruning kicks in. (Adrien Grand)

Bug Fixes
---------------------
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
when they were not sorted by startOffset. (Seunghan Jung)
* GITHUB#13884: Remove broken .toArray from Long/CharObjectHashMap entirely. (Pan Guixin)
* GITHUB#12686: Added support for highlighting IndexOrDocValuesQuery. (Prudhvi Godithi)
* GITHUB#13927: Fix StoredFieldsConsumer finish. (linfn)
* GITHUB#13944: Ensure deterministic order of clauses for `DisjunctionMaxQuery#toString`. (Laurent Jakubina)

Build
---------------------
Expand Down
7 changes: 7 additions & 0 deletions lucene/MIGRATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@

## Migration from Lucene 9.x to Lucene 10.0

### DataInput#readVLong() may now read negative vlongs

LUCENE-10376 started allowing `DataInput#readVLong()` to read negative vlongs.
In particular, this feature is used by the `DataInput#readZLong()` method. A
practical implication is that `DataInput#readVLong()` may now read up to 10
bytes, while it would never read more than 9 bytes in Lucene 9.x.

### Changes to DataInput.readGroupVInt and readGroupVInts methods

As part of GITHUB#13820, GITHUB#13825, GITHUB#13830, this issue corrects DataInput.readGroupVInts
Expand Down
4 changes: 2 additions & 2 deletions lucene/core/src/generated/checksums/generateForDeltaUtil.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"lucene/core/src/java/org/apache/lucene/codecs/lucene912/ForDeltaUtil.java": "f561578ccb6a95364bb62c5ed86b38ff0b4a009d",
"lucene/core/src/java/org/apache/lucene/codecs/lucene912/gen_ForDeltaUtil.py": "eea1a71be9da8a13fdd979354dc4a8c6edf21be1"
"lucene/core/src/java/org/apache/lucene/codecs/lucene912/ForDeltaUtil.java": "b662da5848b0decc8bceb4225f433875ae9e3c11",
"lucene/core/src/java/org/apache/lucene/codecs/lucene912/gen_ForDeltaUtil.py": "01787b97bbe79edb7703498cef8ddb85901a6b1e"
}
4 changes: 2 additions & 2 deletions lucene/core/src/generated/checksums/generateForUtil.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"lucene/core/src/java/org/apache/lucene/codecs/lucene912/ForUtil.java": "159e82388346fde147924d5e15ca65df4dd63b9a",
"lucene/core/src/java/org/apache/lucene/codecs/lucene912/gen_ForUtil.py": "66dc8813160feae2a37d8b50474f5f9830b6cb22"
"lucene/core/src/java/org/apache/lucene/codecs/lucene912/ForUtil.java": "02e0c8c290e65d0314664fde24c9331bdec44925",
"lucene/core/src/java/org/apache/lucene/codecs/lucene912/gen_ForUtil.py": "d7850f37e52a16c6592322950d0f6219cad23a33"
}
Original file line number Diff line number Diff line change
Expand Up @@ -286,19 +286,19 @@ void decodeAndPrefixSum(int bitsPerValue, PostingDecodingUtil pdu, long base, lo
throws IOException {
switch (bitsPerValue) {
case 1:
decode1(pdu, tmp, longs);
decode1(pdu, longs);
prefixSum8(longs, base);
break;
case 2:
decode2(pdu, tmp, longs);
decode2(pdu, longs);
prefixSum8(longs, base);
break;
case 3:
decode3(pdu, tmp, longs);
prefixSum8(longs, base);
break;
case 4:
decode4(pdu, tmp, longs);
decode4(pdu, longs);
prefixSum8(longs, base);
break;
case 5:
Expand All @@ -314,7 +314,7 @@ void decodeAndPrefixSum(int bitsPerValue, PostingDecodingUtil pdu, long base, lo
prefixSum16(longs, base);
break;
case 8:
decode8To16(pdu, tmp, longs);
decode8To16(pdu, longs);
prefixSum16(longs, base);
break;
case 9:
Expand Down Expand Up @@ -346,7 +346,7 @@ void decodeAndPrefixSum(int bitsPerValue, PostingDecodingUtil pdu, long base, lo
prefixSum32(longs, base);
break;
case 16:
decode16To32(pdu, tmp, longs);
decode16To32(pdu, longs);
prefixSum32(longs, base);
break;
case 17:
Expand Down Expand Up @@ -431,8 +431,7 @@ private static void decode7To16(PostingDecodingUtil pdu, long[] tmp, long[] long
}
}

private static void decode8To16(PostingDecodingUtil pdu, long[] tmp, long[] longs)
throws IOException {
private static void decode8To16(PostingDecodingUtil pdu, long[] longs) throws IOException {
pdu.splitLongs(16, longs, 8, 8, MASK16_8, longs, 16, MASK16_8);
}

Expand Down Expand Up @@ -522,8 +521,7 @@ private static void decode15To32(PostingDecodingUtil pdu, long[] tmp, long[] lon
}
}

private static void decode16To32(PostingDecodingUtil pdu, long[] tmp, long[] longs)
throws IOException {
private static void decode16To32(PostingDecodingUtil pdu, long[] longs) throws IOException {
pdu.splitLongs(32, longs, 16, 16, MASK32_16, longs, 32, MASK32_16);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -291,19 +291,19 @@ static void decodeSlow(int bitsPerValue, PostingDecodingUtil pdu, long[] tmp, lo
void decode(int bitsPerValue, PostingDecodingUtil pdu, long[] longs) throws IOException {
switch (bitsPerValue) {
case 1:
decode1(pdu, tmp, longs);
decode1(pdu, longs);
expand8(longs);
break;
case 2:
decode2(pdu, tmp, longs);
decode2(pdu, longs);
expand8(longs);
break;
case 3:
decode3(pdu, tmp, longs);
expand8(longs);
break;
case 4:
decode4(pdu, tmp, longs);
decode4(pdu, longs);
expand8(longs);
break;
case 5:
Expand All @@ -319,7 +319,7 @@ void decode(int bitsPerValue, PostingDecodingUtil pdu, long[] longs) throws IOEx
expand8(longs);
break;
case 8:
decode8(pdu, tmp, longs);
decode8(pdu, longs);
expand8(longs);
break;
case 9:
Expand Down Expand Up @@ -351,7 +351,7 @@ void decode(int bitsPerValue, PostingDecodingUtil pdu, long[] longs) throws IOEx
expand16(longs);
break;
case 16:
decode16(pdu, tmp, longs);
decode16(pdu, longs);
expand16(longs);
break;
case 17:
Expand Down Expand Up @@ -393,11 +393,11 @@ void decode(int bitsPerValue, PostingDecodingUtil pdu, long[] longs) throws IOEx
}
}

static void decode1(PostingDecodingUtil pdu, long[] tmp, long[] longs) throws IOException {
static void decode1(PostingDecodingUtil pdu, long[] longs) throws IOException {
pdu.splitLongs(2, longs, 7, 1, MASK8_1, longs, 14, MASK8_1);
}

static void decode2(PostingDecodingUtil pdu, long[] tmp, long[] longs) throws IOException {
static void decode2(PostingDecodingUtil pdu, long[] longs) throws IOException {
pdu.splitLongs(4, longs, 6, 2, MASK8_2, longs, 12, MASK8_2);
}

Expand All @@ -413,7 +413,7 @@ static void decode3(PostingDecodingUtil pdu, long[] tmp, long[] longs) throws IO
}
}

static void decode4(PostingDecodingUtil pdu, long[] tmp, long[] longs) throws IOException {
static void decode4(PostingDecodingUtil pdu, long[] longs) throws IOException {
pdu.splitLongs(8, longs, 4, 4, MASK8_4, longs, 8, MASK8_4);
}

Expand Down Expand Up @@ -457,7 +457,7 @@ static void decode7(PostingDecodingUtil pdu, long[] tmp, long[] longs) throws IO
}
}

static void decode8(PostingDecodingUtil pdu, long[] tmp, long[] longs) throws IOException {
static void decode8(PostingDecodingUtil pdu, long[] longs) throws IOException {
pdu.in.readLongs(longs, 0, 16);
}

Expand Down Expand Up @@ -601,7 +601,7 @@ static void decode15(PostingDecodingUtil pdu, long[] tmp, long[] longs) throws I
}
}

static void decode16(PostingDecodingUtil pdu, long[] tmp, long[] longs) throws IOException {
static void decode16(PostingDecodingUtil pdu, long[] longs) throws IOException {
pdu.in.readLongs(longs, 0, 32);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOException {
resetIndexInput(termState);
if (pforUtil == null && docFreq >= BLOCK_SIZE) {
pforUtil = new PForUtil(new ForUtil());
pforUtil = new PForUtil();
forDeltaUtil = new ForDeltaUtil();
}
totalTermFreq = indexHasFreq ? termState.totalTermFreq : docFreq;
Expand Down Expand Up @@ -727,7 +727,7 @@ public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOExcep
}
totalTermFreq = termState.totalTermFreq;
if (pforUtil == null && totalTermFreq >= BLOCK_SIZE) {
pforUtil = new PForUtil(new ForUtil());
pforUtil = new PForUtil();
}
// Where this term's postings start in the .pos file:
final long posTermStartFP = termState.posStartFP;
Expand Down Expand Up @@ -1142,7 +1142,7 @@ public long cost() {
private abstract class BlockImpactsEnum extends ImpactsEnum {

protected final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
protected final PForUtil pforUtil = new PForUtil(new ForUtil());
protected final PForUtil pforUtil = new PForUtil();

protected final long[] docBuffer = new long[BLOCK_SIZE + 1];
protected final long[] freqBuffer = new long[BLOCK_SIZE];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,8 @@ public Lucene912PostingsWriter(SegmentWriteState state) throws IOException {
metaOut, META_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
CodecUtil.writeIndexHeader(
docOut, DOC_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
final ForUtil forUtil = new ForUtil();
forDeltaUtil = new ForDeltaUtil();
pforUtil = new PForUtil(forUtil);
pforUtil = new PForUtil();
if (state.fieldInfos.hasProx()) {
posDeltaBuffer = new long[BLOCK_SIZE];
String posFileName =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,10 @@ static boolean allEqual(long[] l) {
return true;
}

private final ForUtil forUtil;
private final ForUtil forUtil = new ForUtil();

PForUtil(ForUtil forUtil) {
static {
assert ForUtil.BLOCK_SIZE <= 256 : "blocksize must fit in one byte. got " + ForUtil.BLOCK_SIZE;
this.forUtil = forUtil;
}

/** Encode 128 integers from {@code longs} into {@code out}. */
Expand Down Expand Up @@ -106,17 +105,18 @@ void encode(long[] longs, DataOutput out) throws IOException {

/** Decode 128 integers into {@code ints}. */
void decode(PostingDecodingUtil pdu, long[] longs) throws IOException {
final int token = Byte.toUnsignedInt(pdu.in.readByte());
var in = pdu.in;
final int token = Byte.toUnsignedInt(in.readByte());
final int bitsPerValue = token & 0x1f;
final int numExceptions = token >>> 5;
if (bitsPerValue == 0) {
Arrays.fill(longs, 0, ForUtil.BLOCK_SIZE, pdu.in.readVLong());
Arrays.fill(longs, 0, ForUtil.BLOCK_SIZE, in.readVLong());
} else {
forUtil.decode(bitsPerValue, pdu, longs);
}
final int numExceptions = token >>> 5;
for (int i = 0; i < numExceptions; ++i) {
longs[Byte.toUnsignedInt(pdu.in.readByte())] |=
Byte.toUnsignedLong(pdu.in.readByte()) << bitsPerValue;
longs[Byte.toUnsignedInt(in.readByte())] |=
Byte.toUnsignedLong(in.readByte()) << bitsPerValue;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,10 @@ def writeRemainder(bpv, next_primitive, remaining_bits_per_long, o, num_values,

def writeDecode(bpv, f):
next_primitive = primitive_size_for_bpv(bpv)
f.write(' private static void decode%dTo%d(PostingDecodingUtil pdu, long[] tmp, long[] longs) throws IOException {\n' %(bpv, next_primitive))
if next_primitive % bpv == 0:
f.write(' private static void decode%dTo%d(PostingDecodingUtil pdu, long[] longs) throws IOException {\n' %(bpv, next_primitive))
else:
f.write(' private static void decode%dTo%d(PostingDecodingUtil pdu, long[] tmp, long[] longs) throws IOException {\n' %(bpv, next_primitive))
if bpv == next_primitive:
f.write(' pdu.in.readLongs(longs, 0, %d);\n' %(bpv*2))
else:
Expand Down Expand Up @@ -390,9 +393,15 @@ def writeDecode(bpv, f):
primitive_size = primitive_size_for_bpv(bpv)
f.write(' case %d:\n' %bpv)
if next_primitive(bpv) == primitive_size:
f.write(' decode%d(pdu, tmp, longs);\n' %bpv)
if primitive_size % bpv == 0:
f.write(' decode%d(pdu, longs);\n' %bpv)
else:
f.write(' decode%d(pdu, tmp, longs);\n' %bpv)
else:
f.write(' decode%dTo%d(pdu, tmp, longs);\n' %(bpv, primitive_size))
if primitive_size % bpv == 0:
f.write(' decode%dTo%d(pdu, longs);\n' %(bpv, primitive_size))
else:
f.write(' decode%dTo%d(pdu, tmp, longs);\n' %(bpv, primitive_size))
f.write(' prefixSum%d(longs, base);\n' %primitive_size)
f.write(' break;\n')
f.write(' default:\n')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,17 +287,19 @@ def writeDecode(bpv, f):
next_primitive = 8
elif bpv <= 16:
next_primitive = 16
f.write(' static void decode%d(PostingDecodingUtil pdu, long[] tmp, long[] longs) throws IOException {\n' %bpv)
if bpv == next_primitive:
f.write(' static void decode%d(PostingDecodingUtil pdu, long[] longs) throws IOException {\n' %bpv)
f.write(' pdu.in.readLongs(longs, 0, %d);\n' %(bpv*2))
else:
num_values_per_long = 64 / next_primitive
remaining_bits = next_primitive % bpv
num_iters = (next_primitive - 1) // bpv
o = 2 * bpv * num_iters
if remaining_bits == 0:
f.write(' static void decode%d(PostingDecodingUtil pdu, long[] longs) throws IOException {\n' %bpv)
f.write(' pdu.splitLongs(%d, longs, %d, %d, MASK%d_%d, longs, %d, MASK%d_%d);\n' %(bpv*2, next_primitive - bpv, bpv, next_primitive, bpv, o, next_primitive, next_primitive - num_iters * bpv))
else:
f.write(' static void decode%d(PostingDecodingUtil pdu, long[] tmp, long[] longs) throws IOException {\n' %bpv)
f.write(' pdu.splitLongs(%d, longs, %d, %d, MASK%d_%d, tmp, 0, MASK%d_%d);\n' %(bpv*2, next_primitive - bpv, bpv, next_primitive, bpv, next_primitive, next_primitive - num_iters * bpv))
writeRemainder(bpv, next_primitive, remaining_bits, o, 128/num_values_per_long - o, f)
f.write(' }\n')
Expand Down Expand Up @@ -334,7 +336,10 @@ def writeDecode(bpv, f):
elif bpv <= 16:
next_primitive = 16
f.write(' case %d:\n' %bpv)
f.write(' decode%d(pdu, tmp, longs);\n' %bpv)
if next_primitive % bpv == 0:
f.write(' decode%d(pdu, longs);\n' %bpv)
else:
f.write(' decode%d(pdu, tmp, longs);\n' %bpv)
f.write(' expand%d(longs);\n' %next_primitive)
f.write(' break;\n')
f.write(' default:\n')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
Expand Down Expand Up @@ -120,7 +119,6 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio

@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
final Weight thisWeight = this;
Terms terms = Terms.getTerms(context.reader(), fieldName);
TermsEnum termsEnum = terms.iterator();
if (termsEnum.seekExact(new BytesRef(featureName)) == false) {
Expand All @@ -135,10 +133,8 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
@Override
public Scorer get(long leadCost) throws IOException {
final SimScorer scorer = function.scorer(boost);
final LeafSimScorer simScorer =
new LeafSimScorer(scorer, context.reader(), fieldName, false);
final ImpactsEnum impacts = termsEnum.impacts(PostingsEnum.FREQS);
return new TermScorer(thisWeight, impacts, simScorer, topLevelScoringClause);
return new TermScorer(impacts, scorer, null, topLevelScoringClause);
}

@Override
Expand Down
Loading

0 comments on commit 5c2cb2d

Please sign in to comment.