Skip to content

Commit 25d7f7e

Browse files
CNDB-15919: Optimize SAI NOT queries, push logic into posting lists
1 parent 0c97397 commit 25d7f7e

File tree

8 files changed

+119
-34
lines changed

8 files changed

+119
-34
lines changed

src/java/org/apache/cassandra/index/sai/SSTableIndex.java

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ private static SearchableIndex createSearchableIndex(SSTableContext sstableConte
9595
if (CassandraRelevantProperties.SAI_INDEX_READS_DISABLED.getBoolean())
9696
{
9797
logger.info("Creating dummy (empty) index searcher for sstable {} as SAI index reads are disabled", sstableContext.sstable.descriptor);
98-
return new EmptyIndex();
98+
return new EmptyIndex(sstableContext);
9999
}
100100

101101
return perIndexComponents.onDiskFormat().newSearchableIndex(sstableContext, perIndexComponents);
@@ -252,17 +252,10 @@ private KeyRangeIterator getNonEqIterator(Expression expression,
252252
QueryContext context,
253253
boolean defer) throws IOException
254254
{
255-
KeyRangeIterator allKeys = allSSTableKeys(keyRange);
256255
if (TypeUtil.supportsRounding(expression.validator))
257-
{
258-
return allKeys;
259-
}
256+
return allSSTableKeys(keyRange);
260257
else
261-
{
262-
Expression negExpression = expression.negated();
263-
KeyRangeIterator matchedKeys = searchableIndex.search(negExpression, keyRange, context, defer);
264-
return KeyRangeAntiJoinIterator.create(allKeys, matchedKeys);
265-
}
258+
return searchableIndex.search(expression, keyRange, context, defer);
266259
}
267260

268261
public KeyRangeIterator search(Expression expression,

src/java/org/apache/cassandra/index/sai/disk/EmptyIndex.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.apache.cassandra.db.virtual.SimpleDataSet;
2828
import org.apache.cassandra.dht.AbstractBounds;
2929
import org.apache.cassandra.index.sai.QueryContext;
30+
import org.apache.cassandra.index.sai.SSTableContext;
3031
import org.apache.cassandra.index.sai.disk.v1.Segment;
3132
import org.apache.cassandra.index.sai.iterators.KeyRangeIterator;
3233
import org.apache.cassandra.index.sai.plan.Expression;
@@ -38,6 +39,13 @@
3839

3940
public class EmptyIndex implements SearchableIndex
4041
{
42+
private final SSTableContext sstableContext;
43+
44+
public EmptyIndex(SSTableContext sstableContext)
45+
{
46+
this.sstableContext = sstableContext;
47+
}
48+
4149
@Override
4250
public long indexFileCacheSize()
4351
{
@@ -98,7 +106,9 @@ public KeyRangeIterator search(Expression expression,
98106
QueryContext context,
99107
boolean defer) throws IOException
100108
{
101-
return KeyRangeIterator.empty();
109+
return expression.getOp().isNonEquality()
110+
? PrimaryKeyMapIterator.create(sstableContext, keyRange)
111+
: KeyRangeIterator.empty();
102112
}
103113

104114
@Override

src/java/org/apache/cassandra/index/sai/disk/v1/IndexSearcher.java

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.apache.cassandra.index.sai.disk.PostingList;
3838
import org.apache.cassandra.index.sai.disk.PostingListKeyRangeIterator;
3939
import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
40+
import org.apache.cassandra.index.sai.disk.v1.postings.ComplementPostingList;
4041
import org.apache.cassandra.index.sai.iterators.KeyRangeIterator;
4142
import org.apache.cassandra.index.sai.iterators.RowIdToPrimaryKeyWithSortKeyIterator;
4243
import org.apache.cassandra.index.sai.plan.Expression;
@@ -96,7 +97,28 @@ protected IndexSearcher(PrimaryKeyMap.Factory primaryKeyMapFactory,
9697
* @param defer create the iterator in a deferred state
9798
* @return {@link KeyRangeIterator} that matches given expression
9899
*/
99-
public abstract KeyRangeIterator search(Expression expression, AbstractBounds<PartitionPosition> keyRange, QueryContext queryContext, boolean defer) throws IOException;
100+
public KeyRangeIterator search(Expression expression, AbstractBounds<PartitionPosition> keyRange, QueryContext queryContext, boolean defer) throws IOException
101+
{
102+
if (expression.getOp().isNonEquality())
103+
{
104+
var negated = expression.negated();
105+
var postingList = searchInternal(negated, keyRange, queryContext, defer);
106+
// TODO handle the complexity of which row ids to use here.
107+
int minSegmentRowId = metadata.toSegmentRowId(0);
108+
int maxSegmentRowId = metadata.toSegmentRowId(primaryKeyMapFactory.count() - 1);
109+
110+
var complement = new ComplementPostingList(minSegmentRowId, maxSegmentRowId, postingList);
111+
// TODO this needs to use min/max keys for the whole table
112+
return toPrimaryKeyIterator(complement, queryContext);
113+
}
114+
else
115+
{
116+
var postingList = searchInternal(expression, keyRange, queryContext, defer);
117+
return toPrimaryKeyIterator(postingList, queryContext);
118+
}
119+
}
120+
121+
protected abstract PostingList searchInternal(Expression expression, AbstractBounds<PartitionPosition> keyRange, QueryContext queryContext, boolean defer) throws IOException;
100122

101123
/**
102124
* Order the rows by the given Orderer. Used for ORDER BY clause when

src/java/org/apache/cassandra/index/sai/disk/v1/InvertedIndexSearcher.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -134,13 +134,7 @@ public long indexFileCacheSize()
134134
}
135135

136136
@SuppressWarnings("resource")
137-
public KeyRangeIterator search(Expression exp, AbstractBounds<PartitionPosition> keyRange, QueryContext context, boolean defer) throws IOException
138-
{
139-
PostingList postingList = searchPosting(exp, context);
140-
return toPrimaryKeyIterator(postingList, context);
141-
}
142-
143-
private PostingList searchPosting(Expression exp, QueryContext context)
137+
protected PostingList searchInternal(Expression exp, AbstractBounds<PartitionPosition> keyRange, QueryContext context, boolean defer) throws IOException
144138
{
145139
if (logger.isTraceEnabled())
146140
logger.trace(indexContext.logMessage("Searching on expression '{}'..."), exp);

src/java/org/apache/cassandra/index/sai/disk/v1/KDTreeIndexSearcher.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,7 @@ public long indexFileCacheSize()
8484
}
8585

8686
@Override
87-
public KeyRangeIterator search(Expression exp, AbstractBounds<PartitionPosition> keyRange, QueryContext context, boolean defer) throws IOException
88-
{
89-
PostingList postingList = searchPosting(exp, context);
90-
return toPrimaryKeyIterator(postingList, context);
91-
}
92-
93-
private PostingList searchPosting(Expression exp, QueryContext context)
87+
protected PostingList searchInternal(Expression exp, AbstractBounds<PartitionPosition> keyRange, QueryContext context, boolean defer) throws IOException
9488
{
9589
if (logger.isTraceEnabled())
9690
logger.trace(indexContext.logMessage("Searching on expression '{}'..."), exp);

src/java/org/apache/cassandra/index/sai/disk/v1/V1OnDiskFormat.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ public PrimaryKeyMap.Factory newPrimaryKeyMapFactory(IndexComponents.ForRead per
163163
public SearchableIndex newSearchableIndex(SSTableContext sstableContext, IndexComponents.ForRead perIndexComponents)
164164
{
165165
return perIndexComponents.isEmpty()
166-
? new EmptyIndex()
166+
? new EmptyIndex(sstableContext)
167167
: new V1SearchableIndex(sstableContext, perIndexComponents);
168168
}
169169

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.cassandra.index.sai.disk.v1.postings;
20+
21+
import java.io.IOException;
22+
23+
import org.apache.cassandra.index.sai.disk.PostingList;
24+
25+
public class ComplementPostingList implements PostingList
26+
{
27+
private final PostingList source;
28+
private final int lastSourceRowId;
29+
private int nextRowId;
30+
private int nextSourceRowId = -1;
31+
32+
/**
33+
* A posting list that complements the provided posting list within the specified range.
34+
*
35+
* @param minSegmentRowId inclusive minimum row id
36+
* @param maxSegmentRowId exclusive maximum row id
37+
* @param source posting list to complement
38+
*/
39+
public ComplementPostingList(int minSegmentRowId, int maxSegmentRowId, PostingList source)
40+
{
41+
this.nextRowId = minSegmentRowId;
42+
this.lastSourceRowId = maxSegmentRowId;
43+
this.source = source == null ? PostingList.EMPTY : source;
44+
}
45+
46+
@Override
47+
public int nextPosting() throws IOException
48+
{
49+
if (nextSourceRowId == -1)
50+
nextSourceRowId = source.nextPosting();
51+
52+
// Move both pointers forward
53+
while (nextSourceRowId == nextRowId)
54+
{
55+
nextRowId++;
56+
nextSourceRowId = source.nextPosting();
57+
}
58+
59+
if (nextRowId > lastSourceRowId)
60+
return END_OF_STREAM;
61+
62+
return nextRowId++;
63+
}
64+
65+
@Override
66+
public int size()
67+
{
68+
return (lastSourceRowId - nextRowId) - source.size() + 1;
69+
}
70+
71+
@Override
72+
public int advance(int targetRowID) throws IOException
73+
{
74+
nextRowId = targetRowID;
75+
nextSourceRowId = source.advance(targetRowID);
76+
return nextPosting();
77+
}
78+
}

src/java/org/apache/cassandra/index/sai/disk/v2/V2VectorIndexSearcher.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -135,13 +135,7 @@ public ProductQuantization getPQ()
135135
}
136136

137137
@Override
138-
public KeyRangeIterator search(Expression exp, AbstractBounds<PartitionPosition> keyRange, QueryContext context, boolean defer) throws IOException
139-
{
140-
PostingList results = searchPosting(context, exp, keyRange);
141-
return toPrimaryKeyIterator(results, context);
142-
}
143-
144-
private PostingList searchPosting(QueryContext context, Expression exp, AbstractBounds<PartitionPosition> keyRange) throws IOException
138+
protected PostingList searchInternal(Expression exp, AbstractBounds<PartitionPosition> keyRange, QueryContext context, boolean defer) throws IOException
145139
{
146140
if (logger.isTraceEnabled())
147141
logger.trace(indexContext.logMessage("Searching on expression '{}'..."), exp);

0 commit comments

Comments
 (0)