Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CASSANDRA-20460: Expand TableWalk tests to include collections and add support for += and -= for these types #3995

Closed
wants to merge 39 commits into from
Closed
Changes from 34 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
21c4f30
when UDT sees empty bytes this becomes null
dcapwell Mar 18, 2025
d1be462
checkpoint of current test finding missing insert
dcapwell Mar 18, 2025
7982756
fixed another model bug with UDTs and empty bytes
dcapwell Mar 18, 2025
e99535a
need to set null rather than unset so the cell value is updated
dcapwell Mar 18, 2025
76dfa34
first draft of assignment operator support in the model
dcapwell Mar 18, 2025
f5a59c2
fixed NPE when c -= returns null
dcapwell Mar 18, 2025
75143d2
fixed bug where update static didnt handle assignment operator
dcapwell Mar 18, 2025
9f7d6a3
added set update support
dcapwell Mar 18, 2025
3eb4735
added support for set - set
dcapwell Mar 18, 2025
289334d
added map support
dcapwell Mar 18, 2025
ae18a84
added -= and += list support
dcapwell Mar 18, 2025
f358d43
limit to only the desired types
dcapwell Mar 18, 2025
1a32d93
checkpoint seeds
dcapwell Mar 18, 2025
8000bf3
checkpiont seed
dcapwell Mar 18, 2025
f679856
fixed issue where empty collection was {} rather than null
dcapwell Mar 19, 2025
e2fb393
each column gets its own boolean distro for allowing assignment or not
dcapwell Mar 19, 2025
aaa083d
made type kind and what primitives to use based off a distribution ch…
dcapwell Mar 19, 2025
2c994c9
fixed a bug where frozen collections would allow range
dcapwell Mar 19, 2025
961f618
fixed bugs related to trying to do search collections rather than the…
dcapwell Mar 19, 2025
7915895
clenaup
dcapwell Mar 19, 2025
b93943d
fixed bug where we tried to do filtering on UDTs
dcapwell Mar 19, 2025
42588bd
fixed bug where we would try to do < queries on tuples without allow …
dcapwell Mar 19, 2025
202ba7a
when only a subset of columns differ than missing and unexpected have…
dcapwell Mar 19, 2025
74e084a
when only a subset of columns differ than missing and unexpected have…
dcapwell Mar 19, 2025
b76f2ba
fixed a bug where frozen udts would store null rather than <empty>
dcapwell Mar 19, 2025
639427b
checkpoint failing seed
dcapwell Mar 19, 2025
2ba8dd0
revert
dcapwell Mar 19, 2025
e475980
cleanup for pr
dcapwell Mar 19, 2025
ccc8a34
made partitions non-uniform. fixed bug where serachable was defined …
dcapwell Mar 19, 2025
ee6c718
cleanup and npe
dcapwell Mar 19, 2025
171c41a
cleanup
dcapwell Mar 19, 2025
9eb86e7
cleanup
dcapwell Mar 19, 2025
5de3604
testing out removing vectors from SAI search
dcapwell Mar 20, 2025
fbbdfdd
revert while true loop
dcapwell Mar 20, 2025
4dda3e6
added unset to cql
dcapwell Mar 20, 2025
44a3f1c
Revert "added unset to cql"
dcapwell Mar 20, 2025
3e1d786
added unit test for += and -=
dcapwell Mar 21, 2025
c03ccde
feedback
dcapwell Mar 21, 2025
34af875
feedback
dcapwell Mar 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -66,10 +66,11 @@
import org.apache.cassandra.utils.ASTGenerators;
import org.apache.cassandra.utils.AbstractTypeGenerators;
import org.apache.cassandra.utils.AbstractTypeGenerators.TypeGenBuilder;
import org.apache.cassandra.utils.AbstractTypeGenerators.TypeKind;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.cassandra.utils.CassandraGenerators.TableMetadataBuilder;
import org.apache.cassandra.utils.Generators;
import org.apache.cassandra.utils.ImmutableUniqueList;
import org.quicktheories.generators.SourceDSL;

import static accord.utils.Property.commands;
import static accord.utils.Property.stateful;
@@ -78,6 +79,17 @@

public class SingleNodeTableWalkTest extends StatefulASTBase
{
private static final Gen<Gen<Boolean>> BOOLEAN_DISTRIBUTION = Gens.bools().mixedDistribution();
//TODO (coverage): COMPOSITE, DYNAMIC_COMPOSITE
private static Gen<Gen<TypeKind>> TYPE_KIND_DISTRIBUTION = Gens.mixedDistribution(TypeKind.PRIMITIVE,
TypeKind.SET, TypeKind.LIST, TypeKind.MAP,
TypeKind.TUPLE, TypeKind.UDT,
TypeKind.VECTOR
);
private static Gen<Gen<AbstractType<?>>> PRIMITIVE_DISTRIBUTION = Gens.mixedDistribution(AbstractTypeGenerators.knownPrimitiveTypes()
.stream()
.filter(t -> !AbstractTypeGenerators.isUnsafeEquality(t))
.collect(Collectors.toList()));
private static final Logger logger = LoggerFactory.getLogger(SingleNodeTableWalkTest.class);

protected void preCheck(Cluster cluster, Property.StatefulBuilder builder)
@@ -88,10 +100,20 @@ protected void preCheck(Cluster cluster, Property.StatefulBuilder builder)
// CQL_DEBUG_APPLY_OPERATOR = true;
}

protected TypeGenBuilder supportedTypes()
protected TypeGenBuilder supportedTypes(RandomSource rs)
{
return AbstractTypeGenerators.withoutUnsafeEquality(AbstractTypeGenerators.builder()
.withTypeKinds(AbstractTypeGenerators.TypeKind.PRIMITIVE));
return AbstractTypeGenerators.builder()
.withTypeKinds(Generators.fromGen(TYPE_KIND_DISTRIBUTION.next(rs)))
.withPrimitives(Generators.fromGen(PRIMITIVE_DISTRIBUTION.next(rs)))
Comment on lines +106 to +107
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

before we used uniform random so each test had the same weights. Now we select a new distribution for each test run (some tests will only be primitives, others will be mostly maps, etc.)

.withUserTypeFields(AbstractTypeGenerators.UserTypeFieldsGen.simpleNames())
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not testing UDT serialization so simple names makes it so much easier to read...

.withMaxDepth(1);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ATM we are not testing nested access (will need to eventually) so lowered as the cql literal output was so large it got hard to read (UDT of map of UDT of map of UDT....)

}

protected TypeGenBuilder supportedPrimaryColumnTypes(RandomSource rs)
{
return AbstractTypeGenerators.builder()
.withTypeKinds(TypeKind.PRIMITIVE)
.withPrimitives(Generators.fromGen(PRIMITIVE_DISTRIBUTION.next(rs)));
}

protected List<CreateIndexDDL.Indexer> supportedIndexers()
@@ -206,7 +228,7 @@ protected List<CreateIndexDDL.Indexer> supportedIndexers()
builder.value(pk, key.bufferAt(pks.indexOf(pk)));


List<Symbol> searchableColumns = state.nonPartitionColumns;
List<Symbol> searchableColumns = state.searchableNonPartitionColumns;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

multi cell collections/udt can not be searched, so just using the non-partition columns wasn't enough

Symbol symbol = rs.pick(searchableColumns);

TreeMap<ByteBuffer, List<BytesPartitionState.PrimaryKey>> universe = state.model.index(ref, symbol);
@@ -363,7 +385,8 @@ protected TableMetadata defineTable(RandomSource rs, String ks)
.withCompression())
.withKeyspaceName(ks).withTableName("tbl")
.withSimpleColumnNames()
.withDefaultTypeGen(supportedTypes())
.withDefaultTypeGen(supportedTypes(rs))
.withPrimaryColumnTypeGen(supportedPrimaryColumnTypes(rs))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when primary keys can be collections there can be issues with having too many bytes and having our mutations be rejected. Keeping it primitive also improves readability for debug output...

Would be good to have a test that overrides this though, just to improve coverage...

.withPartitioner(Murmur3Partitioner.instance)
.build())
.next(rs);
@@ -393,7 +416,7 @@ public class State extends CommonState
{
protected final LinkedHashMap<Symbol, IndexedColumn> indexes;
private final Gen<Mutation> mutationGen;
private final List<Symbol> nonPartitionColumns;
private final List<Symbol> searchableNonPartitionColumns;
private final List<Symbol> searchableColumns;
private final List<Symbol> nonPkIndexedColumns;

@@ -424,7 +447,8 @@ public State(RandomSource rs, Cluster cluster)
.withoutTransaction()
.withoutTtl()
.withoutTimestamp()
.withPartitions(SourceDSL.arbitrary().pick(uniquePartitions));
.withPartitions(Generators.fromGen(Gens.mixedDistribution(uniquePartitions).next(rs)))
.withColumnExpressions(e -> e.withOperators(Generators.fromGen(BOOLEAN_DISTRIBUTION.next(rs))));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

each column will have a different distribution for when operators are allowed

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Required for CASSANDRA-20449 repro at least 😄

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wasn't required; we hit the issue before this change. This change is to better cover different edge cases

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need different assignment operators to repro (+= v vs. = {v}) but not necessarily different distributions of operators, which is what's introduced here - is that right?

if (IGNORED_ISSUES.contains(KnownIssue.SAI_EMPTY_TYPE))
{
model.factory.regularAndStaticColumns.stream()
@@ -438,16 +462,30 @@ public State(RandomSource rs, Cluster cluster)
}
this.mutationGen = toGen(mutationGenBuilder.build());

nonPartitionColumns = ImmutableList.<Symbol>builder()
.addAll(model.factory.clusteringColumns)
.addAll(model.factory.staticColumns)
.addAll(model.factory.regularColumns)
.build();
var nonPartitionColumns = ImmutableList.<Symbol>builder()
.addAll(model.factory.clusteringColumns)
.addAll(model.factory.staticColumns)
.addAll(model.factory.regularColumns)
.build();
searchableNonPartitionColumns = nonPartitionColumns.stream()
.filter(this::isSearchable)
.collect(Collectors.toList());
nonPkIndexedColumns = nonPartitionColumns.stream()
.filter(indexes::containsKey)
.collect(Collectors.toList());

searchableColumns = metadata.partitionKeyColumns().size() > 1 ? model.factory.selectionOrder : nonPartitionColumns;
searchableColumns = (metadata.partitionKeyColumns().size() > 1 ? model.factory.selectionOrder : nonPartitionColumns)
.stream()
.filter(this::isSearchable)
.collect(Collectors.toList());
}

private boolean isSearchable(Symbol symbol)
{
// See org.apache.cassandra.cql3.Operator.validateFor
// multi cell collections can only be searched if you search their elements, not the collection as a whole
//TODO (coverage): can you query for UDT fields? its a single cell so you "should"?
return !(symbol.type().isMultiCell() && (symbol.type().isCollection() || symbol.type().isUDT()));
}

@Override
@@ -523,6 +561,8 @@ private List<Symbol> multiColumnQueryColumns()
List<Symbol> allowedColumns = searchableColumns;
if (hasMultiNodeMultiColumnAllowFilteringWithLocalWritesIssue())
allowedColumns = nonPkIndexedColumns;
if (IGNORED_ISSUES.contains(KnownIssue.SAI_AND_VECTOR_COLUMNS) && !indexes.isEmpty())
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added to get clean CI. Something is up with multi column queries that touch both SAI and vector type. It doesn't look like the vector must be indexed to get incorrect results, so doesn't feel like we are selecting a bad index here

allowedColumns = allowedColumns.stream().filter(s -> !s.type().isVector()).collect(Collectors.toList());
return allowedColumns;
}

@@ -533,7 +573,7 @@ private boolean hasMultiNodeMultiColumnAllowFilteringWithLocalWritesIssue()

public boolean allowPartitionQuery()
{
return !(model.isEmpty() || nonPartitionColumns.isEmpty());
return !(model.isEmpty() || searchableNonPartitionColumns.isEmpty());
}

@Override
Original file line number Diff line number Diff line change
@@ -21,6 +21,7 @@
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
@@ -43,13 +44,16 @@
import com.google.common.collect.Sets;

import accord.utils.Invariants;
import org.apache.cassandra.cql3.ast.AssignmentOperator;
import org.apache.cassandra.cql3.ast.Conditional;
import org.apache.cassandra.cql3.ast.Conditional.Where.Inequality;
import org.apache.cassandra.cql3.ast.Element;
import org.apache.cassandra.cql3.ast.Expression;
import org.apache.cassandra.cql3.ast.ExpressionEvaluator;
import org.apache.cassandra.cql3.ast.FunctionCall;
import org.apache.cassandra.cql3.ast.Literal;
import org.apache.cassandra.cql3.ast.Mutation;
import org.apache.cassandra.cql3.ast.Operator;
import org.apache.cassandra.cql3.ast.Select;
import org.apache.cassandra.cql3.ast.StandardVisitors;
import org.apache.cassandra.cql3.ast.Symbol;
@@ -244,7 +248,10 @@ public void update(Mutation.Update update)
// static columns to add in. If we are doing something like += to a row that doesn't exist, we still update statics...
Map<Symbol, ByteBuffer> write = new HashMap<>();
for (Symbol col : Sets.intersection(factory.staticColumns.asSet(), set.keySet()))
write.put(col, eval(set.get(col)));
{
ByteBuffer current = partition.staticRow().get(col);
write.put(col, eval(col, current, set.get(col)));
}
partition.setStaticColumns(write);
}
// table has clustering but non are in the write, so only pk/static can be updated
@@ -254,7 +261,10 @@ public void update(Mutation.Update update)
{
Map<Symbol, ByteBuffer> write = new HashMap<>();
for (Symbol col : Sets.intersection(factory.regularColumns.asSet(), set.keySet()))
write.put(col, eval(set.get(col)));
{
ByteBuffer current = partition.get(cd, col);
write.put(col, eval(col, current, set.get(col)));
}

partition.setColumns(cd, write, false);
}
@@ -493,6 +503,45 @@ private static void validateAnyOrder(ImmutableUniqueList<Symbol> columns, Set<Ro
if (actual.isEmpty()) sb.append("No rows returned");
else sb.append("Missing rows:\n").append(table(columns, missing));
}
if (!unexpected.isEmpty() && unexpected.size() == missing.size())
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is extra human readable validation logic. null vs <empty> has been super hard to see from the above output, and the use case i was finding is that we had the correct rows returned, but 1 or more columns had a difference... so if this is the case we try to show which columns are off... this was super useful for detecting consistency issues with null and <empty>

{
// good chance a column differs
StringBuilder finalSb = sb;
Runnable runOnce = new Runnable()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think Alex created a reusable function for this in accord... would need to find it

{
boolean ran = false;
@Override
public void run()
{
if (ran) return;
finalSb.append("\nPossible column conflicts:");
ran = true;
}
};
for (var e : missing)
{
Row smallest = null;
BitSet smallestDiff = null;
for (var a : unexpected)
{
BitSet diff = e.diff(a);
if (smallestDiff == null || diff.cardinality() < smallestDiff.cardinality())
{
smallest = a;
smallestDiff = diff;
}
}
// if every column differs then ignore
if (smallestDiff.cardinality() == e.values.length)
continue;
runOnce.run();
sb.append("\n\tExpected: ").append(e);
sb.append("\n\tDiff (expected over actual):\n");
Row eSmall = e.select(smallestDiff);
Row aSmall = smallest.select(smallestDiff);
sb.append(table(eSmall.columns, Arrays.asList(eSmall, aSmall)));
}
}
if (sb != null)
{
sb.append("\nExpected:\n").append(table(columns, expected));
@@ -731,7 +780,7 @@ private static boolean matches(ByteBuffer value, List<? extends Expression> cond
for (Expression e : conditions)
{
ByteBuffer expected = eval(e);
if (expected.equals(value))
if (expected != null && expected.equals(value))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

eval is now Nullable but this code path shouldn't really ever see a null... maybe if you did something like [1] - [1] then we could get a null here... mostly added to be correct and defensive (and intellij yelled at me)

return true;
}
return false;
@@ -893,28 +942,60 @@ private List<Clustering<ByteBuffer>> keys(Map<Symbol, List<? extends Expression>
return current.stream().map(BufferClustering::new).collect(Collectors.toList());
}

private static ByteBuffer eval(Symbol col, @Nullable ByteBuffer current, Expression e)
{
if (!(e instanceof AssignmentOperator)) return eval(e);
// multi cell collections have the property that they do update even if the current value is null
boolean isFancy = col.type().isCollection() && col.type().isMultiCell();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious about the isFancy name choosing :-)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i felt it was fancy!

if (current == null && !isFancy) return null; // null + ? == null
var assignment = (AssignmentOperator) e;
if (isFancy && current == null)
{
return assignment.kind == AssignmentOperator.Kind.SUBTRACT
// if it doesn't exist, then there is nothing to subtract
? null
: eval(assignment.right);
}
switch (assignment.kind)
{
case ADD:
return eval(new Operator(Operator.Kind.ADD, new Literal(current, e.type()), assignment.right));
case SUBTRACT:
return eval(new Operator(Operator.Kind.SUBTRACT, new Literal(current, e.type()), assignment.right));
default:
throw new UnsupportedOperationException(assignment.kind + ": " + assignment.toCQL());
}
}

@Nullable
private static ByteBuffer eval(Expression e)
{
return ExpressionEvaluator.tryEvalEncoded(e).get();
return ExpressionEvaluator.evalEncoded(e);
}

private static class Row
{
private static final Row EMPTY = new Row(ImmutableUniqueList.empty(), ByteBufferUtil.EMPTY_ARRAY);

private final ImmutableUniqueList<Symbol> columns;
private final ByteBuffer[] values;

private Row(ImmutableUniqueList<Symbol> columns, ByteBuffer[] values)
{
this.columns = columns;
this.values = values;
if (columns.size() != values.length)
throw new IllegalArgumentException("Columns " + columns + " should have the same size as values, but had " + values.length);
}

public String asCQL(Symbol symbol)
{
int offset = columns.indexOf(symbol);
assert offset >= 0;
ByteBuffer b = values[offset];
return (b == null || ByteBufferUtil.EMPTY_BYTE_BUFFER.equals(b)) ? "null" : symbol.type().asCQL3Type().toCQLLiteral(b);
if (b == null) return "null";
if (ByteBufferUtil.EMPTY_BYTE_BUFFER.equals(b)) return "<empty>";
return symbol.type().asCQL3Type().toCQLLiteral(b);
}

public List<String> asCQL()
@@ -925,6 +1006,40 @@ public List<String> asCQL()
return human;
}

public BitSet diff(Row other)
{
if (!columns.equals(other.columns))
throw new UnsupportedOperationException("Columns do not match: expected " + columns + " but given " + other.columns);
int maxLength = Math.max(values.length, other.values.length);
int minLength = Math.min(values.length, other.values.length);
BitSet set = new BitSet(maxLength);
for (int i = 0; i < minLength; i++)
{
ByteBuffer a = values[i];
ByteBuffer b = other.values[i];
if (!Objects.equals(a, b))
set.set(i);
}
for (int i = minLength; i < maxLength; i++)
set.set(i);
return set;
}

public Row select(BitSet selection)
{
if (selection.isEmpty()) return EMPTY;
var names = ImmutableUniqueList.<Symbol>builder(selection.cardinality());
ByteBuffer[] copy = new ByteBuffer[selection.cardinality()];
int offset = 0;
for (int i = 0; i < this.values.length; i++)
{
if (!selection.get(i)) continue;
names.add(this.columns.get(i));
copy[offset++] = this.values[i];
}
return new Row(names.build(), copy);
}

@Override
public boolean equals(Object o)
{
Loading