diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index c56626c9863c..f2778efca416 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -638,6 +638,13 @@ public enum CassandraRelevantProperties */ SAI_QUERY_KIND_PER_QUERY_METRICS_ENABLED("cassandra.sai.metrics.query_kind.per_query.enabled", "false"), + /** + * Whether to enable SAI query plan metrics such as the estimated cost, estimated number of rows, + * number of indexes used in the original and optimized query plan, etc. + * These metrics are counters and histograms. + */ + SAI_QUERY_PLAN_METRICS_ENABLED("cassandra.sai.metrics.query_plan.enabled", "true"), + /** * Whether to enable SAI index metrics such as memtable flush metrics, compaction metrics, and disk usage metrics. * These metrics include timers, histograms, counters, and gauges for index operations. diff --git a/src/java/org/apache/cassandra/index/sai/QueryContext.java b/src/java/org/apache/cassandra/index/sai/QueryContext.java index f5cf235b9dea..e8066f2b0017 100644 --- a/src/java/org/apache/cassandra/index/sai/QueryContext.java +++ b/src/java/org/apache/cassandra/index/sai/QueryContext.java @@ -19,11 +19,15 @@ package org.apache.cassandra.index.sai; import java.util.concurrent.TimeUnit; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; import com.google.common.annotations.VisibleForTesting; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.index.sai.plan.Plan; import org.apache.cassandra.index.sai.utils.AbortedOperationException; import org.apache.cassandra.utils.MonotonicClock; @@ -88,9 +92,7 @@ public class QueryContext private float annRerankFloor = 0.0f; // only called from single-threaded setup code - // Determines the order of using indexes for filtering and sorting. - // Null means the query execution order hasn't been decided yet. - private FilterSortOrder filterSortOrder = null; + private PlanInfo queryPlanInfo; @VisibleForTesting public QueryContext() @@ -219,12 +221,6 @@ public void addAnnGraphSearchLatency(long val) annGraphSearchLatency += val; } - public void setFilterSortOrder(FilterSortOrder filterSortOrder) - { - checkThreadOwnership(); - this.filterSortOrder = filterSortOrder; - } - public void checkpoint() { checkThreadOwnership(); @@ -250,17 +246,10 @@ public void updateAnnRerankFloor(float observedFloor) annRerankFloor = max(annRerankFloor, observedFloor); } - /** - * Determines the order of filtering and sorting operations. - * Currently used only by vector search. - */ - public enum FilterSortOrder + public void recordQueryPlan(Plan.RowsIteration originalPlan, Plan.RowsIteration optimizedPlan) { - /** First get the matching keys from the non-vector indexes, then use vector index to return the top K by similarity order */ - SEARCH_THEN_ORDER, - - /** First get the candidates in ANN order from the vector index, then fetch the rows and filter them until we find K matching the predicates */ - SCAN_THEN_FILTER + if (CassandraRelevantProperties.SAI_QUERY_PLAN_METRICS_ENABLED.getBoolean()) + this.queryPlanInfo = new PlanInfo(originalPlan, optimizedPlan); } public Snapshot snapshot() @@ -311,7 +300,9 @@ public static class Snapshot public final long triePostingsDecodes; public final long queryTimeouts; public final long annGraphSearchLatency; - public final FilterSortOrder filterSortOrder; + + @Nullable + public final PlanInfo queryPlanInfo; /** * Creates a snapshot of all the metrics in the given {@link QueryContext}. @@ -339,7 +330,38 @@ private Snapshot(QueryContext context) triePostingsDecodes = context.triePostingsDecodes; queryTimeouts = context.queryTimeouts; annGraphSearchLatency = context.annGraphSearchLatency; - filterSortOrder = context.filterSortOrder; + queryPlanInfo = context.queryPlanInfo; + } + } + + /** + * Captures relevant information about a query plan, both original and optimized. + */ + public static class PlanInfo + { + public final boolean searchExecutedBeforeOrder; + public final boolean filterExecutedAfterOrderedScan; + + public final long costEstimated; + public final long rowsToReturnEstimated; + public final long rowsToFetchEstimated; + public final long keysToIterateEstimated; + public final int logSelectivityEstimated; + + public final int indexReferencesInQuery; + public final int indexReferencesInPlan; + + public PlanInfo(@Nonnull Plan.RowsIteration originalPlan, @Nonnull Plan.RowsIteration optimizedPlan) + { + this.costEstimated = Math.round(optimizedPlan.fullCost()); + this.rowsToReturnEstimated = Math.round(optimizedPlan.expectedRows()); + this.rowsToFetchEstimated = Math.round(optimizedPlan.estimatedRowsToFetch()); + this.keysToIterateEstimated = Math.round(optimizedPlan.estimatedKeysToIterate()); + this.logSelectivityEstimated = Math.min(20, (int) Math.floor(-Math.log10(optimizedPlan.selectivity()))); + this.indexReferencesInQuery = originalPlan.referencedIndexCount(); + this.indexReferencesInPlan = optimizedPlan.referencedIndexCount(); + this.searchExecutedBeforeOrder = optimizedPlan.isSearchThenOrderHybrid(); + this.filterExecutedAfterOrderedScan = optimizedPlan.isOrderedScanThenFilterHybrid(); } } } diff --git a/src/java/org/apache/cassandra/index/sai/memory/TrieMemtableIndex.java b/src/java/org/apache/cassandra/index/sai/memory/TrieMemtableIndex.java index 88232353eabc..af70ac7decc4 100644 --- a/src/java/org/apache/cassandra/index/sai/memory/TrieMemtableIndex.java +++ b/src/java/org/apache/cassandra/index/sai/memory/TrieMemtableIndex.java @@ -34,6 +34,9 @@ import com.google.common.collect.Streams; import com.google.common.util.concurrent.Runnables; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.DecoratedKey; @@ -55,6 +58,7 @@ import org.apache.cassandra.index.sai.memory.MemoryIndex.PkWithFrequency; import org.apache.cassandra.index.sai.plan.Expression; import org.apache.cassandra.index.sai.plan.Orderer; +import org.apache.cassandra.index.sai.plan.QueryController; import org.apache.cassandra.index.sai.utils.BM25Utils; import org.apache.cassandra.index.sai.utils.PrimaryKey; import org.apache.cassandra.index.sai.utils.PrimaryKeyWithByteComparable; @@ -74,6 +78,8 @@ public class TrieMemtableIndex extends AbstractMemtableIndex { + private static final Logger logger = LoggerFactory.getLogger(TrieMemtableIndex.class); + private final ShardBoundaries boundaries; private final MemoryIndex[] rangeIndexes; private final IndexContext indexContext; @@ -104,6 +110,8 @@ public TrieMemtableIndex(IndexContext indexContext, Memtable memtable, int shard } this.sensorContext = Context.from(indexContext); this.requestTracker = RequestTracker.instance; + + logger.debug("Creating TrieMemtableIndex for index {} with shard count: {}", indexContext.getIndexName(), boundaries.shardCount()); } @Override diff --git a/src/java/org/apache/cassandra/index/sai/metrics/TableQueryMetrics.java b/src/java/org/apache/cassandra/index/sai/metrics/TableQueryMetrics.java index e94c00d6e975..187af931fdbd 100644 --- a/src/java/org/apache/cassandra/index/sai/metrics/TableQueryMetrics.java +++ b/src/java/org/apache/cassandra/index/sai/metrics/TableQueryMetrics.java @@ -22,6 +22,8 @@ import java.util.function.Predicate; import java.util.regex.Pattern; +import javax.annotation.Nullable; + import com.codahale.metrics.Counter; import com.codahale.metrics.Histogram; import com.codahale.metrics.Timer; @@ -115,7 +117,7 @@ public void record(QueryContext context, ReadCommand command) { final long queryLatencyMicros = TimeUnit.NANOSECONDS.toMicros(snapshot.totalQueryTimeNs); - if (snapshot.filterSortOrder == QueryContext.FilterSortOrder.SEARCH_THEN_ORDER) + if (snapshot.queryPlanInfo != null && snapshot.queryPlanInfo.searchExecutedBeforeOrder) { Tracing.trace("Index query accessed memtable indexes, {}, and {}, selected {} before ranking, " + "post-filtered {} in {}, and took {} microseconds.", @@ -199,8 +201,8 @@ public static class PerTable extends AbstractQueryMetrics public final Counter totalRowTombstonesFetched; public final Counter totalQueriesCompleted; - public final Counter sortThenFilterQueriesCompleted; - public final Counter filterThenSortQueriesCompleted; + @Nullable + public final QueryPlanMetrics queryPlanMetrics; /** * @param table the table to measure metrics for @@ -220,9 +222,9 @@ public PerTable(TableMetadata table, QueryKind queryKind, Predicate totalRowTombstonesFetched = Metrics.counter(createMetricName("TotalRowTombstonesFetched")); totalQueriesCompleted = Metrics.counter(createMetricName("TotalQueriesCompleted")); totalQueryTimeouts = Metrics.counter(createMetricName("TotalQueryTimeouts")); - - sortThenFilterQueriesCompleted = Metrics.counter(createMetricName("SortThenFilterQueriesCompleted")); - filterThenSortQueriesCompleted = Metrics.counter(createMetricName("FilterThenSortQueriesCompleted")); + queryPlanMetrics = (CassandraRelevantProperties.SAI_QUERY_PLAN_METRICS_ENABLED.getBoolean()) + ? new QueryPlanMetrics() + : null; } @Override @@ -243,10 +245,42 @@ public void record(QueryContext.Snapshot snapshot) totalRowsReturned.inc(snapshot.rowsReturned); totalRowTombstonesFetched.inc(snapshot.rowTombstonesFetched); - if (snapshot.filterSortOrder == QueryContext.FilterSortOrder.SCAN_THEN_FILTER) - sortThenFilterQueriesCompleted.inc(); - else if (snapshot.filterSortOrder == QueryContext.FilterSortOrder.SEARCH_THEN_ORDER) - filterThenSortQueriesCompleted.inc(); + QueryContext.PlanInfo queryPlanInfo = snapshot.queryPlanInfo; + if (queryPlanInfo != null && queryPlanMetrics != null) + { + queryPlanMetrics.totalCostEstimated.inc(queryPlanInfo.costEstimated); + queryPlanMetrics.totalRowsToReturnEstimated.inc(queryPlanInfo.rowsToReturnEstimated); + queryPlanMetrics.totalRowsToFetchEstimated.inc(queryPlanInfo.rowsToFetchEstimated); + queryPlanMetrics.totalKeysToIterateEstimated.inc(queryPlanInfo.keysToIterateEstimated); + + if (queryPlanInfo.filterExecutedAfterOrderedScan) + queryPlanMetrics.sortThenFilterQueriesCompleted.inc(); + if (queryPlanInfo.searchExecutedBeforeOrder) + queryPlanMetrics.filterThenSortQueriesCompleted.inc(); + } + } + + public class QueryPlanMetrics + { + public final Counter totalRowsToReturnEstimated; + public final Counter totalRowsToFetchEstimated; + public final Counter totalKeysToIterateEstimated; + public final Counter totalCostEstimated; + + public final Counter sortThenFilterQueriesCompleted; + public final Counter filterThenSortQueriesCompleted; + + + public QueryPlanMetrics() + { + totalRowsToReturnEstimated = Metrics.counter(createMetricName("TotalRowsToReturnEstimated")); + totalRowsToFetchEstimated = Metrics.counter(createMetricName("TotalRowsToFetchEstimated")); + totalKeysToIterateEstimated = Metrics.counter(createMetricName("TotalKeysToIterateEstimated")); + totalCostEstimated = Metrics.counter(createMetricName("TotalCostEstimated")); + + sortThenFilterQueriesCompleted = Metrics.counter(createMetricName("SortThenFilterQueriesCompleted")); + filterThenSortQueriesCompleted = Metrics.counter(createMetricName("FilterThenSortQueriesCompleted")); + } } } @@ -293,6 +327,9 @@ public static class PerQuery extends AbstractQueryMetrics */ public final Timer annGraphSearchLatency; + @Nullable + public final QueryPlanMetrics queryPlanMetrics; + /** * @param table the table to measure metrics for * @param queryKind an identifier for the kind of query which metrics are being recorded for @@ -323,6 +360,10 @@ public PerQuery(TableMetadata table, QueryKind queryKind, Predicate // Key vector metrics that translate to performance annGraphSearchLatency = Metrics.timer(createMetricName("ANNGraphSearchLatency")); + + queryPlanMetrics = CassandraRelevantProperties.SAI_QUERY_PLAN_METRICS_ENABLED.getBoolean() + ? new QueryPlanMetrics() + : null; } @Override @@ -362,6 +403,75 @@ public void record(QueryContext.Snapshot snapshot) { annGraphSearchLatency.update(snapshot.annGraphSearchLatency, TimeUnit.NANOSECONDS); } + + QueryContext.PlanInfo queryPlanInfo = snapshot.queryPlanInfo; + if (queryPlanInfo != null && queryPlanMetrics != null) + { + queryPlanMetrics.costEstimated.update(queryPlanInfo.costEstimated); + queryPlanMetrics.rowsToReturnEstimated.update(queryPlanInfo.rowsToReturnEstimated); + queryPlanMetrics.rowsToFetchEstimated.update(queryPlanInfo.rowsToFetchEstimated); + queryPlanMetrics.keysToIterateEstimated.update(queryPlanInfo.keysToIterateEstimated); + queryPlanMetrics.logSelectivityEstimated.update(queryPlanInfo.logSelectivityEstimated); + queryPlanMetrics.indexReferencesInQuery.update(queryPlanInfo.indexReferencesInQuery); + queryPlanMetrics.indexReferencesInPlan.update(queryPlanInfo.indexReferencesInPlan); + } } + + /// Metrics related to query planning. + /// Moved to separate class so they can be enabled/disabled as a group. + public class QueryPlanMetrics + { + /** + * Query execution cost as estimated by the planner + */ + public final Histogram costEstimated; + + /** + * Number of rows to be returned from the query as estimated by the planner + */ + public final Histogram rowsToReturnEstimated; + + /** + * Number of rows to be fetched by the query as estimated by the planner + */ + public final Histogram rowsToFetchEstimated; + + /** + * Number of keys to be iterated by the query as estimated by the planner + */ + public final Histogram keysToIterateEstimated; + + /** + * Negative decimal logarithm of selectivity of the query, before applying the LIMIT clause. + * We use logarithm because selectivity values can be very small (e.g. 10^-9). + */ + public final Histogram logSelectivityEstimated; + + /** + * Number of indexes referenced by the optimized query plan. + * The same index referenced from unrelated query clauses, + * leading to separate index searches, are counted separately. + */ + public final Histogram indexReferencesInPlan; + + /** + * Number of indexes referenced by the original query plan before optimization (as stated in the query text) + */ + public final Histogram indexReferencesInQuery; + + QueryPlanMetrics() + { + costEstimated = Metrics.histogram(createMetricName("CostEstimated"), false); + rowsToReturnEstimated = Metrics.histogram(createMetricName("RowsToReturnEstimated"), true); + rowsToFetchEstimated = Metrics.histogram(createMetricName("RowsToFetchEstimated"), true); + keysToIterateEstimated = Metrics.histogram(createMetricName("KeysToIterateEstimated"), true); + logSelectivityEstimated = Metrics.histogram(createMetricName("LogSelectivityEstimated"), true); + indexReferencesInPlan = Metrics.histogram(createMetricName("IndexReferencesInPlan"), true); + indexReferencesInQuery = Metrics.histogram(createMetricName("IndexReferencesInQuery"), false); + } + } + } + + } diff --git a/src/java/org/apache/cassandra/index/sai/plan/Plan.java b/src/java/org/apache/cassandra/index/sai/plan/Plan.java index fd1925fb03f4..587ddf9403d4 100644 --- a/src/java/org/apache/cassandra/index/sai/plan/Plan.java +++ b/src/java/org/apache/cassandra/index/sai/plan/Plan.java @@ -19,6 +19,7 @@ package org.apache.cassandra.index.sai.plan; import java.util.*; +import java.util.function.Consumer; import java.util.function.DoubleSupplier; import java.util.function.Function; import javax.annotation.Nonnull; @@ -27,6 +28,8 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import org.apache.commons.lang3.mutable.MutableDouble; +import org.apache.commons.lang3.mutable.MutableInt; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -215,7 +218,7 @@ final List nodesOfType(Class nodeType) * If node of given type is not found, returns null. */ @SuppressWarnings("unchecked") - final @Nullable T firstNodeOfType(Class nodeType) + public final @Nullable T firstNodeOfType(Class nodeType) { Plan[] result = new Plan[] { null }; forEach(node -> { @@ -366,14 +369,23 @@ protected String description() } /** - * Returns the index context if the plan node uses one. - * Need to be overridden by nodes that use an index. - * Non-recursive. + * Traverses the tree recursively and calls the consumer for each index used in the plan. */ - protected @Nullable IndexContext getIndexContext() + public final void visitIndexesRecursive(Consumer consumer) + { + forEach(node -> { + node.visitIndexes(consumer); + return Plan.ControlFlow.Continue; + }); + } + + /** + * Traverses the tree and calls the consumer for each index used in the plan. + * Non recursive. + */ + protected void visitIndexes(Consumer consumer) { // By default, a node does not contain an index. - return null; } /** @@ -383,7 +395,7 @@ protected String description() * and recomputes the nodes above it. Then it returns the best plan from candidates obtained that way. * The expected running time is proportional to the height of the plan tree multiplied by the number of the leaves. */ - public final Plan optimize() + protected Plan optimize() { if (logger.isTraceEnabled()) logger.trace("Optimizing plan:\n{}", toRedactedStringRecursive()); @@ -416,7 +428,7 @@ public final Plan optimize() * Modifies all intersections to not intersect more clauses than the given limit. * Retains the most selective clauses. */ - public final Plan limitIntersectedClauses(int clauseLimit) + protected Plan limitIntersectedClauses(int clauseLimit) { Plan result = this; if (result instanceof Intersection) @@ -428,12 +440,35 @@ public final Plan limitIntersectedClauses(int clauseLimit) } /** Returns true if the plan contains a node matching the condition */ - final boolean contains(Function condition) + public final boolean contains(Function condition) { ControlFlow res = forEach(node -> (condition.apply(node)) ? ControlFlow.Break : ControlFlow.Continue); return res == ControlFlow.Break; } + /** + * Returns true if the plan represents a hybrid query that first selects the matching + * rows by index-based search and then orders the matching rows in memory. This order of execution + * is best when the query contains a predicate that matches only a very small number of rows. + * Returns false in other cases, including when the query is not a hybrid query. + */ + public final boolean isSearchThenOrderHybrid() + { + return contains(node -> node instanceof KeysSort); + } + + /** + * Returns true if the plan represents a hybrid query that first scans the index in + * the index term-order (sorted by the index terms) and then filters the matching rows in memory. + * This order of execution is best when the query contains a predicate with a poor selectivity. + * Returns false in other cases, including when the query is not a hybrid query. + */ + public final boolean isOrderedScanThenFilterHybrid() + { + return (contains(node -> node instanceof Filter) + && contains(node -> node instanceof IndexScan && ((IndexScan) node).ordering != null)); + } + /** * Returns a new plan with the given node filtering restriction removed. * Searches for the subplan to remove recursively down the tree. @@ -494,6 +529,44 @@ public final double selectivity() return selectivity; } + /** + * Returns the number of indexes referenced by this plan. + * The same index referenced from unrelated query clauses, + * leading to separate index searches, are counted separately. + */ + public final int referencedIndexCount() + { + MutableInt count = new MutableInt(0); + visitIndexesRecursive(index -> count.increment()); + return count.intValue(); + } + + /** + * Returns the estimated number of rows to be fetched from storage. + */ + public final double estimatedRowsToFetch() + { + Fetch fetch = firstNodeOfType(Plan.Fetch.class); + return fetch != null ? fetch.expectedRows() : 0.0; + } + + /** + * Returns the estimated number of primary keys to be iterated by all index iterators. + * This may be larger than the number of rows to fetch because of intersections. + */ + public final double estimatedKeysToIterate() + { + MutableDouble total = new MutableDouble(0.0); + forEach(node -> { + if (node instanceof Leaf) + { + total.add(((Leaf) node).expectedKeys()); + } + return ControlFlow.Continue; + }); + return total.doubleValue(); + } + protected interface Cost { /** @@ -671,6 +744,18 @@ final double costPerKey() return cost().costPerKey(); } + @Override + public final KeysIteration optimize() + { + return (KeysIteration) super.optimize(); + } + + @Override + public final KeysIteration limitIntersectedClauses(int clauseLimit) + { + return (KeysIteration) super.limitIntersectedClauses(clauseLimit); + } + protected abstract boolean usesIncludedIndex(); } @@ -961,10 +1046,10 @@ public String getIndexName() } @Override - final protected IndexContext getIndexContext() + final protected void visitIndexes(Consumer consumer) { assert predicate != null || ordering != null; - return predicate != null ? predicate.context : ordering.context; + consumer.accept(predicate != null ? predicate.context : ordering.context); } } /** @@ -1509,11 +1594,10 @@ protected KeysIteration withAccess(Access access) : new AnnIndexScan(factory, id, access, ordering); } - @Nullable @Override - protected IndexContext getIndexContext() + protected void visitIndexes(Consumer consumer) { - return ordering.context; + consumer.accept(ordering.context); } } @@ -1551,9 +1635,9 @@ protected KeysIteration withAccess(Access access) } @Override - protected IndexContext getIndexContext() + protected void visitIndexes(Consumer consumer) { - return ordering.context; + consumer.accept(ordering.context); } } @@ -1583,10 +1667,25 @@ final double costPerRow() return cost().costPerRow(); } - final double expectedRows() + /** + * Returns the number of rows produced by this plan. + */ + public final double expectedRows() { return cost().expectedRows; } + + @Override + public final RowsIteration optimize() + { + return (RowsIteration) super.optimize(); + } + + @Override + public final RowsIteration limitIntersectedClauses(int clauseLimit) + { + return (RowsIteration) super.limitIntersectedClauses(clauseLimit); + } } /** @@ -2053,6 +2152,17 @@ public TableMetrics(long rows, double avgCellsPerRow, double avgBytesPerRow, int this.avgBytesPerRow = avgBytesPerRow; this.sstables = sstables; } + + @Override + public String toString() + { + return "Plan.TableMetrics {" + + "rows: " + rows + + ", avgCellsPerRow: " + avgCellsPerRow + + ", avgBytesPerRow: " + avgBytesPerRow + + ", sstables: " + sstables + + '}'; + } } /** diff --git a/src/java/org/apache/cassandra/index/sai/plan/QueryController.java b/src/java/org/apache/cassandra/index/sai/plan/QueryController.java index df7f8c3dcffd..29a9a3abdb4d 100644 --- a/src/java/org/apache/cassandra/index/sai/plan/QueryController.java +++ b/src/java/org/apache/cassandra/index/sai/plan/QueryController.java @@ -189,6 +189,7 @@ public QueryController(ColumnFamilyStore cfs, avgCellsPerRow(), avgRowSizeInBytes(), cfs.getLiveSSTables().size()); + logger.debug("Creating Plan.Factory for table " + cfs.getTableName() + ": " + tableMetrics); this.planFactory = new Plan.Factory(cfs.metadata.keyspace, tableMetrics, this, command.rowFilter().indexHints); } @@ -372,12 +373,7 @@ public SinglePartitionReadCommand getPartitionReadCommand(PrimaryKey key, ReadEx private void updateIndexMetricsQueriesCount(Plan plan) { HashSet queriedIndexesContexts = new HashSet<>(); - plan.forEach(node -> { - IndexContext indexContext = node.getIndexContext(); - if (indexContext != null) - queriedIndexesContexts.add(indexContext); - return Plan.ControlFlow.Continue; - }); + plan.visitIndexesRecursive(queriedIndexesContexts::add); queriedIndexesContexts.forEach(indexContext -> indexContext.getIndexMetrics() .ifPresent(m -> m.queriesCount.inc())); } @@ -399,23 +395,18 @@ Plan buildPlan() // in which predicates it leaves in the plan and the probability of accidentally removing a good branch // here is even lower. int intersectionClauseLimit = CassandraRelevantProperties.SAI_INTERSECTION_CLAUSE_LIMIT.getInt(); - Plan plan = rowsIteration.limitIntersectedClauses(intersectionClauseLimit * 3); + Plan.RowsIteration origPlan = rowsIteration.limitIntersectedClauses(intersectionClauseLimit * 3); + Plan.RowsIteration plan = origPlan; if (QUERY_OPT_LEVEL > 0) - plan = plan.optimize(); + plan = origPlan.optimize(); plan = plan.limitIntersectedClauses(intersectionClauseLimit); - - if (plan.contains(node -> node instanceof Plan.Filter) - && plan.contains(node -> node instanceof Plan.IndexScan && ((Plan.IndexScan) node).ordering != null)) - queryContext.setFilterSortOrder(QueryContext.FilterSortOrder.SCAN_THEN_FILTER); - if (plan.contains(node -> node instanceof Plan.KeysSort)) - queryContext.setFilterSortOrder(QueryContext.FilterSortOrder.SEARCH_THEN_ORDER); - + queryContext.recordQueryPlan(origPlan, plan); updateIndexMetricsQueriesCount(plan); - if (logger.isTraceEnabled()) - logger.trace("Query execution plan:\n" + plan.toRedactedStringRecursive()); + //if (logger.isTraceEnabled()) + logger.debug("Query execution plan:\n" + plan.toRedactedStringRecursive()); if (Tracing.isTracing()) { @@ -485,8 +476,10 @@ private KeyRangeIterator buildIterator(Expression predicate) */ QueryView getQueryView(IndexContext context) throws QueryView.Builder.MissingIndexException { - return queryViews.computeIfAbsent(context, + var qv = queryViews.computeIfAbsent(context, c -> new QueryView.Builder(c, mergeRange).build()); + logger.debug("Query view for index {}: {}", context.getIndexName(), qv); + return qv; } private float avgCellsPerRow() diff --git a/src/java/org/apache/cassandra/index/sai/plan/QueryView.java b/src/java/org/apache/cassandra/index/sai/plan/QueryView.java index 46b4d2205280..d186b3358cda 100644 --- a/src/java/org/apache/cassandra/index/sai/plan/QueryView.java +++ b/src/java/org/apache/cassandra/index/sai/plan/QueryView.java @@ -77,6 +77,27 @@ public long getTotalSStableRows() return viewFragment.sstables.stream().mapToLong(SSTableReader::getTotalRows).sum(); } + /** + * Returns the total count of operations in all memtables in this view + */ + private long getTotalMemtableOperations() + { + long operations = 0; + for (Memtable memtable : viewFragment.memtables) + { + operations += memtable.getOperations(); + } + return operations; + } + + /** + * Returns the total count of indexed rows in all memtable indexes in this view + */ + private long getTotalMemtableIndexesRows() + { + return memtableIndexes.stream().mapToLong(MemtableIndex::getRowCount).sum(); + } + /** * Build a query specific view of the memtables, sstables, and indexes for a query. * For use with SAI ordered queries to ensure that the view is consistent over the lifetime of the query, @@ -199,4 +220,11 @@ private boolean indexInRange(SSTableIndex index) return range.left.compareTo(sstable.last) <= 0 && (range.right.isMinimum() || sstable.first.compareTo(range.right) <= 0); } } + + @Override + public String toString() + { + return String.format("QueryView {sstables: %d, memtables: %d, total sstable rows: %d, total memtable ops: %d, total memtable index rows: %d}", + sstableIndexes.size(), memtableIndexes.size(), getTotalSStableRows(), getTotalMemtableOperations(), getTotalMemtableIndexesRows()); + } } diff --git a/test/unit/org/apache/cassandra/index/sai/SAITester.java b/test/unit/org/apache/cassandra/index/sai/SAITester.java index 873abfd225ab..bb52017c0f8c 100644 --- a/test/unit/org/apache/cassandra/index/sai/SAITester.java +++ b/test/unit/org/apache/cassandra/index/sai/SAITester.java @@ -516,6 +516,18 @@ protected Object getMetricValue(ObjectName metricObjectName) return metricValue; } + protected double getHistogramMean(ObjectName metricObjectName) + { + try + { + return ((Number) getMBeanAttribute(metricObjectName, "Mean")).doubleValue(); + } + catch (Exception e) + { + throw new RuntimeException(e); + } + } + protected void assertMetricExists(ObjectName name) { Assertions.assertThatNoException().isThrownBy(() -> getMetricValue(name)); diff --git a/test/unit/org/apache/cassandra/index/sai/metrics/AbstractMetricsTest.java b/test/unit/org/apache/cassandra/index/sai/metrics/AbstractMetricsTest.java index d982e233d145..0dfb38d81c13 100644 --- a/test/unit/org/apache/cassandra/index/sai/metrics/AbstractMetricsTest.java +++ b/test/unit/org/apache/cassandra/index/sai/metrics/AbstractMetricsTest.java @@ -20,9 +20,11 @@ import java.util.concurrent.TimeUnit; import javax.management.ObjectName; +import org.junit.After; import org.junit.Before; import org.junit.Ignore; +import org.apache.cassandra.config.CassandraRelevantProperties; import org.apache.cassandra.index.sai.SAITester; import org.apache.cassandra.utils.Throwables; @@ -32,6 +34,11 @@ @Ignore public abstract class AbstractMetricsTest extends SAITester { + boolean indexMetricsEnabled; + boolean perQueryKindPerTableMetricsEnabled; + boolean perQueryKindPerQueryMetricsEnabled; + boolean queryPlanMetricsEnabled; + @Before public void initializeTest() throws Throwable { @@ -40,6 +47,20 @@ public void initializeTest() throws Throwable startJMXServer(); createMBeanServerConnection(); + + indexMetricsEnabled = CassandraRelevantProperties.SAI_INDEX_METRICS_ENABLED.getBoolean(); + perQueryKindPerQueryMetricsEnabled = CassandraRelevantProperties.SAI_QUERY_KIND_PER_QUERY_METRICS_ENABLED.getBoolean(); + perQueryKindPerTableMetricsEnabled = CassandraRelevantProperties.SAI_QUERY_KIND_PER_TABLE_METRICS_ENABLED.getBoolean(); + queryPlanMetricsEnabled = CassandraRelevantProperties.SAI_QUERY_PLAN_METRICS_ENABLED.getBoolean(); + } + + @After + public void cleanupTest() throws Throwable + { + CassandraRelevantProperties.SAI_INDEX_METRICS_ENABLED.setBoolean(indexMetricsEnabled); + CassandraRelevantProperties.SAI_QUERY_KIND_PER_TABLE_METRICS_ENABLED.setBoolean(perQueryKindPerTableMetricsEnabled); + CassandraRelevantProperties.SAI_QUERY_KIND_PER_QUERY_METRICS_ENABLED.setBoolean(perQueryKindPerQueryMetricsEnabled); + CassandraRelevantProperties.SAI_QUERY_PLAN_METRICS_ENABLED.setBoolean(indexMetricsEnabled); } protected void waitForIndexCompaction(String keyspace, String table, String index) @@ -85,7 +106,6 @@ protected void waitForHistogramMeanBetween(ObjectName name, double min, double m }, 10, TimeUnit.SECONDS); } - protected void waitForGreaterThanZero(ObjectName name) { waitForAssert(() -> { @@ -99,4 +119,19 @@ protected void waitForGreaterThanZero(ObjectName name) } }, 160, TimeUnit.SECONDS); } + + protected void waitForMetricValueBetween(ObjectName name, long min, long max) + { + waitForAssert(() -> { + try + { + double value = ((Number) getMetricValue(name)).longValue(); + assertTrue("Metric value " + value + " is between " + min + " and " + max, value >= min && value <= max); + } + catch (Throwable ex) + { + throw Throwables.unchecked(ex); + } + }, 60, TimeUnit.SECONDS); + } } diff --git a/test/unit/org/apache/cassandra/index/sai/metrics/QueryMetricsTest.java b/test/unit/org/apache/cassandra/index/sai/metrics/QueryMetricsTest.java index b40c539e0303..3f91611ffdb0 100644 --- a/test/unit/org/apache/cassandra/index/sai/metrics/QueryMetricsTest.java +++ b/test/unit/org/apache/cassandra/index/sai/metrics/QueryMetricsTest.java @@ -652,6 +652,138 @@ private void verifyHistogramCount(String name, boolean hasPerQueryKindMetrics) waitForHistogramCountEqualsIfExists(hasPerQueryKindMetrics, objectName(name, PER_MP_HYBRID_QUERY_METRIC_TYPE), 1); } + @Test + public void testQueryPlannerMetrics() + { + CassandraRelevantProperties.SAI_QUERY_PLAN_METRICS_ENABLED.setBoolean(true); + + String table = createTable("CREATE TABLE %s (k int PRIMARY KEY, lc int, hc int)"); + createIndex("CREATE CUSTOM INDEX ON %s(lc) USING 'StorageAttachedIndex'"); + createIndex("CREATE CUSTOM INDEX ON %s(hc) USING 'StorageAttachedIndex'"); + + int numRows = 10000; + for (int i = 0; i < numRows; i++) + { + execute("INSERT INTO %s (k, lc, hc) VALUES (?, ?, ?)", i, i % 2, i); + } + + flush(); + + UntypedResultSet rows = execute("SELECT k FROM %s WHERE lc = 0"); + assertEquals(numRows / 2, rows.size()); + + final double ESTIMATION_TOLERANCE = 0.25; + final double LOWER_BOUND_MULTIPLIER = 1.0 - ESTIMATION_TOLERANCE; + final double UPPER_BOUND_MULTIPLIER = 1.0 + ESTIMATION_TOLERANCE; + + var rowsToReturnEstimatedMetric = objectNameNoIndex("RowsToReturnEstimated", KEYSPACE, table, PER_QUERY_METRIC_TYPE); + waitForHistogramCountEquals(rowsToReturnEstimatedMetric, 1); + waitForHistogramMeanBetween(rowsToReturnEstimatedMetric, numRows / 2.0 * LOWER_BOUND_MULTIPLIER, numRows / 2.0 * UPPER_BOUND_MULTIPLIER); + + var rowsToFetchEstimatedMetric = objectNameNoIndex("RowsToFetchEstimated", KEYSPACE, table, PER_QUERY_METRIC_TYPE); + waitForHistogramCountEquals(rowsToFetchEstimatedMetric, 1); + waitForHistogramMeanBetween(rowsToFetchEstimatedMetric, numRows / 2.0 * LOWER_BOUND_MULTIPLIER, numRows / 2.0 * UPPER_BOUND_MULTIPLIER); + + var keysToIterateEstimatedMetric = objectNameNoIndex("KeysToIterateEstimated", KEYSPACE, table, PER_QUERY_METRIC_TYPE); + waitForHistogramCountEquals(keysToIterateEstimatedMetric, 1); + waitForHistogramMeanBetween(keysToIterateEstimatedMetric, numRows / 2.0 * LOWER_BOUND_MULTIPLIER, numRows / 2.0 * UPPER_BOUND_MULTIPLIER); + + var objectName = objectNameNoIndex("CostEstimated", KEYSPACE, table, PER_QUERY_METRIC_TYPE); + waitForHistogramCountEquals(objectName, 1); + waitForHistogramMeanBetween(objectName, 1.0, Double.POSITIVE_INFINITY); + + var logSelectivityEstimatedMetric = objectNameNoIndex("LogSelectivityEstimated", KEYSPACE, table, PER_QUERY_METRIC_TYPE); + waitForHistogramCountEquals(logSelectivityEstimatedMetric, 1); + waitForHistogramMeanBetween(logSelectivityEstimatedMetric, 0, 0); + + var indexReferencesInQueryMetric = objectNameNoIndex("IndexReferencesInQuery", KEYSPACE, table, PER_QUERY_METRIC_TYPE); + waitForHistogramCountEquals(indexReferencesInQueryMetric, 1); + waitForHistogramMeanBetween(indexReferencesInQueryMetric, 1.0, 1.0); + + var indexReferencesInPlan = objectNameNoIndex("IndexReferencesInPlan", KEYSPACE, table, PER_QUERY_METRIC_TYPE); + waitForHistogramCountEquals(indexReferencesInPlan, 1); + waitForHistogramMeanBetween(indexReferencesInPlan, 1.0, 1.0); + + objectName = objectNameNoIndex("TotalRowsToReturnEstimated", KEYSPACE, table, TABLE_QUERY_METRIC_TYPE); + waitForMetricValueBetween(objectName, (long)(numRows / 2.0 * LOWER_BOUND_MULTIPLIER), (long)(numRows / 2.0 * UPPER_BOUND_MULTIPLIER)); + + objectName = objectNameNoIndex("TotalRowsToFetchEstimated", KEYSPACE, table, TABLE_QUERY_METRIC_TYPE); + waitForMetricValueBetween(objectName, (long)(numRows / 2.0 * LOWER_BOUND_MULTIPLIER), (long)(numRows / 2.0 * UPPER_BOUND_MULTIPLIER)); + + objectName = objectNameNoIndex("TotalKeysToIterateEstimated", KEYSPACE, table, TABLE_QUERY_METRIC_TYPE); + waitForMetricValueBetween(objectName, (long)(numRows / 2.0 * LOWER_BOUND_MULTIPLIER), (long)(numRows / 2.0 * UPPER_BOUND_MULTIPLIER)); + + objectName = objectNameNoIndex("TotalCostEstimated", KEYSPACE, table, TABLE_QUERY_METRIC_TYPE); + waitForMetricValueBetween(objectName, 1, Long.MAX_VALUE); + + rows = execute("SELECT k FROM %s WHERE lc = 0 AND hc < 10"); + assertEquals(5, rows.size()); + + waitForHistogramCountEquals(logSelectivityEstimatedMetric, 2); + waitForHistogramMeanBetween(logSelectivityEstimatedMetric, 1, 2); + + waitForHistogramCountEquals(indexReferencesInQueryMetric, 2); + waitForHistogramMeanBetween(indexReferencesInQueryMetric, 1.4999, 1.5001); // average of 2 indexes and 1 index + + waitForHistogramCountEquals(indexReferencesInPlan, 2); + waitForHistogramMeanBetween(indexReferencesInPlan, 1.0, 1.0); // low selectivity index eliminated by optimisation + + // Check estimates are updated also for queries returning 0 rows + // 0 is special, log selectivity would be -infinity, so we need to check if there is no overflow + var oldRowsEstimated = getHistogramMean(rowsToFetchEstimatedMetric); + var oldLogSelectivityEstimated = getHistogramMean(logSelectivityEstimatedMetric); + rows = execute("SELECT k FROM %s WHERE lc = -1"); + assertEquals(0, rows.size()); + var newRowsEstimated = getHistogramMean(rowsToFetchEstimatedMetric); + assertTrue(newRowsEstimated < oldRowsEstimated); + var newLogSelectivityEstimated = getHistogramMean(logSelectivityEstimatedMetric); + assertTrue(Double.isFinite(newLogSelectivityEstimated)); + assertTrue(newLogSelectivityEstimated > oldLogSelectivityEstimated); + } + + @Test + public void testDisableQueryPlanMetrics() + { + CassandraRelevantProperties.SAI_QUERY_PLAN_METRICS_ENABLED.setBoolean(false); + + String table = createTable("CREATE TABLE %s (k int PRIMARY KEY, lc int, hc int)"); + createIndex("CREATE CUSTOM INDEX ON %s(lc) USING 'StorageAttachedIndex'"); + createIndex("CREATE CUSTOM INDEX ON %s(hc) USING 'StorageAttachedIndex'"); + + int numRows = 10000; + for (int i = 0; i < numRows; i++) + { + execute("INSERT INTO %s (k, lc, hc) VALUES (?, ?, ?)", i, i % 2, i); + } + + flush(); + + // Check if SAI queries still work correctly when plan metrics are disabled + UntypedResultSet rows; + rows = execute("SELECT k FROM %s WHERE lc = 0"); + assertEquals(numRows / 2, rows.size()); + rows = execute("SELECT k FROM %s WHERE lc = 0 AND hc < 10"); + assertEquals(5, rows.size()); + rows = execute("SELECT k FROM %s WHERE lc = 0 ORDER BY hc LIMIT 100"); + assertEquals(100, rows.size()); + rows = execute("SELECT k FROM %s WHERE k = 1 AND hc = 1"); + assertEquals(1, rows.size()); + + assertMetricDoesNotExist(objectNameNoIndex("RowsToReturnEstimated", KEYSPACE, table, PER_QUERY_METRIC_TYPE)); + assertMetricDoesNotExist(objectNameNoIndex("RowsToFetchEstimated", KEYSPACE, table, PER_QUERY_METRIC_TYPE)); + assertMetricDoesNotExist(objectNameNoIndex("KeysToIterateEstimated", KEYSPACE, table, PER_QUERY_METRIC_TYPE)); + assertMetricDoesNotExist(objectNameNoIndex("CostEstimated", KEYSPACE, table, PER_QUERY_METRIC_TYPE)); + assertMetricDoesNotExist(objectNameNoIndex("LogSelectivityEstimated", KEYSPACE, table, PER_QUERY_METRIC_TYPE)); + assertMetricDoesNotExist(objectNameNoIndex("IndexReferencesInQuery", KEYSPACE, table, PER_QUERY_METRIC_TYPE)); + assertMetricDoesNotExist(objectNameNoIndex("IndexReferencesInPlan", KEYSPACE, table, PER_QUERY_METRIC_TYPE)); + assertMetricDoesNotExist(objectNameNoIndex("TotalRowsToReturnEstimated", KEYSPACE, table, TABLE_QUERY_METRIC_TYPE)); + assertMetricDoesNotExist(objectNameNoIndex("TotalRowsToFetchEstimated", KEYSPACE, table, TABLE_QUERY_METRIC_TYPE)); + assertMetricDoesNotExist(objectNameNoIndex("TotalKeysToIterateEstimated", KEYSPACE, table, TABLE_QUERY_METRIC_TYPE)); + assertMetricDoesNotExist(objectNameNoIndex("TotalCostEstimated", KEYSPACE, table, TABLE_QUERY_METRIC_TYPE)); + assertMetricDoesNotExist(objectNameNoIndex("SortThenFilterQueriesCompleted", KEYSPACE, table, TABLE_QUERY_METRIC_TYPE)); + assertMetricDoesNotExist(objectNameNoIndex("FilterThenSortQueriesCompleted", KEYSPACE, table, TABLE_QUERY_METRIC_TYPE)); + } + private ObjectName objectName(String name, String type) { return objectNameNoIndex(name, KEYSPACE, currentTable(), type); diff --git a/test/unit/org/apache/cassandra/index/sai/plan/PlanTest.java b/test/unit/org/apache/cassandra/index/sai/plan/PlanTest.java index a64686c90549..943e08ecc7c4 100644 --- a/test/unit/org/apache/cassandra/index/sai/plan/PlanTest.java +++ b/test/unit/org/apache/cassandra/index/sai/plan/PlanTest.java @@ -120,6 +120,7 @@ public void empty() Plan.KeysIteration plan = factory.indexScan(saiPred1, 0); assertTrue(plan instanceof Plan.NumericIndexScan); assertEquals(0.0, plan.expectedKeys(), 0.01); + assertEquals(0.0, plan.estimatedKeysToIterate(), 0.01); assertEquals(0.0, plan.selectivity(), 0.01); assertEquals(0.0, plan.costPerKey(), 0.01); } @@ -298,6 +299,7 @@ public void fetch() Plan.KeysIteration i = factory.indexScan(saiPred1, (long) (0.5 * factory.tableMetrics.rows)); Plan.RowsIteration s = factory.fetch(i); assertEquals(0.5 * factory.tableMetrics.rows, s.expectedRows(), 0.01); + assertEquals(0.5 * factory.tableMetrics.rows, s.estimatedRowsToFetch(), 0.01); assertTrue(s.fullCost() > 0.5 * factory.tableMetrics.rows * (SAI_KEY_COST + ROW_COST)); } @@ -340,6 +342,7 @@ public void annSort() Plan.KeysIteration s = factory.sort(i, ordering); assertEquals(0.5 * factory.tableMetrics.rows, s.expectedKeys(), 0.01); + assertEquals(0.5 * factory.tableMetrics.rows, s.estimatedKeysToIterate(), 0.01); assertTrue(s.initCost() >= i.fullCost()); } @@ -367,6 +370,7 @@ public void annScan() { Plan.KeysIteration i = factory.sort(factory.everything, ordering); assertEquals(factory.tableMetrics.rows, i.expectedKeys(), 0.01); + assertEquals(factory.tableMetrics.rows, i.estimatedKeysToIterate(), 0.01); assertEquals(i.initCost() + factory.costEstimator.estimateAnnSearchCost(ordering, (int) ceil(i.expectedKeys()), factory.tableMetrics.rows), i.fullCost(), 0.01); } @@ -1051,6 +1055,28 @@ public void testLazyAccessPropagation() Mockito.verify(indexScan1, Mockito.times(1)).estimateCost(); } + @Test + public void testReferencedIndexes() + { + Plan.KeysIteration indexScan1 = factory.indexScan(saiPred1, (long) (0.001 * factory.tableMetrics.rows)); // numeric + Plan.KeysIteration indexScan2 = factory.indexScan(saiPred2, (long) (0.001 * factory.tableMetrics.rows)); // numeric + Plan.KeysIteration indexScan3 = factory.indexScan(saiPred4, (long) (0.5 * factory.tableMetrics.rows)); // literal + Plan.KeysIteration sort = factory.sort(indexScan1, ordering); // will generate ordered scan + Plan.KeysIteration intersection = factory.intersection(Lists.newArrayList(sort, indexScan2, indexScan3)); + Plan.RowsIteration fetch = factory.fetch(intersection); + Plan.RowsIteration postFilter = factory.recheckFilter(rowFilter123, fetch); + Plan.RowsIteration plan = factory.limit(postFilter, 3); + + assertEquals(0, factory.everything.referencedIndexCount()); + assertEquals(0, factory.nothing.referencedIndexCount()); + assertEquals(1, indexScan1.referencedIndexCount()); + assertEquals(1, indexScan2.referencedIndexCount()); + assertEquals(1, indexScan3.referencedIndexCount()); + assertEquals(1, sort.referencedIndexCount()); + assertEquals(3, intersection.referencedIndexCount()); + assertEquals(3, plan.referencedIndexCount()); + } + private List ids(List subplans) { return subplans.stream().map(p -> p.id).collect(Collectors.toList());