datastax · michaeljmarshall · Oct 2, 2025 · Oct 3, 2025 · Oct 6, 2025 · Oct 8, 2025
diff --git a/build.xml b/build.xml
@@ -743,7 +743,7 @@
           <dependency groupId="org.apache.lucene" artifactId="lucene-core" version="9.8.0" />
           <dependency groupId="org.apache.lucene" artifactId="lucene-analysis-common" version="9.8.0" />
           <dependency groupId="org.apache.lucene" artifactId="lucene-backward-codecs" version="9.8.0" />
-          <dependency groupId="io.github.jbellis" artifactId="jvector" version="4.0.0-rc.5" />
+          <dependency groupId="io.github.jbellis" artifactId="jvector" version="4.0.0-rc.7-352eda26" />
           <dependency groupId="com.bpodgursky" artifactId="jbool_expressions" version="1.14" scope="test"/>
 
           <dependency groupId="com.carrotsearch.randomizedtesting" artifactId="randomizedtesting-runner" version="2.1.2" scope="test">

diff --git a/src/java/org/apache/cassandra/cache/ChunkCache.java b/src/java/org/apache/cassandra/cache/ChunkCache.java
@@ -297,7 +297,7 @@ public void invalidateFileNow(File file)
         synchronousCache.invalidateAll(Iterables.filter(cache.asMap().keySet(), x -> (x.readerId & mask) == fileId));
     }
 
-    static class Key
+    static class Key implements Comparable<Key>
     {
         final long readerId;
         final long position;
@@ -312,11 +312,15 @@ private Key(long readerId, long position)
         @Override
         public int hashCode()
         {
-            final int prime = 31;
-            int result = 1;
-            result = prime * result + Long.hashCode(readerId);
-            result = prime * result + Long.hashCode(position);
-            return result;
+            // Mix readerId and position into a single long using a large prime multiplier
+            // This constant is a mixing constant derived from the Golden Ratio
+            long mixed = (readerId + position) * 0x9E3779B97F4A7C15L;
+
+            // Spread the bits (XOR-shift) to ensure high bits affect low bits
+            mixed ^= (mixed >>> 32);
+            mixed ^= (mixed >>> 16);
+
+            return (int) mixed;
         }
 
         @Override
@@ -331,6 +335,17 @@ public boolean equals(Object obj)
             return (position == other.position)
                    && readerId == other.readerId;
         }
+
+        @Override
+        public int compareTo(Key other) {
+            // Compare readerId first
+            int cmp = Long.compare(this.readerId, other.readerId);
+            if (cmp != 0) {
+                return cmp;
+            }
+            // Then compare position
+            return Long.compare(this.position, other.position);
+        }
     }
 
     /**

diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java
@@ -436,12 +436,16 @@ public enum CassandraRelevantProperties
     SAI_VECTOR_FLUSH_THRESHOLD_MAX_ROWS("cassandra.sai.vector_flush_threshold_max_rows", "-1"),
     // Use non-positive value to disable it. Period in millis to trigger a flush for SAI vector memtable index.
     SAI_VECTOR_FLUSH_PERIOD_IN_MILLIS("cassandra.sai.vector_flush_period_in_millis", "-1"),
+    // Whether compaction should build vector indexes using fused adc
+    SAI_VECTOR_ENABLE_FUSED("cassandra.sai.vector.enable_fused", "true"),
     // Use nvq when building graphs in compaction. Disabled by default for now. Enabling will reduce recall slightly
     // while also reducing the storage footprint.
     SAI_VECTOR_ENABLE_NVQ("cassandra.sai.vector.enable_nvq", "false"),
     // NVQ number of subvectors. This isn't really expected to change much so we're only exposing
     // it as a global variable in case it's needed.
     SAI_VECTOR_NVQ_NUM_SUB_VECTORS("cassandra.sai.vector.nvq_num_sub_vectors", "2"),
+    // When building a compaction graph, encode layer 0 nodes in parallel and subsequently use async io for writes.
+    SAI_ENCODE_AND_WRITE_VECTOR_GRAPH_IN_PARALLEL("cassandra.sai.vector.encode_write_graph_parallel", "true"),
     /**
      * Whether to disable auto-compaction
      */

diff --git a/src/java/org/apache/cassandra/index/sai/disk/format/Version.java b/src/java/org/apache/cassandra/index/sai/disk/format/Version.java
@@ -39,6 +39,7 @@
 import org.apache.cassandra.index.sai.disk.v5.V5OnDiskFormat;
 import org.apache.cassandra.index.sai.disk.v6.V6OnDiskFormat;
 import org.apache.cassandra.index.sai.disk.v7.V7OnDiskFormat;
+import org.apache.cassandra.index.sai.disk.v8.V8OnDiskFormat;
 import org.apache.cassandra.index.sai.utils.TypeUtil;
 import org.apache.cassandra.io.sstable.format.SSTableFormat;
 import org.apache.cassandra.schema.SchemaConstants;
@@ -75,10 +76,12 @@ public class Version implements Comparable<Version>
     public static final Version EC = new Version("ec", V7OnDiskFormat.instance, (c, i, g) -> stargazerFileNameFormat(c, i, g, "ec"));
     // total terms count serialization in index metadata, enables ANN_USE_SYNTHETIC_SCORE by default
     public static final Version ED = new Version("ed", V7OnDiskFormat.instance, (c, i, g) -> stargazerFileNameFormat(c, i, g, "ed"));
+    // jvector file format version 6 (skipped 5)
+    public static final Version FA = new Version("fa", V8OnDiskFormat.instance, (c, i, g) -> stargazerFileNameFormat(c, i, g, "fa"));
 
     // These are in reverse-chronological order so that the latest version is first. Version matching tests
     // are more likely to match the latest version, so we want to test that one first.
-    public static final List<Version> ALL = Lists.newArrayList(ED, EC, EB, DC, DB, CA, BA, AA);
+    public static final List<Version> ALL = Lists.newArrayList(FA, ED, EC, EB, DC, DB, CA, BA, AA);
 
     public static final Version EARLIEST = AA;
     public static final Version VECTOR_EARLIEST = BA;

diff --git a/src/java/org/apache/cassandra/index/sai/disk/v8/V8OnDiskFormat.java b/src/java/org/apache/cassandra/index/sai/disk/v8/V8OnDiskFormat.java
@@ -0,0 +1,32 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.cassandra.index.sai.disk.v8;
+
+import org.apache.cassandra.index.sai.disk.v7.V7OnDiskFormat;
+
+public class V8OnDiskFormat extends V7OnDiskFormat
+{
+   public static final V8OnDiskFormat instance = new V8OnDiskFormat();
+
+   @Override
+   public int jvectorFileFormatVersion()
+   {
+       return 6;
+   }
+}
diff --git a/src/java/org/apache/cassandra/index/sai/disk/vector/CassandraDiskAnn.java b/src/java/org/apache/cassandra/index/sai/disk/vector/CassandraDiskAnn.java
@@ -95,7 +95,7 @@ public CassandraDiskAnn(SSTableContext sstableContext, SegmentMetadata segmentMe
 
         SegmentMetadata.ComponentMetadata termsMetadata = this.componentMetadatas.get(IndexComponentType.TERMS_DATA);
         graphHandle = indexFiles.termsData();
-        var rawGraph = OnDiskGraphIndex.load(graphHandle::createReader, termsMetadata.offset);
+        var rawGraph = OnDiskGraphIndex.load(graphHandle::createReader, termsMetadata.offset, false);
         features = rawGraph.getFeatureSet();
         graph = rawGraph;
         usesNVQ = features.contains(FeatureId.NVQ_VECTORS);
@@ -123,7 +123,7 @@ public CassandraDiskAnn(SSTableContext sstableContext, SegmentMetadata segmentMe
             }
 
             VectorCompression.CompressionType compressionType = VectorCompression.CompressionType.values()[reader.readByte()];
-            if (features.contains(FeatureId.FUSED_ADC))
+            if (features.contains(FeatureId.FUSED_PQ))
             {
                 assert compressionType == VectorCompression.CompressionType.PRODUCT_QUANTIZATION;
                 compressedVectors = null;
@@ -239,9 +239,7 @@ public CloseableIterator<RowIdWithScore> search(VectorFloat<?> queryVector,
         {
             var view = (ImmutableGraphIndex.ScoringView) searcher.getView();
             SearchScoreProvider ssp;
-            // FusedADC can no longer be written due to jvector upgrade. However, it's possible these index files
-            // still exist, so we have to support them.
-            if (features.contains(FeatureId.FUSED_ADC))
+            if (features.contains(FeatureId.FUSED_PQ))
             {
                 var asf = view.approximateScoreFunctionFor(queryVector, similarityFunction);
                 var rr = isRerankless ? null : view.rerankerFor(queryVector, similarityFunction);