diff --git a/src/java/org/apache/cassandra/db/marshal/AbstractType.java b/src/java/org/apache/cassandra/db/marshal/AbstractType.java
index bdbfbea409fb..41ca6b3c013a 100644
--- a/src/java/org/apache/cassandra/db/marshal/AbstractType.java
+++ b/src/java/org/apache/cassandra/db/marshal/AbstractType.java
@@ -210,7 +210,7 @@ public ByteBuffer decompose(T value)
public String toCQLString(ByteBuffer bytes, boolean redact)
{
if (redact)
- return "?";
+ return RedactionUtil.redact(bytes, isValueLengthFixed());
if (bytes == null)
return "null";
diff --git a/src/java/org/apache/cassandra/db/marshal/RedactionUtil.java b/src/java/org/apache/cassandra/db/marshal/RedactionUtil.java
new file mode 100644
index 000000000000..0352a2d9b659
--- /dev/null
+++ b/src/java/org/apache/cassandra/db/marshal/RedactionUtil.java
@@ -0,0 +1,139 @@
+/*
+ * Copyright DataStax, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.marshal;
+
+import java.nio.ByteBuffer;
+import javax.annotation.Nullable;
+
+/**
+ * Utility class for redacting sensitive data values while preserving some orientative size information.
+ *
+ * This class provides methods to replace actual data values with a redacted placeholder ("?"), occasionally including
+ * size hints to help with debugging and troubleshooting without exposing the actual data content. No size hints will be
+ * included for values smaller than 100 bytes, or for values of fixed-size data types (e.g., int, UUID, timestamp).
+ *
+ * Size hints are provided in logarithmic buckets (e.g., ">100B", ">1KiB", ">10KiB", ">100KiB") to give a rough
+ * indication of data size while maintaining privacy:
+ *
+ *
Up to 100B: no size hint (just "?")
+ *
(100 B, 1 KiB]: "?[>100B]"
+ *
(1 KiB, 10 KiB]: "?[>1KiB]"
+ *
(10 KiB, 100 KiB]: "?[>10KiB]"
+ *
(100 KiB, 1 MiB]: "?[>100KiB]"
+ *
(1 MiB, 10 MiB]: "?[>1MiB]"
+ *
(10 MiB, 100 MiB]: "?[>10MiB]"
+ *
(100 MiB, 1 GiB]: "?[>100MiB]"
+ *
Over 1 GiB: "?[>1GiB]"
+ *
+ */
+public final class RedactionUtil
+{
+ // Pre-computed redacted values for each size bucket
+ private static final String REDACTED = "?";
+ private static final String REDACTED_100B = "?[>100B]";
+ private static final String REDACTED_1KIB = "?[>1KiB]";
+ private static final String REDACTED_10KIB = "?[>10KiB]";
+ private static final String REDACTED_100KIB = "?[>100KiB]";
+ private static final String REDACTED_1MIB = "?[>1MiB]";
+ private static final String REDACTED_10MIB = "?[>10MiB]";
+ private static final String REDACTED_100MIB = "?[>100MiB]";
+ private static final String REDACTED_1GIB = "?[>1GiB]";
+
+ // Pre-computed size thresholds for each size bucket
+ private static final int B_100 = 100;
+ private static final int KIB = 1024;
+ private static final int KIB_10 = 10 * KIB;
+ private static final int KIB_100 = 100 * KIB;
+ private static final int MIB = 1024 * KIB;
+ private static final int MIB_10 = 10 * MIB;
+ private static final int MIB_100 = 100 * MIB;
+ private static final int GIB = 1024 * MIB;
+
+ private RedactionUtil()
+ {
+ }
+
+ /**
+ * Redacts a byte buffer value, optionally including size information.
+ *
+ * If the value is null, it's not greater than 100B, or has a fixed length (where size information would not be
+ * useful), returns a simple "?" placeholder. Otherwise, returns a placeholder with a size hint indicating the
+ * approximate size of the data, according to {@link #redact(int)}.
+ *
+ * @param bytes the value to redact
+ * @param isValueLengthFixed whether the value has a fixed length (e.g., int, UUID, timestamp)
+ * @return a redacted string representation, either "?" or "?[size_hint]"
+ */
+ public static String redact(@Nullable ByteBuffer bytes, boolean isValueLengthFixed)
+ {
+ if (bytes == null || isValueLengthFixed)
+ return REDACTED;
+
+ int remaining = bytes.remaining();
+ // Early return for small values to avoid method call overhead
+ if (remaining <= B_100)
+ return REDACTED;
+
+ return redact(remaining);
+ }
+
+ /**
+ * Generates a redacted string with a size hint based on the provided size.
+ *
+ * The size hint uses logarithmic buckets to provide a rough indication of size:
+ *
+ *
Up to 100B: no size hint (just "?")
+ *
(100 B, 1 KiB]: "?[>100B]"
+ *
(1 KiB, 10 KiB]: "?[>1KiB]"
+ *
(10 KiB, 100 KiB]: "?[>10KiB]"
+ *
(100 KiB, 1 MiB]: "?[>100KiB]"
+ *
And so on, up to "?[>1GiB]" for very large values
+ *
+ *
+ * @param size the size in bytes
+ * @return a redacted string with an appropriate size hint
+ */
+ public static String redact(int size)
+ {
+ assert size >= 0 : "Size must be non-negative";
+
+ // Byte range, don't include size information for the values in the smallest bucket
+ if (size <= B_100)
+ return REDACTED;
+ if (size <= KIB)
+ return REDACTED_100B;
+
+ // KiB range
+ if (size <= KIB_10)
+ return REDACTED_1KIB;
+ if (size <= KIB_100)
+ return REDACTED_10KIB;
+ if (size <= MIB)
+ return REDACTED_100KIB;
+
+ // MiB range
+ if (size <= MIB_10)
+ return REDACTED_1MIB;
+ if (size <= MIB_100)
+ return REDACTED_10MIB;
+ if (size <= GIB)
+ return REDACTED_100MIB;
+
+ // above 1 GiB
+ return REDACTED_1GIB;
+ }
+}
diff --git a/test/distributed/org/apache/cassandra/distributed/test/SlowQueryLoggerTest.java b/test/distributed/org/apache/cassandra/distributed/test/SlowQueryLoggerTest.java
index d67738d802b8..8513e05bf277 100644
--- a/test/distributed/org/apache/cassandra/distributed/test/SlowQueryLoggerTest.java
+++ b/test/distributed/org/apache/cassandra/distributed/test/SlowQueryLoggerTest.java
@@ -16,6 +16,7 @@
package org.apache.cassandra.distributed.test;
+import java.nio.ByteBuffer;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;
@@ -66,15 +67,27 @@ public void testDoesNotLogSensitiveData() throws Throwable
ICoordinator coordinator = cluster.coordinator(1);
IInvokableInstance node = cluster.get(2);
- cluster.schemaChange(format("CREATE TABLE %s.%s (k text, c text, v text, PRIMARY KEY (k, c))"));
+ cluster.schemaChange(format("CREATE TABLE %s.%s (k text, c text, v text, b blob, PRIMARY KEY (k, c))"));
coordinator.execute(format("INSERT INTO %s.%s (k, c, v) VALUES ('secret_k', 'secret_c', 'secret_v')"), ALL);
+ // verify that slow queries are logged with redacted values
long mark = node.logs().mark();
coordinator.execute(format("SELECT * FROM %s.%s WHERE k = 'secret_k' AND c = 'secret_c' AND v = 'secret_v' ALLOW FILTERING"), ALL);
node.runOnInstance(() -> MonitoringTask.instance.logOperations(approxTime.now()));
-
assertLogsContain(mark, node, "Some operations were slow", format("