diff --git a/src/java/org/apache/cassandra/db/marshal/AbstractType.java b/src/java/org/apache/cassandra/db/marshal/AbstractType.java index bdbfbea409fb..41ca6b3c013a 100644 --- a/src/java/org/apache/cassandra/db/marshal/AbstractType.java +++ b/src/java/org/apache/cassandra/db/marshal/AbstractType.java @@ -210,7 +210,7 @@ public ByteBuffer decompose(T value) public String toCQLString(ByteBuffer bytes, boolean redact) { if (redact) - return "?"; + return RedactionUtil.redact(bytes, isValueLengthFixed()); if (bytes == null) return "null"; diff --git a/src/java/org/apache/cassandra/db/marshal/RedactionUtil.java b/src/java/org/apache/cassandra/db/marshal/RedactionUtil.java new file mode 100644 index 000000000000..0352a2d9b659 --- /dev/null +++ b/src/java/org/apache/cassandra/db/marshal/RedactionUtil.java @@ -0,0 +1,139 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.marshal; + +import java.nio.ByteBuffer; +import javax.annotation.Nullable; + +/** + * Utility class for redacting sensitive data values while preserving some orientative size information. + *

+ * This class provides methods to replace actual data values with a redacted placeholder ("?"), occasionally including + * size hints to help with debugging and troubleshooting without exposing the actual data content. No size hints will be + * included for values smaller than 100 bytes, or for values of fixed-size data types (e.g., int, UUID, timestamp). + *

+ * Size hints are provided in logarithmic buckets (e.g., ">100B", ">1KiB", ">10KiB", ">100KiB") to give a rough + * indication of data size while maintaining privacy: + *

+ */ +public final class RedactionUtil +{ + // Pre-computed redacted values for each size bucket + private static final String REDACTED = "?"; + private static final String REDACTED_100B = "?[>100B]"; + private static final String REDACTED_1KIB = "?[>1KiB]"; + private static final String REDACTED_10KIB = "?[>10KiB]"; + private static final String REDACTED_100KIB = "?[>100KiB]"; + private static final String REDACTED_1MIB = "?[>1MiB]"; + private static final String REDACTED_10MIB = "?[>10MiB]"; + private static final String REDACTED_100MIB = "?[>100MiB]"; + private static final String REDACTED_1GIB = "?[>1GiB]"; + + // Pre-computed size thresholds for each size bucket + private static final int B_100 = 100; + private static final int KIB = 1024; + private static final int KIB_10 = 10 * KIB; + private static final int KIB_100 = 100 * KIB; + private static final int MIB = 1024 * KIB; + private static final int MIB_10 = 10 * MIB; + private static final int MIB_100 = 100 * MIB; + private static final int GIB = 1024 * MIB; + + private RedactionUtil() + { + } + + /** + * Redacts a byte buffer value, optionally including size information. + *

+ * If the value is null, it's not greater than 100B, or has a fixed length (where size information would not be + * useful), returns a simple "?" placeholder. Otherwise, returns a placeholder with a size hint indicating the + * approximate size of the data, according to {@link #redact(int)}. + * + * @param bytes the value to redact + * @param isValueLengthFixed whether the value has a fixed length (e.g., int, UUID, timestamp) + * @return a redacted string representation, either "?" or "?[size_hint]" + */ + public static String redact(@Nullable ByteBuffer bytes, boolean isValueLengthFixed) + { + if (bytes == null || isValueLengthFixed) + return REDACTED; + + int remaining = bytes.remaining(); + // Early return for small values to avoid method call overhead + if (remaining <= B_100) + return REDACTED; + + return redact(remaining); + } + + /** + * Generates a redacted string with a size hint based on the provided size. + *

+ * The size hint uses logarithmic buckets to provide a rough indication of size: + *

+ * + * @param size the size in bytes + * @return a redacted string with an appropriate size hint + */ + public static String redact(int size) + { + assert size >= 0 : "Size must be non-negative"; + + // Byte range, don't include size information for the values in the smallest bucket + if (size <= B_100) + return REDACTED; + if (size <= KIB) + return REDACTED_100B; + + // KiB range + if (size <= KIB_10) + return REDACTED_1KIB; + if (size <= KIB_100) + return REDACTED_10KIB; + if (size <= MIB) + return REDACTED_100KIB; + + // MiB range + if (size <= MIB_10) + return REDACTED_1MIB; + if (size <= MIB_100) + return REDACTED_10MIB; + if (size <= GIB) + return REDACTED_100MIB; + + // above 1 GiB + return REDACTED_1GIB; + } +} diff --git a/test/distributed/org/apache/cassandra/distributed/test/SlowQueryLoggerTest.java b/test/distributed/org/apache/cassandra/distributed/test/SlowQueryLoggerTest.java index d67738d802b8..8513e05bf277 100644 --- a/test/distributed/org/apache/cassandra/distributed/test/SlowQueryLoggerTest.java +++ b/test/distributed/org/apache/cassandra/distributed/test/SlowQueryLoggerTest.java @@ -16,6 +16,7 @@ package org.apache.cassandra.distributed.test; +import java.nio.ByteBuffer; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.TimeUnit; @@ -66,15 +67,27 @@ public void testDoesNotLogSensitiveData() throws Throwable ICoordinator coordinator = cluster.coordinator(1); IInvokableInstance node = cluster.get(2); - cluster.schemaChange(format("CREATE TABLE %s.%s (k text, c text, v text, PRIMARY KEY (k, c))")); + cluster.schemaChange(format("CREATE TABLE %s.%s (k text, c text, v text, b blob, PRIMARY KEY (k, c))")); coordinator.execute(format("INSERT INTO %s.%s (k, c, v) VALUES ('secret_k', 'secret_c', 'secret_v')"), ALL); + // verify that slow queries are logged with redacted values long mark = node.logs().mark(); coordinator.execute(format("SELECT * FROM %s.%s WHERE k = 'secret_k' AND c = 'secret_c' AND v = 'secret_v' ALLOW FILTERING"), ALL); node.runOnInstance(() -> MonitoringTask.instance.logOperations(approxTime.now())); - assertLogsContain(mark, node, "Some operations were slow", format("100B\\] ALLOW FILTERING>"), + format("10KiB\\] ALLOW FILTERING>")); } } diff --git a/test/unit/org/apache/cassandra/db/marshal/RedactionUtilTest.java b/test/unit/org/apache/cassandra/db/marshal/RedactionUtilTest.java new file mode 100644 index 000000000000..3c5b46c4a030 --- /dev/null +++ b/test/unit/org/apache/cassandra/db/marshal/RedactionUtilTest.java @@ -0,0 +1,89 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.marshal; + +import java.nio.ByteBuffer; + +import org.junit.Test; + +import org.assertj.core.api.Assertions; + +public class RedactionUtilTest +{ + @Test + public void testRedactByteBuffer() + { + assertRedactByteBuffer(null, "?"); + assertRedactByteBuffer(ByteBuffer.allocate(0), "?"); + assertRedactByteBuffer(ByteBuffer.allocate(1), "?"); + assertRedactByteBuffer(ByteBuffer.allocate(100), "?"); + assertRedactByteBuffer(ByteBuffer.allocate(101), "?[>100B]"); + assertRedactByteBuffer(ByteBuffer.allocate(1024), "?[>100B]"); + assertRedactByteBuffer(ByteBuffer.allocate(1025), "?[>1KiB]"); + assertRedactByteBuffer(ByteBuffer.allocate(10 * 1024), "?[>1KiB]"); + assertRedactByteBuffer(ByteBuffer.allocate(10 * 1024 + 1), "?[>10KiB]"); + // we don't want to keep testing allocating giant buffers, the test for the size alone should get us covered + } + + private static void assertRedactByteBuffer(ByteBuffer bytes, String expectedForVariableLength) + { + Assertions.assertThat(RedactionUtil.redact(bytes, true)).isEqualTo("?"); + Assertions.assertThat(RedactionUtil.redact(bytes, false)).isEqualTo(expectedForVariableLength); + } + + @Test + public void testRedactSize() + { + // invalid size + Assertions.assertThatThrownBy(() -> RedactionUtil.redact(-1)).isInstanceOf(AssertionError.class); + + // byte range + assertRedactSize(0, "?"); + assertRedactSize(1, "?"); + assertRedactSize(100, "?"); + assertRedactSize(101, "?[>100B]"); + + // KiB range + int unit = 1024; + assertRedactSize(unit, "?[>100B]"); + assertRedactSize(unit + 1, "?[>1KiB]"); + assertRedactSize(10 * unit, "?[>1KiB]"); + assertRedactSize(10 * unit + 1, "?[>10KiB]"); + assertRedactSize(100 * unit, "?[>10KiB]"); + assertRedactSize(100 * unit + 1, "?[>100KiB]"); + + // MiB range + unit *= 1024; + assertRedactSize(unit, "?[>100KiB]"); + assertRedactSize(unit + 1, "?[>1MiB]"); + assertRedactSize(10 * unit, "?[>1MiB]"); + assertRedactSize(10 * unit + 1, "?[>10MiB]"); + assertRedactSize(100 * unit, "?[>10MiB]"); + assertRedactSize(100 * unit + 1, "?[>100MiB]"); + + // GiB range + unit *= 1024; + assertRedactSize(unit, "?[>100MiB]"); + assertRedactSize(unit + 1, "?[>1GiB]"); + assertRedactSize(Integer.MAX_VALUE, "?[>1GiB]"); + } + + private static void assertRedactSize(int size, String expected) + { + Assertions.assertThat(RedactionUtil.redact(size)).isEqualTo(expected); + } +}