Skip to content

Commit 4b4fd54

Browse files
adelapenamichaelsembwever
authored andcommitted
CNDB-16025: CNDB-15807: Add hints about the size of redacted query values (#2114)
Add hints about value size when redacting column values in CQL queries printed to logs. No size hints will be included for values smaller than 100 bytes, or for values of fixed-size data types (e.g., int, UUID, timestamp). Size hints are provided in logarithmic buckets (e.g., ">100B", ">1KiB", ">10KiB", ">100KiB") to give a rough indication of data size while maintaining privacy.
1 parent cfd94b0 commit 4b4fd54

File tree

4 files changed

+244
-3
lines changed

4 files changed

+244
-3
lines changed

src/java/org/apache/cassandra/db/marshal/AbstractType.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ public ByteBuffer decompose(T value)
210210
public String toCQLString(ByteBuffer bytes, boolean redact)
211211
{
212212
if (redact)
213-
return "?";
213+
return RedactionUtil.redact(bytes, isValueLengthFixed());
214214

215215
if (bytes == null)
216216
return "null";
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
/*
2+
* Copyright DataStax, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.apache.cassandra.db.marshal;
18+
19+
import java.nio.ByteBuffer;
20+
import javax.annotation.Nullable;
21+
22+
/**
23+
* Utility class for redacting sensitive data values while preserving some orientative size information.
24+
* <p>
25+
* This class provides methods to replace actual data values with a redacted placeholder ("?"), occasionally including
26+
* size hints to help with debugging and troubleshooting without exposing the actual data content. No size hints will be
27+
* included for values smaller than 100 bytes, or for values of fixed-size data types (e.g., int, UUID, timestamp).
28+
* <p>
29+
* Size hints are provided in logarithmic buckets (e.g., ">100B", ">1KiB", ">10KiB", ">100KiB") to give a rough
30+
* indication of data size while maintaining privacy:
31+
* <ul>
32+
* <li>Up to 100B: no size hint (just "?")</li>
33+
* <li>(100 B, 1 KiB]: "?[>100B]"</li>
34+
* <li>(1 KiB, 10 KiB]: "?[>1KiB]"</li>
35+
* <li>(10 KiB, 100 KiB]: "?[>10KiB]"</li>
36+
* <li>(100 KiB, 1 MiB]: "?[>100KiB]"</li>
37+
* <li>(1 MiB, 10 MiB]: "?[>1MiB]"</li>
38+
* <li>(10 MiB, 100 MiB]: "?[>10MiB]"</li>
39+
* <li>(100 MiB, 1 GiB]: "?[>100MiB]"</li>
40+
* <li>Over 1 GiB: "?[>1GiB]"</li>
41+
* </ul>
42+
*/
43+
public final class RedactionUtil
44+
{
45+
// Pre-computed redacted values for each size bucket
46+
private static final String REDACTED = "?";
47+
private static final String REDACTED_100B = "?[>100B]";
48+
private static final String REDACTED_1KIB = "?[>1KiB]";
49+
private static final String REDACTED_10KIB = "?[>10KiB]";
50+
private static final String REDACTED_100KIB = "?[>100KiB]";
51+
private static final String REDACTED_1MIB = "?[>1MiB]";
52+
private static final String REDACTED_10MIB = "?[>10MiB]";
53+
private static final String REDACTED_100MIB = "?[>100MiB]";
54+
private static final String REDACTED_1GIB = "?[>1GiB]";
55+
56+
// Pre-computed size thresholds for each size bucket
57+
private static final int B_100 = 100;
58+
private static final int KIB = 1024;
59+
private static final int KIB_10 = 10 * KIB;
60+
private static final int KIB_100 = 100 * KIB;
61+
private static final int MIB = 1024 * KIB;
62+
private static final int MIB_10 = 10 * MIB;
63+
private static final int MIB_100 = 100 * MIB;
64+
private static final int GIB = 1024 * MIB;
65+
66+
private RedactionUtil()
67+
{
68+
}
69+
70+
/**
71+
* Redacts a byte buffer value, optionally including size information.
72+
* <p>
73+
* If the value is null, it's not greater than 100B, or has a fixed length (where size information would not be
74+
* useful), returns a simple "?" placeholder. Otherwise, returns a placeholder with a size hint indicating the
75+
* approximate size of the data, according to {@link #redact(int)}.
76+
*
77+
* @param bytes the value to redact
78+
* @param isValueLengthFixed whether the value has a fixed length (e.g., int, UUID, timestamp)
79+
* @return a redacted string representation, either "?" or "?[size_hint]"
80+
*/
81+
public static String redact(@Nullable ByteBuffer bytes, boolean isValueLengthFixed)
82+
{
83+
if (bytes == null || isValueLengthFixed)
84+
return REDACTED;
85+
86+
int remaining = bytes.remaining();
87+
// Early return for small values to avoid method call overhead
88+
if (remaining <= B_100)
89+
return REDACTED;
90+
91+
return redact(remaining);
92+
}
93+
94+
/**
95+
* Generates a redacted string with a size hint based on the provided size.
96+
* <p>
97+
* The size hint uses logarithmic buckets to provide a rough indication of size:
98+
* <ul>
99+
* <li>Up to 100B: no size hint (just "?")</li>
100+
* <li>(100 B, 1 KiB]: "?[>100B]"</li>
101+
* <li>(1 KiB, 10 KiB]: "?[>1KiB]"</li>
102+
* <li>(10 KiB, 100 KiB]: "?[>10KiB]"</li>
103+
* <li>(100 KiB, 1 MiB]: "?[>100KiB]"</li>
104+
* <li>And so on, up to "?[>1GiB]" for very large values</li>
105+
* </ul>
106+
*
107+
* @param size the size in bytes
108+
* @return a redacted string with an appropriate size hint
109+
*/
110+
public static String redact(int size)
111+
{
112+
assert size >= 0 : "Size must be non-negative";
113+
114+
// Byte range, don't include size information for the values in the smallest bucket
115+
if (size <= B_100)
116+
return REDACTED;
117+
if (size <= KIB)
118+
return REDACTED_100B;
119+
120+
// KiB range
121+
if (size <= KIB_10)
122+
return REDACTED_1KIB;
123+
if (size <= KIB_100)
124+
return REDACTED_10KIB;
125+
if (size <= MIB)
126+
return REDACTED_100KIB;
127+
128+
// MiB range
129+
if (size <= MIB_10)
130+
return REDACTED_1MIB;
131+
if (size <= MIB_100)
132+
return REDACTED_10MIB;
133+
if (size <= GIB)
134+
return REDACTED_100MIB;
135+
136+
// above 1 GiB
137+
return REDACTED_1GIB;
138+
}
139+
}

test/distributed/org/apache/cassandra/distributed/test/SlowQueryLoggerTest.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
package org.apache.cassandra.distributed.test;
1818

19+
import java.nio.ByteBuffer;
1920
import java.util.List;
2021
import java.util.concurrent.Callable;
2122
import java.util.concurrent.TimeUnit;
@@ -66,15 +67,27 @@ public void testDoesNotLogSensitiveData() throws Throwable
6667
ICoordinator coordinator = cluster.coordinator(1);
6768
IInvokableInstance node = cluster.get(2);
6869

69-
cluster.schemaChange(format("CREATE TABLE %s.%s (k text, c text, v text, PRIMARY KEY (k, c))"));
70+
cluster.schemaChange(format("CREATE TABLE %s.%s (k text, c text, v text, b blob, PRIMARY KEY (k, c))"));
7071
coordinator.execute(format("INSERT INTO %s.%s (k, c, v) VALUES ('secret_k', 'secret_c', 'secret_v')"), ALL);
7172

73+
// verify that slow queries are logged with redacted values
7274
long mark = node.logs().mark();
7375
coordinator.execute(format("SELECT * FROM %s.%s WHERE k = 'secret_k' AND c = 'secret_c' AND v = 'secret_v' ALLOW FILTERING"), ALL);
7476
node.runOnInstance(() -> MonitoringTask.instance.logOperations(approxTime.now()));
75-
7677
assertLogsContain(mark, node, "Some operations were slow", format("<SELECT \\* FROM %s\\.%s WHERE k = \\? AND c = \\? AND v = \\? ALLOW FILTERING>"));
7778
assertLogsNotContain(mark, node, "secret_k", "secret_c", "secret_v");
79+
80+
// verify that large values include size hints
81+
mark = node.logs().mark();
82+
String query = format("SELECT * FROM %s.%s WHERE b = ? ALLOW FILTERING");
83+
coordinator.execute(query, ALL, ByteBuffer.allocate(100 + 1));
84+
coordinator.execute(query, ALL, ByteBuffer.allocate(1024 + 1));
85+
coordinator.execute(query, ALL, ByteBuffer.allocate(10 * 1024 + 1));
86+
node.runOnInstance(() -> MonitoringTask.instance.logOperations(approxTime.now()));
87+
assertLogsContain(mark, node,
88+
format("<SELECT \\* FROM %s\\.%s WHERE b = \\?\\[>100B\\] ALLOW FILTERING>"),
89+
format("<SELECT \\* FROM %s\\.%s WHERE b = \\?\\[>1KiB\\] ALLOW FILTERING>"),
90+
format("<SELECT \\* FROM %s\\.%s WHERE b = \\?\\[>10KiB\\] ALLOW FILTERING>"));
7891
}
7992
}
8093

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
* Copyright DataStax, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.apache.cassandra.db.marshal;
18+
19+
import java.nio.ByteBuffer;
20+
21+
import org.junit.Test;
22+
23+
import org.assertj.core.api.Assertions;
24+
25+
public class RedactionUtilTest
26+
{
27+
@Test
28+
public void testRedactByteBuffer()
29+
{
30+
assertRedactByteBuffer(null, "?");
31+
assertRedactByteBuffer(ByteBuffer.allocate(0), "?");
32+
assertRedactByteBuffer(ByteBuffer.allocate(1), "?");
33+
assertRedactByteBuffer(ByteBuffer.allocate(100), "?");
34+
assertRedactByteBuffer(ByteBuffer.allocate(101), "?[>100B]");
35+
assertRedactByteBuffer(ByteBuffer.allocate(1024), "?[>100B]");
36+
assertRedactByteBuffer(ByteBuffer.allocate(1025), "?[>1KiB]");
37+
assertRedactByteBuffer(ByteBuffer.allocate(10 * 1024), "?[>1KiB]");
38+
assertRedactByteBuffer(ByteBuffer.allocate(10 * 1024 + 1), "?[>10KiB]");
39+
// we don't want to keep testing allocating giant buffers, the test for the size alone should get us covered
40+
}
41+
42+
private static void assertRedactByteBuffer(ByteBuffer bytes, String expectedForVariableLength)
43+
{
44+
Assertions.assertThat(RedactionUtil.redact(bytes, true)).isEqualTo("?");
45+
Assertions.assertThat(RedactionUtil.redact(bytes, false)).isEqualTo(expectedForVariableLength);
46+
}
47+
48+
@Test
49+
public void testRedactSize()
50+
{
51+
// invalid size
52+
Assertions.assertThatThrownBy(() -> RedactionUtil.redact(-1)).isInstanceOf(AssertionError.class);
53+
54+
// byte range
55+
assertRedactSize(0, "?");
56+
assertRedactSize(1, "?");
57+
assertRedactSize(100, "?");
58+
assertRedactSize(101, "?[>100B]");
59+
60+
// KiB range
61+
int unit = 1024;
62+
assertRedactSize(unit, "?[>100B]");
63+
assertRedactSize(unit + 1, "?[>1KiB]");
64+
assertRedactSize(10 * unit, "?[>1KiB]");
65+
assertRedactSize(10 * unit + 1, "?[>10KiB]");
66+
assertRedactSize(100 * unit, "?[>10KiB]");
67+
assertRedactSize(100 * unit + 1, "?[>100KiB]");
68+
69+
// MiB range
70+
unit *= 1024;
71+
assertRedactSize(unit, "?[>100KiB]");
72+
assertRedactSize(unit + 1, "?[>1MiB]");
73+
assertRedactSize(10 * unit, "?[>1MiB]");
74+
assertRedactSize(10 * unit + 1, "?[>10MiB]");
75+
assertRedactSize(100 * unit, "?[>10MiB]");
76+
assertRedactSize(100 * unit + 1, "?[>100MiB]");
77+
78+
// GiB range
79+
unit *= 1024;
80+
assertRedactSize(unit, "?[>100MiB]");
81+
assertRedactSize(unit + 1, "?[>1GiB]");
82+
assertRedactSize(Integer.MAX_VALUE, "?[>1GiB]");
83+
}
84+
85+
private static void assertRedactSize(int size, String expected)
86+
{
87+
Assertions.assertThat(RedactionUtil.redact(size)).isEqualTo(expected);
88+
}
89+
}

0 commit comments

Comments
 (0)