Skip to content

Commit 63e08c1

Browse files
committed
HCD-237: UCS settings json file cleanup fails
UCS settings files are not dropped after the table gets dropped. Instead they are supposed to be cleared after the node restart. The cleanup is faulty though and it prevents the node from startup. Root Cause: The cleanupControllerConfig() method in CompactionManager attempts to verify if a table exists by calling getColumnFamilyStore(). When the table is dropped, this method throws IllegalArgumentException, which was not being caught. The existing catch block only handled NullPointerException (for missing keyspace). Extended the exception handler to catch both NullPointerException and IllegalArgumentException, allowing orphaned controller-config.JSON files to be properly identified and deleted during node restart.
1 parent 9745f0a commit 63e08c1

File tree

3 files changed

+119
-4
lines changed

3 files changed

+119
-4
lines changed

src/java/org/apache/cassandra/db/compaction/CompactionManager.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -267,12 +267,17 @@ public static void cleanupControllerConfig()
267267
//table exists so keep the file
268268
Schema.instance.getKeyspaceInstance(names[0]).getColumnFamilyStore(names[1]);
269269
}
270-
catch (NullPointerException e)
270+
catch (NullPointerException | IllegalArgumentException e)
271271
{
272272
//table does not exist so delete the file
273-
logger.debug("Removing " + file + " because it does not correspond to an existing table");
273+
logger.debug("Removing {} because it does not correspond to an existing table", file);
274274
file.delete();
275275
}
276+
catch (Throwable e)
277+
{
278+
logger.error("Encountered an exception while cleaning up the orphaned compaction settings file ({}) for {}.{}", file, names[0], names[1]);
279+
throw e;
280+
}
276281
}
277282
else if (names.length == 3) // if keyspace/table names are long, we include table id as a 3rd component while the keyspace and table names are abbreviated
278283
{
@@ -281,7 +286,7 @@ else if (names.length == 3) // if keyspace/table names are long, we include tabl
281286
if (Schema.instance.getTableMetadata(tableId) == null)
282287
{
283288
//table does not exist so delete the file
284-
logger.debug("Removing " + file + " because it does not correspond to an existing table");
289+
logger.debug("Removing {} because it does not correspond to an existing table", file);
285290
file.delete();
286291
}
287292
}

test/distributed/org/apache/cassandra/distributed/test/CompactionControllerConfigTest.java

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package org.apache.cassandra.distributed.test;
2020

2121
import java.util.Arrays;
22+
import java.util.function.Consumer;
2223

2324
import org.junit.Test;
2425

@@ -31,6 +32,7 @@
3132
import org.apache.cassandra.db.compaction.unified.Controller;
3233
import org.apache.cassandra.db.compaction.unified.StaticController;
3334
import org.apache.cassandra.distributed.Cluster;
35+
import org.apache.cassandra.distributed.api.ConsistencyLevel;
3436
import org.apache.cassandra.schema.TableMetadata;
3537

3638

@@ -40,6 +42,7 @@
4042
import static org.apache.cassandra.SchemaLoader.standardCFMD;
4143
import static org.assertj.core.api.Assertions.assertThat;
4244
import static org.junit.Assert.assertEquals;
45+
import static org.junit.Assert.assertFalse;
4346
import static org.junit.Assert.assertTrue;
4447

4548
public class CompactionControllerConfigTest extends TestBaseImpl
@@ -228,6 +231,84 @@ public void testVectorControllerConfig() throws Throwable
228231
vectorControllerConfig(false);
229232
}
230233

234+
/**
235+
* Test to reproduce the bug where orphaned controller-config.JSON files
236+
* cause IllegalArgumentException during node restart.
237+
*
238+
* The bug occurs when:
239+
* 1. A table with UCS compaction strategy is created
240+
* 2. Data is written and flushed
241+
* 3. The UCS config file is saved
242+
* 4. Table is dropped (file is NOT deleted)
243+
* 5. Node is restarted (cleanupControllerConfig() throws IllegalArgumentException)
244+
*/
245+
@Test
246+
public void testDropTableOrphanedControllerConfigFileCleanup() throws Throwable
247+
{
248+
testOrphanedControllerConfigFileCleanup(cluster -> cluster.schemaChange(withKeyspace("DROP TABLE test_ks.test_table;")));
249+
}
250+
251+
/**
252+
* Same as testDropTableOrphanedControllerConfigFileCleanup but for dropping keyspace
253+
*/
254+
@Test
255+
public void testDropKeyspaceOrphanedControllerConfigFileCleanup() throws Throwable
256+
{
257+
testOrphanedControllerConfigFileCleanup(cluster -> cluster.schemaChange(withKeyspace("DROP KEYSPACE test_ks;")));
258+
}
259+
260+
private void testOrphanedControllerConfigFileCleanup(Consumer<Cluster> schemaRemover) throws Throwable
261+
{
262+
try (Cluster cluster = init(Cluster.build(1).start()))
263+
{
264+
// create keyspace and table with UCS compaction strategy
265+
cluster.schemaChange(withKeyspace("CREATE KEYSPACE test_ks WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};"));
266+
cluster.schemaChange(withKeyspace("CREATE TABLE test_ks.test_table (pk int PRIMARY KEY, v int) WITH compaction = " +
267+
"{'class': 'UnifiedCompactionStrategy', 'adaptive': 'true'};"));
268+
269+
// insert data and flush to trigger controller-config.JSON creation
270+
for (int i = 0; i < 100; i++)
271+
{
272+
cluster.coordinator(1).execute(withKeyspace("INSERT INTO test_ks.test_table (pk, v) VALUES (?, ?)"),
273+
ConsistencyLevel.ONE, i, i);
274+
}
275+
276+
cluster.get(1).flush(KEYSPACE);
277+
278+
// store the controller-config.JSON file and verify it exists
279+
cluster.get(1).runOnInstance(() -> {
280+
CompactionManager.storeControllerConfig();
281+
TableMetadata metadata = TableMetadata.minimal("test_ks", "test_table");
282+
assertTrue("Controller config file should exist after flush",
283+
Controller.getControllerConfigPath(metadata).exists());
284+
});
285+
286+
// drop the schema - this should delete the controller-config.JSON file but currently doesn't
287+
schemaRemover.accept(cluster);
288+
289+
// verify the orphaned file still exists
290+
cluster.get(1).runOnInstance(() -> {
291+
// This assertion will pass, showing the file is orphaned
292+
TableMetadata metadata = TableMetadata.minimal("test_ks", "test_table");
293+
assertTrue("Controller config file is orphaned after table drop",
294+
Controller.getControllerConfigPath(metadata).exists());
295+
});
296+
297+
// when
298+
// stopping the node
299+
waitOn(cluster.get(1).shutdown());
300+
301+
// then
302+
// starting the node again should succeed
303+
cluster.get(1).startup();
304+
// after restart, the orphaned file should be cleaned up
305+
cluster.get(1).runOnInstance(() -> {
306+
TableMetadata metadata = TableMetadata.minimal("test_ks", "test_table");
307+
assertFalse("Controller config file should be deleted after restart cleanup",
308+
Controller.getControllerConfigPath(metadata).exists());
309+
});
310+
}
311+
}
231312

232313
public void vectorControllerConfig(boolean vectorOverride) throws Throwable
233314
{

test/unit/org/apache/cassandra/db/compaction/CompactionControllerTest.java

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262

6363
import static org.apache.cassandra.config.CassandraRelevantProperties.ALLOW_UNSAFE_AGGRESSIVE_SSTABLE_EXPIRATION;
6464
import static org.apache.cassandra.db.ColumnFamilyStore.FlushReason.UNIT_TESTS;
65+
import static org.assertj.core.api.Assertions.assertThatExceptionOfType;
6566
import static org.junit.Assert.assertEquals;
6667
import static org.junit.Assert.assertFalse;
6768
import static org.junit.Assert.assertNotEquals;
@@ -74,12 +75,12 @@ public class CompactionControllerTest extends SchemaLoader
7475
private static final String KEYSPACE = "CompactionControllerTest";
7576
private static final String CF1 = "Standard1";
7677
private static final String CF2 = "Standard2";
77-
private static final int TTL_SECONDS = 10;
7878
private static CountDownLatch compaction2FinishLatch = new CountDownLatch(1);
7979
private static CountDownLatch createCompactionControllerLatch = new CountDownLatch(1);
8080
private static CountDownLatch compaction1RefreshLatch = new CountDownLatch(1);
8181
private static CountDownLatch refreshCheckLatch = new CountDownLatch(1);
8282
private static int overlapRefreshCounter = 0;
83+
private static boolean shouldThrowOnConfigCleanup = false;
8384

8485
@BeforeClass
8586
public static void defineSchema() throws ConfigurationException
@@ -269,6 +270,34 @@ public void testInFlightBloomFilterMemory()
269270
}
270271
}
271272

273+
@Test
274+
@BMRule(name = "Simulate an non standard exception while opening a keyspace",
275+
targetClass = "org.apache.cassandra.schema.Schema",
276+
targetMethod = "getKeyspaceInstance",
277+
targetLocation = "AT ENTRY",
278+
condition = "org.apache.cassandra.db.compaction.CompactionControllerTest.shouldThrowOnConfigCleanup",
279+
action = "throw new java.lang.RuntimeException(\"Expected exception\");")
280+
public void testRethrowingExceptionOnConfigCleanup()
281+
{
282+
try
283+
{
284+
// given the config was stored
285+
CompactionManager.storeControllerConfig();
286+
// below flag is read by byteman, it is needed to throw the exception on the second call of cleanupControllerConfig
287+
shouldThrowOnConfigCleanup = true;
288+
// when
289+
// unhandled exception is throw while cleaning up the controller config
290+
// then the excepion is not swallowed
291+
assertThatExceptionOfType(RuntimeException.class)
292+
.isThrownBy(CompactionManager::cleanupControllerConfig)
293+
.withMessage("Expected exception");
294+
}
295+
finally
296+
{
297+
shouldThrowOnConfigCleanup = false;
298+
}
299+
}
300+
272301
@Test
273302
@BMRules(rules = {
274303
@BMRule(name = "Pause compaction",

0 commit comments

Comments
 (0)