Skip to content

Commit cdffc7f

Browse files
committed
feat: remove orphan objects if copy fails at any point
Given that there are at least 3 objects uploaded per segment, and it could fail at any stage, the plugin should guard that the chances for orphan objects is minimal.
1 parent cd4434c commit cdffc7f

File tree

2 files changed

+105
-5
lines changed

2 files changed

+105
-5
lines changed

core/src/main/java/io/aiven/kafka/tieredstorage/RemoteStorageManager.java

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,12 @@ public Optional<CustomMetadata> copyLogSegmentData(final RemoteLogSegmentMetadat
257257
maybeEncryptionKey,
258258
customMetadataBuilder);
259259
} catch (final Exception e) {
260+
try {
261+
// best effort on removing orphan files
262+
tryDeleteSegmentObjects(remoteLogSegmentMetadata);
263+
} catch (final Exception ignored) {
264+
// ignore all exceptions
265+
}
260266
throw new RemoteStorageException(e);
261267
}
262268

@@ -342,7 +348,7 @@ private TransformChunkEnumeration transformation(
342348
return transformEnum;
343349
}
344350

345-
private SegmentIndexesV1 uploadIndexes(
351+
SegmentIndexesV1 uploadIndexes(
346352
final RemoteLogSegmentMetadata remoteLogSegmentMetadata,
347353
final LogSegmentData segmentData,
348354
final DataKeyAndAAD maybeEncryptionKey,
@@ -661,10 +667,7 @@ public void deleteLogSegmentData(final RemoteLogSegmentMetadata remoteLogSegment
661667
final long startedMs = time.milliseconds();
662668

663669
try {
664-
final Set<ObjectKey> keys = Arrays.stream(ObjectKeyFactory.Suffix.values())
665-
.map(s -> objectKeyFactory.key(remoteLogSegmentMetadata, s))
666-
.collect(Collectors.toSet());
667-
deleter.delete(keys);
670+
tryDeleteSegmentObjects(remoteLogSegmentMetadata);
668671
} catch (final Exception e) {
669672
metrics.recordSegmentDeleteError(remoteLogSegmentMetadata.remoteLogSegmentId()
670673
.topicIdPartition().topicPartition());
@@ -678,6 +681,15 @@ public void deleteLogSegmentData(final RemoteLogSegmentMetadata remoteLogSegment
678681
log.info("Deleting log segment data for completed successfully {}", remoteLogSegmentMetadata);
679682
}
680683

684+
private void tryDeleteSegmentObjects(
685+
final RemoteLogSegmentMetadata remoteLogSegmentMetadata
686+
) throws StorageBackendException {
687+
final Set<ObjectKey> keys = Arrays.stream(ObjectKeyFactory.Suffix.values())
688+
.map(s -> objectKeyFactory.key(remoteLogSegmentMetadata, s))
689+
.collect(Collectors.toSet());
690+
deleter.delete(keys);
691+
}
692+
681693
@Override
682694
public void close() {
683695
metrics.close();

core/src/test/java/io/aiven/kafka/tieredstorage/RemoteStorageManagerTest.java

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,19 @@
1818

1919
import java.io.IOException;
2020
import java.io.InputStream;
21+
import java.nio.ByteBuffer;
2122
import java.nio.channels.ClosedByInterruptException;
23+
import java.nio.charset.StandardCharsets;
2224
import java.nio.file.Files;
2325
import java.nio.file.Path;
2426
import java.util.Map;
27+
import java.util.Optional;
2528
import java.util.stream.Stream;
2629

2730
import org.apache.kafka.common.TopicIdPartition;
2831
import org.apache.kafka.common.TopicPartition;
2932
import org.apache.kafka.common.Uuid;
33+
import org.apache.kafka.server.log.remote.storage.LogSegmentData;
3034
import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentId;
3135
import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata;
3236
import org.apache.kafka.server.log.remote.storage.RemoteStorageException;
@@ -36,6 +40,7 @@
3640
import io.aiven.kafka.tieredstorage.storage.StorageBackendException;
3741

3842
import org.junit.jupiter.api.BeforeEach;
43+
import org.junit.jupiter.api.Test;
3944
import org.junit.jupiter.api.io.TempDir;
4045
import org.junit.jupiter.params.ParameterizedTest;
4146
import org.junit.jupiter.params.provider.Arguments;
@@ -45,8 +50,12 @@
4550
import static org.assertj.core.api.Assertions.assertThatThrownBy;
4651
import static org.junit.jupiter.params.provider.Arguments.arguments;
4752
import static org.mockito.ArgumentMatchers.any;
53+
import static org.mockito.ArgumentMatchers.anyBoolean;
4854
import static org.mockito.ArgumentMatchers.anyInt;
55+
import static org.mockito.Mockito.doCallRealMethod;
56+
import static org.mockito.Mockito.doThrow;
4957
import static org.mockito.Mockito.mock;
58+
import static org.mockito.Mockito.spy;
5059
import static org.mockito.Mockito.when;
5160

5261
class RemoteStorageManagerTest {
@@ -221,6 +230,85 @@ void fetchSegmentNonInterruptionExceptionWhenGettingSegment(
221230
.hasRootCauseInstanceOf(exceptionClass);
222231
}
223232

233+
@Test
234+
void deleteObjectsWhenUploadFails(
235+
@TempDir final Path partitionDir
236+
) throws IOException, StorageBackendException, RemoteStorageException {
237+
// given a sample local segment to be uploaded
238+
final var segmentPath = Files.createFile(partitionDir.resolve("0000.log"));
239+
final var segmentContent = "test";
240+
Files.writeString(segmentPath, segmentContent);
241+
final var indexPath = Files.createFile(partitionDir.resolve("0000.index"));
242+
final var timeIndexPath = Files.createFile(partitionDir.resolve("0000.timeindex"));
243+
final var producerSnapshotPath = Files.createFile(partitionDir.resolve("0000.snapshot"));
244+
final var logSegmentData = new LogSegmentData(
245+
segmentPath,
246+
indexPath,
247+
timeIndexPath,
248+
Optional.empty(),
249+
producerSnapshotPath,
250+
ByteBuffer.wrap("test".getBytes(StandardCharsets.UTF_8))
251+
);
252+
253+
final var remoteLogSegmentMetadata = new RemoteLogSegmentMetadata(
254+
REMOTE_SEGMENT_ID, 0, 1L,
255+
0, 0, 0, segmentContent.length(), Map.of(0, 0L));
256+
257+
final var remotePartitionPath = targetDir.resolve(TOPIC_ID_PARTITION.topic() + "-" + TOPIC_ID)
258+
.resolve(String.valueOf(TOPIC_ID_PARTITION.partition()));
259+
260+
final var config = Map.of(
261+
"chunk.size", "1",
262+
"storage.backend.class", "io.aiven.kafka.tieredstorage.storage.filesystem.FileSystemStorage",
263+
"storage.root", targetDir.toString()
264+
);
265+
rsm.configure(config);
266+
rsm = spy(rsm);
267+
268+
// when first upload fails
269+
doThrow(IOException.class).when(rsm).uploadSegmentLog(any(), any(), anyBoolean(), any(), any());
270+
271+
assertThatThrownBy(() -> rsm.copyLogSegmentData(remoteLogSegmentMetadata, logSegmentData))
272+
.isInstanceOf(RemoteStorageException.class)
273+
.hasRootCauseInstanceOf(IOException.class);
274+
275+
// then no files stored in remote
276+
assertThat(remotePartitionPath).doesNotExist();
277+
278+
// fallback to real method
279+
doCallRealMethod().when(rsm).uploadSegmentLog(any(), any(), anyBoolean(), any(), any());
280+
281+
// when second upload fails
282+
doThrow(IOException.class).when(rsm).uploadIndexes(any(), any(), any(), any());
283+
284+
assertThatThrownBy(() -> rsm.copyLogSegmentData(remoteLogSegmentMetadata, logSegmentData))
285+
.isInstanceOf(RemoteStorageException.class)
286+
.hasRootCauseInstanceOf(IOException.class);
287+
288+
// then no files stored in remote
289+
assertThat(remotePartitionPath).doesNotExist();
290+
291+
// fallback to real method
292+
doCallRealMethod().when(rsm).uploadIndexes(any(), any(), any(), any());
293+
294+
// when third upload fails
295+
doThrow(IOException.class).when(rsm).uploadManifest(any(), any(), any(), anyBoolean(), any(), any());
296+
297+
assertThatThrownBy(() -> rsm.copyLogSegmentData(remoteLogSegmentMetadata, logSegmentData))
298+
.isInstanceOf(RemoteStorageException.class)
299+
.hasRootCauseInstanceOf(IOException.class);
300+
301+
// then no files stored in remote
302+
assertThat(remotePartitionPath).doesNotExist();
303+
304+
// fallback to real method
305+
doCallRealMethod().when(rsm).uploadManifest(any(), any(), any(), anyBoolean(), any(), any());
306+
307+
// when all good
308+
rsm.copyLogSegmentData(remoteLogSegmentMetadata, logSegmentData);
309+
assertThat(Files.list(remotePartitionPath)).hasSize(3);
310+
}
311+
224312
static Stream<Arguments> provideNonInterruptionExceptions() {
225313
return Stream.of(
226314
arguments(null, Exception.class),

0 commit comments

Comments
 (0)