Skip to content

Commit ff3f5c1

Browse files
committed
Add command for checksum validation
1 parent 5c61d9f commit ff3f5c1

File tree

6 files changed

+159
-2
lines changed

6 files changed

+159
-2
lines changed

.dockerignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ build/
1010
**/.DS_Store
1111

1212
# Ignore logs and temporary files
13-
*.log
13+
**/*.log
1414
*.tmp
1515
*.swp
1616

Dockerfile

+2
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,6 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 CMD nc -z
1616

1717
COPY --from=build /home/gradle/src/kiwi-server/build/install/kiwi-server/ /app/
1818

19+
RUN echo -e '#!/bin/sh\njava -cp "/app/lib/*" kiwi.core.checksum.Run "$@"' > /app/checksum && chmod +x /app/checksum
20+
1921
CMD ["/app/bin/kiwi-server"]

README.md

+25
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,31 @@ Default values and environment variables:
109109
redis-cli -h localhost -p 6379
110110
```
111111

112+
## Checksums
113+
114+
KiWi uses CRC32 checksums to ensure data integrity. The checksum is stored alongside the data.
115+
There is a special command that can be used to verify the data integrity.
116+
117+
Checksum command uses available CPU cores to parallelize the checksum calculation. If checksum
118+
fails,
119+
the following error message will be displayed:
120+
121+
```text
122+
Checksum failed: segment=00000000000000000000 position=444636640 checksum=2005447726 timestamp=1733002903067 ttl=0 keySize=16 valueSize=0
123+
```
124+
125+
### Docker
126+
127+
```bash
128+
docker run -it --rm -p 6379:6379 nemanjam/kiwi:latest sh checksum --dir [log dir] --threads [threads]
129+
```
130+
131+
### Java
132+
133+
```bash
134+
java -cp "kiwi-server/build/install/kiwi-server/lib/*" kiwi.core.checksum.Run --dir [log dir] --threads [threads]
135+
```
136+
112137
## Benchmarks
113138

114139
KiWi can be evaluated with [redis-benchmark](https://redis.io/topics/benchmarks) utility command.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
package kiwi.core.checksum;
2+
3+
import kiwi.core.storage.bitcask.log.LogSegment;
4+
import kiwi.core.storage.bitcask.log.Record;
5+
6+
import java.io.IOException;
7+
import java.nio.file.Files;
8+
import java.nio.file.InvalidPathException;
9+
import java.nio.file.Path;
10+
import java.nio.file.Paths;
11+
import java.util.ArrayList;
12+
import java.util.List;
13+
import java.util.concurrent.ExecutorService;
14+
import java.util.concurrent.Executors;
15+
import java.util.concurrent.Future;
16+
import java.util.stream.Stream;
17+
18+
public class Run {
19+
20+
public static void main(String[] args) throws IOException {
21+
Path logDir = null;
22+
int numThreads = Runtime.getRuntime().availableProcessors();
23+
24+
if (args.length == 0) {
25+
System.err.println("Usage: COMMAND --dir <log-dir> [--threads <num-threads>]");
26+
System.exit(1);
27+
}
28+
29+
for (int i = 0; i < args.length; i++) {
30+
switch (args[i]) {
31+
case "--dir":
32+
case "-d":
33+
if (i + 1 < args.length) {
34+
try {
35+
logDir = Paths.get(args[++i]);
36+
37+
if (!logDir.toFile().isDirectory()) {
38+
System.err.println("Path for --dir or -d is not a directory");
39+
System.exit(1);
40+
}
41+
} catch (InvalidPathException e) {
42+
System.err.println("Invalid path for --dir or -d");
43+
System.exit(1);
44+
}
45+
} else {
46+
System.err.println("Missing value for --dir or -d");
47+
System.exit(1);
48+
}
49+
break;
50+
case "--threads":
51+
case "-t":
52+
if (i + 1 < args.length) {
53+
try {
54+
numThreads = Integer.parseInt(args[++i]);
55+
} catch (NumberFormatException e) {
56+
System.err.println("Invalid number format for --threads or -t");
57+
System.exit(1);
58+
}
59+
} else {
60+
System.err.println("Missing value for --threads or -t");
61+
System.exit(1);
62+
}
63+
break;
64+
default:
65+
System.err.println("Unknown argument: " + args[i]);
66+
System.exit(1);
67+
}
68+
}
69+
70+
if (logDir == null) {
71+
System.err.println("Argument '--dir' is required");
72+
System.exit(1);
73+
}
74+
75+
checksum(logDir, numThreads);
76+
77+
System.exit(0);
78+
}
79+
80+
static void checksum(Path logDir, int numThreads) throws IOException {
81+
ExecutorService executor = Executors.newFixedThreadPool(numThreads);
82+
List<Future<?>> futures = new ArrayList<>();
83+
84+
try (Stream<Path> paths = Files.walk(logDir)) {
85+
List<LogSegment> segments = paths.filter(Files::isRegularFile)
86+
.filter(path -> path.getFileName().toString().endsWith(".log"))
87+
.map((path) -> LogSegment.open(path, true))
88+
.toList();
89+
90+
for (LogSegment segment : segments) {
91+
futures.add(executor.submit(() -> checkLogSegment(segment)));
92+
}
93+
}
94+
95+
futures.forEach(future -> {
96+
try {
97+
future.get();
98+
} catch (Exception e) {
99+
System.err.println("Error processing log: " + e.getMessage());
100+
}
101+
});
102+
103+
executor.shutdown();
104+
}
105+
106+
static void checkLogSegment(LogSegment segment) {
107+
long position = 0;
108+
for (Record record : segment.getRecords()) {
109+
if (!record.isValidChecksum()) {
110+
String message = String.format(
111+
"Checksum failed: segment=%s position=%s checksum=%d timestamp=%d ttl=%d keySize=%d valueSize=%d",
112+
segment.name(),
113+
position,
114+
record.header().checksum(),
115+
record.header().timestamp(),
116+
record.header().ttl(),
117+
record.header().keySize(),
118+
record.header().valueSize());
119+
System.out.println(message);
120+
}
121+
position += record.size();
122+
}
123+
}
124+
}

kiwi-core/src/main/java/kiwi/core/storage/Utils.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@ public class Utils {
1616
private static final Logger logger = LoggerFactory.getLogger(Utils.class);
1717

1818
public static long checksum(long timestamp, long ttl, Bytes key, Bytes value) {
19-
ByteBuffer buffer = ByteBuffer.allocate(2 * Long.BYTES + key.size() + value.size());
19+
ByteBuffer buffer = ByteBuffer.allocate(2 * (Long.BYTES + Integer.BYTES) + key.size() + value.size());
2020
buffer.putLong(timestamp);
2121
buffer.putLong(ttl);
22+
buffer.putInt(key.size());
23+
buffer.putInt(value.size());
2224
buffer.put(key.get());
2325
buffer.put(value.get());
2426
CRC32 crc = new CRC32();

kiwi-core/src/main/java/kiwi/core/storage/bitcask/log/LogSegment.java

+4
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,10 @@ public double dirtyRatio(Map<Bytes, Long> keyTimestampMap) {
222222
return (double) dirtyCount / total;
223223
}
224224

225+
public Iterable<Record> getRecords() {
226+
return () -> new RecordIterator(channel, keyHeader -> true);
227+
}
228+
225229
public Iterable<Record> getActiveRecords(Map<Bytes, Long> keyTimestampMap) {
226230
return () -> new RecordIterator(channel, keyHeader -> isActiveRecord(keyHeader, keyTimestampMap));
227231
}

0 commit comments

Comments
 (0)