Skip to content

Commit 758566a

Browse files
committed
casync-tool: gc verb
Fixes #43.
1 parent 7aa62e3 commit 758566a

File tree

8 files changed

+439
-10
lines changed

8 files changed

+439
-10
lines changed

README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,9 @@ but there are other systems that use similar algorithms, in particular:
158158
## Maintenance
159159

160160
```
161-
# casync gc /var/lib/backup.castr /home/lennart.caidx /home/foobar.caidx ... (NOT IMPLEMENTED YET)
161+
# casync gc /home/lennart-20170101.caidx /home/lennart-20170102.caidx /home/lennart-20170103.caidx
162+
# casync gc --backup /var/lib/backup/backup.castr /home/lennart-*.caidx
163+
162164
# casync make /home/lennart.catab /home/lennart (NOT IMPLEMENTED)
163165
```
164166

doc/casync.rst

+20-8
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Synopsis
1616
| **casync** [*OPTIONS*...] digest [*ARCHIVE* | *BLOB* | *ARCHIVE_INDEX* | *BLOB_INDEX* | *DIRECTORY*]
1717
| **casync** [*OPTIONS*...] mount [*ARCHIVE* | *ARCHIVE_INDEX*] *PATH*
1818
| **casync** [*OPTIONS*...] mkdev [*BLOB* | *BLOB_INDEX*] [*NODE*]
19+
| **casync** [*OPTIONS*...] gc *BLOB_INDEX* | *ARCHIVE_INDEX* ...
1920
2021
Description
2122
-----------
@@ -42,8 +43,8 @@ The metadata included in the archive is controlled by the ``--with-*`` and
4243
``--without-*`` options.
4344

4445
|
45-
| **casync** extract [*ARCHIVE* | *ARCHIVE_INDEX*] [*DIRECTORY*]
46-
| **casync** extract *BLOB_INDEX* *FILE* | *DEVICE*
46+
| **casync** **extract** [*ARCHIVE* | *ARCHIVE_INDEX*] [*DIRECTORY*]
47+
| **casync** **extract** *BLOB_INDEX* *FILE* | *DEVICE*
4748
4849
This will extract the contents of a .catar archive or .caidx index
4950
into the specified *DIRECTORY*, or the contents specified by *BLOB_INDEX*
@@ -54,7 +55,7 @@ The metadata replayed from the archive is controlled by the ``--with-*`` and
5455
``--without-*`` options.
5556

5657
|
57-
| **casync** list [*ARCHIVE* | *ARCHIVE_INDEX* | *DIRECTORY*]
58+
| **casync** **list** [*ARCHIVE* | *ARCHIVE_INDEX* | *DIRECTORY*]
5859
5960
This will list all the files and directories in the specified .catar
6061
archive or .caidx index, or the directory. The argument is optional,
@@ -68,7 +69,7 @@ The output includes the permission mask and file names::
6869
-rw-r--r-- TODO
6970

7071
|
71-
| **casync** mtree [*ARCHIVE* | *ARCHIVE_INDEX* | *DIRECTORY*]
72+
| **casync** **mtree** [*ARCHIVE* | *ARCHIVE_INDEX* | *DIRECTORY*]
7273
7374
This is similar to **list**, but includes information about each entry in the
7475
key=value format defined by BSD mtree(5)::
@@ -79,7 +80,7 @@ key=value format defined by BSD mtree(5)::
7980
TODO type=file mode=0644 size=2395 uid=0 gid=0 time=1498175562.000000000 sha256digest=316f11a03c08ec39f0328ab1f7446bd048507d3fbeafffe7c32fad4942244b7d
8081

8182
|
82-
| **casync** stat [*ARCHIVE* | *ARCHIVE_INDEX* | *DIRECTORY*] [*PATH*]
83+
| **casync** **stat** [*ARCHIVE* | *ARCHIVE_INDEX* | *DIRECTORY*] [*PATH*]
8384
8485
This will show detailed information about a file or directory *PATH*, as found
8586
in either *ARCHIVE* or *ARCHIVE_INDEX* or underneath *DIRECTORY*. Both arguments
@@ -99,7 +100,7 @@ Example output::
99100
Group: zbyszek (1000)
100101

101102
|
102-
| **casync** digest [*ARCHIVE* | *BLOB* | *ARCHIVE_INDEX* | *BLOB_INDEX* | *DIRECTORY*]
103+
| **casync** **digest** [*ARCHIVE* | *BLOB* | *ARCHIVE_INDEX* | *BLOB_INDEX* | *DIRECTORY*]
103104
104105
This will compute and print the checksum of the argument.
105106
The argument is optional and defaults to the current directory::
@@ -111,13 +112,13 @@ The argument is optional and defaults to the current directory::
111112
d1698b0c4c27163284abea5d1e369b92e89dd07cb74378638849800e0406baf7
112113

113114
|
114-
| **casync** mount [*ARCHIVE* | *ARCHIVE_INDEX*] *PATH*
115+
| **casync** **mount** [*ARCHIVE* | *ARCHIVE_INDEX*] *PATH*
115116
116117
This will mount the specified .catar archive or .caidx index at the
117118
specified *PATH*, using the FUSE protocol.
118119

119120
|
120-
| **casync** mkdev [*BLOB* | *BLOB_INDEX*] [*NODE*]
121+
| **casync** **mkdev** [*BLOB* | *BLOB_INDEX*] [*NODE*]
121122
122123
This will create a block device *NODE* with the contents specified
123124
by the .caibx *BLOB_INDEX* or just the file or block device *BLOB*,
@@ -134,13 +135,24 @@ Example::
134135

135136
When ``casync mkdev`` is killed, the device is destroyed.
136137

138+
|
139+
| **casync** **gc** *ARCHIVE_INDEX* | *BLOB_INDEX* ...
140+
141+
This will remove all chunks that are not used by one of the specified indices
142+
(one or more blob and archive indices can be given). If ``--store`` is not
143+
given, the default store for the first index will be used.
144+
145+
This command can be used to prune unused chunks from a shared chunk
146+
store.
147+
137148
Options
138149
-------
139150

140151
General options:
141152

142153
--help, -h Show terse help output
143154
--verbose, -v Show terse status information during runtime
155+
--dry-run, -n Only print what would be removed with **gc**
144156
--store=PATH The primary chunk store to use
145157
--extra-store=<PATH> Additional chunk store to look for chunks in
146158
--chunk-size=<[MIN:]AVG[:MAX]> The minimal/average/maximum number of bytes in a chunk

src/castore.c

+95
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
/* SPDX-License-Identifier: LGPL-2.1+ */
22

3+
#include <dirent.h>
34
#include <fcntl.h>
45
#include <lzma.h>
56
#include <sys/stat.h>
67
#include <unistd.h>
78

89
#include "castore.h"
910
#include "def.h"
11+
#include "dirent-util.h"
1012
#include "realloc-buffer.h"
1113
#include "rm-rf.h"
1214
#include "util.h"
@@ -34,6 +36,14 @@ struct CaStore {
3436
uint64_t n_request_bytes;
3537
};
3638

39+
struct CaStoreIterator {
40+
CaStore *store;
41+
42+
DIR *rootdir;
43+
struct dirent *subdir_de;
44+
DIR *subdir;
45+
};
46+
3747
CaStore* ca_store_new(void) {
3848
CaStore *store;
3949

@@ -312,3 +322,88 @@ int ca_store_set_digest_type(CaStore *s, CaDigestType type) {
312322

313323
return 0;
314324
}
325+
326+
CaStoreIterator* ca_store_iterator_new(CaStore *store) {
327+
CaStoreIterator *it;
328+
329+
it = new0(CaStoreIterator, 1);
330+
if (!it)
331+
return NULL;
332+
333+
it->store = store;
334+
335+
return it;
336+
}
337+
338+
CaStoreIterator* ca_store_iterator_unref(CaStoreIterator *iter) {
339+
if (!iter)
340+
return NULL;
341+
342+
if (iter->rootdir)
343+
closedir(iter->rootdir);
344+
if (iter->subdir)
345+
closedir(iter->subdir);
346+
return mfree(iter);
347+
}
348+
349+
int ca_store_iterator_next(
350+
CaStoreIterator *iter,
351+
int *rootdir_fd,
352+
const char **subdir,
353+
int *subdir_fd,
354+
const char **chunk) {
355+
356+
struct dirent *de;
357+
358+
if (!iter->rootdir) {
359+
iter->rootdir = opendir(iter->store->root);
360+
if (!iter->rootdir)
361+
return -errno;
362+
}
363+
364+
while (true) {
365+
if (!iter->subdir) {
366+
int fd;
367+
368+
errno = 0;
369+
iter->subdir_de = readdir(iter->rootdir);
370+
if (!iter->subdir_de) {
371+
if (errno > 0)
372+
return -errno;
373+
return 0; /* done */
374+
}
375+
376+
fd = openat(dirfd(iter->rootdir), iter->subdir_de->d_name,
377+
O_RDONLY|O_CLOEXEC|O_DIRECTORY);
378+
if (fd < 0) {
379+
if (errno == EISDIR)
380+
continue;
381+
return -errno;
382+
}
383+
384+
iter->subdir = fdopendir(fd);
385+
if (!iter->subdir) {
386+
safe_close(fd);
387+
return -errno;
388+
}
389+
}
390+
391+
FOREACH_DIRENT_ALL(de, iter->subdir, return -errno) {
392+
if (!dirent_is_file_with_suffix(de, ".cacnk"))
393+
continue;
394+
395+
if (rootdir_fd)
396+
*rootdir_fd = dirfd(iter->rootdir);
397+
if (subdir)
398+
*subdir = iter->subdir_de->d_name;
399+
if (subdir_fd)
400+
*subdir_fd = dirfd(iter->subdir);
401+
if (chunk)
402+
*chunk = de->d_name;
403+
return 1; /* success */
404+
}
405+
406+
assert_se(closedir(iter->subdir) == 0);
407+
iter->subdir = NULL;
408+
}
409+
}

src/castore.h

+16
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,14 @@
88
#include "cautil.h"
99

1010
typedef struct CaStore CaStore;
11+
typedef struct CaStoreIterator CaStoreIterator;
1112

1213
CaStore* ca_store_new(void);
1314
CaStore *ca_store_new_cache(void);
1415
CaStore* ca_store_unref(CaStore *store);
16+
static inline void ca_store_unrefp(CaStore **store) {
17+
ca_store_unref(*store);
18+
}
1519

1620
int ca_store_set_path(CaStore *store, const char *path);
1721
int ca_store_set_compression(CaStore *store, CaChunkCompression c);
@@ -26,4 +30,16 @@ int ca_store_get_request_bytes(CaStore *s, uint64_t *ret);
2630

2731
int ca_store_set_digest_type(CaStore *s, CaDigestType type);
2832

33+
CaStoreIterator* ca_store_iterator_new(CaStore *store);
34+
CaStoreIterator* ca_store_iterator_unref(CaStoreIterator *iter);
35+
static inline void ca_store_iterator_unrefp(CaStoreIterator **iter) {
36+
ca_store_iterator_unref(*iter);
37+
}
38+
int ca_store_iterator_next(
39+
CaStoreIterator *iter,
40+
int *rootdir_fd,
41+
const char **subdir,
42+
int *subdir_fd,
43+
const char **chunk);
44+
2945
#endif

src/casync-tool.c

+73-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "caremote.h"
2222
#include "castore.h"
2323
#include "casync.h"
24+
#include "gc.h"
2425
#include "notify.h"
2526
#include "parse-util.h"
2627
#include "signal-handler.h"
@@ -35,6 +36,7 @@ static enum arg_what {
3536
_WHAT_INVALID = -1,
3637
} arg_what = _WHAT_INVALID;
3738
static bool arg_verbose = false;
39+
static bool arg_dry_run = false;
3840
static bool arg_exclude_nodump = true;
3941
static bool arg_exclude_submounts = false;
4042
static bool arg_reflink = true;
@@ -74,6 +76,8 @@ static void help(void) {
7476
" -h --help Show this help\n"
7577
" --version Show brief version information\n"
7678
" -v --verbose Show terse status information during runtime\n"
79+
" -n --dry-run When garbage collecting, only print what would\n"
80+
" be done\n"
7781
" --store=PATH The primary chunk store to use\n"
7882
" --extra-store=PATH Additional chunk store to look for chunks in\n"
7983
" --chunk-size=[MIN:]AVG[:MAX]\n"
@@ -312,6 +316,7 @@ static int parse_argv(int argc, char *argv[]) {
312316
{ "help", no_argument, NULL, 'h' },
313317
{ "version", no_argument, NULL, ARG_VERSION },
314318
{ "verbose", no_argument, NULL, 'v' },
319+
{ "dry-run", no_argument, NULL, 'n' },
315320
{ "store", required_argument, NULL, ARG_STORE },
316321
{ "extra-store", required_argument, NULL, ARG_EXTRA_STORE },
317322
{ "chunk-size", required_argument, NULL, ARG_CHUNK_SIZE },
@@ -345,7 +350,7 @@ static int parse_argv(int argc, char *argv[]) {
345350
if (getenv_bool("CASYNC_VERBOSE") > 0)
346351
arg_verbose = true;
347352

348-
while ((c = getopt_long(argc, argv, "hv", options, NULL)) >= 0) {
353+
while ((c = getopt_long(argc, argv, "hvn", options, NULL)) >= 0) {
349354

350355
switch (c) {
351356

@@ -361,6 +366,10 @@ static int parse_argv(int argc, char *argv[]) {
361366
arg_verbose = true;
362367
break;
363368

369+
case 'n':
370+
arg_dry_run = true;
371+
break;
372+
364373
case ARG_STORE: {
365374
char *p;
366375

@@ -3807,6 +3816,67 @@ static int verb_udev(int argc, char *argv[]) {
38073816
return 0;
38083817
}
38093818

3819+
static int verb_gc(int argc, char *argv[]) {
3820+
int i, r;
3821+
_cleanup_(ca_chunk_collection_unrefp) CaChunkCollection *coll = NULL;
3822+
_cleanup_(ca_store_unrefp) CaStore *store = NULL;
3823+
3824+
if (argc < 2) {
3825+
fprintf(stderr, "Expected at least one argument.\n");
3826+
return -EINVAL;
3827+
}
3828+
3829+
coll = ca_chunk_collection_new();
3830+
if (!coll)
3831+
return log_oom();
3832+
3833+
/* This sets the same store for all indices, based on the first index. */
3834+
r = set_default_store(argv[1]);
3835+
if (r < 0)
3836+
return r;
3837+
3838+
if (!arg_store) {
3839+
fprintf(stderr, "Failed to determine store, use --store= to set store.\n");
3840+
return -EINVAL;
3841+
}
3842+
3843+
store = ca_store_new();
3844+
if (!store)
3845+
return log_oom();
3846+
3847+
r = ca_store_set_path(store, arg_store);
3848+
if (r < 0) {
3849+
fprintf(stderr, "Set to set store to \"%s\": %s", arg_store, strerror(-r));
3850+
return r;
3851+
}
3852+
3853+
for (i = 1; i < argc; i++) {
3854+
const char *path = argv[i];
3855+
3856+
r = ca_chunk_collection_add_index(coll, path);
3857+
if (r < 0)
3858+
return r;
3859+
}
3860+
3861+
{
3862+
size_t usage, size;
3863+
3864+
assert_se(ca_chunk_collection_usage(coll, &usage) == 0);
3865+
assert_se(ca_chunk_collection_size(coll, &size) == 0);
3866+
if (arg_verbose)
3867+
printf("Chunk store usage: %zu references, %zu chunks\n", usage, size);
3868+
}
3869+
3870+
r = ca_gc_cleanup_unused(store, coll,
3871+
arg_verbose * CA_GC_VERBOSE |
3872+
arg_dry_run * CA_GC_DRY_RUN);
3873+
if (r < 0)
3874+
fprintf(stderr, "Chunk cleanup failed: %s\n", strerror(-r));
3875+
3876+
return r;
3877+
}
3878+
3879+
38103880
static int dispatch_verb(int argc, char *argv[]) {
38113881
int r;
38123882

@@ -3836,6 +3906,8 @@ static int dispatch_verb(int argc, char *argv[]) {
38363906
r = verb_push(argc, argv);
38373907
else if (streq(argv[0], "udev")) /* "Secret" verb, only to be called by the udev nbd rules */
38383908
r = verb_udev(argc, argv);
3909+
else if (streq(argv[0], "gc"))
3910+
r = verb_gc(argc, argv);
38393911
else {
38403912
fprintf(stderr, "Unknown verb '%s'. (Invoke '%s --help' for a list of available verbs.)\n", argv[0], program_invocation_short_name);
38413913
r = -EINVAL;

0 commit comments

Comments
 (0)