Skip to content

Commit c44b426

Browse files
committed
per-directory external database list
indexer (probably admin) no longer has to keep track of external database basenames that users want to track directory owner can create a file called external.gufi listing all external db files to track changed -q to trigger validating external db files before tracking them defaults to not set
1 parent 31034d3 commit c44b426

File tree

13 files changed

+255
-106
lines changed

13 files changed

+255
-106
lines changed

docs/latex/sections/external_databases.tex

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -63,37 +63,29 @@
6363
\subsection{External Databases}
6464
In addition to extended attributes, users may place SQLite3 database
6565
files into the source file system to be indexed and attached into GUFI
66-
for querying\footnote{An alternative method might be to allow for
67-
users to place simple text files of known names into the source
68-
filesystem for automatic indexing. This would remove the requirement
69-
that the administrator for the index knows what external database file
70-
names exist.}. This effectively allows for users to associate and
66+
for querying. This effectively allows for users to associate and
7167
query aribitrary data with filesystem metadata.
7268

7369
The extended attribute code uses the more generic external database
7470
infrastructure to track the extended attributes that were not rolled
7571
in.
7672

7773
\subsubsection{Usage}
78-
In order to index a set of external database files that serve the same
79-
purpose, they should have the same basename that will be discovered
80-
during indexing by passing \texttt{-q} to \gufidirindex or
81-
\gufidirtrace.
74+
All external database files for a set of data should have the same
75+
basename. The files should be listed in a simple text file called
76+
``external.gufi'' that is in the directory where a subset of the data
77+
will be joined with GUFI data. If the path is relative, it will be
78+
concatentated with the parent directory's path to produce an absolute
79+
path.
8280

83-
For example, if SQLite3 files with the name ``external.db" are placed
84-
into various directories in the source filesystem, the call to index
85-
the source tree should look like:
81+
\texttt{-q} may be passed in during indexing in order to verify that
82+
an entry listed in ``external.gufi'' is indeed a database file.
8683

87-
\hspace*{\fill} \gufidirindex \texttt{-q "external.db" "ext" ...} \hspace*{\fill}
84+
One more more additional database files, referred to as the
85+
``schema template", or ``template", containing a copy of the same
86+
schema as each external database table should be maintained somewhere
87+
for usage when querying. The template file should generally have empty
88+
tables, but nothing prevents non-empty tables from being provided.
8889

89-
which will result in each ``external.db" file being attached with the
90-
name ``ext".
91-
92-
An additional database file, referred to as the ``schema template", or
93-
``template", containing a copy of the ``external.db" schema should be
94-
maintained somewhere for usage when querying. This template file
95-
should generally have empty tables, but nothing prevents non-empty
96-
files from being provided.
97-
98-
Note that the external databases will also be included as an entry in
99-
the index (this may change in the future).
90+
Note that ``external.gufi'' will also be included as an entry in the
91+
index (this may change in the future).

include/bf.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -238,9 +238,8 @@ struct input {
238238
/* compress work items (if compression library was found) */
239239
int compress;
240240

241-
/* used when indexing (-q) */
242-
trie_t *map_external; /* full path -> attach name */
243-
size_t map_external_count; /* only keeps track of unique full paths */
241+
/* check if a listed external db is valid when indexing (-q) */
242+
int check_extdb_valid;
244243

245244
/* used when querying (-Q) */
246245
sll_t external_attach; /* list of eus_t */

include/external.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ OF SUCH DAMAGE.
7676
extern "C" {
7777
#endif
7878

79+
/* name of file that user can create to list external databases to track */
80+
#define EXTERNAL_DB_USER_FILE "external.gufi"
81+
#define EXTERNAL_DB_USER_FILE_LEN (sizeof(EXTERNAL_DB_USER_FILE) - 1)
82+
7983
/*
8084
*
8185
* these tables/views only exist in db.db and list
@@ -85,21 +89,26 @@ extern "C" {
8589
* matter the usage of the external databases
8690
*/
8791

92+
/* table */
8893
#define EXTERNAL_DBS_PWD "external_dbs_pwd" /* *.db files found during indexing */
8994
extern const char EXTERNAL_DBS_PWD_CREATE[];
9095
extern const char EXTERNAL_DBS_PWD_INSERT[];
9196

97+
/* table */
9298
#define EXTERNAL_DBS_ROLLUP "external_dbs_rollup" /* *.db files brought up during rollup */
9399
extern const char EXTERNAL_DBS_ROLLUP_CREATE[];
94100
extern const char EXTERNAL_DBS_ROLLUP_INSERT[];
95101

102+
/* view */
96103
#define EXTERNAL_DBS "external_dbs"
97104
#define EXTERNAL_DBS_LEN (sizeof(EXTERNAL_DBS) - 1)
98105

106+
/* column value */
99107
#define EXTERNAL_TYPE_XATTR_NAME "xattrs"
100108
#define EXTERNAL_TYPE_XATTR_LEN (sizeof(EXTERNAL_TYPE_XATTR_NAME) - 1)
101109
extern const refstr_t EXTERNAL_TYPE_XATTR; /* convenience struct */
102110

111+
/* column value */
103112
#define EXTERNAL_TYPE_USER_DB_NAME "user_db"
104113
#define EXTERNAL_TYPE_USER_DB_LEN (sizeof(EXTERNAL_TYPE_USER_DB_NAME) - 1)
105114
extern const refstr_t EXTERNAL_TYPE_USER_DB; /* convenience struct */

include/utils.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ int shortpath(const char *name, char *nameout, char *endname);
106106
/* descend */
107107
typedef int (*process_nondir_f)(struct work *nondir, struct entry_data *ed, void *nondir_args);
108108

109-
/* returns 1 for success, 0 for failure */
110-
typedef int (*process_external_db_f)(struct input *in,
111-
struct work *child, void *args);
109+
/* returns number of external databases tracked */
110+
typedef size_t (*process_external_db_f)(struct input *in,
111+
struct work *child, void *args);
112112

113113
struct descend_counters {
114114
size_t dirs;

src/bf.c

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,6 @@ struct input *input_init(struct input *in) {
108108
trie_insert(in->skip, ".", 1, NULL, NULL);
109109
trie_insert(in->skip, "..", 2, NULL, NULL);
110110

111-
in->map_external = trie_alloc();
112-
113111
sll_init(&in->external_attach);
114112
}
115113

@@ -119,7 +117,6 @@ struct input *input_init(struct input *in) {
119117
void input_fini(struct input *in) {
120118
if (in) {
121119
sll_destroy(&in->external_attach, free);
122-
trie_free(in->map_external);
123120
trie_free(in->skip);
124121
}
125122
}
@@ -181,7 +178,7 @@ void print_help(const char* prog_name,
181178
case 'M': printf(" -M <bytes> target memory footprint"); break;
182179
case 'C': printf(" -C <count> Number of subdirectories allowed to be enqueued for parallel processing. Any remainders will be processed in-situ"); break;
183180
case 'e': printf(" -e compress work items"); break;
184-
case 'q': printf(" -q <basename> Basename of file to keep track of during indexing"); break;
181+
case 'q': printf(" -q check that external databases are valid before tracking during indexing"); break;
185182
case 'Q': printf(" -Q <basename>\n"
186183
" <table>\n"
187184
" <template>.<table>\n"
@@ -240,7 +237,7 @@ void show_input(struct input* in, int retval) {
240237
printf("in.target_memory_footprint = %" PRIu64 "\n", in->target_memory_footprint);
241238
printf("in.subdir_limit = %zu\n", in->subdir_limit);
242239
printf("in.compress = %d\n", in->compress);
243-
printf("in.external_db_count = %zu\n", in->map_external_count);
240+
printf("in.check_extdb_valid = %d\n", in->check_extdb_valid);
244241
size_t i = 0;
245242
sll_loop(&in->external_attach, node) {
246243
eus_t *eus = (eus_t *) sll_node_data(node);
@@ -483,14 +480,8 @@ int parse_cmd_line(int argc,
483480
in->compress = 1;
484481
break;
485482

486-
case 'q': // file basename -> attach name
487-
{
488-
refstr_t basename;
489-
INSTALL_STR(&basename, optarg);
490-
491-
in->map_external_count += !trie_search(in->map_external, basename.data, basename.len, NULL);
492-
trie_insert(in->map_external, basename.data, basename.len, NULL, NULL);
493-
}
483+
case 'q':
484+
in->check_extdb_valid = 1;
494485
break;
495486

496487
case 'Q':

src/gufi_dir2index.c

Lines changed: 66 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ OF SUCH DAMAGE.
6363

6464

6565
#include <errno.h>
66+
#include <fcntl.h>
6667
#include <dirent.h>
6768
#include <inttypes.h>
6869
#include <stdlib.h>
@@ -153,35 +154,78 @@ static int process_nondir(struct work *entry, struct entry_data *ed, void *args)
153154
return 0;
154155
}
155156

156-
static int track_external(struct input *in,
157-
struct work *child,
158-
void *args) {
157+
static size_t track_external(struct input *in,
158+
struct work *child,
159+
void *args) {
159160
sqlite3 *db = (sqlite3 *) args;
160161

161-
refstr_t *ext = NULL;
162-
if (!trie_search(in->map_external, child->name + child->name_len - child->basename_len,
163-
child->basename_len, (void **) &ext)) {
162+
int extdb_list = open(child->name, O_RDONLY);
163+
if (extdb_list < 0) {
164+
const int err = errno;
165+
fprintf(stderr, "Error: Could not open user external database list in %s: %s (%d)\n",
166+
child->name, strerror(err), err);
164167
return 0;
165168
}
166169

167-
int rc = 1;
170+
size_t rc = 0;
171+
172+
char *line = NULL;
173+
size_t len = 0;
174+
off_t offset = 0;
175+
while (getline_fd(&line, &len, extdb_list, &offset, 512) > 0) {
176+
char extdb_path_stack[MAXPATH];
177+
char *extdb_path = line;
178+
179+
/* resolve relative paths */
180+
if (line[0] != '/') {
181+
char path[MAXPATH];
182+
SNFORMAT_S(path, sizeof(path), 2,
183+
child->name, child->name_len - child->basename_len,
184+
/* basename does not include slash, so don't need to add another one */
185+
line, len);
186+
187+
if (!realpath(path, extdb_path_stack)) {
188+
const int err = errno;
189+
fprintf(stderr, "Error: Could not resolve external database path %s: %s (%d)\n",
190+
path, strerror(err), err);
191+
free(line);
192+
line = NULL;
193+
continue;
194+
}
195+
196+
extdb_path = extdb_path_stack;
197+
}
168198

169-
/* open the path to make sure it eventually resolves to a file */
170-
sqlite3 *extdb = opendb(child->name, SQLITE_OPEN_READONLY, 0, 0, NULL, NULL);
171-
char *err = NULL;
199+
if (in->check_extdb_valid) {
200+
/* open the path to make sure it eventually resolves to a file */
201+
sqlite3 *extdb = opendb(extdb_path, SQLITE_OPEN_READONLY, 0, 0, NULL, NULL);
202+
char *err = NULL;
203+
204+
/* make sure this file is a sqlite3 db */
205+
/* can probably skip this check */
206+
if (sqlite3_exec(extdb, "SELECT '' FROM sqlite_master;", NULL, NULL, &err) == SQLITE_OK) {
207+
rc += !external_insert(db, EXTERNAL_TYPE_USER_DB.data, child->pinode, extdb_path);
208+
}
209+
else {
210+
fprintf(stderr, "Warning: Not tracking requested external db: %s: %s\n",
211+
extdb_path, err);
212+
sqlite3_free(err);
213+
}
214+
215+
closedb(extdb);
216+
}
217+
else {
218+
rc += !external_insert(db, EXTERNAL_TYPE_USER_DB.data, child->pinode, extdb_path);
219+
}
172220

173-
/* make sure this file is a sqlite3 db */
174-
/* can probably skip this check */
175-
if (sqlite3_exec(extdb, "SELECT '' FROM sqlite_master;", NULL, NULL, &err) == SQLITE_OK) {
176-
rc = !external_insert(db, EXTERNAL_TYPE_USER_DB.data, child->pinode, child->name);
221+
free(line);
222+
line = NULL;
177223
}
178-
else {
179-
fprintf(stderr, "Warning: Not tracking requested external db: %s: %s\n",
180-
child->name, err);
181-
sqlite3_free(err);
182-
rc = 0;
183-
}
184-
closedb(extdb);
224+
225+
free(line);
226+
line = NULL;
227+
228+
close(extdb_list);
185229

186230
return rc;
187231
}
@@ -399,7 +443,7 @@ static void sub_help(void) {
399443

400444
int main(int argc, char *argv[]) {
401445
struct PoolArgs pa;
402-
int idx = parse_cmd_line(argc, argv, "hHn:xz:k:M:C:" COMPRESS_OPT "q:", 2, "input_dir... output_dir", &pa.in);
446+
int idx = parse_cmd_line(argc, argv, "hHn:xz:k:M:C:" COMPRESS_OPT "q", 2, "input_dir... output_dir", &pa.in);
403447
if (pa.in.helped)
404448
sub_help();
405449
if (idx < 0) {

0 commit comments

Comments
 (0)