Skip to content

Commit b7bffd8

Browse files
committed
Merge pull request #295 from bcgov/sync
Sync Buckets recursively
2 parents fa2a6e2 + 152d84e commit b7bffd8

File tree

12 files changed

+316
-106
lines changed

12 files changed

+316
-106
lines changed

app/src/components/utils.js

+29
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,17 @@ const utils = {
6161
else return AuthMode.FULLAUTH; // basicAuth && oidcAuth
6262
},
6363

64+
/**
65+
* @function formatS3KeyForCompare
66+
* Format S3 key-prefixes for comparison with bucket.key in COMS db
67+
* @param {string} k S3 key prefix. example: photos/docs/
68+
* @returns {string} provided key prefix without trailing slash
69+
*/
70+
formatS3KeyForCompare(k) {
71+
let key = k.substr(0, k.lastIndexOf('/')); // remove trailing slash and file name
72+
return key || '/'; // set empty key to '/' to match convention in COMS db
73+
},
74+
6475
/**
6576
* @function getBucket
6677
* Acquire core S3 bucket credential information from database or configuration
@@ -328,6 +339,24 @@ const utils = {
328339
&& pathParts.filter(part => !prefixParts.includes(part)).length === 1;
329340
},
330341

342+
/**
343+
* @function isPrefixOfPath
344+
* Predicate function determining if the `path` is a member of or equal to the `prefix` path
345+
* @param {string} prefix The base "folder"
346+
* @param {string} path The "file" to check
347+
* @returns {boolean} True if path is member of prefix. False in all other cases.
348+
*/
349+
isPrefixOfPath(prefix, path) {
350+
if (typeof prefix !== 'string' || typeof path !== 'string') return false;
351+
// path `/photos/holiday/` (represents a folder) and should be an objects in bucket with key `/photos/holiday`
352+
if (prefix === path || prefix + DELIMITER === path) return true;
353+
354+
const pathParts = path.split(DELIMITER).filter(part => part);
355+
const prefixParts = prefix.split(DELIMITER).filter(part => part);
356+
return prefixParts.every((part, i) => pathParts[i] === part)
357+
&& pathParts.filter(part => !prefixParts.includes(part)).length === 1;
358+
},
359+
331360
/**
332361
* @function isTruthy
333362
* Returns true if the element name in the object contains a truthy value

app/src/controllers/sync.js

+210-24
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,243 @@
11
const { NIL: SYSTEM_USER } = require('uuid');
22

33
const errorToProblem = require('../components/errorToProblem');
4-
const { addDashesToUuid, getCurrentIdentity } = require('../components/utils');
4+
const { addDashesToUuid, getCurrentIdentity, formatS3KeyForCompare, isPrefixOfPath } = require('../components/utils');
55
const utils = require('../db/models/utils');
6-
const { bucketService, objectService, storageService, objectQueueService, userService } = require('../services');
6+
const log = require('../components/log')(module.filename);
7+
8+
const {
9+
bucketPermissionService,
10+
bucketService,
11+
objectService,
12+
storageService,
13+
objectQueueService,
14+
userService
15+
} = require('../services');
716

817
const SERVICE = 'ObjectQueueService';
918

1019
/**
1120
* The Sync Controller
1221
*/
1322
const controller = {
23+
24+
/**
25+
* @function syncBucketRecursive
26+
* Synchronizes all objects and subfolders found at the Key and below for the given parent folder (bucket)
27+
* NOTE: OIDC users reuire MANAGE permission to do a recursive sync on a folder
28+
* All their permissions will be copied to any NEW sub-folders created
29+
* @param {object} req Express request object
30+
* @param {object} res Express response object
31+
* @param {function} next The next callback function
32+
* @returns {function} Express middleware function
33+
*/
34+
async syncBucketRecursive(req, res, next) {
35+
try {
36+
// Wrap all sql operations in a single transaction
37+
const response = await utils.trxWrapper(async (trx) => {
38+
39+
// curren userId
40+
const userId = await userService.getCurrentUserId(
41+
getCurrentIdentity(req.currentUser, SYSTEM_USER),
42+
SYSTEM_USER
43+
);
44+
// parent bucket
45+
const bucketId = addDashesToUuid(req.params.bucketId);
46+
const parentBucket = await bucketService.read(bucketId);
47+
48+
// current user's permissions on parent bucket (folder)
49+
const currentUserParentBucketPerms = userId !== SYSTEM_USER ? (await bucketPermissionService.searchPermissions({
50+
bucketId: parentBucket.bucketId,
51+
userId: userId
52+
})).map(p => p.permCode) : [];
53+
54+
/**
55+
* sync (ie create or delete) bucket records in COMS db to match 'folders' (S3 key prefixes) that exist in S3
56+
*/
57+
// parent + child bucket records already in COMS db
58+
const dbChildBuckets = await bucketService.searchChildBuckets(parentBucket);
59+
let dbBuckets = [parentBucket].concat(dbChildBuckets);
60+
// 'folders' that exist below (and including) the parent 'folder' in S3
61+
const s3Response = await storageService.listAllObjectVersions({ bucketId: bucketId, precisePath: false });
62+
const s3Keys = [...new Set([
63+
...s3Response.DeleteMarkers.map(object => formatS3KeyForCompare(object.Key)),
64+
...s3Response.Versions.map(object => formatS3KeyForCompare(object.Key)),
65+
])];
66+
67+
const syncedBuckets = await this.syncBucketRecords(
68+
dbBuckets,
69+
s3Keys,
70+
parentBucket,
71+
// assign current user's permissions on parent bucket to new sub-folders (buckets)
72+
currentUserParentBucketPerms,
73+
trx
74+
);
75+
76+
/**
77+
* Queue objects in all the folders for syncing
78+
*/
79+
return await this.queueObjectRecords(syncedBuckets, s3Response, userId, trx);
80+
});
81+
82+
// return number of jobs inserted
83+
res.status(202).json(response);
84+
} catch (e) {
85+
next(errorToProblem(SERVICE, e));
86+
}
87+
},
88+
1489
/**
15-
* @function syncBucket
16-
* Synchronizes a bucket
90+
* @function syncBucketSingle
91+
* Synchronizes objects found at the Key of the given bucket, ignoring subfolders and files after the next delimiter
1792
* @param {object} req Express request object
1893
* @param {object} res Express response object
1994
* @param {function} next The next callback function
2095
* @returns {function} Express middleware function
2196
*/
22-
async syncBucket(req, res, next) {
97+
async syncBucketSingle(req, res, next) {
2398
try {
24-
// TODO: Consider adding an "all" mode for checking through all known objects and buckets for job enumeration
25-
// const allMode = isTruthy(req.query.all);
2699
const bucketId = addDashesToUuid(req.params.bucketId);
100+
const bucket = await bucketService.read(bucketId);
27101
const userId = await userService.getCurrentUserId(getCurrentIdentity(req.currentUser, SYSTEM_USER), SYSTEM_USER);
28102

29-
const [dbResponse, s3Response] = await Promise.all([
30-
objectService.searchObjects({ bucketId: bucketId }),
31-
storageService.listAllObjectVersions({ bucketId: bucketId, filterLatest: true })
32-
]);
33-
34-
// Aggregate and dedupe all file paths to consider
35-
const jobs = [...new Set([
36-
...dbResponse.data.map(object => object.path),
37-
...s3Response.DeleteMarkers.map(object => object.Key),
38-
...s3Response.Versions.map(object => object.Key)
39-
])].map(path => ({ path: path, bucketId: bucketId }));
103+
const s3Objects = await storageService.listAllObjectVersions({ bucketId: bucketId, filterLatest: true });
40104

41105
const response = await utils.trxWrapper(async (trx) => {
42-
await bucketService.update({
43-
bucketId: bucketId,
44-
userId: userId,
45-
lastSyncRequestedDate: new Date().toISOString()
46-
}, trx);
47-
return await objectQueueService.enqueue({ jobs: jobs }, trx);
106+
return this.queueObjectRecords([bucket], s3Objects, userId, trx);
48107
});
108+
49109
res.status(202).json(response);
50110
} catch (e) {
51111
next(errorToProblem(SERVICE, e));
52112
}
53113
},
54114

115+
/**
116+
* @function syncBucketRecords
117+
* Synchronizes (creates / prunes) COMS db bucket records for each 'directry' found in S3
118+
* @param {object[]} Array of Bucket models - bucket records already in COMS db before syncing
119+
* @param {string[]} s3Keys Array of key prefixes from S3 representing 'directories'
120+
* @param {object} Bucket model for the COMS db bucket record of parent bucket
121+
* @param {string[]} currentUserParentBucketPerms Array of PermCodes to add to NEW buckets
122+
* @param {object} [trx] An Objection Transaction object
123+
* @returns {string[]} And array of bucketId's for bucket records in COMS db
124+
*/
125+
async syncBucketRecords(dbBuckets, s3Keys, parentBucket, currentUserParentBucketPerms, trx) {
126+
try {
127+
// delete buckets not found in S3 from COMS db
128+
const oldDbBuckets = dbBuckets.filter(b => !s3Keys.includes(b.key));
129+
await Promise.all(
130+
oldDbBuckets.map(dbBucket =>
131+
bucketService.delete(dbBucket.bucketId, trx)
132+
.then(() => {
133+
dbBuckets = dbBuckets.filter(b => b.bucketId !== dbBucket.bucketId);
134+
})
135+
)
136+
);
137+
138+
// Create buckets only found in S3 in COMS db
139+
const newS3Keys = s3Keys.filter(k => !dbBuckets.map(b => b.key).includes(k));
140+
await Promise.all(
141+
newS3Keys.map(s3Key => {
142+
const data = {
143+
bucketName: s3Key.substring(s3Key.lastIndexOf('/') + 1),
144+
accessKeyId: parentBucket.accessKeyId,
145+
bucket: parentBucket.bucket,
146+
endpoint: parentBucket.endpoint,
147+
key: s3Key,
148+
secretAccessKey: parentBucket.secretAccessKey,
149+
region: parentBucket.region ?? undefined,
150+
active: parentBucket.active,
151+
userId: parentBucket.createdBy ?? SYSTEM_USER,
152+
// current user has MANAGE perm on parent folder (see route.hasPermission)
153+
// ..so copy all their perms to NEW subfolders
154+
permCodes: currentUserParentBucketPerms
155+
};
156+
return bucketService.create(data, trx)
157+
.then((dbResponse) => {
158+
dbBuckets.push(dbResponse);
159+
});
160+
})
161+
);
162+
163+
return dbBuckets;
164+
}
165+
catch (err) {
166+
log.error(err.message, { function: 'syncBucketRecords' });
167+
throw err;
168+
}
169+
},
170+
171+
/**
172+
* @function queueObjectRecords
173+
* Synchronizes (creates / prunes) COMS db object records with state in S3
174+
* @param {object[]} dbBuckets Array of Bucket models in COMS db
175+
* @param {object} s3Objects The response from storage.listAllObjectVersions - and
176+
* object containg an array of DeleteMarkers and Versions
177+
* @param {string} userId the guid of current user
178+
* @param {object} [trx] An Objection Transaction object
179+
* @returns {string[]} And array of bucketId's for bucket records in COMS db
180+
*/
181+
async queueObjectRecords(dbBuckets, s3Objects, userId, trx) {
182+
try {
183+
// get all objects in existing buckets in all 'buckets' in COMS db
184+
const dbObjects = await objectService.searchObjects({
185+
bucketId: dbBuckets.map(b => b.bucketId)
186+
}, trx);
187+
188+
/**
189+
* merge arrays of objects from COMS db and S3 to form an array of jobs with format:
190+
* [ { path: '/images/img3.jpg', bucketId: '123' }, { path: '/images/album1/img1.jpg', bucketId: '456' } ]
191+
*/
192+
const objects = [...new Set([
193+
// objects already in database
194+
...dbObjects.data.map(object => {
195+
return {
196+
path: object.path,
197+
bucketId: object.bucketId
198+
};
199+
}),
200+
// DeleteMarkers found in S3
201+
...s3Objects.DeleteMarkers.map(object => {
202+
return {
203+
path: object.Key,
204+
bucketId: dbBuckets.find(b => isPrefixOfPath(b.key, object.Key))?.bucketId
205+
};
206+
}),
207+
// Versions found in S3
208+
...s3Objects.Versions
209+
.filter(v => v.Size > 0) // is an file (not a 'directory')
210+
.map(object => {
211+
return {
212+
path: object.Key,
213+
bucketId: dbBuckets.find(b => isPrefixOfPath(b.key, object.Key))?.bucketId
214+
// NOTE: adding current userId will give ALL perms on new objects
215+
// and set createdBy on all downstream resources (versions, tags, meta)
216+
// userId: userId
217+
};
218+
}),
219+
])];
220+
221+
// merge and remove duplicates
222+
const jobs = [...new Map(objects.map(o => [o.path, o])).values()];
223+
224+
// create jobs in COMS db object_queue for each object
225+
// update 'lastSyncRequestedDate' value in COMS db for each bucket
226+
for (const bucket of dbBuckets) {
227+
await bucketService.update({
228+
bucketId: bucket.bucketId,
229+
userId: userId,
230+
lastSyncRequestedDate: new Date().toISOString()
231+
}, trx);
232+
}
233+
return await objectQueueService.enqueue({ jobs: jobs }, trx);
234+
}
235+
catch (err) {
236+
log.error(err.message, { function: 'queueObjectRecords' });
237+
throw err;
238+
}
239+
},
240+
55241
/**
56242
* @function syncObject
57243
* Synchronizes an object

app/src/db/models/tables/bucket.js

+10
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,19 @@ class Bucket extends mixin(Model, [
4848
filterBucketName(query, value) {
4949
filterILike(query, value, 'bucket.bucketName');
5050
},
51+
filterEndpoint(query, value) {
52+
filterILike(query, value, 'bucket.endpoint');
53+
},
5154
filterKey(query, value) {
5255
filterILike(query, value, 'bucket.key');
5356
},
57+
filterKeyIsChild(query, value) {
58+
if (value && value !== '/') {
59+
query.where('bucket.key', 'like', `${value}%`);
60+
}
61+
query
62+
.where('bucket.key', '!=', value);
63+
},
5464
filterActive(query, value) {
5565
if (value !== undefined) query.where('bucket.active', value);
5666
},

0 commit comments

Comments
 (0)