Skip to content

Commit cea2818

Browse files
committed
Script to anonymize drill data
1 parent 013072d commit cea2818

File tree

1 file changed

+133
-0
lines changed

1 file changed

+133
-0
lines changed
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
/**
2+
* Description: This script is used to anonymize drill collection
3+
* Server: countly
4+
* Path: $(countly dir)/bin/scripts/export-data
5+
* Command: node drill_collections_anonymized.js
6+
*/
7+
8+
var crypto = require('crypto');
9+
const fs = require('fs');
10+
const { ObjectId } = require('mongodb');
11+
12+
const pluginManager = require('../../../plugins/pluginManager.js');
13+
const common = require('../../../api/utils/common.js');
14+
const drillCommon = require("../../../plugins/drill/api/common.js");
15+
16+
const APPS = []; //leave array empty to process all apps;
17+
const PATH = './'; //path to save anonymized data.
18+
const FIELDS_TO_ANONYMIZE = {"did": 1, "up": {"name": 1, "username": 1, "email": 1, "organization": 1, "phone": 1, "picture": 1}, "custom": 1};
19+
20+
Promise.all([pluginManager.dbConnection("countly"), pluginManager.dbConnection("countly_drill")]).then(async function([countlyDb, drillDb]) {
21+
console.log("Connected to databases...");
22+
23+
//SET COMMON DBs
24+
common.db = countlyDb;
25+
common.drillDb = drillDb;
26+
27+
var query = {};
28+
if (APPS.length > 0) {
29+
APPS.forEach(function(id, index) {
30+
APPS[index] = ObjectId(id);
31+
});
32+
query = {_id: {$in: APPS}};
33+
}
34+
try {
35+
//FETCH APPS
36+
var apps = await countlyDb.collection('apps').find(query, {_id: 1, name: 1}).toArray();
37+
//PROCESS COLLECTIONS FOR EACH APP
38+
for (let i = 0; i < apps.length; i++) {
39+
console.log("Processing app: " + apps[i].name);
40+
//CREATE DIR FOR APP
41+
if (!fs.existsSync(PATH + apps[i]._id)) {
42+
fs.mkdirSync(PATH + apps[i]._id);
43+
}
44+
//FETCH DRILL COLLECTIONS
45+
var drillCollections = await getDrillCollections(apps[i]._id);
46+
//PROCESS EACH DRILL COLLECTION
47+
for (let j = 0; j < drillCollections.length; j++) {
48+
console.log("Processing collection: " + drillCollections[j].collectionName);
49+
//CREATE WRITE STREAM FOR DRILL COLLECTION
50+
var collectionWriteStream = fs.createWriteStream(PATH + apps[i]._id + '/' + drillCollections[j].collectionName + '.jsonl');
51+
//CREATE COLLECTION CURSOR
52+
const cursor = drillDb.collection(drillCollections[j].collectionName).find({});
53+
//FOR EACH DOCUMENT
54+
while (await cursor.hasNext()) {
55+
var doc = await cursor.next();
56+
//ANONYMIZE USER DATA
57+
anonymizeRecursive(doc, FIELDS_TO_ANONYMIZE);
58+
//WRITE USER DATA TO FILE
59+
collectionWriteStream.write(JSON.stringify(doc) + '\n');
60+
}
61+
}
62+
}
63+
}
64+
catch (err) {
65+
console.log(err);
66+
}
67+
finally {
68+
countlyDb.close();
69+
drillDb.close();
70+
console.log("Done.");
71+
}
72+
73+
async function getDrillCollections(appId) {
74+
var collections = [];
75+
try {
76+
var events = await countlyDb.collection("events").findOne({_id: common.db.ObjectID(appId)});
77+
var list = ["[CLY]_session", "[CLY]_crash", "[CLY]_view", "[CLY]_action", "[CLY]_push_action", "[CLY]_star_rating", "[CLY]_nps", "[CLY]_survey", "[CLY]_apm_network", "[CLY]_apm_device"];
78+
79+
if (events && events.list) {
80+
for (var p = 0; p < events.list.length; p++) {
81+
if (list.indexOf(events.list[p]) === -1) {
82+
list.push(events.list[p]);
83+
}
84+
}
85+
}
86+
for (let i = 0; i < list.length; i++) {
87+
var collectionName = drillCommon.getCollectionName(list[i], appId);
88+
collections.push({collectionName: collectionName});
89+
}
90+
}
91+
catch (err) {
92+
console.log("Error getting drill collections for app ", appId, "error: ", err);
93+
}
94+
return collections;
95+
}
96+
97+
//RECURSIVE FUNCTION TO ANONYMIZE EMEDDED FIELDS
98+
function anonymizeRecursive(obj, fieldsToAnonymize) {
99+
for (let key in fieldsToAnonymize) {
100+
if (obj[key]) {
101+
if (fieldsToAnonymize[key] === 1) {
102+
obj[key] = sha1Hash(obj[key]);
103+
}
104+
else if (typeof fieldsToAnonymize[key] === 'object') {
105+
anonymizeRecursive(obj[key], fieldsToAnonymize[key]);
106+
}
107+
}
108+
}
109+
}
110+
111+
//SHA1 HASH FUNCTION
112+
function sha1Hash(field) {
113+
if (typeof field === 'object') {
114+
if (Array.isArray(field)) {
115+
field.forEach(function(element, index) {
116+
field[index] = sha1Hash(element);
117+
});
118+
}
119+
else {
120+
for (let key in field) {
121+
field[key] = sha1Hash(field[key]);
122+
}
123+
}
124+
return field;
125+
}
126+
if (isNaN(field)) {
127+
var salt = crypto.randomBytes(16).toString('hex');
128+
var hashedField = crypto.createHmac('sha1', salt).update(field).digest('hex');
129+
return hashedField.substring(0, field.length);
130+
}
131+
return field;
132+
}
133+
});

0 commit comments

Comments
 (0)