Skip to content

Commit

Permalink
Updated to work with batches in write
Browse files Browse the repository at this point in the history
  • Loading branch information
simonedeponti committed Nov 18, 2023
1 parent 6a2fa3e commit 7e4d313
Showing 1 changed file with 28 additions and 3 deletions.
31 changes: 28 additions & 3 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ class MongodbAnonymizer extends Command {
description:
"documents from these collections will be ignored (comma separated)",
}),
batchSize: flags.integer({
char: "b",
description: "size of batch to write (batched writes are way faster)",
default: 1000
})
};
async run() {
const { flags } = this.parse(MongodbAnonymizer);
Expand Down Expand Up @@ -77,7 +82,8 @@ class MongodbAnonymizer extends Command {
sourceCollection,
targetCollection,
collectionName,
list
list,
flags.batchSize
);
}
else {
Expand All @@ -93,7 +99,8 @@ class MongodbAnonymizer extends Command {
sourceCollection: Collection<any>,
targetCollection: Collection<any>,
collectionName: string,
list: string[]
list: string[],
batchSize: number
) {
const keysToAnonymize = list
.filter(
Expand All @@ -109,10 +116,28 @@ class MongodbAnonymizer extends Command {
}));
const fieldsToAnonymize = keysToAnonymize.map((item) => item.field);
this.log(`Fields to anonymize: ${fieldsToAnonymize}`);
let batch = [];
let batchCount = 0;
let readItems = 0;
let writtenItems = 0;
for await (const document of sourceCollection.find()) {
if(!document) continue;
readItems++;
const documentAnonymized = this.anonymizeMap(document, "", fieldsToAnonymize, keysToAnonymize);
await targetCollection.insertOne(documentAnonymized);
batch.push(documentAnonymized);
batchCount++;
if(batchCount >= batchSize) {
await targetCollection.insertMany(batch);
writtenItems = writtenItems + batchCount;
this.log(`Inserted ${batchCount} elements (${readItems} read, ${writtenItems} written)`);
batch = [];
batchCount = 0;
}
}
if(batchCount > 0) {
await targetCollection.insertMany(batch);
writtenItems = writtenItems + batchCount;
this.log(`Inserted ${batchCount} elements (${readItems} read, ${writtenItems} written)`);
}
}

Expand Down

0 comments on commit 7e4d313

Please sign in to comment.