From 7e4d31355b3512eb2e73a41ec6fa57b6dbdd5850 Mon Sep 17 00:00:00 2001 From: Simone Deponti Date: Sat, 18 Nov 2023 12:20:12 +0100 Subject: [PATCH] Updated to work with batches in write --- src/index.ts | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/src/index.ts b/src/index.ts index d2df1c1..fbfe1ac 100644 --- a/src/index.ts +++ b/src/index.ts @@ -26,6 +26,11 @@ class MongodbAnonymizer extends Command { description: "documents from these collections will be ignored (comma separated)", }), + batchSize: flags.integer({ + char: "b", + description: "size of batch to write (batched writes are way faster)", + default: 1000 + }) }; async run() { const { flags } = this.parse(MongodbAnonymizer); @@ -77,7 +82,8 @@ class MongodbAnonymizer extends Command { sourceCollection, targetCollection, collectionName, - list + list, + flags.batchSize ); } else { @@ -93,7 +99,8 @@ class MongodbAnonymizer extends Command { sourceCollection: Collection, targetCollection: Collection, collectionName: string, - list: string[] + list: string[], + batchSize: number ) { const keysToAnonymize = list .filter( @@ -109,10 +116,28 @@ class MongodbAnonymizer extends Command { })); const fieldsToAnonymize = keysToAnonymize.map((item) => item.field); this.log(`Fields to anonymize: ${fieldsToAnonymize}`); + let batch = []; + let batchCount = 0; + let readItems = 0; + let writtenItems = 0; for await (const document of sourceCollection.find()) { if(!document) continue; + readItems++; const documentAnonymized = this.anonymizeMap(document, "", fieldsToAnonymize, keysToAnonymize); - await targetCollection.insertOne(documentAnonymized); + batch.push(documentAnonymized); + batchCount++; + if(batchCount >= batchSize) { + await targetCollection.insertMany(batch); + writtenItems = writtenItems + batchCount; + this.log(`Inserted ${batchCount} elements (${readItems} read, ${writtenItems} written)`); + batch = []; + batchCount = 0; + } + } + if(batchCount > 0) { + await targetCollection.insertMany(batch); + writtenItems = writtenItems + batchCount; + this.log(`Inserted ${batchCount} elements (${readItems} read, ${writtenItems} written)`); } }