0

#Updating records in mongoDB from pandas dataframe# #The updating 500000 records is taking more than a day#

from pymongo import MongoClient
import pymongo
import pandas as pd
import json
from pandas.io.json import json_normalize
from datetime import date
from datetime import time
from datetime import datetime
from datetime import datetime, date
import numpy as np
from datetime import datetime, timedelta
from datetime import datetime
client = MongoClient()
mbpfmdb = client.mbpfm
start_time = datetime.now()
from pymongo import UpdateMany
from pymongo import MongoClient, UpdateOne

for i in output.index:
    operations = [UpdateOne({"TransactionId":output['TransactionId'][i]}, {"$set": 
    {"CategoryId":output['CategoryId'][i], "MerchantId":output['MerchantId'][i], 
    "CompletedFlag":output['CompletedFlag'][i]}}, upsert=True)]
    mbpfmdb.account_transaction.bulk_write(operations, ordered=False)
    
end_time = datetime.now()
print('Data updation  MONGODB : {}'.format(end_time - start_time))
eshwar raj
  • 11
  • 1
  • 3
  • My first instinct would be to use `mongoimport` on the commandline with a JSON or CSV file. Secondly, you aren't really doing a `bulk_write`. You are using `bulk_write` with a single `UpdateOne` in the `for` loop. If you want to use `bulk_write` efficiently, append all the `UpdateOne` operations into a single list and then call `bulk_write` once. – rickhg12hs Sep 05 '22 at 13:10

0 Answers0