I have a CSV file named data.csv. I want to convert this flat CSV file in to nested JSON and insert it in to mongodb using python.Below is the code which i have used to convert CSV to nested JSON, but i am not able to insert it in to mongodb. it shows error.Can somebody help?
import json
import pandas as pd
from pymongo import MongoClient
try:
conn = MongoClient()
print("Connected successfully!!!")
except:
print("Could not connect to MongoDB")
# database
db = conn.database
collection = db.collection2
df = pd.read_csv(r'C:\Users\swetha1\Desktop\data.csv')
def get_nested_rec(key, grp):
rec = {}
rec['PrimaryId'] = key[0]
rec['FirstName'] = key[1]
rec['LastName'] = key[2]
rec['City'] = key[3]
for field in ['CarName','DogName']:
rec[field] = list(grp[field].unique())
return rec
records = []
for key, grp in df.groupby(['PrimaryId','FirstName','LastName','City']):
rec = get_nested_rec(key, grp)
records.append(rec)
records = dict(data = records)
r=json.dumps(records,default=int,indent=4)
print(r)
collection.insert(r)
print("inserted")
output converting CSV to nested JSON is as follows:
Connected successfully!!!
{
"data": [
{
"PrimaryId": 100,
"FirstName": "John",
"LastName": "Smith",
"City": "NewYork",
"CarName": [
"Toyota",
"BMW"
],
"DogName": [
"Spike",
"Rusty"
]
},
{
"PrimaryId": 101,
"FirstName": "Ben",
"LastName": "Swan",
"City": "Sydney",
"CarName": [
"Volkswagen",
"Ford",
"Audi"
],
"DogName": [
"Buddy",
"Max"
]
},
{
"PrimaryId": 102,
"FirstName": "Julia",
"LastName": "Brown",
"City": "London",
"CarName": [
"Mini"
],
"DogName": [
"Lucy"
]
}
]
}
but shows following error while inserting in to JSON:
Traceback (most recent call last):
File "data.py", line 38, in <module>
collection.insert(r)
File "C:\Users\swetha1\anaconda3\lib\site-packages\pymongo\collection.py",
line 3161, in insert
check_keys, manipulate, write_concern)
File "C:\Users\swetha1\anaconda3\lib\site-packages\pymongo\collection.py",
line 639, in _insert
blk.ops = [(message._INSERT, doc) for doc in gen()]
File "C:\Users\swetha1\anaconda3\lib\site-packages\pymongo\collection.py",
line 639, in <listcomp>
blk.ops = [(message._INSERT, doc) for doc in gen()]
File "C:\Users\swetha1\anaconda3\lib\site-packages\pymongo\collection.py",
line 623, in gen
doc['_id'] = ObjectId()
TypeError: 'str' object does not support item assignment
referred few links too but cudnt understand how to insert.
data.CSV
PrimaryId,FirstName,LastName,City,CarName,DogName
100,John,Smith,NewYork,Toyota,Spike
100,John,Smith,NewYork,BMW,Spike
100,John,Smith,NewYork,Toyota,Rusty
100,John,Smith,NewYork,BMW,Rusty
101,Ben,Swan,Sydney,Volkswagen,Buddy
101,Ben,Swan,Sydney,Ford,Buddy
101,Ben,Swan,Sydney,Audi,Buddy
101,Ben,Swan,Sydney,Volkswagen,Max
101,Ben,Swan,Sydney,Ford,Max
101,Ben,Swan,Sydney,Audi,Max
102,Julia,Brown,London,Mini,Lucy