Other aproach is stored procedure as mentioned by other people . Stored procedure requires partitioning key. Also stored procedure should end within 4sec as per documentation otherwise all records will rollback. See code below using python azure documentdb sdk and javascript based stored procedure. I have modified the script and resolved lot of error below code is working fine:-
function bulkimport2(docObject) {
var collection = getContext().getCollection();
var collectionLink = collection.getSelfLink();
// The count of imported docs, also used as current doc index.
var count = 0;
getContext().getResponse().setBody(docObject.items);
//return
// Validate input.
//if (!docObject.items || !docObject.items.length) getContext().getResponse().setBody(docObject);
docObject.items=JSON.stringify(docObject.items)
docObject.items = docObject.items.replace("\\\\r", "");
docObject.items = docObject.items.replace("\\\\n", "");
var docs = JSON.parse(docObject.items);
var docsLength = docObject.items.length;
if (docsLength == 0) {
getContext().getResponse().setBody(0);
return;
}
// Call the CRUD API to create a document.
tryCreate(docs[count], callback, collectionLink,count);
// Note that there are 2 exit conditions:
// 1) The createDocument request was not accepted.
// In this case the callback will not be called, we just call setBody and we are done.
// 2) The callback was called docs.length times.
// In this case all documents were created and we don't need to call tryCreate anymore. Just call setBody and we are done.
function tryCreate(doc, callback, collectionLink,count ) {
doc=JSON.stringify(doc);
if (typeof doc == "undefined") {
getContext().getResponse().setBody(count);
return ;
} else {
doc = doc.replace("\\r", "");
doc = doc.replace("\\n", "");
doc=JSON.parse(doc);
}
getContext().getResponse().setBody(doc);
var isAccepted = collection.upsertDocument(collectionLink, doc, callback);
// If the request was accepted, callback will be called.
// Otherwise report current count back to the client,
// which will call the script again with remaining set of docs.
// This condition will happen when this stored procedure has been running too long
// and is about to get cancelled by the server. This will allow the calling client
// to resume this batch from the point we got to before isAccepted was set to false
if (!isAccepted) {
getContext().getResponse().setBody(count);
}
}
// This is called when collection.createDocument is done and the document has been persisted.
function callback(err, doc, options) {
if (err) throw getContext().getResponse().setBody(err + doc);
// One more document has been inserted, increment the count.
count++;
if (count >= docsLength) {
// If we have created all documents, we are done. Just set the response.
getContext().getResponse().setBody(count);
return ;
} else {
// Create next document.
tryCreate(docs[count], callback, collectionLink,count);
}
}
}
EDIT:- getContext().getResponse().setBody(count);
return ; //when all records are processed completely.
python script to load stored procedure and do batch import
# Initialize the Python DocumentDB client
client = document_client.DocumentClient(config['ENDPOINT'], {'masterKey': config['MASTERKEY'] ,'DisableSSLVerification' : 'true' })
# Create a database
#db = client.CreateDatabase({ 'id': config['DOCUMENTDB_DATABASE'] })
db=client.ReadDatabases({ 'id': 'db2' })
print(db)
# Create collection options
options = {
'offerEnableRUPerMinuteThroughput': True,
'offerVersion': "V2",
'offerThroughput': 400
}
# Create a collection
#collection = client.CreateCollection('dbs/db2' , { 'id': 'coll2'}, options)
#collection = client.CreateCollection({ 'id':'db2'},{ 'id': 'coll2'}, options)
database_link = 'dbs/db2'
collection_link = database_link + '/colls/coll2'
"""
#List collections
collection = client.ReadCollection(collection_link)
print(collection)
print('Databases:')
databases = list(client.ReadDatabases())
if not databases:
print('No Databases:')
for database in databases:
print(database['id'])
"""
# Create some documents
"""
document1 = client.CreateDocument(collection['_self'],
{
'Web Site': 0,
'Cloud Service': 0,
'Virtual Machine': 0,
'name': 'some'
})
document2 = client.CreateDocument(collection['_self'],
{
'Web Site': 1,
'Cloud Service': 0,
'Virtual Machine': 0,
'name': 'some'
})
"""
# Query them in SQL
"""
query = { 'query': 'SELECT * FROM server s' }
options = {}
options['enableCrossPartitionQuery'] = True
options['maxItemCount'] = 20
#result_iterable = client.QueryDocuments(collection['_self'], query, options)
result_iterable = client.QueryDocuments(collection_link, query, options)
results = list(result_iterable);
print(results)
"""
##How to store procedure and use it
"""
sproc3 = {
'id': 'storedProcedure2',
'body': (
'function (input) {' +
' getContext().getResponse().setBody(' +
' \'a\' + input.temp);' +
'}')
}
retrieved_sproc3 = client.CreateStoredProcedure(collection_link,sproc3)
result = client.ExecuteStoredProcedure('dbs/db2/colls/coll2/sprocs/storedProcedure3',{'temp': 'so'})
"""
## delete all records in collection
"""
result = client.ExecuteStoredProcedure('dbs/db2/colls/coll2/sprocs/bulkDeleteSproc',"SELECT * FROM c ORDER BY c._ts DESC ")
print(result)
"""
multiplerecords="""[{
"Virtual Machine": 0,
"name": "some",
"Web Site": 0,
"Cloud Service": 0
},
{
"Virtual Machine": 0,
"name": "some",
"Web Site": 1,
"Cloud Service": 0
}]"""
multiplerecords=json.loads(multiplerecords)
print(multiplerecords)
print(str(json.dumps(json.dumps(multiplerecords).encode('utf8'))))
#bulkloadresult = client.ExecuteStoredProcedure('dbs/db2/colls/coll2/sprocs/bulkImport',json.dumps(multiplerecords).encode('utf8'))
#bulkloadresult = client.ExecuteStoredProcedure('dbs/db2/colls/coll2/sprocs/bulkImport',json.dumps(json.loads(r'{"items": [{"name":"John","age":30,"city":"New York"},{"name":"John","age":30,"city":"New York"}]}')).encode('utf8'))
str1='{name":"John","age":30,"city":"New York","PartitionKey" : "Morisplane"}'
str2='{name":"John","age":30,"city":"New York","partitionKey" : "Morisplane"}'
key1=base64.b64encode(str1.encode("utf-8"))
key2=base64.b64encode(str2.encode("utf-8"))
data= {"items":[{"id": key1 ,"name":"John","age":30,"city":"Morisplane","PartitionKey" : "Morisplane" },{"id": key2,"name":"John","age":30,"city":"Morisplane","partitionKey" : "Morisplane"}] , "city": "Morisplane", "partitionKey" : "Morisplane"}
print(repr(data))
#retrieved_sproc3 =client.DeleteStoredProcedure('dbs/db2/colls/coll2/sprocs/bulkimport2')
sproc3 = {
'id': 'bulkimport2',
'body': (
"""function bulkimport2(docObject) {
var collection = getContext().getCollection();
var collectionLink = collection.getSelfLink();
// The count of imported docs, also used as current doc index.
var count = 0;
getContext().getResponse().setBody(docObject.items);
//return
// Validate input.
//if (!docObject.items || !docObject.items.length) getContext().getResponse().setBody(docObject);
docObject.items=JSON.stringify(docObject.items)
docObject.items = docObject.items.replace("\\\\r", "");
docObject.items = docObject.items.replace("\\\\n", "");
var docs = JSON.parse(docObject.items);
var docsLength = docObject.items.length;
if (docsLength == 0) {
getContext().getResponse().setBody(0);
return;
}
// Call the CRUD API to create a document.
tryCreate(docs[count], callback, collectionLink,count);
// Note that there are 2 exit conditions:
// 1) The createDocument request was not accepted.
// In this case the callback will not be called, we just call setBody and we are done.
// 2) The callback was called docs.length times.
// In this case all documents were created and we don't need to call tryCreate anymore. Just call setBody and we are done.
function tryCreate(doc, callback, collectionLink,count ) {
doc=JSON.stringify(doc);
if (typeof doc == "undefined") {
getContext().getResponse().setBody(count);
return ;
} else {
doc = doc.replace("\\r", "");
doc = doc.replace("\\n", "");
doc=JSON.parse(doc);
}
getContext().getResponse().setBody(doc);
return
var isAccepted = collection.upsertDocument(collectionLink, doc, callback);
// If the request was accepted, callback will be called.
// Otherwise report current count back to the client,
// which will call the script again with remaining set of docs.
// This condition will happen when this stored procedure has been running too long
// and is about to get cancelled by the server. This will allow the calling client
// to resume this batch from the point we got to before isAccepted was set to false
if (!isAccepted) {
getContext().getResponse().setBody(count);
}
}
// This is called when collection.createDocument is done and the document has been persisted.
function callback(err, doc, options) {
if (err) throw getContext().getResponse().setBody(err + doc);
// One more document has been inserted, increment the count.
count++;
if (count >= docsLength) {
// If we have created all documents, we are done. Just set the response.
getContext().getResponse().setBody(count);
return ;
} else {
// Create next document.
tryCreate(docs[count], callback, collectionLink,count);
}
}
}"""
)
}
#retrieved_sproc3 = client.CreateStoredProcedure(collection_link,sproc3)
bulkloadresult = client.ExecuteStoredProcedure('dbs/db2/colls/coll2/sprocs/bulkimport2', data , {"partitionKey" : "Morisplane"} )
print(repr(bulkloadresult))