Providing a modified and working solution
I have avoided using "Offset" parameter as it is suggested not to use it for bulk data pull.
"""defining a function for getting an api key using credentials for Pardot user"""
def api_key_gen():
import requests
import json
url = "https://pi.pardot.com/api/login/version/3%20HTTP/1.1"
querystring = {"email":"","password":"","user_key":"","format":"json"}
headers = {
'Connection': "keep-alive",
'cache-control': "no-cache"
}
response = requests.request("POST", url, headers=headers, params=querystring)
# print(json.loads(response.text)['api_key'])
return (json.loads(response.text)['api_key'])
Using these two functions to fetch data.
- First function fetches data using between two dates.
- Second function is used if there are large number of records stored in a single second.
def fetchFromDate(api_key, max_date, target_date):
url = "https://pi.pardot.com/api/prospect/version/3/do/query? user_key=&api_key="+str(api_key)+"&output=bulk&created_after="+str(max_date)+"&created_before="+str(target_date)+"&format=json"
result = json.loads((requests.request("GET", url)).text)['result']['prospect']
data = pd.DataFrame(result)
return data
def fetchFromId(api_key, max_id):
url = "https://pi.pardot.com/api/prospect/version/3/do/query? user_key=&api_key="+str(api_key)+"&output=bulk&id_greater_than="+str(max_id)+"&format=json"
result = json.loads((requests.request("GET", url)).text)['result']['prospect']
data = pd.DataFrame(result)
return data
Using below code to fetch data from pardot api for one month to keep size of data small.Also whenever api key gets expired,a new key api key is fetched and used in the URL.Dates are used to compare with each other in order to fetch data for desired period only.I have tried to keep the whole process Dynamic except for the dates parameters.
import pandas as pd
import requests
import json
from datetime import datetime,timedelta
"""using a start date and target date to fetch data for a particular time span"""
max_date = '2014-02-03 08:02:57'
target_date = datetime.strptime('2014-06-30 23:59:59','%Y-%m-%d %H:%M:%S')
final_data = pd.DataFrame() #Initialize an empty dataframe
api_key = api_key_gen()
last_maxDate = max_date
last_maxId = '' #get the id of first record for desired year and fill here
url = "https://pi.pardot.com/api/prospect/version/3/do/query? user_key=&api_key="+str(api_key)+"&output=bulk&created_after="+str(max_date)+"&created_before="+str(target_date)+"&format=json"
print("Start Time : ",datetime.now())
i =1
while json.loads((requests.request("GET", url)).text)['result'] is not None:
# max_date = datetime.strptime(str(max_date),'%Y-%m-%d %H:%M:%S')-timedelta(seconds=1)
last_maxDate = datetime.strptime(str(last_maxDate),'%Y-%m-%d %H:%M:%S')
api_key = api_key_gen()
data = fetchFromDate(api_key, max_date, target_date)
if len(data) < 200:
final_data=data.append(final_data,ignore_index=True)
break
else:
max_id = max(data['id'])
max_date = max(data['created_at'])
max_date = datetime.strptime(str(max_date),'%Y-%m-%d %H:%M:%S')-timedelta(seconds=1)
# print(type(max_date),type(last_maxDate))
if bool(max_date == last_maxDate) & bool(int(max_id) == int(last_maxId)):
print("Running through Id's")
api_key = api_key_gen()
data = fetchFromId(api_key, max_id)
# final_data=data.append(final_data,ignore_index=True)
max_id = max(data['id'])
max_date = max(data['created_at'])
final_data=data.append(final_data,ignore_index=True)
last_maxDate = max_date
last_maxId = max_id
print("Running Loop :",i,max_date,max_id)
i += 1
print(max(data['created_at']))
print(max(data['id']))
final_data.to_csv('file.csv',index=False)
print("End Time : ",datetime.now())