I am trying to read some JSON file from web and create a SQL database with the data. I am using ijson to read data as stream. But when the code fails I need to start over to retrieve data. Are there any way to continue reading JSON file from where the program is failed?
I can read the whole document with json.loads but I am assuming the data is too big to read at a time.
You can see my code below.
import sqlite3
import ssl
import urllib.request
import json
import ijson
import time
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
conn = sqlite3.connect('rawdata.sqlite')
cur = conn.cursor()
cur.execute('''DROP TABLE IF EXISTS DailyData ''')
cur.execute('''DROP TABLE IF EXISTS Countries ''')
cur.execute('''DROP TABLE IF EXISTS Continents ''')
cur.execute('''CREATE TABLE IF NOT EXISTS DailyData
(id INTEGER, Day TEXT, Month TEXT, Year TEXT, country_id INTEGER, continent_id INTEGER, Cases TEXT, Deaths TEXT)''')
cur.execute('''CREATE TABLE IF NOT EXISTS Countries
(id INTEGER, CountryCode TEXT UNIQUE, Country TEXT UNIQUE, Population TEXT, continent_id INTEGER)''')
cur.execute('''CREATE TABLE IF NOT EXISTS Continents
(id INTEGER, Continent TEXT UNIQUE)''')
url = "https://opendata.ecdc.europa.eu/covid19/casedistribution/json/"
f = urllib.request.urlopen(url, context=ctx)
reports = ijson.items(f, 'records.item')
sum = 0
count = 0
# error = 0
for item in reports :
iDataRep = item.get('dateRep')
iCases = item.get('cases')
iDeaths = item.get('deaths')
iCountryCode = item.get('countryterritoryCode')
iCountry = item.get('countriesAndTerritories')
iPopulation = item.get('popData2018')
iContinent = item.get('continentExp')
if len(iDataRep) < 0: iDataRep = 0
if len(iCases) < 0: iCases = 0
if len(iDeaths) < 0: iDeaths = 0
if len(iCountryCode) < 0: iCountryCode = 0
if len(iCountry) < 0: iCountry = 0
if len(iPopulation) < 0: iPopulation = 0
if len(iContinent) < 0: iContinent = 0
Spl = iDataRep.split('/')
iDay = Spl[0]
iMonth = Spl[1]
iYear = Spl[2]
id = count + 1
cur.execute('''INSERT OR IGNORE INTO Continents (id, Continent)
VALUES ( ?, ? )''', (id, iContinent))
cur.execute('''SELECT id FROM Continents WHERE Continent = ? ''', (iContinent, ))
continent_id = cur.fetchone()[0]
cur.execute('''INSERT OR IGNORE INTO Countries (id, CountryCode, Country, Population, continent_id)
VALUES ( ?, ?, ?, ?, ? )''', (id, iCountryCode, iCountry, iPopulation, continent_id) )
cur.execute('''SELECT id FROM Countries WHERE Country = ? ''', (iCountry, ))
country_id = cur.fetchone()[0]
cur.execute('''INSERT OR IGNORE INTO DailyData (id, Day, Month, Year, country_id, continent_id, Cases, Deaths)
VALUES ( ?, ?, ?, ?, ?, ?, ? ,?)''', (id, iDay, iMonth, iYear, country_id, continent_id, iCases, iDeaths) )
conn.commit()
# except:
# error = error + 1
# print(error)
# continue
count = count + 1
print(count, 'data retrieved...')
if count % 95 == 0:
time.sleep(1)
print('Program slept a second.')
numCountry = cur.execute('SELECT max(id) FROM Countries' )
numContinent = cur.execute('SELECT max(id) FROM Continents' )
print('From', numCountry, 'different countries and', numContinent, 'continents', count, 'data retrieved.')
cur.close()