1

I am trying to read some JSON file from web and create a SQL database with the data. I am using ijson to read data as stream. But when the code fails I need to start over to retrieve data. Are there any way to continue reading JSON file from where the program is failed?

I can read the whole document with json.loads but I am assuming the data is too big to read at a time.

You can see my code below.

import sqlite3
import ssl
import urllib.request
import json
import ijson
import time

ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

conn = sqlite3.connect('rawdata.sqlite')
cur = conn.cursor()

cur.execute('''DROP TABLE IF EXISTS DailyData ''')
cur.execute('''DROP TABLE IF EXISTS Countries ''')
cur.execute('''DROP TABLE IF EXISTS Continents ''')


cur.execute('''CREATE TABLE IF NOT EXISTS DailyData
    (id INTEGER, Day TEXT, Month TEXT, Year TEXT, country_id INTEGER, continent_id INTEGER, Cases TEXT, Deaths TEXT)''')

cur.execute('''CREATE TABLE IF NOT EXISTS Countries
    (id INTEGER,     CountryCode TEXT UNIQUE, Country TEXT UNIQUE, Population TEXT, continent_id INTEGER)''')

cur.execute('''CREATE TABLE IF NOT EXISTS Continents
    (id INTEGER, Continent TEXT UNIQUE)''')

url = "https://opendata.ecdc.europa.eu/covid19/casedistribution/json/"
f = urllib.request.urlopen(url, context=ctx)
reports = ijson.items(f, 'records.item')

sum = 0
count = 0
# error = 0
for item in reports :

    iDataRep = item.get('dateRep')
    iCases = item.get('cases')
    iDeaths = item.get('deaths')
    iCountryCode = item.get('countryterritoryCode')
    iCountry = item.get('countriesAndTerritories')
    iPopulation = item.get('popData2018')
    iContinent = item.get('continentExp')

    if len(iDataRep) < 0: iDataRep = 0
    if len(iCases) < 0: iCases = 0
    if len(iDeaths) < 0: iDeaths = 0
    if len(iCountryCode) < 0: iCountryCode = 0
    if len(iCountry) < 0: iCountry = 0
    if len(iPopulation) < 0: iPopulation = 0
    if len(iContinent) < 0: iContinent = 0

    Spl = iDataRep.split('/')
    iDay = Spl[0]
    iMonth = Spl[1]
    iYear = Spl[2]
    id = count + 1

    cur.execute('''INSERT OR IGNORE INTO Continents (id, Continent)
        VALUES ( ?, ? )''', (id, iContinent))

    cur.execute('''SELECT id FROM Continents WHERE Continent = ? ''', (iContinent, ))
    continent_id = cur.fetchone()[0]

    cur.execute('''INSERT OR IGNORE INTO Countries (id, CountryCode, Country, Population, continent_id)
        VALUES ( ?, ?, ?, ?, ? )''', (id, iCountryCode, iCountry, iPopulation, continent_id) )

    cur.execute('''SELECT id FROM Countries WHERE Country = ? ''', (iCountry, ))
    country_id = cur.fetchone()[0]

    cur.execute('''INSERT OR IGNORE INTO DailyData (id, Day, Month, Year, country_id, continent_id, Cases, Deaths)
        VALUES ( ?, ?, ?, ?, ?, ?, ? ,?)''', (id, iDay, iMonth, iYear, country_id, continent_id, iCases, iDeaths) )

    conn.commit()

    # except:
    #     error = error + 1
    #     print(error)
    #     continue

    count = count + 1

    print(count, 'data retrieved...')

    if count % 95 == 0:
        time.sleep(1)
        print('Program slept a second.')

numCountry = cur.execute('SELECT max(id) FROM Countries' )
numContinent = cur.execute('SELECT max(id) FROM Continents' )

print('From', numCountry, 'different countries and', numContinent, 'continents', count, 'data retrieved.')

cur.close()
PsyyduckK
  • 11
  • 2
  • can you get request the whole json document and create it with the response dictionary? – bherbruck May 29 '20 at 01:18
  • 1
    You'll need to share your code or something similar with the same problem, to show how it fails and to get suggestions on how to resume reading and processing. It can probably be done, but it's hard to say without knowing more about your solution. – Grismar May 29 '20 at 01:22
  • Sounds like an [X Y problem](https://meta.stackexchange.com/questions/66377/what-is-the-xy-problem). Please share the code so we can give useful advises! – Klaus D. May 29 '20 at 01:55
  • Btw, you can prefer using `cur.executemany` for big data in order to get a better performance. – Barbaros Özhan May 29 '20 at 06:06
  • Here is my code. @KlausD. – PsyyduckK May 29 '20 at 15:53
  • Here is my code. @Grismar – PsyyduckK May 29 '20 at 15:54
  • You've shared the code, but you're still not saying how it fails. I'm assuming it's an exception of sorts - have you tried wrapping the for loop body in a try..except block for the appropriate exception and recovering from there? – Grismar May 31 '20 at 02:58

0 Answers0