I am learning pandas and matplotlib on my own by using some public dataset via this api link
I'm using colab and below are my codes:
import datetime
import io
import json
import pandas as pd
import requests
import matplotlib.pyplot as plt
confirm_resp = requests.get('https://api.data.gov.hk/v2/filterq=%7B%22resource%22%3A%22http%3A%2F%2Fwww.chp.gov.hk%2Ffiles%2Fmisc%2Fenhanced_sur_covid_19_eng.csv%22%2 C%22section%22%3A1%2C%22format%22%3A%22json%22%7D').content
confirm_df = pd.read_json(io.StringIO(confirm_resp.decode('utf-8')))
confirm_df.columns = confirm_df.columns.str.replace(" ", "_")
pd.to_datetime(confirm_df['Report_date'])
confirm_df.columns = ['Case_no', 'Report_date', 'Onset_date', 'Gender', 'Age',
'Name_of_hospital_admitted', 'Status', 'Resident', 'Case_classification', 'Confirmed_probable']
confirm_df = confirm_df.drop('Name_of_hospital_admitted', axis = 1)
confirm_df.head()
and this is what the dataframe looks like:
Case_no | Report_date | Onset_date | Gender | Age | Status | Resident | Case_classification | Confirmed_probable |
---|---|---|---|---|---|---|---|---|
1 | 23/01/2020 | 21/01/2020 | M | 39 | Discharged | Non-HK resident | Imported case | Confirmed |
2 | 23/01/2020 | 18/01/2020 | M | 56 | Discharged | HK resident | Imported case | Confirmed |
3 | 24/01/2020 | 20/01/2020 | F | 62 | Discharged | Non-HK resident | Imported case | Confirmed |
4 | 24/01/2020 | 23/01/2020 | F | 62 | Discharged | Non-HK resident | Imported case | Confirmed |
5 | 24/01/2020 | 23/01/2020 | M | 63 | Discharged | Non-HK resident | Imported case | Confirmed |
When I try to make a simple plot with the below code:
x = confirm_df['Report_date']
y = confirm_df['Case_classification']
confirm_df.plot(x, y)
It gives me the below error:
KeyError Traceback (most recent call last)
<ipython-input-17-e4139a9b5ef1> in <module>()
4 y = confirm_df['Case_classification']
5
----> 6 confirm_df.plot(x, y)
3 frames
/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py in __call__(self, *args, **kwargs)
912 if is_integer(x) and not data.columns.holds_integer():
913 x = data_cols[x]
--> 914 elif not isinstance(data[x], ABCSeries):
915 raise ValueError("x must be a label or position")
916 data = data.set_index(x)
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __getitem__(self, key)
2910 if is_iterator(key):
2911 key = list(key)
-> 2912 indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
2913
2914 # take() does not accept boolean indexers
/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis, raise_missing)
1252 keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
1253
-> 1254 self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
1255 return keyarr, indexer
1256
/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing)
1296 if missing == len(indexer):
1297 axis_name = self.obj._get_axis_name(axis)
-> 1298 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
1299
1300 # We (temporarily) allow for some missing keys with .loc, except in
KeyError: "*None of [Index(['23/01/2020', '23/01/2020', '24/01/2020', '24/01/2020', '24/01/2020',\n '26/01/2020', '26/01/2020', '26/01/2020', '29/01/2020', '29/01/2020',\n ...\n '05/01/2021', '05/01/2021', '05/01/2021', '05/01/2021', '05/01/2021',\n '05/01/2021', '05/01/2021', '05/01/2021', '05/01/2021', '05/01/2021'],\n dtype='object', length=9050)] are in the [column*s]"
I have tried to make the plot with and without converting Report date
to datetime object, I tried substitute x
value with all the columns in the data frame, but all give me the same error code.
Appreciate if anyone can help me to understand how to handle these issues here and going forward. I've spent hours to resolve it but cannot find the answers.
I did not encounter this issue before when I downloaded some notebooks and datasets from Kaggle to follow along.
Thank you and happy new year.