I have some code which looks for the full name of a browser in a column and where found overwrites with a short name. this is to allow simplified filtering without version numbers in visualisations.
This code has worked fine for most of 2020. I get a new work laptop, reinstalled and updated anaconda and now it is giving a KeyError.
The code:
#clean up browser column data to remove version numbers
#for column 'Browser' in dataframe if object (string) contains Ch Chrome else Sa Safari else Ed Edge else MS MSIE else Fi Firefox else Mo Mozilla else Other
def browser_clean(webchat):
webchat.loc[webchat['Browser'].str[:2] == 'Ch', 'Browser'] = 'Chrome'
webchat.loc[webchat['Browser'].str[:2] == 'Sa', 'Browser'] = 'Safari'
webchat.loc[webchat['Browser'].str[:2] == 'Ed', 'Browser'] = 'Edge'
webchat.loc[webchat['Browser'].str[:2] == 'MS', 'Browser'] = 'MSIE'
webchat.loc[webchat['Browser'].str[:2] == 'Fi', 'Browser'] = 'Firefox'
webchat.loc[webchat['Browser'].str[:2] == 'Mo', 'Browser'] = 'Mozilla'
return webchat
I've checked the key is there using:
print (webchat['Browser'])
with the output:
0 Safari 13.1.1
1 Chrome 85.0.4183.83
2 Chrome 84.0.4147.136
3 Chrome 79.0.3945.136
4 Chrome 85.0.4183.81
...
17813 Chrome 85.0.4183.81
17814 Safari 13.1.2
17815 Chrome 85.0.4183.83
17816 Chrome 75.0.3770.143
17817 Safari 13.1.2
Name: Browser, Length: 17818, dtype: object
so it looks to me like the key is there?
The error message I see when I now run the code is as follows:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-11-bcf8735bc33d> in <module>()
5 webchat_drop_uw = drop_unwanted(webchat_dropped)
6 webchat_new_cols = month_year(webchat_drop_uw)
----> 7 webchat_cleaned = browser_clean(webchat_new_cols)
8 webchat_newOScol = short_OS(webchat_cleaned)
9 webchat_final = strip_data(webchat_newOScol)
<ipython-input-8-b9c27fc07f46> in browser_clean(webchat)
4 def browser_clean(webchat):
5
----> 6 webchat.loc[webchat['Browser'].str[:2] == 'Ch', 'Browser'] = 'Chrome'
7 webchat.loc[webchat['Browser'].str[:2] == 'Sa', 'Browser'] = 'Safari'
8 webchat.loc[webchat['Browser'].str[:2] == 'Ed', 'Browser'] = 'Edge'
C:\Users\8485823\Anaconda\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
880
881 elif key_is_scalar:
--> 882 return self._get_value(key)
883
884 if is_hashable(key):
C:\Users\8485823\Anaconda\lib\site-packages\pandas\core\series.py in _get_value(self, label, takeable)
988
989 # Similar to Index.get_value, but we do not fall back to positional
--> 990 loc = self.index.get_loc(label)
991 return self.index._get_values_for_loc(self, loc, label)
992
C:\Users\8485823\Anaconda\lib\site-packages\pandas\core\indexes\range.py in get_loc(self, key, method, tolerance)
356 except ValueError as err:
357 raise KeyError(key) from err
--> 358 raise KeyError(key)
359 return super().get_loc(key, method=method, tolerance=tolerance)
360
KeyError: 'Browser'
updated key error coming in from column being dropped by this prior function, which it didn't used to do:
# remove unwanted columns (contain data)
def drop_unwanted(webchat):
webchat.drop(
labels = ["Visitor Name",
"MCS",
"Agent Name",
"Agent Login Name",
"Agent Full Name",
"Agent Group",
"Chat Start Reason",
"Interactive",
"Engagement ID",
"Goal",
"Campaign",
"Target Audience",
"Country",
"State",
"City",
"Organization",
"IP Address",
"Personal Info Exists",
"Customer Info Exists",
"Marketing Source Exists",
"Lead Exists",
"Visitor Error Exists",
"Service Activity Exists",
"Viewed Product Exists",
"Transaction Exists",
"Cart Update Exists",
"Pre-Chat Survey Exists",
"Search Content Exists",
"CoBrowse - Num Sessions",
"CoBrowse - Num Interactive Sessions",
"Alerted MCS",
"Chat MCS"
],
axis = 1,
#index = None,
errors ='ignore',
inplace = True,
)
return webchat
Any ideas why it would drop a column that's not listed in my drop list?