I run Windows 7 64-bit
Python 2.7.9 |Anaconda 2.2.0 (64-bit)| [MSC v.1500 64 bit (AMD64)]
Pandas version 0.15.2
Jypiter notebook IPython version 3.0.0.
Here is the problem I am facing:
When I run
import pandas as pd
df = pd.DataFrame({'col': [u'α']})
df
where α is the greek 'a' character, I get the error
UnicodeDecodeError Traceback (most recent call last) in () 2 test_df = pd.DataFrame({'col': [u'α']}) 3 # test_df = pd.DataFrame([u'Hello \u2013 World']) ----> 4 test_df
C:\Anaconda\lib\site-packages\IPython\core\displayhook.pyc in call(self, result) 236 self.write_format_data(format_dict, md_dict) 237 self.log_output(format_dict) --> 238 self.finish_displayhook() 239 240 def cull_cache(self):
C:\Anaconda\lib\site-packages\IPython\kernel\zmq\displayhook.pyc in finish_displayhook(self) 70 sys.stderr.flush() 71 if self.msg['content']['data']: ---> 72 self.session.send(self.pub_socket, self.msg, ident=self.topic) 73 self.msg = None 74
C:\Anaconda\lib\site-packages\IPython\kernel\zmq\session.pyc in send(self, stream, msg_or_type, content, parent, ident, buffers, track, header, metadata) 647 if self.adapt_version: 648 msg = adapt(msg, self.adapt_version) --> 649 to_send = self.serialize(msg, ident) 650 to_send.extend(buffers) 651 longest = max([ len(s) for s in to_send ])
C:\Anaconda\lib\site-packages\IPython\kernel\zmq\session.pyc in serialize(self, msg, ident) 551 content = self.none 552 elif isinstance(content, dict): --> 553 content = self.pack(content) 554 elif isinstance(content, bytes): 555 # content is already packed, as in a relayed message
C:\Anaconda\lib\site-packages\IPython\kernel\zmq\session.pyc in (obj) 83 # disallow nan, because it's not actually valid JSON 84 json_packer = lambda obj: jsonapi.dumps(obj, default=date_default, ---> 85 ensure_ascii=False, allow_nan=False, 86 ) 87 json_unpacker = lambda s: jsonapi.loads(s)
C:\Anaconda\lib\site-packages\zmq\utils\jsonapi.pyc in dumps(o, **kwargs) 38 kwargs['separators'] = (',', ':') 39 ---> 40 s = jsonmod.dumps(o, **kwargs) 41 42 if isinstance(s, unicode):
C:\Anaconda\lib\json__init__.pyc in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, encoding, default, sort_keys, **kw) 248 check_circular=check_circular, allow_nan=allow_nan, indent=indent, 249 separators=separators, encoding=encoding, default=default, --> 250 sort_keys=sort_keys, **kw).encode(obj) 251 252
C:\Anaconda\lib\json\encoder.pyc in encode(self, o) 208 if not isinstance(chunks, (list, tuple)): 209 chunks = list(chunks) --> 210 return ''.join(chunks) 211 212 def iterencode(self, o, _one_shot=False):
UnicodeDecodeError: 'ascii' codec can't decode byte 0xce in position 12: ordinal not in range(128)
but I do not get error when I run just df = pd.DataFrame({'col': [u'α']})
After searching for some hours in the web I found that some suggest
reload(sys)
sys.setdefaultencoding("utf-8")
as a solution while others suggest that it should never be used. This code displays the data frame indeed, but it breaks print
Other suggestions were to upgrade to Python3.
What I would like to ask is if there is a solution to displaying the dataframe apart from upgrading to Python 3 and a code that breaks print
.