My code in its simplest form looks as below. I'm trying to connect to hive from a jupyter notebook. I code works fine when I query for smaller number of rows say 'select * from table limit 200' but throws this error when I do something like 'select * from table', the table is around 180MB and can be easily loaded in memory. I'm not sure why this is happening. Any help is much appreciated! I looked at the other similar questions, but they were not helpful
package versions - python - 3.7 pandas - 0.24.2 pyhive 0.6.1
import pandas
import os
from pyhive import hive
def hiveconnection(query):
conn = hive.Connection(host=HOST, port=10000, username=USER,
password=PASSWORD, auth='LDAP')
cur = conn.cursor()
cur.execute(query)
result = cur.fetchall()
conn.close()
return result
query = """select * from table"""
df_new = hiveconnection(query)
The query runs fine and I get results for smaller resultset say - select * from table limit 200
But I get the following error when I query for larger number of rows.
--------------------------------------------------------------------------- TTransportException Traceback (most recent call last) in 1 query = """select * from sample_table""" ----> 2 df_new = hiveconnection(query)
in hiveconnection(query) 7 cur = conn.cursor() 8 cur.execute(query) ----> 9 result = cur.fetchall() 10 conn.close() 11
/usr/local/anaconda/lib/python3.7/site-packages/pyhive/common.py in fetchall(self) 134 :py:meth:
execute
did not produce any result set or no call was issued yet. 135 """ --> 136 return list(iter(self.fetchone, None)) 137 138 @property/usr/local/anaconda/lib/python3.7/site-packages/pyhive/common.py in fetchone(self) 103 104 # Sleep until we're done or we have some data to return --> 105 self._fetch_while(lambda: not self._data and self._state != self._STATE_FINISHED) 106 107 if not self._data:
/usr/local/anaconda/lib/python3.7/site-packages/pyhive/common.py in _fetch_while(self, fn) 43 def _fetch_while(self, fn): 44 while fn(): ---> 45 self._fetch_more() 46 if fn(): 47 time.sleep(self._poll_interval)
/usr/local/anaconda/lib/python3.7/site-packages/pyhive/hive.py in _fetch_more(self) 384 maxRows=self.arraysize, 385 ) --> 386 response = self._connection.client.FetchResults(req) 387 _check_status(response) 388 schema = self.description
/usr/local/anaconda/lib/python3.7/site-packages/TCLIService/TCLIService.py in FetchResults(self, req) 712 """ 713 self.send_FetchResults(req) --> 714 return self.recv_FetchResults() 715 716 def send_FetchResults(self, req):
/usr/local/anaconda/lib/python3.7/site-packages/TCLIService/TCLIService.py in recv_FetchResults(self) 731 raise x 732 result = FetchResults_result() --> 733 result.read(iprot) 734 iprot.readMessageEnd() 735 if result.success is not None:
/usr/local/anaconda/lib/python3.7/site-packages/TCLIService/TCLIService.py in read(self, iprot) 3468 if ftype == TType.STRUCT: 3469 self.success = TFetchResultsResp() -> 3470 self.success.read(iprot) 3471 else: 3472 iprot.skip(ftype)
/usr/local/anaconda/lib/python3.7/site-packages/TCLIService/ttypes.py in read(self, iprot) 6581 if ftype == TType.STRUCT: 6582 self.results = TRowSet() -> 6583 self.results.read(iprot) 6584 else: 6585 iprot.skip(ftype)
/usr/local/anaconda/lib/python3.7/site-packages/TCLIService/ttypes.py in read(self, iprot) 2865 for _i114 in range(_size110): 2866 _elem115 = TColumn() -> 2867 _elem115.read(iprot) 2868 self.columns.append(_elem115) 2869
iprot.readListEnd()/usr/local/anaconda/lib/python3.7/site-packages/TCLIService/ttypes.py in read(self, iprot) 2727 if ftype == TType.STRUCT: 2728 self.i64Val = TI64Column() -> 2729 self.i64Val.read(iprot) 2730 else: 2731 iprot.skip(ftype)
/usr/local/anaconda/lib/python3.7/site-packages/TCLIService/ttypes.py in read(self, iprot) 2351 (_etype79, _size76) = iprot.readListBegin() 2352 for _i80 in range(_size76): -> 2353 _elem81 = iprot.readI64() 2354 self.values.append(_elem81) 2355
iprot.readListEnd()/usr/local/anaconda/lib/python3.7/site-packages/thrift/protocol/TBinaryProtocol.py in readI64(self) 220 221 def readI64(self): --> 222 buff = self.trans.readAll(8) 223 val, = unpack('!q', buff) 224 return val
/usr/local/anaconda/lib/python3.7/site-packages/thrift/transport/TTransport.py in readAll(self, sz) 60 have = 0 61 while (have < sz): ---> 62 chunk = self.read(sz - have) 63 chunkLen = len(chunk) 64 have += chunkLen
/usr/local/anaconda/lib/python3.7/site-packages/thrift_sasl/init.py in read(self, sz) 171 return ret 172 --> 173 self._read_frame() 174 return ret + self.__rbuf.read(sz - len(ret)) 175
/usr/local/anaconda/lib/python3.7/site-packages/thrift_sasl/init.py in _read_frame(self) 188 else: 189 # If the frames are not encoded, just pass it through --> 190 decoded = self._trans.read(length) 191 self.__rbuf = BufferIO(decoded) 192
/usr/local/anaconda/lib/python3.7/site-packages/thrift/transport/TSocket.py in read(self, sz) 141 if len(buff) == 0: 142 raise TTransportException(type=TTransportException.END_OF_FILE, --> 143 message='TSocket read 0 bytes') 144 return buff 145
TTransportException: TSocket read 0 bytes