I have implemented a server in Python 3.8.6 using the socket library (running on a Raspberry Pi 4 w/ 8G RAM & Raspberry Pi OS: Buster) which, by design, only handles a single connection at a time. When a connection is gained, the server spawns two threads, one for sending data, and one for receiving it. When the connection is lost (in theory, it will never be intentionally closed), these threads are terminated and a new thread is spawned to accept a new connection. The problem is, the receiving thread never actually terminates, instead getting stuck infinitely on socket.recv()
. This persists even after calling socket.shutdown(SHUT_RDWR)
and socket.close()
. The code is essentially as follows (I've paired it down to only the pertinent bits and left the heartbeat management code, as I've found this is the only way I can consistently tell that the connection has dropped, as the errors that are supposed to be thrown don't always):
import dataclasses
import logging
import pickle
import queue
import socket
import threading
import time
import traceback
log = logging.getLogger()
class Server(threading.Thread):
@dataclasses.dataclass
class Heartbeat:
failures: int = 0
reciprocate: bool = True
time: float = 0
__CLIENT_IP = '10.1.1.58'
__EOF = "\r\nEOF\r\n".encode()
__PORT = 33368
def __init__(self, kill_flag:threading.Event) -> None:
self.__kill_flag: threading.Event = kill_flag
self.__server = socket.create_server(("", self.__PORT), backlog=0, reuse_port=True)
self.__server.settimeout(5)
self.__client = None
self.__queue_out = queue.Queue(-1)
self.__queue_in = queue.Queue(-1)
self._connected = False
self.__connecting_lock = threading.Lock()
self.__receive_thread = None
self.__transmit_thread = None
threading.Thread(target=self.__connect).start()
def __connect(self) -> None:
with self.__connecting_lock:
if self._connected:
return
addr = ("", 0)
if self.__client:
try:
self.__client.shutdown(socket.SHUT_RDWR)
except OSError:
pass
finally:
self.__client.close()
self.__client = None
while (self.__receive_thread and self.__receive_thread.is_alive()) or (self.__transmit_thread and self.__transmit_thread.is_alive()):
log.debug("connected: %s, recv alive: %s, trans alive: %s", self._connected, self.__receive_thread.is_alive(), self.__transmit_thread.is_alive())
time.sleep(1)
log.info("Waiting for connection...")
while addr[0] != self.__CLIENT_IP and not self.__kill_flag.is_set():
try:
self.__client, addr = self.__server.accept()
if addr[0] != self.__CLIENT_IP:
self.__client.close()
log.warning("Connection from %s:%s - Rejected", addr[0], addr[1])
time.sleep(1)
except socket.timeout:
continue
if self.__kill_flag.is_set():
self.__del__()
else:
log.info("Connection from %s:%s - Accepted", addr[0], addr[1])
self.heartbeat = self.Heartbeat()
self._connected = True
self.__receive_thread = threading.Thread(target=self.__receive)
self.__transmit_thread = threading.Thread(target=self.__transmit)
self.__queue_in.queue.clear()
self.__receive_thread.start()
self.__transmit_thread.start()
def get_package(self) -> tuple:
return self.__queue_out.get(True)
def queue_message(self, content:tuple = ()) -> None:
self.__queue_in.put_nowait(content)
def __receive(self) -> None:
buffer = bytearray()
while self._connected and not self.__kill_flag.is_set():
try:
if self.__EOF in buffer:
payload = pickle.loads(buffer.split(self.__EOF, maxsplit=1)[0])
self.__queue_out.put_nowait(payload)
buffer = bytearray(self.__EOF.join(buffer.split(self.__EOF)[1:]))
else:
try:
log.debug("looping into recv")
data = self.__client.recv(1024)
log.debug("looping past recv")
if len(data) == 0:
raise ConnectionResetError
else:
buffer += data
except OSError as error:
log.error(f"Receive error: {error}")
if self._connected:
self._connected = False
threading.Thread(target=self.__connect).start()
except AttributeError:
break
except socket.timeout:
continue
except Exception as error:
log.error(f"Receive error: {error}")
traceback.print_tb(error.__traceback__)
if self._connected:
self._connected = False
threading.Thread(target=self.__connect).start()
def __transmit(self) -> None:
while self._connected and not self.__kill_flag.is_set():
try:
payload = self.__queue_in.get(True, 5)
except queue.Empty:
if self.heartbeat.reciprocate:
payload = (time.time(),)
self.heartbeat.reciprocate = False
elif time.time() - 7 >= self.heartbeat.time:
self.heartbeat.failures += 1
log.debug("Heartbeat failure")
if self.heartbeat.failures >= 3:
log.warning("Lost connection to client: No heartbeat detected")
if self._connected:
self._connected = False
threading.Thread(target=self.__connect).start()
self.heartbeat.reciprocate = True
continue
else:
continue
self.heartbeat.time = time.time()
try:
self.__client.sendall(pickle.dumps(payload))
log.debug("Package sent: %s", payload)
except BrokenPipeError:
log.warning("Lost connection to client: Broken pipe")
self.__queue_in.put_nowait(payload)
if self._connected:
self._connected = False
threading.Thread(target=self.__connect).start()
except AttributeError:
break
def __del__(self) -> None:
if self.__client:
self.__client.close()
self.__server.close()
After the connection drops, the __receive
method/thread hangs specifically on this line: data = self.__client.recv(1024)
.
Perhaps the strangest part is that the client, whose __transmit
and __receive
methods are virtually identical, does not have this problem.
Any insight anyone can offer into as why this is happening, and how to remedy it, would be greatly appreciated.
Many thanks!