From 364d0541b70198acab9ee6a1b2b035e386d570e5 Mon Sep 17 00:00:00 2001 From: aggieNick02 Date: Thu, 10 Oct 2024 12:13:10 -0500 Subject: [PATCH] Fix a bad race condition that causes intermittent socket receive failures. The issue is caused by the code checking socket status *after* checking if bytes are available. Bytes may have become available between the last byte available check and the socket status check. This causes us to incorrectly ignore the newly available bytes and return 0 to the caller, which in any blocking/timeout scenario tells the caller no more bytes will ever be available. This was introduced in commit 89b9f10dbd9425a650afa1331214605bee8ce736 See also https://github.com/adafruit/Adafruit_CircuitPython_Wiznet5k/pull/151 as a fix for https://github.com/adafruit/Adafruit_CircuitPython_Wiznet5k/issues/135 --- adafruit_wiznet5k/adafruit_wiznet5k_socketpool.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/adafruit_wiznet5k/adafruit_wiznet5k_socketpool.py b/adafruit_wiznet5k/adafruit_wiznet5k_socketpool.py index 6694bef..bbfbdac 100644 --- a/adafruit_wiznet5k/adafruit_wiznet5k_socketpool.py +++ b/adafruit_wiznet5k/adafruit_wiznet5k_socketpool.py @@ -601,7 +601,14 @@ def recv_into( # pylint: disable=unused-argument self._buffer = self._buffer[bytes_to_read:] # explicitly recheck num_to_read to avoid extra checks continue - + # We need to read the socket status here before seeing if any bytes are available. + # Why? Because otherwise we have a bad race condition in the if/elif/... logic below + # that can cause recv_into to fail when the other side closes the connection after + # sending bytes. The problem is that initially we can have num_avail=0 while the + # socket state is not in CLOSED or CLOSED_WAIT. Then after the if but before the elif + # that checks the socket state, bytes arrive and the other end closes the connection. + # So now bytes are available but we see the CLOSED/CLOSED_WAIT state and ignore them. + status_before_getting_available = self._status num_avail = self._available() if num_avail > 0: last_read_time = ticks_ms() @@ -620,7 +627,9 @@ def recv_into( # pylint: disable=unused-argument elif num_read > 0: # We got a message, but there are no more bytes to read, so we can stop. break - elif self._status in ( + # See note where we set status_before_getting_available for why we can't just check + # _status here + elif status_before_getting_available in ( wiznet5k.adafruit_wiznet5k.SNSR_SOCK_CLOSED, wiznet5k.adafruit_wiznet5k.SNSR_SOCK_CLOSE_WAIT, ):