From 20bb804aae3401d22478e88c099be6c72537bd9a Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 16 Apr 2022 22:32:17 -1000 Subject: [PATCH 1/2] Avoid retrying open_connection on unrecoverable errors - We can retry so hard that we block the event loop Fixes ``` 2022-04-16 22:18:51 WARNING (MainThread) [asyncio] Executing exception=ConnectionRefusedError(61, "Connect call failed (192.168.107.200, 9999)") created at /opt/homebrew/Cellar/python@3.9/3.9.12/Frameworks/Python.framework/Versions/3.9/lib/python3.9/asyncio/tasks.py:460> took 1.001 seconds ``` --- kasa/protocol.py | 14 ++++++++++++++ kasa/tests/test_protocol.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/kasa/protocol.py b/kasa/protocol.py index 24c2cd056..641a783b4 100755 --- a/kasa/protocol.py +++ b/kasa/protocol.py @@ -11,6 +11,7 @@ """ import asyncio import contextlib +import errno import json import logging import struct @@ -20,6 +21,7 @@ from .exceptions import SmartDeviceException _LOGGER = logging.getLogger(__name__) +_NO_RETRY_ERRORS = {errno.EHOSTDOWN, errno.EHOSTUNREACH, errno.ECONNREFUSED} class TPLinkSmartHomeProtocol: @@ -118,6 +120,18 @@ async def _query(self, request: str, retry_count: int, timeout: int) -> Dict: for retry in range(retry_count + 1): try: await self._connect(timeout) + except ConnectionRefusedError as ex: + await self.close() + raise SmartDeviceException( + f"Unable to connect to the device: {self.host}: {ex}" + ) + except OSError as ex: + await self.close() + if ex.errno in _NO_RETRY_ERRORS or retry >= retry_count: + raise SmartDeviceException( + f"Unable to connect to the device: {self.host}: {ex}" + ) + continue except Exception as ex: await self.close() if retry >= retry_count: diff --git a/kasa/tests/test_protocol.py b/kasa/tests/test_protocol.py index 5fe4763d5..f8931c11e 100644 --- a/kasa/tests/test_protocol.py +++ b/kasa/tests/test_protocol.py @@ -1,3 +1,4 @@ +import errno import json import logging import struct @@ -29,6 +30,39 @@ def aio_mock_writer(_, __): assert conn.call_count == retry_count + 1 +async def test_protocol_no_retry_on_unreachable(mocker): + conn = mocker.patch( + "asyncio.open_connection", + side_effect=OSError(errno.EHOSTUNREACH, "No route to host"), + ) + with pytest.raises(SmartDeviceException): + await TPLinkSmartHomeProtocol("127.0.0.1").query({}, retry_count=5) + + assert conn.call_count == 1 + + +async def test_protocol_no_retry_connection_refused(mocker): + conn = mocker.patch( + "asyncio.open_connection", + side_effect=ConnectionRefusedError, + ) + with pytest.raises(SmartDeviceException): + await TPLinkSmartHomeProtocol("127.0.0.1").query({}, retry_count=5) + + assert conn.call_count == 1 + + +async def test_protocol_retry_recoverable_error(mocker): + conn = mocker.patch( + "asyncio.open_connection", + side_effect=OSError(errno.ECONNRESET, "Connection reset by peer"), + ) + with pytest.raises(SmartDeviceException): + await TPLinkSmartHomeProtocol("127.0.0.1").query({}, retry_count=5) + + assert conn.call_count == 6 + + @pytest.mark.skipif(sys.version_info < (3, 8), reason="3.8 is first one with asyncmock") @pytest.mark.parametrize("retry_count", [1, 3, 5]) async def test_protocol_reconnect(mocker, retry_count): From 351fb49c3d26122c564535476db75a32f0bba026 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 23 Apr 2022 15:08:35 -1000 Subject: [PATCH 2/2] comment --- kasa/protocol.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kasa/protocol.py b/kasa/protocol.py index 641a783b4..b6d44be90 100755 --- a/kasa/protocol.py +++ b/kasa/protocol.py @@ -117,6 +117,15 @@ def _reset(self) -> None: async def _query(self, request: str, retry_count: int, timeout: int) -> Dict: """Try to query a device.""" + # + # Most of the time we will already be connected if the device is online + # and the connect call will do nothing and return right away + # + # However, if we get an unrecoverable error (_NO_RETRY_ERRORS and ConnectionRefusedError) + # we do not want to keep trying since many connection open/close operations + # in the same time frame can block the event loop. This is especially + # import when there are multiple tplink devices being polled. + # for retry in range(retry_count + 1): try: await self._connect(timeout)