From a4caed57607ff62998533c9155ac1e37eedc77d9 Mon Sep 17 00:00:00 2001 From: TheJulianJES Date: Sun, 31 May 2026 01:19:52 +0200 Subject: [PATCH 1/4] Retry after route rediscovery on NWK_NO_ROUTE in send_packet When a request fails with NWK_NO_ROUTE, send_packet rediscovered the route but then re-raised, relying on zigpy's application-level retry loop to actually re-send. zigpy 1.5.0 (#1824) moved retries out of ControllerApplication.request, so a directly-issued request was no longer retried and route rediscovery never took effect. Retry the send once within send_packet's existing recovery loop after rediscovering the route, matching the MAC_TRANSACTION_EXPIRED recovery path which already does this. This makes send_packet self-recovering regardless of caller. --- zigpy_znp/zigbee/application.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/zigpy_znp/zigbee/application.py b/zigpy_znp/zigbee/application.py index 5f2ada0..527ff11 100644 --- a/zigpy_znp/zigbee/application.py +++ b/zigpy_znp/zigbee/application.py @@ -891,8 +891,9 @@ async def send_packet(self, packet: zigpy.types.ZigbeePacket) -> None: try: # We retry sending twice but the only devices that will use the second retry # attempt are sleeping end devices that silently switched parents from the - # coordinator, all others will fail immediately - for _retry_attempt in range(2): + # coordinator, as well as devices for which we just rediscovered a route. + # All others will fail immediately. + for retry_attempt in range(2): async with self._limit_concurrency(priority=packet.priority): try: # ZDO requests do not generate `AF.DataConfirm` messages @@ -973,9 +974,17 @@ async def send_packet(self, packet: zigpy.types.ZigbeePacket) -> None: else: continue - # Perform route discovery explicitly if the stack fails - if e.response.Status == t.Status.NWK_NO_ROUTE: + # Perform route discovery explicitly if the stack fails and + # then retry the request. zigpy no longer retries failed + # requests at the application level (zigpy #1824), so the retry + # must happen here for route rediscovery to take effect. + if ( + e.response.Status == t.Status.NWK_NO_ROUTE + and device is not None + and retry_attempt < 1 + ): await self._discover_route(device.nwk) + continue raise except InvalidCommandResponse as e: From 7e2a318b534ee18462ed1b423efdb4af0afd2dc0 Mon Sep 17 00:00:00 2001 From: TheJulianJES Date: Sun, 31 May 2026 01:32:58 +0200 Subject: [PATCH 2/4] Don't report success when send_packet recovery exhausts all attempts The new NWK_NO_ROUTE retry path could combine with the existing MAC_TRANSACTION_EXPIRED recovery to consume both send attempts via `continue` without ever raising: a first-attempt NWK_NO_ROUTE followed by a first-time MAC_TRANSACTION_EXPIRED on the final attempt left the loop with succeeded=False and no exception, so an undelivered packet was reported as a successful send. Add a for/else safety net that re-raises the last error whenever the loop completes without a successful break. Add a regression test for the NWK_NO_ROUTE -> MAC_TRANSACTION_EXPIRED fall-through. --- tests/application/test_requests.py | 87 ++++++++++++++++++++++++++++++ zigpy_znp/zigbee/application.py | 10 ++++ 2 files changed, 97 insertions(+) diff --git a/tests/application/test_requests.py b/tests/application/test_requests.py index a1ee64d..11e9b75 100644 --- a/tests/application/test_requests.py +++ b/tests/application/test_requests.py @@ -592,6 +592,93 @@ def set_route_discovered(req): await app.shutdown() +@pytest.mark.parametrize("device", [FormedLaunchpadCC26X2R1]) +async def test_request_recovery_route_rediscovery_then_assoc_failure( + device, make_application, mocker +): + # A NWK_NO_ROUTE failure that consumes the first attempt, followed by a + # first-time MAC_TRANSACTION_EXPIRED recovery on the final attempt, must surface + # as a DeliveryError rather than silently reporting a successful send. + app, znp_server = make_application(server_cls=device) + + await app.startup(auto_form=False) + + mocker.patch("zigpy_znp.zigbee.application.DATA_CONFIRM_TIMEOUT", new=0.1) + app._znp._config[conf.CONF_ZNP_CONFIG][conf.CONF_ARSP_TIMEOUT] = 1 + + device = app.add_initialized_device(ieee=t.EUI64(range(8)), nwk=0xABCD) + + assoc_device, _ = c.util.Device.deserialize(b"\xFF" * 100) + assoc_device.shortAddr = device.nwk + assoc_device.nodeRelation = c.util.NodeRelation.CHILD_FFD_RX_IDLE + + # First attempt fails with NWK_NO_ROUTE, the retried attempt fails with a + # first-time MAC_TRANSACTION_EXPIRED association failure + data_confirm_statuses = iter( + [t.Status.NWK_NO_ROUTE, t.Status.MAC_TRANSACTION_EXPIRED] + ) + + def data_confirm_replier(req): + return c.AF.DataConfirm.Callback( + Status=next(data_confirm_statuses), + Endpoint=1, + TSN=1, + ) + + znp_server.reply_to( + c.AF.DataRequestExt.Req(partial=True), + responses=[ + c.AF.DataRequestExt.Rsp(Status=t.Status.SUCCESS), + data_confirm_replier, + ], + ) + + znp_server.reply_to( + c.UTIL.AssocGetWithAddress.Req(IEEE=device.ieee, partial=True), + responses=[c.UTIL.AssocGetWithAddress.Rsp(Device=assoc_device)], + ) + + did_assoc_remove = znp_server.reply_to( + c.UTIL.AssocRemove.Req(IEEE=device.ieee), + responses=[c.UTIL.AssocRemove.Rsp(Status=t.Status.SUCCESS)], + ) + + did_assoc_add = znp_server.reply_to( + c.UTIL.AssocAdd.Req( + NWK=device.nwk, + IEEE=device.ieee, + NodeRelation=c.util.NodeRelation.CHILD_FFD_RX_IDLE, + ), + responses=[c.UTIL.AssocAdd.Rsp(Status=t.Status.SUCCESS)], + ) + + was_route_discovered = znp_server.reply_to( + c.ZDO.ExtRouteDisc.Req( + Dst=device.nwk, Options=c.zdo.RouteDiscoveryOptions.UNICAST, partial=True + ), + responses=[c.ZDO.ExtRouteDisc.Rsp(Status=t.Status.SUCCESS)], + ) + + with pytest.raises(DeliveryError): + await app.request( + device=device, + profile=260, + cluster=1, + src_ep=1, + dst_ep=1, + sequence=1, + data=b"\x00", + ) + + # The route was rediscovered after the first failure + assert was_route_discovered.call_count >= 1 + # The association removed on the final attempt must be re-added on failure + assert len(did_assoc_remove.mock_calls) >= 1 + assert len(did_assoc_add.mock_calls) >= 1 + + await app.shutdown() + + @pytest.mark.parametrize("device_cls", FORMED_DEVICES) @pytest.mark.parametrize("fw_assoc_remove", [True, False]) @pytest.mark.parametrize("final_status", [t.Status.SUCCESS, t.Status.APS_NO_ACK]) diff --git a/zigpy_znp/zigbee/application.py b/zigpy_znp/zigbee/application.py index 527ff11..7ac6447 100644 --- a/zigpy_znp/zigbee/application.py +++ b/zigpy_znp/zigbee/application.py @@ -887,6 +887,7 @@ async def send_packet(self, packet: zigpy.types.ZigbeePacket) -> None: succeeded = False child_association = None tried_assoc_remove = False + last_error = None try: # We retry sending twice but the only devices that will use the second retry @@ -934,6 +935,8 @@ async def send_packet(self, packet: zigpy.types.ZigbeePacket) -> None: succeeded = True break except InvalidCommandResponse as e: + last_error = e + # Child aging is disabled so if a child switches parents from # the coordinator to another router, we will not be able to # re-discover a route to it. We have to manually drop the child @@ -987,6 +990,13 @@ async def send_packet(self, packet: zigpy.types.ZigbeePacket) -> None: continue raise + else: + # Every attempt was consumed by a recovery `continue` without a + # successful send (e.g. NWK_NO_ROUTE followed by a first-time + # MAC_TRANSACTION_EXPIRED on the final attempt). Surface the last + # error instead of silently reporting the send as successful. + if last_error is not None: + raise last_error except InvalidCommandResponse as e: status = e.response.Status raise DeliveryError(f"Failed to send request: {status!r}", status=status) From fa8390764362a84e9aa6ff58ae42c3c4d53bd3f2 Mon Sep 17 00:00:00 2001 From: TheJulianJES Date: Sun, 31 May 2026 01:45:24 +0200 Subject: [PATCH 3/4] Assert exact recovery counts in route-rediscovery regression test The scenario is a single deterministic config, so exact counts are stronger than `>= 1`: they pin route discovery to twice (NWK path plus MAC recovery) and verify the association is removed exactly once and re-added exactly once, catching an unbalanced remove/add that `>= 1` would miss. --- tests/application/test_requests.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/application/test_requests.py b/tests/application/test_requests.py index 11e9b75..649f939 100644 --- a/tests/application/test_requests.py +++ b/tests/application/test_requests.py @@ -670,11 +670,13 @@ def data_confirm_replier(req): data=b"\x00", ) - # The route was rediscovered after the first failure - assert was_route_discovered.call_count >= 1 - # The association removed on the final attempt must be re-added on failure - assert len(did_assoc_remove.mock_calls) >= 1 - assert len(did_assoc_add.mock_calls) >= 1 + # Route discovery runs once for the first-attempt NWK_NO_ROUTE and once during + # the final-attempt MAC_TRANSACTION_EXPIRED recovery + assert was_route_discovered.call_count == 2 + # The association is removed exactly once during recovery and, since the send + # ultimately fails, must be re-added exactly once + assert len(did_assoc_remove.mock_calls) == 1 + assert len(did_assoc_add.mock_calls) == 1 await app.shutdown() From 395c042d813a0c2640ac372329b6203dc7ea4879 Mon Sep 17 00:00:00 2001 From: TheJulianJES Date: Sun, 31 May 2026 01:46:34 +0200 Subject: [PATCH 4/4] Drop past-behavior note from route rediscovery comment Keep the comment focused on what the code does, not on zigpy's historical retry behavior. --- zigpy_znp/zigbee/application.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/zigpy_znp/zigbee/application.py b/zigpy_znp/zigbee/application.py index 7ac6447..8bf8b3c 100644 --- a/zigpy_znp/zigbee/application.py +++ b/zigpy_znp/zigbee/application.py @@ -978,9 +978,7 @@ async def send_packet(self, packet: zigpy.types.ZigbeePacket) -> None: continue # Perform route discovery explicitly if the stack fails and - # then retry the request. zigpy no longer retries failed - # requests at the application level (zigpy #1824), so the retry - # must happen here for route rediscovery to take effect. + # then retry the request if ( e.response.Status == t.Status.NWK_NO_ROUTE and device is not None