From f604f188ca2dcea66fcb10870a5bb19bae83a28c Mon Sep 17 00:00:00 2001 From: yegorske50 Date: Thu, 23 Apr 2026 02:16:42 +0530 Subject: [PATCH 1/2] Fix swallowed CancelledError in start_child_workflow and Nexus operations (Issue #1445) --- temporalio/worker/_workflow_instance.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/temporalio/worker/_workflow_instance.py b/temporalio/worker/_workflow_instance.py index fea97564b..0ca0515e8 100644 --- a/temporalio/worker/_workflow_instance.py +++ b/temporalio/worker/_workflow_instance.py @@ -2025,6 +2025,7 @@ async def run_child() -> Any: return handle except asyncio.CancelledError: apply_child_cancel_error() + raise async def _outbound_start_nexus_operation( self, input: StartNexusOperationInput[Any, OutputT] @@ -2051,6 +2052,7 @@ async def operation_handle_fn() -> OutputT: except asyncio.CancelledError: cancel_command = self._add_command() handle._apply_cancel_command(cancel_command) + raise handle = _NexusOperationHandle( self, self._next_seq("nexus_operation"), input, operation_handle_fn() @@ -2065,6 +2067,7 @@ async def operation_handle_fn() -> OutputT: except asyncio.CancelledError: cancel_command = self._add_command() handle._apply_cancel_command(cancel_command) + raise #### Miscellaneous helpers #### # These are in alphabetical order. From 218ef5ee8f055f98c686bd1014ade30493977f9a Mon Sep 17 00:00:00 2001 From: yegorske50 Date: Sat, 25 Apr 2026 15:15:37 +0530 Subject: [PATCH 2/2] fix: guard CancelledError re-raise with _cancel_requested check (fixes #1445) --- temporalio/worker/_workflow_instance.py | 8 ++-- tests/worker/test_workflow.py | 55 +++++++++++++++++++++++-- 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/temporalio/worker/_workflow_instance.py b/temporalio/worker/_workflow_instance.py index 0ca0515e8..65ca93d8d 100644 --- a/temporalio/worker/_workflow_instance.py +++ b/temporalio/worker/_workflow_instance.py @@ -2025,7 +2025,8 @@ async def run_child() -> Any: return handle except asyncio.CancelledError: apply_child_cancel_error() - raise + if self._cancel_requested: + raise async def _outbound_start_nexus_operation( self, input: StartNexusOperationInput[Any, OutputT] @@ -2052,7 +2053,6 @@ async def operation_handle_fn() -> OutputT: except asyncio.CancelledError: cancel_command = self._add_command() handle._apply_cancel_command(cancel_command) - raise handle = _NexusOperationHandle( self, self._next_seq("nexus_operation"), input, operation_handle_fn() @@ -2067,7 +2067,9 @@ async def operation_handle_fn() -> OutputT: except asyncio.CancelledError: cancel_command = self._add_command() handle._apply_cancel_command(cancel_command) - raise + if self._cancel_requested: + raise + #### Miscellaneous helpers #### # These are in alphabetical order. diff --git a/tests/worker/test_workflow.py b/tests/worker/test_workflow.py index cf84db758..59ed40ecb 100644 --- a/tests/worker/test_workflow.py +++ b/tests/worker/test_workflow.py @@ -1209,9 +1209,58 @@ async def test_workflow_cancel_child_started(client: Client, use_execute: bool): assert isinstance(err.value.cause.cause, CancelledError) -@pytest.mark.skip(reason="unable to easily prevent child start currently") -async def test_workflow_cancel_child_unstarted(_client: Client): - raise NotImplementedError +@workflow.defn +class CancelDuringChildStartWorkflow: + def __init__(self) -> None: + self._proceed = False + + @workflow.signal + def proceed(self) -> None: + self._proceed = True + + @workflow.run + async def run(self) -> None: + await workflow.wait_condition(lambda: self._proceed) + # Start a child on a task queue with no worker. The child's first WFT + # never starts, so _start_fut remains unresolved and the start loop + # blocks forever. + await workflow.start_child_workflow( + LongSleepWorkflow.run, + id=f"{workflow.info().workflow_id}_child", + task_queue="nonexistent-task-queue-no-worker-abc123", + ) + await workflow.sleep(1000) + + +async def test_workflow_cancel_child_unstarted(client: Client): + # Regression test for https://github.com/temporalio/sdk-python/issues/1445 + # + # When cancellation arrived while the parent was waiting for a child + # workflow to start, the CancelledError was caught in the start loop + # to send a cancel command to the child — but was not re-raised. + # Because _start_fut never resolves (child on a queue with no worker), + # the loop would keep waiting forever, hanging the parent workflow. + # + # The fix: re-raise only when self._cancel_requested is True, which + # distinguishes Temporal workflow cancellation from other CancelledError + # sources such as asyncio.wait_for timeouts. + async with new_worker( + client, + CancelDuringChildStartWorkflow, + # Deliberately not registering LongSleepWorkflow and not starting + # a worker on the child's task queue. + ) as worker: + handle = await client.start_workflow( + CancelDuringChildStartWorkflow.run, + id=f"workflow-{uuid.uuid4()}", + task_queue=worker.task_queue, + execution_timeout=timedelta(seconds=30), + ) + await handle.signal(CancelDuringChildStartWorkflow.proceed) + await handle.cancel() + with pytest.raises(WorkflowFailureError) as err: + await handle.result() + assert isinstance(err.value.cause, CancelledError) @workflow.defn