Skip to content

Commit 02e30d7

Browse files
Fix issue with actor restarting instead of stopping
When an actor threw an exception during cancellation, it restarted rather than stopping as expected. Now it stops and raises an unhandled exception. Signed-off-by: Elzbieta Kotulska <[email protected]>
1 parent 0609b7a commit 02e30d7

File tree

3 files changed

+43
-1
lines changed

3 files changed

+43
-1
lines changed

RELEASE_NOTES.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@
1414

1515
## Bug Fixes
1616

17-
<!-- Here goes notable bug fixes that are worth a special mention or explanation -->
17+
- Fixed issue where actors would restart instead of stopping when exceptions occurred during cancellation. Actors now properly stop and surface the unhandled exception.

src/frequenz/sdk/actor/_actor.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,24 @@ class Actor(BackgroundService, abc.ABC):
4444
This is mostly used for testing purposes and shouldn't be set in production.
4545
"""
4646

47+
def __init__(self, *, name: str | None = None) -> None:
48+
"""Create actor instance.
49+
50+
Args:
51+
name: The name of this background service.
52+
"""
53+
super().__init__(name=name)
54+
self._is_cancelled = False
55+
4756
def start(self) -> None:
4857
"""Start this actor.
4958
5059
If this actor is already running, this method does nothing.
5160
"""
5261
if self.is_running:
5362
return
63+
64+
self._is_cancelled = False
5465
self._tasks.clear()
5566
self._tasks.add(asyncio.create_task(self._run_loop()))
5667

@@ -94,6 +105,17 @@ async def _run_loop(self) -> None:
94105
_logger.info("Actor %s: Cancelled.", self)
95106
raise
96107
except Exception: # pylint: disable=broad-except
108+
if self._is_cancelled:
109+
# If actor was cancelled, but any tasks have failed with an exception
110+
# other than asyncio.CancelledError, those exceptions are combined
111+
# in an ExceptionGroup or BaseExceptionGroup.
112+
# We have to handle that case separately to stop actor instead
113+
# of restarting it.
114+
_logger.exception(
115+
"Actor %s: Raised an unhandled exception during stop.", self
116+
)
117+
break
118+
97119
_logger.exception("Actor %s: Raised an unhandled exception.", self)
98120
limit_str = "∞" if self._restart_limit is None else self._restart_limit
99121
limit_str = f"({n_restarts}/{limit_str})"
@@ -113,3 +135,14 @@ async def _run_loop(self) -> None:
113135
break
114136

115137
_logger.info("Actor %s: Stopped.", self)
138+
139+
def cancel(self, msg: str | None = None) -> None:
140+
"""Cancel actor.
141+
142+
Cancelled actor can't be started again.
143+
144+
Args:
145+
msg: The message to be passed to the tasks being cancelled.
146+
"""
147+
self._is_cancelled = True
148+
return super().cancel(msg)

tests/actor/test_actor.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,3 +391,12 @@ async def test_actor_stop_if_error_was_raised_during_cancel(
391391

392392
await actor.stop()
393393
assert actor.restart_count == 0
394+
395+
assert caplog.record_tuples == [
396+
(*ACTOR_INFO, "Actor RaiseExceptionOnCancelActor[test]: Started."),
397+
(
398+
*ACTOR_ERROR,
399+
"Actor RaiseExceptionOnCancelActor[test]: Raised an unhandled exception during stop.",
400+
),
401+
(*ACTOR_INFO, "Actor RaiseExceptionOnCancelActor[test]: Stopped."),
402+
]

0 commit comments

Comments
 (0)