From c19f69daf5e44554f8db19d2e30bed1aa5581b0b Mon Sep 17 00:00:00 2001 From: aestene Date: Wed, 24 Apr 2024 11:54:23 +0200 Subject: [PATCH] Add retry functionality to monitor status request --- src/isar/config/settings.py | 3 ++ src/isar/state_machine/states/monitor.py | 37 ++++++++++++++++++- .../models/exceptions/robot_exceptions.py | 13 +++++++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/src/isar/config/settings.py b/src/isar/config/settings.py index 4d625a39..d4d3ac38 100644 --- a/src/isar/config/settings.py +++ b/src/isar/config/settings.py @@ -56,6 +56,9 @@ def __init__(self) -> None: # Number of attempts to initiate a step or mission before cancelling INITIATE_FAILURE_COUNTER_LIMIT: int = Field(default=10) + # Number of attempts to request a step status in monitor before cancelling + REQUEST_STATUS_FAILURE_COUNTER_LIMIT: int = Field(default=3) + # Number of attempts to stop the robot before giving up STOP_ROBOT_ATTEMPTS_LIMIT: int = Field(default=10) diff --git a/src/isar/state_machine/states/monitor.py b/src/isar/state_machine/states/monitor.py index f4a03f07..3e245781 100644 --- a/src/isar/state_machine/states/monitor.py +++ b/src/isar/state_machine/states/monitor.py @@ -7,6 +7,7 @@ from transitions import State from isar.mission_planner.task_selector_interface import TaskSelectorStop +from isar.config.settings import settings from isar.services.utilities.threaded_request import ( ThreadedRequest, ThreadedRequestNotFinishedError, @@ -17,6 +18,7 @@ RobotMissionStatusException, RobotRetrieveInspectionException, RobotStepStatusException, + RobotCommunicationTimeoutException, ) from robot_interface.models.inspection.inspection import Inspection from robot_interface.models.mission.mission import Mission @@ -32,6 +34,10 @@ class Monitor(State): def __init__(self, state_machine: "StateMachine") -> None: super().__init__(name="monitor", on_enter=self.start, on_exit=self.stop) self.state_machine: "StateMachine" = state_machine + self.request_status_failure_counter: int = 0 + self.request_status_failure_counter_limit: int = ( + settings.REQUEST_STATUS_FAILURE_COUNTER_LIMIT + ) self.logger = logging.getLogger("state_machine") self.step_status_thread: Optional[ThreadedRequest] = None @@ -61,7 +67,6 @@ def _run(self) -> None: status_function=self.state_machine.robot.step_status, thread_name="State Machine Monitor Get Step Status", ) - try: status: Union[StepStatus, MissionStatus] = ( self.step_status_thread.get_output() @@ -70,6 +75,34 @@ def _run(self) -> None: time.sleep(self.state_machine.sleep_time) continue + except RobotCommunicationTimeoutException as e: + self.state_machine.current_mission.error_message = ErrorMessage( + error_reason=e.error_reason, error_description=e.error_description + ) + self.step_status_thread = None + self.request_status_failure_counter += 1 + self.logger.warning( + f"Monitoring step {self.state_machine.current_step.id} failed #: " + f"{self.request_status_failure_counter} failed because: {e.error_description}" + ) + + if ( + self.request_status_failure_counter + >= self.request_status_failure_counter_limit + ): + self.state_machine.current_step.error_message = ErrorMessage( + error_reason=e.error_reason, + error_description=e.error_description, + ) + self.logger.error( + f"Step will be cancelled after failing to get step status " + f"{self.request_status_failure_counter} times because: " + f"{e.error_description}" + ) + status = StepStatus.Failed + else: + continue + except RobotStepStatusException as e: self.state_machine.current_step.error_message = ErrorMessage( error_reason=e.error_reason, error_description=e.error_description @@ -98,6 +131,8 @@ def _run(self) -> None: f"Retrieving the status failed because: {e.error_description}" ) + self.request_status_failure_counter = 0 + if isinstance(status, StepStatus): self.state_machine.current_step.status = status elif isinstance(status, MissionStatus): diff --git a/src/robot_interface/models/exceptions/robot_exceptions.py b/src/robot_interface/models/exceptions/robot_exceptions.py index ab088a82..a3b49c5a 100644 --- a/src/robot_interface/models/exceptions/robot_exceptions.py +++ b/src/robot_interface/models/exceptions/robot_exceptions.py @@ -5,6 +5,7 @@ class ErrorReason(str, Enum): RobotCommunicationException: str = "robot_communication_exception" + RobotCommunicationTimeoutException: str = "robot_communication_timeout_exception" RobotInfeasibleStepException: str = "robot_infeasible_step_exception" RobotInfeasibleMissionException: str = "robot_infeasible_mission_exception" RobotMissionStatusException: str = "robot_mission_status_exception" @@ -49,6 +50,18 @@ def __init__(self, error_description: str) -> None: pass +# An exception which should be thrown by the robot package if the communication has timed +# out and ISAR should retry the request. +class RobotCommunicationTimeoutException(RobotException): + def __init__(self, error_description: str) -> None: + super().__init__( + error_reason=ErrorReason.RobotCommunicationTimeoutException, + error_description=error_description, + ) + + pass + + # An exception which should be thrown by the robot package if it is unable to start the # current step. class RobotInfeasibleStepException(RobotException):