OCHA-DAP · mcarans · Jan 9, 2025 · Jan 9, 2025 · Jan 9, 2025 · Jan 10, 2025
diff --git a/documentation/main.md b/documentation/main.md
@@ -35,7 +35,8 @@ The library has detailed API documentation which can be found in the menu at the
 From 3.8.0, multiple_replace, match_template_variables, earliest_index,
 get_matching_text_in_strs, get_matching_text,
 get_matching_then_nonmatching_text moved from hdx.utilities.text to
-hdx.utilities.matching.
+hdx.utilities.matching. ErrorOnExit renamed ErrorHandler with changed
+functionality.
 
 From 3.5.5, Python 3.7 no longer supported
 
@@ -654,17 +655,23 @@ Then use the logger like this:
 There is a class that allows collecting of errors to be logged later, typically on exit.
 It is called ErrorsOnExit and can be used as follows:
 
-    with ErrorsOnExit() as errors:
+    with ErrorsHandler() as errors:
         ...
-        errors.add("MY ERROR MESSAGE")
+        errors.add_message("MY ERROR MESSAGE")
+        errors.add_message("MY WARNING MESSAGE", "category 1", "warning")
         ...
-        errors.add("ANOTHER ERROR MESSAGE")
-
-The above code will collect the errors, in this case "MY ERROR MESSAGE" and "ANOTHER
-ERROR MESSAGE". On leaving the `with` block, the errors will be logged and the code will
-exit with the error code 1 (ie. `sys.exit(1)` will be called). If there are no errors,
-the code will not exit and execution will continue after the `with` block (ie.
-`sys.exit(1)` will not be called).
+        errors.add("ERROR MESSAGE", "category 1", "warning")
+        errors.add("ANOTHER ERROR MESSAGE", "category 1", "warning")
+
+The above code will collect the errors and warnings, in this case
+"MY ERROR MESSAGE", "category 1 - ERROR MESSAGE",
+"category 1 - ANOTHER ERROR MESSAGE" and "category 1 - MY WARNING MESSAGE".
+On leaving the `with` block, the errors and warnings will be logged by category
+and sorted. The code will exit with the error code 1 (ie.`sys.exit(1)` will be
+called) if there are errors and `should_exit_on_error` is True (the default
+for this parameter in the constructor). If there are no errors, the code will
+not exit and execution will continue after the `with` block (ie.`sys.exit(1)`
+will not be called).
 
 ## State utility
 

diff --git a/requirements.txt b/requirements.txt
@@ -86,7 +86,7 @@ pluggy==1.5.0
     # via pytest
 pre-commit==4.0.1
     # via hdx-python-utilities (pyproject.toml)
-pydantic==2.10.4
+pydantic==2.10.5
     # via frictionless
 pydantic-core==2.27.2
     # via pydantic

diff --git a/src/hdx/utilities/error_handler.py b/src/hdx/utilities/error_handler.py
@@ -0,0 +1,185 @@
+"""Collect errors and warnings by category and log them."""
+
+import logging
+import sys
+from typing import Any, Optional
+
+from hdx.utilities.dictandlist import dict_of_sets_add
+from hdx.utilities.typehint import ListTuple
+
+logger = logging.getLogger(__name__)
+
+
+class ErrorHandler:
+    """Class that enables recording of errors and warnings.
+
+    Errors and warnings can be logged by calling the `output` method or
+    automatically logged on exit. Messages are output grouped by category and
+    sorted.
+
+    Args:
+        should_exit_on_error (bool): Whether to exit with a 1 code if there are errors. Default is True.
+
+    """
+
+    def __init__(
+        self,
+        should_exit_on_error: bool = True,
+    ):
+        self.should_exit_on_error = should_exit_on_error
+        self.shared_errors = {
+            "error": {},
+            "warning": {},
+        }
+
+    def add(
+        self, message: str, category: str = "", message_type: str = "error"
+    ) -> None:
+        """Add error to be logged. Prepend category if supplied. Output format:
+        error category - {text}
+
+        Args:
+            message (str): Error message
+            category (str): Error category. Defaults to "".
+            message_type (str): The type of message (error or warning). Default is "error"
+
+        Returns:
+            None
+        """
+        message = message.strip()
+        if category:
+            output = f"{category} - {message}"
+        else:
+            output = message
+        dict_of_sets_add(self.shared_errors[message_type], category, output)
+
+    @staticmethod
+    def missing_value_message(value_type: str, value: Any) -> str:
+        """
+        Generate a formatted message for a missing value of a specific type in
+        a fixed format:
+            error category - type n not found
+
+        Args:
+            value_type (str): The type of value that is missing
+            value (Any): The specific missing value
+
+        Returns:
+            str: A formatted message stating the missing value and its type
+        """
+        return f"{value_type} {str(value)} not found"
+
+    def add_missing_value(
+        self,
+        value_type: str,
+        value: Any,
+        category: str = "",
+        message_type: str = "error",
+    ) -> None:
+        """
+        Add a new message (typically a warning or error) concerning a missing value
+        to a dictionary of messages in a fixed format:
+            error category - type n not found
+        identifier is usually a dataset name.
+        Args:
+            value_type (str): Type of value e.g. "sector"
+            value (Any): Missing value
+            category (str): Error category. Defaults to "".
+            message_type (str): The type of message (error or warning). Default is "error"
+        Returns:
+            None
+        """
+        self.add(
+            self.missing_value_message(value_type, value),
+            category,
+            message_type,
+        )
+
+    def multi_valued_message(
+        self, text: str, values: ListTuple
+    ) -> Optional[str]:
+        """
+        Generate a formatted message for a list of values in a fixed format:
+            error category - n {text}. First 10 values: n1,n2,n3...
+        If less than 10 values, ". First 10 values" is omitted. identifier is usually
+        a dataset name. Values are cast to string.
+
+        Args:
+            text (str): Descriptive text for the issue (e.g., "invalid values")
+            values (ListTuple): The list of related values of concern
+
+        Returns:
+            Optional[str]: A formatted string in the format defined above
+        """
+        if not values:
+            return None
+        no_values = len(values)
+        if no_values > 10:
+            values = values[:10]
+            message_suffix = ". First 10 values"
+        else:
+            message_suffix = ""
+        return f"{no_values} {text}{message_suffix}: {', '.join(map(str, values))}"
+
+    def add_multi_valued(
+        self,
+        text: str,
+        values: ListTuple,
+        category: str = "",
+        message_type: str = "error",
+    ) -> bool:
+        """
+        Add a new message (typically a warning or error) concerning a list of
+        values to a set of messages in a fixed format:
+            error category - n {text}. First 10 values: n1,n2,n3...
+        If less than 10 values, ". First 10 values" is omitted. identifier is usually
+        a dataset name. Values are cast to string.
+
+        Args:
+            text (str): Text to use e.g. "negative values removed"
+            values (ListTuple): List of values of concern
+            category (str): Error category. Defaults to "".
+            message_type (str): The type of message (error or warning). Default is "error"
+        Returns:
+            bool: True if a message was added, False if not
+        """
+        message = self.multi_valued_message(text, values)
+        if message is None:
+            return False
+        self.add(message, category, message_type)
+        return True
+
+    def log(self) -> None:
+        """
+        Log errors and warning by category and sorted
+
+        Returns:
+            None
+        """
+
+        for _, errors in self.shared_errors["error"].items():
+            errors = sorted(errors)
+            for error in errors:
+                logger.error(error)
+        for _, warnings in self.shared_errors["warning"].items():
+            warnings = sorted(warnings)
+            for warning in warnings:
+                logger.warning(warning)
+
+    def exit_on_error(self) -> None:
+        """Exit with a 1 code if there are errors and should_exit_on_error
+        is True
+
+        Returns:
+            None
+        """
+        if self.should_exit_on_error and self.shared_errors["error"]:
+            sys.exit(1)
+
+    def __enter__(self) -> "ErrorHandler":
+        return self
+
+    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+        self.log()
+        if exc_type is None:
+            self.exit_on_error()
diff --git a/src/hdx/utilities/errors_onexit.py b/src/hdx/utilities/errors_onexit.py
@@ -1,53 +1,14 @@
 """Collect and log errors on exit."""
 
-import logging
-import sys
-from typing import Any
+import warnings
 
-logger = logging.getLogger(__name__)
+from hdx.utilities.error_handler import ErrorHandler
 
 
-class ErrorsOnExit:
-    """Class that enables recording of errors with logging of those errors on
-    exit."""
-
+class ErrorsOnExit(ErrorHandler):  # pragma: no cover
     def __init__(self) -> None:
-        self.errors = []
-
-    def add(self, message: str) -> None:
-        """Add error to be logged later.
-
-        Args:
-            message (str): Error message
-
-        Returns:
-            None
-        """
-        self.errors.append(message.strip())
-
-    def log(self) -> None:
-        """Log errors.
-
-        Returns:
-            None
-        """
-        for error in self.errors:
-            logger.error(error)
-
-    def exit_on_error(self) -> None:
-        """Exit with a 1 code if there are errors.
-
-        Returns:
-            None
-        """
-        if self.errors:
-            sys.exit(1)
-
-    def __enter__(self) -> "ErrorsOnExit":
-        logging.errors_on_exit = self
-        return self
-
-    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
-        self.log()
-        if exc_type is None:
-            self.exit_on_error()
+        warnings.warn(
+            "The ErrorsOnExit class was renamed ErrorHandler and will be removed in future!",
+            DeprecationWarning,
+        )
+        super().__init__()
diff --git a/tests/hdx/utilities/test_error_handler.py b/tests/hdx/utilities/test_error_handler.py
@@ -0,0 +1,56 @@
+"""Errors on exit Tests"""
+
+import logging
+
+import pytest
+
+from hdx.utilities.easy_logging import setup_logging
+from hdx.utilities.error_handler import ErrorHandler
+
+setup_logging()
+
+
+class TestErrorHandler:
+    def test_error_handler(self, caplog):
+        with ErrorHandler() as errors:
+            assert len(errors.shared_errors["warning"]) == 0
+            assert len(errors.shared_errors["error"]) == 0
+        with pytest.raises(SystemExit):
+            with caplog.at_level(logging.ERROR):
+                with ErrorHandler() as errors:
+                    errors.add("this is a error!")
+                    errors.add("this is a warning!", "warning 1", "warning")
+                    errors.add_missing_value(
+                        "this is a missing value error!",
+                        "problem value",
+                        "error 1",
+                        "error",
+                    )
+                    errors.add_multi_valued(
+                        "this is a multi valued warning!",
+                        (1, 2, 3, 4),
+                        "warning 1",
+                        "warning",
+                    )
+                    errors.add_multi_valued(
+                        "this is a multi valued error!",
+                        (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14),
+                        "error 1",
+                        "error",
+                    )
+                    errors.add_multi_valued(
+                        "this is another multi valued warning!",
+                        (),
+                        "warning 1",
+                        "warning",
+                    )
+                    assert len(errors.shared_errors["warning"]) == 1
+                    assert (
+                        len(errors.shared_errors["warning"]["warning 1"]) == 2
+                    )
+                    assert len(errors.shared_errors["error"]) == 2
+                    assert len(errors.shared_errors["error"][""]) == 1
+                    assert len(errors.shared_errors["error"]["error 1"]) == 2
+                assert "missing value" in caplog.text
+                assert "warning" not in caplog.text
+                assert "multi" not in caplog.text
diff --git a/tests/hdx/utilities/test_errors_onexit.py b/tests/hdx/utilities/test_errors_onexit.py