From cb47678e788287f852971850df6a3468c940e26b Mon Sep 17 00:00:00 2001 From: Josh VanDeraa Date: Thu, 12 Feb 2026 09:58:10 -0600 Subject: [PATCH 1/3] Refactor provider to use OpenAI parser as a fallback and enhance tests for processor behavior --- circuit_maintenance_parser/provider.py | 24 +++++-- tests/unit/test_providers.py | 91 ++++++++++++++++++++++++-- 2 files changed, 103 insertions(+), 12 deletions(-) diff --git a/circuit_maintenance_parser/provider.py b/circuit_maintenance_parser/provider.py index 8be9519b..40371e5c 100644 --- a/circuit_maintenance_parser/provider.py +++ b/circuit_maintenance_parser/provider.py @@ -130,12 +130,6 @@ def get_maintenances(self, data: NotificationData) -> Iterable[Maintenance]: logger.debug("Skipping notification %s due filtering policy for %s.", data, self.__class__.__name__) return [] - if os.getenv("PARSER_OPENAI_API_KEY"): - self._processors.append(CombinedProcessor(data_parsers=[EmailDateParser, OpenAIParser])) - - # Add subject to all html or text/* data_parts if not already present. - self.add_subject_to_text(data) - for processor in self._processors: try: return processor.process(data, self.get_extended_data()) @@ -150,6 +144,22 @@ def get_maintenances(self, data: NotificationData) -> Iterable[Maintenance]: related_exceptions.append(exc) continue + # Use OpenAI parser as a last resort if all other processors failed. + if os.getenv("PARSER_OPENAI_API_KEY"): + self.add_subject_to_text(data) + openai_processor = CombinedProcessor(data_parsers=[EmailDateParser, OpenAIParser]) + try: + return openai_processor.process(data, self.get_extended_data()) + except ProcessorError as exc: + process_error_message = ( + f"- Processor {openai_processor.__class__.__name__} from {provider_name} failed due to: %s\n" + ) + logger.debug(process_error_message, traceback.format_exc()) + + related_exc = rgetattr(exc, "__cause__") + error_message += process_error_message % related_exc + related_exceptions.append(exc) + raise ProviderError( (f"Failed creating Maintenance notification for {provider_name}.\nDetails:\n{error_message}"), related_exceptions=related_exceptions, @@ -165,7 +175,7 @@ def add_subject_to_text(self, data: NotificationData): if subject: new_data_parts = [] for part in data.data_parts: - if part.type.startswith("text/") or part.type.startswith("html"): + if (part.type.startswith("text/") or part.type.startswith("html")) and part.type != "text/calendar": content_str = part.content.decode(errors="ignore") if subject not in content_str: # Append subject and update content diff --git a/tests/unit/test_providers.py b/tests/unit/test_providers.py index 48879bcd..19ed8244 100644 --- a/tests/unit/test_providers.py +++ b/tests/unit/test_providers.py @@ -116,21 +116,78 @@ class ProviderWithIncludeFilter(GenericProvider): "provider_class", [GenericProvider, AquaComms], ) -def test_provider_gets_mlparser(provider_class): - """Test to check the any provider gets a default ML parser when ENV is activated.""" +def test_provider_falls_back_to_openai(provider_class): + """Test that OpenAI parser is used as last resort when all other processors fail.""" os.environ["PARSER_OPENAI_API_KEY"] = "some_api_key" data = NotificationData.init_from_raw("text/plain", b"fake data") data.add_data_part("text/html", b"other data") provider = provider_class() + original_processor_count = len(provider._processors) # pylint: disable=protected-access + + with patch("circuit_maintenance_parser.processor.GenericProcessor.process") as mock_processor: + # All native processors fail, then OpenAI processor succeeds + mock_processor.side_effect = [ProcessorError] * original_processor_count + [[{"a": "b"}]] + provider.get_maintenances(data) + # Native processors + 1 OpenAI call + assert mock_processor.call_count == original_processor_count + 1 + + # OpenAI processor should NOT be appended to the provider's processor list + assert len(provider._processors) == original_processor_count # pylint: disable=protected-access + os.environ.pop("PARSER_OPENAI_API_KEY", None) + + +def test_provider_does_not_use_openai_when_native_succeeds(): + """Test that OpenAI parser is not invoked when a native processor succeeds.""" + os.environ["PARSER_OPENAI_API_KEY"] = "some_api_key" + data = NotificationData.init_from_raw("text/plain", b"fake data") + + provider = GenericProvider() with patch("circuit_maintenance_parser.processor.GenericProcessor.process") as mock_processor: mock_processor.return_value = [{"a": "b"}] provider.get_maintenances(data) + # Only the native processor should be called + assert mock_processor.call_count == 1 - assert provider._processors[-1] == CombinedProcessor( # pylint: disable=protected-access - data_parsers=[EmailDateParser, OpenAIParser] - ) + os.environ.pop("PARSER_OPENAI_API_KEY", None) + + +def test_provider_data_not_mutated_when_native_succeeds(): + """Test that add_subject_to_text is not called when native processors succeed.""" + os.environ["PARSER_OPENAI_API_KEY"] = "some_api_key" + data = NotificationData.init_from_raw("text/plain", b"fake data") + data.add_data_part("email-header-subject", b"Test Subject") + + original_content = data.data_parts[0].content + provider = GenericProvider() + + with patch("circuit_maintenance_parser.processor.GenericProcessor.process") as mock_processor: + mock_processor.return_value = [{"a": "b"}] + provider.get_maintenances(data) + + # Data should not have been mutated since native processor succeeded + assert data.data_parts[0].content == original_content + os.environ.pop("PARSER_OPENAI_API_KEY", None) + + +def test_provider_no_repeated_append_on_multiple_calls(): + """Test that calling get_maintenances multiple times doesn't grow the processor list.""" + os.environ["PARSER_OPENAI_API_KEY"] = "some_api_key" + data = NotificationData.init_from_raw("text/plain", b"fake data") + + provider = GenericProvider() + original_processor_count = len(provider._processors) # pylint: disable=protected-access + + with patch("circuit_maintenance_parser.processor.GenericProcessor.process") as mock_processor: + mock_processor.return_value = [{"a": "b"}] + provider.get_maintenances(data) + provider.get_maintenances(data) + provider.get_maintenances(data) + + # Processor list should never grow + assert len(provider._processors) == original_processor_count # pylint: disable=protected-access + os.environ.pop("PARSER_OPENAI_API_KEY", None) def test_add_subject_to_text_appends_subject_to_text_parts(): @@ -230,3 +287,27 @@ def test_add_subject_to_text_handles_decode_errors(): # This should not raise an exception due to errors="ignore" in decode() provider.add_subject_to_text(data) + + +def test_add_subject_to_text_skips_text_calendar(): + """Test that add_subject_to_text does not modify text/calendar parts.""" + provider = GenericProvider() + + data = NotificationData() + data.add_data_part("email-header-subject", b"Planned Work Notification") + data.add_data_part( + "text/calendar", + b"BEGIN:VCALENDAR\r\nVERSION:2.0\r\nBEGIN:VEVENT\r\nEND:VEVENT\r\nEND:VCALENDAR", + ) + data.add_data_part("text/plain", b"Some plain text content") + + original_calendar_content = data.data_parts[1].content + + provider.add_subject_to_text(data) + + # text/calendar part should remain unchanged + assert data.data_parts[1].content == original_calendar_content + assert b"Planned Work Notification" not in data.data_parts[1].content + + # text/plain part should have subject appended + assert b"Planned Work Notification" in data.data_parts[2].content From e09b1c016495fe6a2611fff8cf1056a898f37cdc Mon Sep 17 00:00:00 2001 From: Josh VanDeraa Date: Thu, 12 Feb 2026 13:56:20 -0600 Subject: [PATCH 2/3] Refactor OpenAI parser fallback and clean up test imports --- changes/372.fixed | 1 + tests/unit/test_providers.py | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 changes/372.fixed diff --git a/changes/372.fixed b/changes/372.fixed new file mode 100644 index 00000000..23a9f6a5 --- /dev/null +++ b/changes/372.fixed @@ -0,0 +1 @@ +Refactored OpenAI parser to be used as a fallback and fixed subject injection to skip text/calendar parts. diff --git a/tests/unit/test_providers.py b/tests/unit/test_providers.py index 19ed8244..6672d616 100644 --- a/tests/unit/test_providers.py +++ b/tests/unit/test_providers.py @@ -7,9 +7,8 @@ from circuit_maintenance_parser.data import NotificationData from circuit_maintenance_parser.errors import ProcessorError, ProviderError -from circuit_maintenance_parser.parser import EmailDateParser, Parser -from circuit_maintenance_parser.parsers.openai import OpenAIParser -from circuit_maintenance_parser.processor import CombinedProcessor, SimpleProcessor +from circuit_maintenance_parser.parser import Parser +from circuit_maintenance_parser.processor import SimpleProcessor from circuit_maintenance_parser.provider import AquaComms, GenericProvider # pylint: disable=use-implicit-booleaness-not-comparison From 0ccada9c560f7b9eb51fc08e38d91034c180d2fb Mon Sep 17 00:00:00 2001 From: Josh VanDeraa Date: Thu, 12 Feb 2026 13:59:21 -0600 Subject: [PATCH 3/3] Update tests/unit/test_providers.py Co-authored-by: Glenn Matthews --- tests/unit/test_providers.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/tests/unit/test_providers.py b/tests/unit/test_providers.py index 19ed8244..2c4c6c5c 100644 --- a/tests/unit/test_providers.py +++ b/tests/unit/test_providers.py @@ -118,23 +118,22 @@ class ProviderWithIncludeFilter(GenericProvider): ) def test_provider_falls_back_to_openai(provider_class): """Test that OpenAI parser is used as last resort when all other processors fail.""" - os.environ["PARSER_OPENAI_API_KEY"] = "some_api_key" - data = NotificationData.init_from_raw("text/plain", b"fake data") - data.add_data_part("text/html", b"other data") - - provider = provider_class() - original_processor_count = len(provider._processors) # pylint: disable=protected-access - - with patch("circuit_maintenance_parser.processor.GenericProcessor.process") as mock_processor: - # All native processors fail, then OpenAI processor succeeds - mock_processor.side_effect = [ProcessorError] * original_processor_count + [[{"a": "b"}]] - provider.get_maintenances(data) - # Native processors + 1 OpenAI call - assert mock_processor.call_count == original_processor_count + 1 - - # OpenAI processor should NOT be appended to the provider's processor list - assert len(provider._processors) == original_processor_count # pylint: disable=protected-access - os.environ.pop("PARSER_OPENAI_API_KEY", None) + with patch.dict(os.environ, {"PARSER_OPENAI_API_KEY": "some_api_key"}): + data = NotificationData.init_from_raw("text/plain", b"fake data") + data.add_data_part("text/html", b"other data") + + provider = provider_class() + original_processor_count = len(provider._processors) # pylint: disable=protected-access + + with patch("circuit_maintenance_parser.processor.GenericProcessor.process") as mock_processor: + # All native processors fail, then OpenAI processor succeeds + mock_processor.side_effect = [ProcessorError] * original_processor_count + [[{"a": "b"}]] + provider.get_maintenances(data) + # Native processors + 1 OpenAI call + assert mock_processor.call_count == original_processor_count + 1 + + # OpenAI processor should NOT be appended to the provider's processor list + assert len(provider._processors) == original_processor_count # pylint: disable=protected-access def test_provider_does_not_use_openai_when_native_succeeds():