From 17a4874db7de7caefd1e9d095cacf0734fa82480 Mon Sep 17 00:00:00 2001 From: Akol125 Date: Tue, 11 Nov 2025 21:04:44 +0000 Subject: [PATCH 01/18] start uplift schema ticket2 --- .../common/validator/expression_checker.py | 43 +++++++++++++++++++ .../shared/src/common/validator/validator.py | 12 ++++-- .../validator/test_schemas/test_schema.json | 2 +- 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/lambdas/shared/src/common/validator/expression_checker.py b/lambdas/shared/src/common/validator/expression_checker.py index dcd36ffde..369b2f535 100644 --- a/lambdas/shared/src/common/validator/expression_checker.py +++ b/lambdas/shared/src/common/validator/expression_checker.py @@ -26,6 +26,8 @@ def validate_expression( match expression_type: case "DATETIME": return self._validate_datetime(expression_rule, field_name, field_value, row) + case "STRING": + return self._validate_for_string_values(expression_rule, field_name, field_value, row) case "DATE": return self._validate_datetime(expression_rule, field_name, field_value, row) case "UUID": @@ -426,6 +428,47 @@ def _validate_empty(self, _expression_rule: str, field_name: str, field_value: s message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) + # String Pre-Validation + def _validate_for_string_values( + self, _expression_rule: str, field_name: str, field_value: str, row: dict + ) -> ErrorReport: + """ + Apply validation to a string field to ensure it is a non-empty string which meets + the length requirements and predefined values requirements + """ + defined_length: int = (10,) + max_length: int = (None,) + predefined_values: list = (None,) + spaces_allowed: bool = False + try: + if not isinstance(field_value, str): + raise TypeError(f"{field_name} must be a string") + + if field_value.isspace(): + raise ValueError(f"{field_name} must be a non-empty string") + + if defined_length: + if len(field_value) != defined_length: + raise ValueError(f"{field_name} must be {defined_length} characters") + else: + if len(field_value) == 0: + raise ValueError(f"{field_name} must be a non-empty string") + + if max_length: + if len(field_value) > max_length: + raise ValueError(f"{field_name} must be {max_length} or fewer characters") + if predefined_values: + if field_value not in predefined_values: + raise ValueError(f"{field_name} must be one of the following: " + str(", ".join(predefined_values))) + + if not spaces_allowed: + if " " in field_value: + raise ValueError(f"{field_name} must not contain spaces") + except Exception as e: + if self.report_unexpected_exception: + message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) + return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) + # Not Empty Validate def _validate_not_empty(self, _expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: try: diff --git a/lambdas/shared/src/common/validator/validator.py b/lambdas/shared/src/common/validator/validator.py index ad5bcd54e..701e0d115 100644 --- a/lambdas/shared/src/common/validator/validator.py +++ b/lambdas/shared/src/common/validator/validator.py @@ -72,10 +72,14 @@ def _validate_expression( add_error_record( error_records, error_record, expression_error_group, expression_name, expression_id, error_level ) - except Exception: - print(f"Exception validating expression {expression_id} on row {row}: {error_record}") - row += 1 - return row + except Exception as e: + message = f"Expression Validation Unexpected exception [{e.__class__.__name__}]: {e}" + error_record = ErrorReport(code=ExceptionLevels.UNEXPECTED_EXCEPTION, message=message) + error_record = ErrorReport(code=ExceptionLevels.PARSING_ERROR, message=message) + add_error_record( + error_records, error_record, expression_error_group, expression_name, expression_id, error_level + ) + return def validate_fhir( self, diff --git a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json index dca4a35e1..1408ce08b 100644 --- a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json +++ b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json @@ -12,7 +12,7 @@ "errorLevel": 0, "expression": { "expressionName": "NHS Number Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "validity" From 24932453552b7cef7e1f946af730a34c8df4c7fc Mon Sep 17 00:00:00 2001 From: Akol125 Date: Wed, 12 Nov 2025 14:13:59 +0000 Subject: [PATCH 02/18] remove unused files --- .../src/common/validator/expression_checker.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/lambdas/shared/src/common/validator/expression_checker.py b/lambdas/shared/src/common/validator/expression_checker.py index 369b2f535..30c9d2b37 100644 --- a/lambdas/shared/src/common/validator/expression_checker.py +++ b/lambdas/shared/src/common/validator/expression_checker.py @@ -140,21 +140,6 @@ def _validate_integer(self, expression_rule: str, field_name: str, field_value: message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - # Float Validate - def _validate_float(self, _expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - float(field_value) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - # Length Validate def _validate_length(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: try: From 9b50fcd4eee31ff40f53a6b9789bdf1b406f8cb7 Mon Sep 17 00:00:00 2001 From: Akol125 Date: Wed, 12 Nov 2025 17:29:40 +0000 Subject: [PATCH 03/18] uplift validation rules and create utility used in expression checker --- .../common/validator/expression_checker.py | 59 ++++++++++++++++++- .../src/common/validator/validation_utils.py | 58 ++++++++++++++++++ .../validator/test_schemas/test_schema.json | 8 +-- 3 files changed, 120 insertions(+), 5 deletions(-) create mode 100644 lambdas/shared/src/common/validator/validation_utils.py diff --git a/lambdas/shared/src/common/validator/expression_checker.py b/lambdas/shared/src/common/validator/expression_checker.py index 30c9d2b37..df81082d2 100644 --- a/lambdas/shared/src/common/validator/expression_checker.py +++ b/lambdas/shared/src/common/validator/expression_checker.py @@ -1,11 +1,13 @@ import datetime import re import uuid +from typing import Optional from common.validator.constants.enums import MESSAGES, ExceptionLevels, MessageLabel from common.validator.error_report.record_error import ErrorReport, RecordError from common.validator.lookup_expressions.key_data import KeyData from common.validator.lookup_expressions.lookup_data import LookUpData +from common.validator.validation_utils import check_if_future_date class ExpressionChecker: @@ -28,8 +30,10 @@ def validate_expression( return self._validate_datetime(expression_rule, field_name, field_value, row) case "STRING": return self._validate_for_string_values(expression_rule, field_name, field_value, row) + case "LIST": + return self._validate_for_list_values(expression_rule, field_name, field_value, row) case "DATE": - return self._validate_datetime(expression_rule, field_name, field_value, row) + return self.validate_for_date(expression_rule, field_name, field_value, row) case "UUID": return self._validate_uuid(expression_rule, field_name, field_value, row) case "INT": @@ -97,6 +101,23 @@ def _validate_datetime(self, _expression_rule, field_name, field_value, row) -> message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) + def validate_for_date(self, _expression_rule, field_name, field_value, row, future_date_allowed: bool = False): + """ + Apply pre-validation to a date field to ensure that it is a string (JSON dates must be + written as strings) containing a valid date in the format "YYYY-MM-DD" + """ + if not isinstance(field_value, str): + raise TypeError(f"{field_name} must be a string") + + try: + parsed_date = datetime.strptime(field_value, "%Y-%m-%d").date() + except ValueError as value_error: + raise ValueError(f'{field_name} must be a valid date string in the format "YYYY-MM-DD"') from value_error + + # Enforce future date rule using central checker after successful parse + if not future_date_allowed and check_if_future_date(parsed_date): + raise ValueError(f"{field_name} must not be in the future") + # UUID validate def _validate_uuid(self, _expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: try: @@ -206,6 +227,42 @@ def _validate_equal(self, expression_rule: str, field_name: str, field_value: st message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) + def for_list(self, expression_rule: str, field_name: str, field_value: list, row: dict): + """ + Apply validation to a list field to ensure it is a non-empty list which meets the length requirements and + requirements, if applicable, for each list element to be a non-empty string or non-empty dictionary + """ + defined_length: Optional[int] = (None,) + max_length: Optional[int] = (None,) + elements_are_strings: bool = (False,) + string_element_max_length: Optional[int] = (None,) + elements_are_dicts: bool = (False,) + if not isinstance(field_value, list): + raise TypeError(f"{field_name} must be an array") + + if defined_length: + if len(field_value) != defined_length: + raise ValueError(f"{field_name} must be an array of length {defined_length}") + else: + if len(field_value) == 0: + raise ValueError(f"{field_name} must be a non-empty array") + + if max_length is not None and len(field_value) > max_length: + raise ValueError(f"{field_name} must be an array of maximum length {max_length}") + + if elements_are_strings: + for idx, element in enumerate(field_value): + self._validate_for_string_values.for_string( + element, f"{field_name}[{idx}]", max_length=string_element_max_length + ) + + if elements_are_dicts: + for element in field_value: + if not isinstance(element, dict): + raise TypeError(f"{field_name} must be an array of objects") + if len(element) == 0: + raise ValueError(f"{field_name} must be an array of non-empty objects") + # Not Equal Validate def _validate_not_equal(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: try: diff --git a/lambdas/shared/src/common/validator/validation_utils.py b/lambdas/shared/src/common/validator/validation_utils.py new file mode 100644 index 000000000..e216ca55a --- /dev/null +++ b/lambdas/shared/src/common/validator/validation_utils.py @@ -0,0 +1,58 @@ +from datetime import date, datetime + +from stdnum.verhoeff import validate + + +def check_if_future_date(parsed_value: date | datetime): + """ + Ensure a parsed date or datetime object is not in the future. + """ + if isinstance(parsed_value, datetime): + now = datetime.now(parsed_value.tzinfo) if parsed_value.tzinfo else datetime.now() + elif isinstance(parsed_value, date): + now = datetime.now().date() + if parsed_value > now: + return True + return False + + +def is_valid_simple_snomed(simple_snomed: str) -> bool: + "check the snomed code valid or not." + min_snomed_length = 6 + max_snomed_length = 18 + return ( + simple_snomed is not None + and simple_snomed.isdigit() + and simple_snomed[0] != "0" + and min_snomed_length <= len(simple_snomed) <= max_snomed_length + and validate(simple_snomed) + and (simple_snomed[-3:-1] in ("00", "10")) + ) + + +def nhs_number_mod11_check(nhs_number: str) -> bool: + """ + Parameters:- + nhs_number: str + The NHS number to be checked. + Returns:- + True if the nhs number passes the mod 11 check, False otherwise. + + Definition of NHS number can be found at: + https://www.datadictionary.nhs.uk/attributes/nhs_number.html + """ + is_mod11 = False + if nhs_number.isdigit() and len(nhs_number) == 10: + # Create a reversed list of weighting factors + weighting_factors = list(range(2, 11))[::-1] + # Multiply each of the first nine digits by the weighting factor and add the results of each multiplication + # together + total = sum(int(digit) * weight for digit, weight in zip(nhs_number[:-1], weighting_factors)) + # Divide the total by 11 and establish the remainder and subtract the remainder from 11 to give the check digit. + # If the result is 11 then a check digit of 0 is used. If the result is 10 then the NHS NUMBER is invalid and + # not used. + check_digit = 0 if (total % 11 == 0) else (11 - (total % 11)) + # Check the remainder matches the check digit. If it does not, the NHS NUMBER is invalid. + is_mod11 = check_digit == int(nhs_number[-1]) + + return is_mod11 diff --git a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json index 1408ce08b..b819e5eff 100644 --- a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json +++ b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json @@ -25,7 +25,7 @@ "errorLevel": 0, "expression": { "expressionName": "Person Forname Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "LIST", "expressionRule": "" }, "errorGroup": "completeness" @@ -39,7 +39,7 @@ "parentExpression": "01K5EGR0C7Y1WJ0BC803SQDWK4", "expression": { "expressionName": "Person Surname Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "completeness" @@ -52,7 +52,7 @@ "errorLevel": 1, "expression": { "expressionName": "Date of Birth Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "DATE", "expressionRule": "" }, "errorGroup": "consistency" @@ -65,7 +65,7 @@ "errorLevel": 1, "expression": { "expressionName": "Gender Valid Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "consistency" From 69d8e240fc1d6755a292847f29fecfe2a1438240 Mon Sep 17 00:00:00 2001 From: Akol125 Date: Wed, 12 Nov 2025 22:09:04 +0000 Subject: [PATCH 04/18] add expressionType for all 34 fields --- .../common/validator/expression_checker.py | 213 ++++++++++-------- .../shared/src/common/validator/validator.py | 1 + .../validator/test_schemas/test_schema.json | 62 ++--- 3 files changed, 152 insertions(+), 124 deletions(-) diff --git a/lambdas/shared/src/common/validator/expression_checker.py b/lambdas/shared/src/common/validator/expression_checker.py index df81082d2..efd943cd2 100644 --- a/lambdas/shared/src/common/validator/expression_checker.py +++ b/lambdas/shared/src/common/validator/expression_checker.py @@ -1,7 +1,7 @@ import datetime import re import uuid -from typing import Optional +from typing import Decimal, Optional, Union from common.validator.constants.enums import MESSAGES, ExceptionLevels, MessageLabel from common.validator.error_report.record_error import ErrorReport, RecordError @@ -26,14 +26,22 @@ def validate_expression( self, expression_type: str, expression_rule: str, field_name: str, field_value: str, row: dict ) -> ErrorReport: match expression_type: - case "DATETIME": - return self._validate_datetime(expression_rule, field_name, field_value, row) case "STRING": - return self._validate_for_string_values(expression_rule, field_name, field_value, row) + return self.validation_for_string_values(expression_rule, field_name, field_value, row) case "LIST": - return self._validate_for_list_values(expression_rule, field_name, field_value, row) + return self.validation_for_list(expression_rule, field_name, field_value, row) case "DATE": - return self.validate_for_date(expression_rule, field_name, field_value, row) + return self.validation_for_date(expression_rule, field_name, field_value, row) + case "DATETIME": + return self.validation_for_date_time(expression_rule, field_name, field_value, row) + case "POSITIVEINTEGER": + return self.validation_for_positive_integer(expression_rule, field_name, field_value, row) + case "UNIQUELIST": + return self.validation_for_unique_list(expression_rule, field_name, field_value, row) + case "BOOLEAN": + return self._validate_boolean(expression_rule, field_name, field_value, row) + case "INTDECIMAL": + return self.validation_for_integer_or_decimal(expression_rule, field_name, field_value, row) case "UUID": return self._validate_uuid(expression_rule, field_name, field_value, row) case "INT": @@ -86,22 +94,7 @@ def validate_expression( return "Schema expression not found! Check your expression type : " + expression_type # ISO 8601 date/datetime validate (currently date-only) - def _validate_datetime(self, _expression_rule, field_name, field_value, row) -> ErrorReport: - try: - # Current behavior expects date-only; datetime raises and is handled below - datetime.date.fromisoformat(field_value) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - - def validate_for_date(self, _expression_rule, field_name, field_value, row, future_date_allowed: bool = False): + def validation_for_date(self, _expression_rule, field_name, field_value, row, future_date_allowed: bool = False): """ Apply pre-validation to a date field to ensure that it is a string (JSON dates must be written as strings) containing a valid date in the format "YYYY-MM-DD" @@ -133,53 +126,51 @@ def _validate_uuid(self, _expression_rule: str, field_name: str, field_value: st message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - # Integer Validate - def _validate_integer(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - int(field_value) - if expression_rule: - check_value = int(expression_rule) - if int(field_value) != check_value: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "Value integer check failed", - MessageLabel.VALUE_MISMATCH_MSG - + MessageLabel.EXPECTED_LABEL - + expression_rule - + " " - + MessageLabel.FOUND_LABEL - + field_value, - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) + def validation_for_positive_integer(field_value: int, field_name: str, max_value: int = None): + """ + Apply pre-validation to an integer field to ensure that it is a positive integer, + which does not exceed the maximum allowed value (if applicable) + """ + # This check uses type() instead of isinstance() because bool is a subclass of int. + if type(field_value) is not int: # pylint: disable=unidiomatic-typecheck + raise TypeError(f"{field_name} must be a positive integer") - # Length Validate - def _validate_length(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - str_len = len(field_value) - check_length = int(expression_rule) - if str_len > check_length: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, "Value length check failed", "Value is longer than expected" + if field_value <= 0: + raise ValueError(f"{field_name} must be a positive integer") + + if max_value: + if field_value > max_value: + raise ValueError(f"{field_name} must be an integer in the range 1 to {max_value}") + + def validation_for_integer_or_decimal(field_value: Union[int, Decimal], field_location: str): + """ + Apply pre-validation to a decimal field to ensure that it is an integer or decimal, + which does not exceed the maximum allowed number of decimal places (if applicable) + """ + if not ( + # This check uses type() instead of isinstance() because bool is a subclass of int. + type(field_value) is int # pylint: disable=unidiomatic-typecheck + or type(field_value) is Decimal # pylint: disable=unidiomatic-typecheck + ): + raise TypeError(f"{field_location} must be a number") + + def validation_for_unique_list( + list_to_check: list, + unique_value_in_list: str, + field_location: str, + ): + """ + Apply pre-validation to a list of dictionaries to ensure that a specified value in each + dictionary is unique across the list + """ + found = [] + for item in list_to_check: + if item[unique_value_in_list] in found: + raise ValueError( + f"{field_location.replace('FIELD_TO_REPLACE', item[unique_value_in_list])}" + " must be unique" ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) + + found.append(item[unique_value_in_list]) # Regex Validate def _validate_regex(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: @@ -202,32 +193,12 @@ def _validate_regex(self, expression_rule: str, field_name: str, field_value: st message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - # Equal Validate - def _validate_equal(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - if field_value != expression_rule: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "Value equals check failed", - MessageLabel.VALUE_MISMATCH_MSG - + MessageLabel.EXPECTED_LABEL - + expression_rule - + " " - + MessageLabel.FOUND_LABEL - + field_value, - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) + def validation_for_boolean(field_value: str, field_name: str): + """Apply pre-validation to a boolean field to ensure that it is a boolean""" + if not isinstance(field_value, bool): + raise TypeError(f"{field_name} must be a boolean") - def for_list(self, expression_rule: str, field_name: str, field_value: list, row: dict): + def validation_for_list(self, expression_rule: str, field_name: str, field_value: list, row: dict): """ Apply validation to a list field to ensure it is a non-empty list which meets the length requirements and requirements, if applicable, for each list element to be a non-empty string or non-empty dictionary @@ -263,6 +234,62 @@ def for_list(self, expression_rule: str, field_name: str, field_value: list, row if len(element) == 0: raise ValueError(f"{field_name} must be an array of non-empty objects") + def validation_for_date_time(field_value: str, field_location: str, strict_timezone: bool = True): + """ + Apply pre-validation to a datetime field to ensure that it is a string (JSON dates must be written as strings) + containing a valid datetime. Note that partial dates are valid for FHIR, but are not allowed for this API. + Valid formats are any of the following: + * 'YYYY-MM-DD' - Full date only + * 'YYYY-MM-DDThh:mm:ss%z' - Full date, time without milliseconds, timezone + * 'YYYY-MM-DDThh:mm:ss.f%z' - Full date, time with milliseconds (any level of precision), timezone + """ + + if not isinstance(field_value, str): + raise TypeError(f"{field_location} must be a string") + + error_message = ( + f"{field_location} must be a valid datetime in one of the following formats:" + "- 'YYYY-MM-DD' — Full date only" + "- 'YYYY-MM-DDThh:mm:ss%z' — Full date and time with timezone (e.g. +00:00 or +01:00)" + "- 'YYYY-MM-DDThh:mm:ss.f%z' — Full date and time with milliseconds and timezone" + "- Date must not be in the future." + ) + if strict_timezone: + error_message += ( + "Only '+00:00' and '+01:00' are accepted as valid timezone offsets.\n" + f"Note that partial dates are not allowed for {field_location} in this service.\n" + ) + + allowed_suffixes = { + "+00:00", + "+01:00", + "+0000", + "+0100", + } + + # List of accepted strict formats + formats = [ + "%Y-%m-%d", + "%Y-%m-%dT%H:%M:%S%z", + "%Y-%m-%dT%H:%M:%S.%f%z", + ] + + for fmt in formats: + try: + fhir_date = datetime.strptime(field_value, fmt) + # Enforce future-date rule using central checker after successful parse + if check_if_future_date(fhir_date): + raise ValueError(f"{field_location} must not be in the future") + # After successful parse, enforce timezone and future-date rules + if strict_timezone and fhir_date.tzinfo is not None: + if not any(field_value.endswith(suffix) for suffix in allowed_suffixes): + raise ValueError(error_message) + return fhir_date.isoformat() + except ValueError: + continue + + raise ValueError(error_message) + # Not Equal Validate def _validate_not_equal(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: try: @@ -471,7 +498,7 @@ def _validate_empty(self, _expression_rule: str, field_name: str, field_value: s return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) # String Pre-Validation - def _validate_for_string_values( + def validation_for_string_values( self, _expression_rule: str, field_name: str, field_value: str, row: dict ) -> ErrorReport: """ diff --git a/lambdas/shared/src/common/validator/validator.py b/lambdas/shared/src/common/validator/validator.py index 701e0d115..21c846098 100644 --- a/lambdas/shared/src/common/validator/validator.py +++ b/lambdas/shared/src/common/validator/validator.py @@ -56,6 +56,7 @@ def _validate_expression( try: expression_values = data_parser.extract_field_values(expression_fieldname) + print(f"Validating Expression ID {expression_fieldname} with values: {expression_values}") except Exception as e: message = f"Data get values Unexpected exception [{e.__class__.__name__}]: {e}" error_record = ErrorReport(code=ExceptionLevels.PARSING_ERROR, message=message) diff --git a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json index b819e5eff..ea4ffd251 100644 --- a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json +++ b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json @@ -78,7 +78,7 @@ "errorLevel": 2, "expression": { "expressionName": "Defaults to", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "completeness" @@ -91,7 +91,7 @@ "errorLevel": 0, "expression": { "expressionName": "Date Convert", - "expressionType": "NOTEMPTY", + "expressionType": "DATETIME", "expressionRule": "" }, "errorGroup": "consistency" @@ -104,7 +104,7 @@ "errorLevel": 0, "expression": { "expressionName": "Organisation Look Up Check", - "expressionType": "KEYCHECK", + "expressionType": "STRING", "expressionRule": "Organisation" }, "errorGroup": "consistency" @@ -117,7 +117,7 @@ "errorLevel": 1, "expression": { "expressionName": "Defaults to", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "consistency" @@ -130,7 +130,7 @@ "errorLevel": 0, "expression": { "expressionName": "Unique ID Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "validity" @@ -143,14 +143,14 @@ "errorLevel": 0, "expression": { "expressionName": "Unique ID URI Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "validity" }, { "expressionId": "01K8S0Y0TN3NZA6VBB3HM1PDR1", - "fieldNameFHIR": "id", + "fieldNameFHIR": "", "fieldNameFlat": "ACTION_FLAG", "fieldNumber": 12, "errorLevel": 1, @@ -169,7 +169,7 @@ "errorLevel": 1, "expression": { "expressionName": "Practitioner Forename Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "LIST", "expressionRule": "" }, "errorGroup": "completeness" @@ -182,7 +182,7 @@ "errorLevel": 1, "expression": { "expressionName": "Practitioner Surname Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "completeness" @@ -195,8 +195,8 @@ "errorLevel": 1, "expression": { "expressionName": "Recorded Date Convert", - "expressionType": "NOTEMPTY", - "expressionRule": "" + "expressionType": "DATETIME", + "expressionRule": "false-strict-timezone" }, "errorGroup": "consistency" }, @@ -208,7 +208,7 @@ "errorLevel": 0, "expression": { "expressionName": "Primary Source Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "BOOLEAN", "expressionRule": "" }, "errorGroup": "completeness" @@ -221,7 +221,7 @@ "errorLevel": 0, "expression": { "expressionName": "Procedure Code Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "completeness" @@ -234,7 +234,7 @@ "errorLevel": 1, "expression": { "expressionName": "Procedure Term Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "completeness" @@ -247,7 +247,7 @@ "errorLevel": 1, "expression": { "expressionName": "Dose Sequence Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "POSITIVEINTEGER", "expressionRule": "" }, "errorGroup": "completeness" @@ -260,7 +260,7 @@ "errorLevel": 0, "expression": { "expressionName": "Produce Code Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "completeness" @@ -273,7 +273,7 @@ "errorLevel": 1, "expression": { "expressionName": "Produce Term Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "completeness" @@ -286,7 +286,7 @@ "errorLevel": 0, "expression": { "expressionName": "Manufacturer Display Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "completeness" @@ -299,7 +299,7 @@ "errorLevel": 0, "expression": { "expressionName": "Batch Number Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "completeness" @@ -312,7 +312,7 @@ "errorLevel": 1, "expression": { "expressionName": "Date Convert", - "expressionType": "NOTEMPTY", + "expressionType": "DATE", "expressionRule": "" }, "errorGroup": "consistency" @@ -325,7 +325,7 @@ "errorLevel": 0, "expression": { "expressionName": "Site of Vaccination Code Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "completeness" @@ -338,7 +338,7 @@ "errorLevel": 1, "expression": { "expressionName": "Site of Vaccination Term Lookup Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "consistency" @@ -351,8 +351,8 @@ "errorLevel": 0, "expression": { "expressionName": "Route of Vaccination Code Not Empty Check", - "expressionType": "NOTEMPTY", - "expressionRule": "" + "expressionType": "STRING", + "expressionRule": "UniqueList" }, "errorGroup": "completeness" }, @@ -364,7 +364,7 @@ "errorLevel": 1, "expression": { "expressionName": "Route of Vaccination Term Lookup Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "consistency" @@ -377,7 +377,7 @@ "errorLevel": 1, "expression": { "expressionName": "Dose Amount Default Check", - "expressionType": "NOTEMPTY", + "expressionType": "INTDECIMAL", "expressionRule": "" }, "errorGroup": "completeness" @@ -390,7 +390,7 @@ "errorLevel": 1, "expression": { "expressionName": "Dose Unit Only If System Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "consistency" @@ -403,7 +403,7 @@ "errorLevel": 0, "expression": { "expressionName": "Dose Unit Term Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "completeness" @@ -416,7 +416,7 @@ "errorLevel": 0, "expression": { "expressionName": "Indication Code Not Empty Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "completeness" @@ -429,7 +429,7 @@ "errorLevel": 1, "expression": { "expressionName": "Location Code Default Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "consistency" @@ -442,7 +442,7 @@ "errorLevel": 1, "expression": { "expressionName": "Location Code Type URI Default Check", - "expressionType": "NOTEMPTY", + "expressionType": "STRING", "expressionRule": "" }, "errorGroup": "consistency" From 15bdfa6c3391e0aa000e8e643713c5bb39cded48 Mon Sep 17 00:00:00 2001 From: Akol125 Date: Thu, 13 Nov 2025 02:21:36 +0000 Subject: [PATCH 05/18] expression rule per field --- .../fhir_immunization_pre_validators.py | 49 +- .../common/validator/constants/constants.py | 4 + .../common/validator/expression_checker.py | 310 +++--------- .../src/common/validator/expression_rule.py | 19 + .../validator/test_expression_checker.py | 462 +++--------------- .../validator/test_schemas/test_schema.json | 12 +- .../validator/test_validation_csv_row.py | 6 +- .../validator/testing_utils/constants.py | 2 +- 8 files changed, 179 insertions(+), 685 deletions(-) create mode 100644 lambdas/shared/src/common/validator/constants/constants.py create mode 100644 lambdas/shared/src/common/validator/expression_rule.py diff --git a/backend/src/models/fhir_immunization_pre_validators.py b/backend/src/models/fhir_immunization_pre_validators.py index 8f45585c9..23f10eab9 100644 --- a/backend/src/models/fhir_immunization_pre_validators.py +++ b/backend/src/models/fhir_immunization_pre_validators.py @@ -96,9 +96,7 @@ def validate(self): self.pre_validate_value_codeable_concept, self.pre_validate_extension_length, self.pre_validate_vaccination_procedure_code, - self.pre_validate_vaccination_procedure_display, self.pre_validate_vaccine_code, - self.pre_validate_vaccine_display, ] for method in validation_methods: @@ -592,7 +590,7 @@ def pre_validate_vaccination_procedure_code(self, values: dict) -> dict: (legacy CSV field name: VACCINATION_PROCEDURE_CODE) exists, then it is a non-empty string """ url = "https://fhir.hl7.org.uk/StructureDefinition/Extension-UKCore-" + "VaccinationProcedure" - system = Urls.snomed + system = "http://snomed.info/sct" field_type = "code" field_location = generate_field_location_for_extension(url, system, field_type) try: @@ -602,22 +600,6 @@ def pre_validate_vaccination_procedure_code(self, values: dict) -> dict: except (KeyError, IndexError): pass - def pre_validate_vaccination_procedure_display(self, values: dict) -> dict: - """ - Pre-validate that, if extension[?(@.url=='https://fhir.hl7.org.uk/StructureDefinition/Extension-UKCore- - VaccinationProcedure')].valueCodeableConcept.coding[?(@.system=='http://snomed.info/sct')].display - (legacy CSV field name: VACCINATION_PROCEDURE_TERM) exists, then it is a non-empty string - """ - url = "https://fhir.hl7.org.uk/StructureDefinition/Extension-UKCore-" + "VaccinationProcedure" - system = Urls.snomed - field_type = "display" - field_location = generate_field_location_for_extension(url, system, field_type) - try: - field_value = get_generic_extension_value(values, url, system, field_type) - PreValidation.for_string(field_value, field_location) - except (KeyError, IndexError): - pass - def pre_validate_vaccination_situation_code(self, values: dict) -> dict: """ Pre-validate that, if extension[?(@.url=='https://fhir.hl7.org.uk/StructureDefinition/Extension-UKCore- @@ -625,7 +607,7 @@ def pre_validate_vaccination_situation_code(self, values: dict) -> dict: (legacy CSV field name: VACCINATION_SITUATION_CODE) exists, then it is a non-empty string """ url = "https://fhir.hl7.org.uk/StructureDefinition/Extension-UKCore-VaccinationSituation" - system = Urls.snomed + system = "http://snomed.info/sct" field_type = "code" field_location = generate_field_location_for_extension(url, system, field_type) try: @@ -641,7 +623,7 @@ def pre_validate_vaccination_situation_display(self, values: dict) -> dict: (legacy CSV field name: VACCINATION_SITUATION_TERM) exists, then it is a non-empty string """ url = "https://fhir.hl7.org.uk/StructureDefinition/Extension-UKCore-VaccinationSituation" - system = Urls.snomed + system = "http://snomed.info/sct" field_type = "display" field_location = generate_field_location_for_extension(url, system, field_type) try: @@ -720,7 +702,7 @@ def pre_validate_disease_type_coding_codes(self, values: dict) -> dict: Pre-validate that, if protocolApplied[0].targetDisease[{i}].coding[?(@.system=='http://snomed.info/sct')].code exists, then it is a non-empty string """ - url = Urls.snomed + url = "http://snomed.info/sct" try: for i in range(len(values["protocolApplied"][0]["targetDisease"])): field_location = f"protocolApplied[0].targetDisease[{i}].coding[?(@.system=='{url}')].code" @@ -779,7 +761,7 @@ def pre_validate_site_coding_code(self, values: dict) -> dict: Pre-validate that, if site.coding[?(@.system=='http://snomed.info/sct')].code (legacy CSV field name: SITE_OF_VACCINATION_CODE) exists, then it is a non-empty string """ - url = Urls.snomed + url = "http://snomed.info/sct" field_location = f"site.coding[?(@.system=='{url}')].code" try: site_coding_code = [x for x in values["site"]["coding"] if x.get("system") == url][0]["code"] @@ -792,7 +774,7 @@ def pre_validate_site_coding_display(self, values: dict) -> dict: Pre-validate that, if site.coding[?(@.system=='http://snomed.info/sct')].display (legacy CSV field name: SITE_OF_VACCINATION_TERM) exists, then it is a non-empty string """ - url = Urls.snomed + url = "http://snomed.info/sct" field_location = f"site.coding[?(@.system=='{url}')].display" try: field_value = [x for x in values["site"]["coding"] if x.get("system") == url][0]["display"] @@ -813,7 +795,7 @@ def pre_validate_route_coding_code(self, values: dict) -> dict: Pre-validate that, if route.coding[?(@.system=='http://snomed.info/sct')].code (legacy CSV field name: ROUTE_OF_VACCINATION_CODE) exists, then it is a non-empty string """ - url = Urls.snomed + url = "http://snomed.info/sct" field_location = f"route.coding[?(@.system=='{url}')].code" try: field_value = [x for x in values["route"]["coding"] if x.get("system") == url][0]["code"] @@ -826,7 +808,7 @@ def pre_validate_route_coding_display(self, values: dict) -> dict: Pre-validate that, if route.coding[?(@.system=='http://snomed.info/sct')].display (legacy CSV field name: ROUTE_OF_VACCINATION_TERM) exists, then it is a non-empty string """ - url = Urls.snomed + url = "http://snomed.info/sct" field_location = f"route.coding[?(@.system=='{url}')].display" try: field_value = [x for x in values["route"]["coding"] if x.get("system") == url][0]["display"] @@ -969,7 +951,7 @@ def pre_validate_vaccine_code(self, values: dict) -> dict: NOTE: vaccineCode is a mandatory FHIR field. A value of None will be rejected by the FHIR model before pre-validators are run. """ - url = Urls.snomed + url = "http://snomed.info/sct" field_location = f"vaccineCode.coding[?(@.system=='{url}')].code" try: field_value = [x for x in values["vaccineCode"]["coding"] if x.get("system") == url][0]["code"] @@ -977,16 +959,3 @@ def pre_validate_vaccine_code(self, values: dict) -> dict: PreValidation.for_snomed_code(field_value, field_location) except (KeyError, IndexError): pass - - def pre_validate_vaccine_display(self, values: dict) -> dict: - """ - Pre-validate that, if vaccineCode.coding[?(@.system=='http://snomed.info/sct')].display - (legacy CSV field : VACCINE_PRODUCT_TERM) exists, then it is a non-empty string - """ - url = Urls.snomed - field_location = f"vaccineCode.coding[?(@.system=='{url}')].display" - try: - field_value = [x for x in values["vaccineCode"]["coding"] if x.get("system") == url][0]["display"] - PreValidation.for_string(field_value, field_location) - except (KeyError, IndexError): - pass diff --git a/lambdas/shared/src/common/validator/constants/constants.py b/lambdas/shared/src/common/validator/constants/constants.py new file mode 100644 index 000000000..a11be49c4 --- /dev/null +++ b/lambdas/shared/src/common/validator/constants/constants.py @@ -0,0 +1,4 @@ +class Constants: + NHS_NUMBER_LENGTH = 10 + PERSON_NAME_ELEMENT_MAX_LENGTH = 35 + GENDERS = ["male", "female", "other", "unknown"] diff --git a/lambdas/shared/src/common/validator/expression_checker.py b/lambdas/shared/src/common/validator/expression_checker.py index efd943cd2..1883597f7 100644 --- a/lambdas/shared/src/common/validator/expression_checker.py +++ b/lambdas/shared/src/common/validator/expression_checker.py @@ -1,10 +1,11 @@ -import datetime import re -import uuid -from typing import Decimal, Optional, Union +from datetime import datetime +from decimal import Decimal +from typing import Optional, Union from common.validator.constants.enums import MESSAGES, ExceptionLevels, MessageLabel from common.validator.error_report.record_error import ErrorReport, RecordError +from common.validator.expression_rule import expression_rule_per_field from common.validator.lookup_expressions.key_data import KeyData from common.validator.lookup_expressions.lookup_data import LookUpData from common.validator.validation_utils import check_if_future_date @@ -39,66 +40,23 @@ def validate_expression( case "UNIQUELIST": return self.validation_for_unique_list(expression_rule, field_name, field_value, row) case "BOOLEAN": - return self._validate_boolean(expression_rule, field_name, field_value, row) + return self.validation_for_boolean(expression_rule, field_name, field_value, row) case "INTDECIMAL": return self.validation_for_integer_or_decimal(expression_rule, field_name, field_value, row) - case "UUID": - return self._validate_uuid(expression_rule, field_name, field_value, row) - case "INT": - return self._validate_integer(expression_rule, field_name, field_value, row) - case "FLOAT": - return self._validate_float(expression_rule, field_name, field_value, row) - case "REGEX": - return self._validate_regex(expression_rule, field_name, field_value, row) - case "EQUAL": - return self._validate_equal(expression_rule, field_name, field_value, row) - case "NOTEQUAL": - return self._validate_not_equal(expression_rule, field_name, field_value, row) - case "IN": - return self._validate_in(expression_rule, field_name, field_value, row) - case "NRANGE": - return self._validate_n_range(expression_rule, field_name, field_value, row) - case "INARRAY": - return self._validate_in_array(expression_rule, field_name, field_value, row) - case "UPPER": - return self._validate_upper(expression_rule, field_name, field_value, row) - case "LOWER": - return self._validate_lower(expression_rule, field_name, field_value, row) - case "LENGTH": - return self._validate_length(expression_rule, field_name, field_value, row) - case "STARTSWITH": - return self._validate_starts_with(expression_rule, field_name, field_value, row) - case "ENDSWITH": - return self._validate_ends_with(expression_rule, field_name, field_value, row) - case "EMPTY": - return self._validate_empty(expression_rule, field_name, field_value, row) - case "NOTEMPTY": - return self._validate_not_empty(expression_rule, field_name, field_value, row) - case "POSITIVE": - return self._validate_positive(expression_rule, field_name, field_value, row) case "POSTCODE": return self._validate_post_code(expression_rule, field_name, field_value, row) case "GENDER": return self._validate_gender(expression_rule, field_name, field_value, row) - case "NHSNUMBER": - return self._validate_nhs_number(expression_rule, field_name, field_value, row) - case "MAXOBJECTS": - return self._validate_max_objects(expression_rule, field_name, field_value, row) - case "ONLYIF": - return self._validate_only_if(expression_rule, field_name, field_value, row) - case "LOOKUP": - return self._validate_against_lookup(expression_rule, field_name, field_value, row) - case "KEYCHECK": - return self._validate_against_key(expression_rule, field_name, field_value, row) case _: return "Schema expression not found! Check your expression type : " + expression_type # ISO 8601 date/datetime validate (currently date-only) - def validation_for_date(self, _expression_rule, field_name, field_value, row, future_date_allowed: bool = False): + def validation_for_date(self, _expression_rule, field_name, field_value, row): """ Apply pre-validation to a date field to ensure that it is a string (JSON dates must be written as strings) containing a valid date in the format "YYYY-MM-DD" """ + future_date_allowed: bool = False if not isinstance(field_value, str): raise TypeError(f"{field_name} must be a string") @@ -111,26 +69,12 @@ def validation_for_date(self, _expression_rule, field_name, field_value, row, fu if not future_date_allowed and check_if_future_date(parsed_date): raise ValueError(f"{field_name} must not be in the future") - # UUID validate - def _validate_uuid(self, _expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - uuid.UUID(str(field_value)) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - - def validation_for_positive_integer(field_value: int, field_name: str, max_value: int = None): + def validation_for_positive_integer(self, _expression_rule, field_name, field_value, row): """ Apply pre-validation to an integer field to ensure that it is a positive integer, which does not exceed the maximum allowed value (if applicable) """ + max_value: int = None # This check uses type() instead of isinstance() because bool is a subclass of int. if type(field_value) is not int: # pylint: disable=unidiomatic-typecheck raise TypeError(f"{field_name} must be a positive integer") @@ -142,7 +86,9 @@ def validation_for_positive_integer(field_value: int, field_name: str, max_value if field_value > max_value: raise ValueError(f"{field_name} must be an integer in the range 1 to {max_value}") - def validation_for_integer_or_decimal(field_value: Union[int, Decimal], field_location: str): + def validation_for_integer_or_decimal( + self, _expression_rule, field_value: Union[int, Decimal], field_name: str, row: dict + ): """ Apply pre-validation to a decimal field to ensure that it is an integer or decimal, which does not exceed the maximum allowed number of decimal places (if applicable) @@ -152,7 +98,7 @@ def validation_for_integer_or_decimal(field_value: Union[int, Decimal], field_lo type(field_value) is int # pylint: disable=unidiomatic-typecheck or type(field_value) is Decimal # pylint: disable=unidiomatic-typecheck ): - raise TypeError(f"{field_location} must be a number") + raise TypeError(f"{field_name} must be a number") def validation_for_unique_list( list_to_check: list, @@ -193,7 +139,7 @@ def _validate_regex(self, expression_rule: str, field_name: str, field_value: st message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - def validation_for_boolean(field_value: str, field_name: str): + def validation_for_boolean(self, expression_rule: str, field_name: str, field_value: str, row: dict): """Apply pre-validation to a boolean field to ensure that it is a boolean""" if not isinstance(field_value, bool): raise TypeError(f"{field_name} must be a boolean") @@ -203,38 +149,47 @@ def validation_for_list(self, expression_rule: str, field_name: str, field_value Apply validation to a list field to ensure it is a non-empty list which meets the length requirements and requirements, if applicable, for each list element to be a non-empty string or non-empty dictionary """ - defined_length: Optional[int] = (None,) - max_length: Optional[int] = (None,) - elements_are_strings: bool = (False,) - string_element_max_length: Optional[int] = (None,) - elements_are_dicts: bool = (False,) - if not isinstance(field_value, list): - raise TypeError(f"{field_name} must be an array") - - if defined_length: - if len(field_value) != defined_length: - raise ValueError(f"{field_name} must be an array of length {defined_length}") - else: - if len(field_value) == 0: - raise ValueError(f"{field_name} must be a non-empty array") - - if max_length is not None and len(field_value) > max_length: - raise ValueError(f"{field_name} must be an array of maximum length {max_length}") - - if elements_are_strings: - for idx, element in enumerate(field_value): - self._validate_for_string_values.for_string( - element, f"{field_name}[{idx}]", max_length=string_element_max_length - ) + rules = expression_rule_per_field(expression_rule) if expression_rule else {} + defined_length: Optional[int] = rules.get("defined_length", None) + max_length: Optional[int] = rules.get("max_length", None) + elements_are_strings: bool = rules.get("elements_are_strings", False) + string_element_max_length: Optional[int] = rules.get("string_element_max_length", None) + elements_are_dicts: bool = rules.get("elements_are_dicts", False) - if elements_are_dicts: - for element in field_value: - if not isinstance(element, dict): - raise TypeError(f"{field_name} must be an array of objects") - if len(element) == 0: - raise ValueError(f"{field_name} must be an array of non-empty objects") + try: + if not isinstance(field_value, list): + raise TypeError(f"{field_name} must be an array") - def validation_for_date_time(field_value: str, field_location: str, strict_timezone: bool = True): + if defined_length: + if len(field_value) != defined_length: + raise ValueError(f"{field_name} must be an array of length {defined_length}") + else: + if len(field_value) == 0: + raise ValueError(f"{field_name} must be a non-empty array") + + if max_length is not None and len(field_value) > max_length: + raise ValueError(f"{field_name} must be an array of maximum length {max_length}") + + if elements_are_strings: + for idx, element in enumerate(field_value): + self._validate_for_string_values.for_string( + element, f"{field_name}[{idx}]", max_length=string_element_max_length + ) + + if elements_are_dicts: + for element in field_value: + if not isinstance(element, dict): + raise TypeError(f"{field_name} must be an array of objects") + if len(element) == 0: + raise ValueError(f"{field_name} must be an array of non-empty objects") + except Exception as e: + if self.report_unexpected_exception: + message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) + return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) + + def validation_for_date_time( + self, expression_rule: str, field_name: str, field_value: str, row: dict, strict_timezone: bool = True + ): """ Apply pre-validation to a datetime field to ensure that it is a string (JSON dates must be written as strings) containing a valid datetime. Note that partial dates are valid for FHIR, but are not allowed for this API. @@ -245,10 +200,10 @@ def validation_for_date_time(field_value: str, field_location: str, strict_timez """ if not isinstance(field_value, str): - raise TypeError(f"{field_location} must be a string") + raise TypeError(f"{field_name} must be a string") error_message = ( - f"{field_location} must be a valid datetime in one of the following formats:" + f"{field_name} must be a valid datetime in one of the following formats:" "- 'YYYY-MM-DD' — Full date only" "- 'YYYY-MM-DDThh:mm:ss%z' — Full date and time with timezone (e.g. +00:00 or +01:00)" "- 'YYYY-MM-DDThh:mm:ss.f%z' — Full date and time with milliseconds and timezone" @@ -257,7 +212,7 @@ def validation_for_date_time(field_value: str, field_location: str, strict_timez if strict_timezone: error_message += ( "Only '+00:00' and '+01:00' are accepted as valid timezone offsets.\n" - f"Note that partial dates are not allowed for {field_location} in this service.\n" + f"Note that partial dates are not allowed for {field_name} in this service.\n" ) allowed_suffixes = { @@ -279,7 +234,7 @@ def validation_for_date_time(field_value: str, field_location: str, strict_timez fhir_date = datetime.strptime(field_value, fmt) # Enforce future-date rule using central checker after successful parse if check_if_future_date(fhir_date): - raise ValueError(f"{field_location} must not be in the future") + raise ValueError(f"{field_name} must not be in the future") # After successful parse, enforce timezone and future-date rules if strict_timezone and fhir_date.tzinfo is not None: if not any(field_value.endswith(suffix) for suffix in allowed_suffixes): @@ -313,118 +268,6 @@ def _validate_not_equal(self, expression_rule: str, field_name: str, field_value message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - # In Validate - def _validate_in(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - if expression_rule.lower() not in field_value.lower(): - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "Data not in Value failed", - "Check Data not found in Value, List- " + expression_rule + " Data- " + field_value, - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - - # NRange Validate - def _validate_n_range(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - value = float(field_value) - rule = expression_rule.split(",") - range1 = float(rule[0]) - range2 = float(rule[1]) - - if not (range1 <= value <= range2): - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "Value range check failed", - "Value is not within the number range, data- " + field_value, - ) - return None - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - - # InArray Validate - def _validate_in_array(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - rule_list = expression_rule.split(",") - - if field_value not in rule_list: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "Value not in array check failed", - "Check Value not found in data array", - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - - # Upper Validate - def _validate_upper(self, _expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - result = field_value.isupper() - - if not result: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "Value not uppercase", - "Check Value not found to be uppercase, value- " + field_value, - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - - # Lower Validate - def _validate_lower(self, _expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - result = field_value.islower() - - if not result: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "Value not lowercase", - "Check Value not found to be lowercase, data- " + field_value, - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - # Starts With Validate def _validate_starts_with(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: try: @@ -499,16 +342,19 @@ def _validate_empty(self, _expression_rule: str, field_name: str, field_value: s # String Pre-Validation def validation_for_string_values( - self, _expression_rule: str, field_name: str, field_value: str, row: dict + self, expression_rule: str, field_name: str, field_value: str, row: dict ) -> ErrorReport: """ Apply validation to a string field to ensure it is a non-empty string which meets the length requirements and predefined values requirements """ - defined_length: int = (10,) - max_length: int = (None,) - predefined_values: list = (None,) - spaces_allowed: bool = False + if expression_rule is not None: + rules = expression_rule_per_field(expression_rule) + defined_length = rules.get("defined_length", None) + max_length = rules.get("max_length", None) + predefined_values = rules.get("predefined_values", None) + spaces_allowed = rules.get("spaces_allowed", None) + try: if not isinstance(field_value, str): raise TypeError(f"{field_name} must be a string") @@ -715,29 +561,3 @@ def _validate_against_lookup( if self.report_unexpected_exception: message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - - # Check with Key Lookup - def _validate_against_key(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - result = self.key_data.find_key(expression_rule, field_value) - if not result: - raise RecordError( - ExceptionLevels.KEY_CHECK_FAILED, - "Key lookup failure", - "Value was not found in Key List, " - + MessageLabel.EXPECTED_LABEL - + field_value - + " " - + MessageLabel.FOUND_LABEL - + "nothing", - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.KEY_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.KEY_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) diff --git a/lambdas/shared/src/common/validator/expression_rule.py b/lambdas/shared/src/common/validator/expression_rule.py new file mode 100644 index 000000000..79065117e --- /dev/null +++ b/lambdas/shared/src/common/validator/expression_rule.py @@ -0,0 +1,19 @@ +from common.validator.constants.constants import Constants + + +def expression_rule_per_field(expression_type: str) -> dict: + match expression_type: + case "NHS_NUMBER": + return {"defined_length": Constants.NHS_NUMBER_LENGTH, "spaces_allowed": False} + case "PERSON_NAME": + return {"max_length": Constants.PERSON_NAME_ELEMENT_MAX_LENGTH} + case "PERSON_SURNAME": + return { + "elements_are_strings": True, + "max_length": 5, + "string_element_max_length": Constants.PERSON_NAME_ELEMENT_MAX_LENGTH, + } + case "GENDER": + return {"predefined_values": Constants.GENDERS} + case _: + raise ValueError(f"Expression rule not found for type: {expression_type}") diff --git a/lambdas/shared/tests/test_common/validator/test_expression_checker.py b/lambdas/shared/tests/test_common/validator/test_expression_checker.py index 7739b87b5..5333aec7c 100644 --- a/lambdas/shared/tests/test_common/validator/test_expression_checker.py +++ b/lambdas/shared/tests/test_common/validator/test_expression_checker.py @@ -1,417 +1,103 @@ import unittest -from unittest.mock import MagicMock, patch -from common.validator.constants.enums import ExceptionLevels from common.validator.error_report.record_error import ErrorReport from common.validator.expression_checker import ExpressionChecker -class MockParser(unittest.TestCase): - """ - Mock parser used to simulate field value lookups - for ExpressionChecker during testing. - """ +class MockParser: + """Minimal parser providing get_key_value for ONLYIF tests.""" def __init__(self, data=None): self._data = data or {} def get_key_value(self, field_name): - """Return a list to mimic parser contract.""" return [self._data.get(field_name, "")] class TestExpressionChecker(unittest.TestCase): - """ - Unit tests for ExpressionChecker validation logic. - Each test validates a specific expression rule type. - """ + """Unit tests limited to expression types used in the provided schema.""" def make_checker(self, mock_data=None, summarise=False, report=True): - """Helper to create an ExpressionChecker with mock parser data.""" return ExpressionChecker(MockParser(mock_data), summarise, report) - # Date Time Check - def test_datetime_valid(self): - """Valid ISO date should pass without error.""" - checker = self.make_checker({"date_field": "2025-01-01"}) - error = checker.validate_expression("DATETIME", None, "date_field", "2025-01-01", 1) - self.assertIsNone(error) - - def test_datetime_unexpected_exception(self): - """Passing incompatible type should raise an error report.""" - checker = self.make_checker() - error = checker.validate_expression("DATETIME", None, "date_field", object(), 1) - self.assertIsInstance(error, ErrorReport) - - def test_uuid_valid_and_invalid(self): - """UUID validation should pass for valid UUIDs and fail for invalid ones.""" - checker = self.make_checker() - valid_uuid = "12345678-1234-5678-1234-567812345678" - self.assertIsNone(checker.validate_expression("UUID", None, "uuid_field", valid_uuid, 1)) - self.assertIsInstance(checker.validate_expression("UUID", None, "uuid_field", "not-a-uuid", 1), ErrorReport) - - # Numeric Length and Regex - def test_integer_length_and_regex_rules(self): - """Test integer, length, and regex-based validations.""" + # STRING + def test_string_valid_and_invalid(self): checker = self.make_checker() - - # INT should pass with numeric value - self.assertIsNone(checker.validate_expression("INT", None, "int_field", "42", 1)) - - # LENGTH too long -> Error - self.assertIsInstance(checker.validate_expression("LENGTH", "3", "str_field", "abcd", 1), ErrorReport) - - # REGEX mismatch -> Error - self.assertIsInstance(checker.validate_expression("REGEX", r"^abc$", "regex_field", "abcd", 1), ErrorReport) - - # Case & String Position Rules - def test_upper_lower_startswith_endswith_rules(self): - """Validate case and string boundary conditions.""" - checker = self.make_checker() - - # UPPER - self.assertIsNone(checker.validate_expression("UPPER", None, "upper_field", "ABC", 1)) - self.assertIsInstance(checker.validate_expression("UPPER", None, "upper_field", "AbC", 1), ErrorReport) - - # LOWER - self.assertIsNone(checker.validate_expression("LOWER", None, "lower_field", "abc", 1)) - self.assertIsInstance(checker.validate_expression("LOWER", None, "lower_field", "abC", 1), ErrorReport) - - # STARTSWITH - self.assertIsNone(checker.validate_expression("STARTSWITH", "ab", "start_field", "abc", 1)) - self.assertIsInstance(checker.validate_expression("STARTSWITH", "zz", "start_field", "abc", 1), ErrorReport) - - # ENDSWITH - self.assertIsNone(checker.validate_expression("ENDSWITH", "bc", "end_field", "abc", 1)) - self.assertIsInstance(checker.validate_expression("ENDSWITH", "zz", "end_field", "abc", 1), ErrorReport) - - # --- EMPTY & NOTEMPTY ------------------------------------------------ - - def test_empty_and_notempty_rules(self): - """Validate checks for empty and non-empty fields.""" - checker = self.make_checker() - - # EMPTY - self.assertIsNone(checker.validate_expression("EMPTY", None, "empty_field", "", 1)) - self.assertIsInstance(checker.validate_expression("EMPTY", None, "empty_field", "value", 1), ErrorReport) - - # NOTEMPTY - self.assertIsNone(checker.validate_expression("NOTEMPTY", None, "notempty_field", "value", 1)) - self.assertIsInstance(checker.validate_expression("NOTEMPTY", None, "notempty_field", "", 1), ErrorReport) - - # --- NUMERIC RANGES -------------------------------------------------- - - def test_positive_and_nrange_rules(self): - """Check positive and numeric range validations.""" - checker = self.make_checker() - - # POSITIVE - self.assertIsNone(checker.validate_expression("POSITIVE", None, "positive_field", "1.2", 1)) - self.assertIsInstance(checker.validate_expression("POSITIVE", None, "positive_field", "-3", 1), ErrorReport) - - # NRANGE - self.assertIsNone(checker.validate_expression("NRANGE", "1,10", "range_field", "5", 1)) - self.assertIsInstance(checker.validate_expression("NRANGE", "a,b", "range_field", "5", 1), ErrorReport) - - # --- COMPARISONS & LIST MEMBERSHIP ----------------------------------- - - def test_inarray_equal_notequal_rules(self): - """Test INARRAY, EQUAL, and NOTEQUAL expressions.""" - checker = self.make_checker() - - # INARRAY - self.assertIsNone(checker.validate_expression("INARRAY", "a,b", "array_field", "a", 1)) - self.assertIsInstance(checker.validate_expression("INARRAY", "a,b", "array_field", "z", 1), ErrorReport) - - # EQUAL - self.assertIsNone(checker.validate_expression("EQUAL", "x", "equal_field", "x", 1)) - self.assertIsInstance(checker.validate_expression("EQUAL", "x", "equal_field", "y", 1), ErrorReport) - - # NOTEQUAL - self.assertIsNone(checker.validate_expression("NOTEQUAL", "x", "notequal_field", "y", 1)) - self.assertIsInstance(checker.validate_expression("NOTEQUAL", "x", "notequal_field", "x", 1), ErrorReport) - - # --- DOMAIN-SPECIFIC RULES ------------------------------------------- - - def test_postcode_gender_nhsnumber_rules(self): - """Check NHS number, gender, and postcode validations.""" - checker = self.make_checker() - - # NHSNUMBER invalid - self.assertIsInstance(checker.validate_expression("NHSNUMBER", None, "nhs_field", "123", 1), ErrorReport) - - # GENDER - self.assertIsNone(checker.validate_expression("GENDER", None, "gender_field", "0", 1)) - self.assertIsInstance(checker.validate_expression("GENDER", None, "gender_field", "x", 1), ErrorReport) - - # POSTCODE - self.assertIsInstance(checker.validate_expression("POSTCODE", None, "postcode_field", "XYZ", 1), ErrorReport) - - # --- COLLECTION SIZE RULES ------------------------------------------- - - def test_maxobjects_rule(self): - """MAXOBJECTS validates maximum allowed length of list-like fields.""" - checker = self.make_checker() - self.assertIsNone(checker.validate_expression("MAXOBJECTS", "1", "list_field", [], 1)) - self.assertIsInstance(checker.validate_expression("MAXOBJECTS", "1", "list_field", [1, 2], 1), ErrorReport) - - # --- LOOKUP & CONDITIONAL RULES -------------------------------------- - - def test_lookup_and_keycheck_rules(self): - """Force unexpected or missing lookup paths.""" - checker = self.make_checker(report=True) - self.assertIsInstance(checker.validate_expression("LOOKUP", None, "lookup_field", "unknown", 1), ErrorReport) - - def test_onlyif_uses_parser_values(self): - """ONLYIF uses parser to conditionally validate based on another field.""" - mock_data = {"location_field": "VAL"} - checker = self.make_checker(mock_data) - - # expressionRule format: field|expected_value - result_match = checker.validate_expression("ONLYIF", "location_field|VAL", "test_field", "any", 1) - result_mismatch = checker.validate_expression("ONLYIF", "location_field|NOPE", "test_field", "any", 1) - - self.assertIsNone(result_match) - self.assertIsInstance(result_mismatch, ErrorReport) - - -class TestExpressionLookUp(unittest.TestCase): - def setUp(self): - self.MockLookUpData = patch("common.validator.expression_checker.LookUpData").start() - self.MockKeyData = patch("common.validator.expression_checker.KeyData").start() - - self.mock_summarise = MagicMock() - self.mock_report_exception = MagicMock() - self.mock_data_parser = MagicMock() - - self.expression_checker = ExpressionChecker( - self.mock_data_parser, self.mock_summarise, self.mock_report_exception - ) - - def tearDown(self): - patch.stopall() - - def test_validate_datetime_valid(self): - result = self.expression_checker.validate_expression( - "DATETIME", expression_rule="", field_name="timestamp", field_value="2022-01-01T12:00:00", row={} - ) - self.assertEqual( - result.message, - "Unexpected exception [ValueError]: Invalid isoformat string: '2022-01-01T12:00:00'", + # Valid NHS number length + self.assertIsNone( + checker.validate_expression( + "STRING", + "NHS_NUMBER", + "contained|#:Patient|identifier|#:https://fhir.nhs.uk/Id/nhs-number|value", + "9876543210", + 1, + ) ) - self.assertEqual(result.code, ExceptionLevels.UNEXPECTED_EXCEPTION) - self.assertEqual(result.field, "timestamp") - - def test_validate_uuid_valid(self): - result = self.expression_checker.validate_expression( - "UUID", expression_rule="", field_name="id", field_value="550e8400-e29b-41d4-a716-446655440000", row={} + # Empty should fail NHS number string rule + self.assertIsInstance( + checker.validate_expression( + "STRING", "NHS_NUMBER", "contained|#:Patient|identifier|#:https://fhir.nhs.uk/Id/nhs-number|value", "", 1 + ), + ErrorReport, ) - self.assertTrue(result is None) - def test_validate_integer_invalid(self): - result = self.expression_checker.validate_expression( - "INT", expression_rule="", field_name="age", field_value="hello world", row={} - ) - self.assertEqual(result.code, ExceptionLevels.UNEXPECTED_EXCEPTION) - self.assertEqual(result.field, "age") - self.assertIn("invalid literal for int()", result.message) - - def test_validate_in_array(self): - # Mock data_parser.get_key_values - self.mock_data_parser.get_key_values.return_value = ["val1", "val2"] - - result = self.expression_checker.validate_expression( - "INARRAY", expression_rule="", field_name="some_field", field_value="val2", row={} - ) - self.assertEqual(result.message, "Value not in array check failed") - self.assertEqual(result.field, "some_field") - - def test_validate_expression_type_not_found(self): - result = self.expression_checker.validate_expression( - "UNKNOWN", expression_rule="", field_name="field", field_value="value", row={} - ) - self.assertIn("Schema expression not found", result) - - -class DummyParserEx: - """A dummy parser that optionally raises exceptions when fetching values.""" - - def __init__(self, data=None, raise_on_get=False): - self._data = data or {} - self._raise_on_get = raise_on_get - - def get_key_value(self, field_name): - """Simulate field lookup, optionally raising an error.""" - if self._raise_on_get: - raise RuntimeError("boom") - return [self._data.get(field_name, "")] - - -class StubLookup: - """Stub object to simulate lookup behavior with optional exception raising.""" - - def __init__(self, raise_on_call=False): - self._raise_on_call = raise_on_call - - def find_lookup(self, value): - if self._raise_on_call: - raise RuntimeError("boom") - return "" # always empty to force error path - - -class StubKeyData: - """Stub for key data operations with optional exception raising.""" - - def __init__(self, raise_on_call=False): - self._raise_on_call = raise_on_call - - def findKey(self, key_source, field_value): - if self._raise_on_call: - raise RuntimeError("boom") - return False # always fail to trigger error branch - - -class TestExpressionCheckerExceptions(unittest.TestCase): - """ - Tests edge cases and exception-handling paths in ExpressionChecker. - Focuses on behavior when report=True vs report=False. - """ - - def make_checker(self, data=None, report=True, raise_on_get=False): - """Helper to construct ExpressionChecker with dummy parser.""" - return ExpressionChecker(DummyParserEx(data, raise_on_get), False, report) - - # --- REGEX, IN, LENGTH, FLOAT, UUID ---------------------------------- - - def test_regex_unexpected_true_false(self): - """REGEX should return ErrorReport when report=True, None when report=False.""" - checker = self.make_checker(report=True) - self.assertIsInstance(checker.validate_expression("REGEX", None, "regex_field", "abc", 1), ErrorReport) - - checker_no_report = self.make_checker(report=False) - self.assertIsNone(checker_no_report.validate_expression("REGEX", None, "regex_field", "abc", 1)) - - def test_in_unexpected_true_false(self): - """IN should trigger error when report=True and pass silently when report=False.""" - checker = self.make_checker(report=True) - self.assertIsInstance(checker.validate_expression("IN", "ab", "in_field", None, 1), ErrorReport) - - checker_no_report = self.make_checker(report=False) - self.assertIsNone(checker_no_report.validate_expression("IN", "ab", "in_field", None, 1)) - - def test_length_unexpected_true_false(self): - """LENGTH rule with invalid argument should trigger exception path.""" - checker = self.make_checker(report=True) - self.assertIsInstance(checker.validate_expression("LENGTH", "x", "length_field", "abcd", 1), ErrorReport) - - checker_no_report = self.make_checker(report=False) - self.assertIsNone(checker_no_report.validate_expression("LENGTH", "x", "length_field", "abcd", 1)) - - def test_float_unexpected_true_false(self): - """FLOAT rule should fail when value cannot be parsed as float.""" - checker = self.make_checker(report=True) - self.assertIsInstance(checker.validate_expression("FLOAT", None, "float_field", "abc", 1), ErrorReport) - - checker_no_report = self.make_checker(report=False) - self.assertIsNone(checker_no_report.validate_expression("FLOAT", None, "float_field", "abc", 1)) - - def test_uuid_unexpected_true_false(self): - """UUID rule should handle malformed UUIDs properly.""" - checker = self.make_checker(report=True) - self.assertIsInstance(checker.validate_expression("UUID", None, "uuid_field", "not-a-uuid", 1), ErrorReport) - - checker_no_report = self.make_checker(report=False) - self.assertIsNone(checker_no_report.validate_expression("UUID", None, "uuid_field", "not-a-uuid", 1)) - - # --- MAXOBJECTS ------------------------------------------------------ - - def test_maxobjects_unexpected_true_false(self): - """MAXOBJECTS should handle non-iterable input gracefully.""" - - class NoLen: - """Dummy object without __len__.""" - - pass - - checker = self.make_checker(report=True) - self.assertIsInstance(checker.validate_expression("MAXOBJECTS", "1", "max_field", NoLen(), 1), ErrorReport) - - checker_no_report = self.make_checker(report=False) - self.assertIsNone(checker_no_report.validate_expression("MAXOBJECTS", "1", "max_field", NoLen(), 1)) - - # --- ONLYIF ---------------------------------------------------------- - - def test_onlyif_unexpected_true_false(self): - """ONLYIF rule should handle parser errors based on report flag.""" - checker = self.make_checker(report=True, raise_on_get=True) - self.assertIsInstance(checker.validate_expression("ONLYIF", "loc|VAL", "field", "x", 1), ErrorReport) - - checker_no_report = self.make_checker(report=False, raise_on_get=True) - self.assertIsNone(checker_no_report.validate_expression("ONLYIF", "loc|VAL", "field", "x", 1)) - - # --- LOOKUP & KEYCHECK ----------------------------------------------- - - def test_lookup_unexpected_true_false(self): - """LOOKUP rule should handle raised exceptions gracefully.""" - checker = self.make_checker(report=True) - checker.data_look_up = StubLookup(raise_on_call=True) - self.assertIsInstance(checker.validate_expression("LOOKUP", None, "lookup_field", "x", 1), ErrorReport) - - checker_no_report = self.make_checker(report=False) - checker_no_report.data_look_up = StubLookup(raise_on_call=True) - self.assertIsNone(checker_no_report.validate_expression("LOOKUP", None, "lookup_field", "x", 1)) - - def test_keycheck_unexpected_true_false(self): - """KEYCHECK rule should handle raised exceptions gracefully.""" - checker = self.make_checker(report=True) - checker.key_data = StubKeyData(raise_on_call=True) - self.assertIsInstance(checker.validate_expression("KEYCHECK", "Site", "key_field", "val", 1), ErrorReport) - - checker_no_report = self.make_checker(report=False) - checker_no_report.key_data = StubKeyData(raise_on_call=True) - self.assertIsNone(checker_no_report.validate_expression("KEYCHECK", "Site", "key_field", "val", 1)) - - # --- DATE & STRING EDGE CASES --------------------------------------- - - def test_date_alias_and_upper_lower_edges(self): - """DATE alias should behave like DATETIME; string rules should handle None gracefully.""" - checker = self.make_checker({"date_field": "2025-01-01"}) - self.assertIsNone(checker.validate_expression("DATE", None, "date_field", "2025-01-01", 1)) - - checker_none = self.make_checker() - self.assertIsInstance(checker_none.validate_expression("UPPER", None, "upper_field", None, 1), ErrorReport) - self.assertIsInstance(checker_none.validate_expression("LOWER", None, "lower_field", None, 1), ErrorReport) - self.assertIsInstance(checker_none.validate_expression("STARTSWITH", "a", "start_field", None, 1), ErrorReport) - self.assertIsInstance(checker_none.validate_expression("ENDSWITH", "a", "end_field", None, 1), ErrorReport) - - # --- NUMERIC RANGE --------------------------------------------------- - - def test_nrange_out_of_range(self): - """NRANGE rule should return error when value exceeds upper bound.""" - checker = self.make_checker() - self.assertIsInstance(checker.validate_expression("NRANGE", "1,10", "range_field", "11", 1), ErrorReport) - - # --- DOMAIN-SPECIFIC ------------------------------------------------- - - def test_postcode_valid_and_unexpected(self): - """POSTCODE should pass for valid UK postcode and fail otherwise.""" + # LIST + def test_list_valid_and_invalid(self): checker = self.make_checker() - self.assertIsNone(checker.validate_expression("POSTCODE", None, "postcode_field", "EC1A 1BB", 1)) - self.assertIsInstance(checker.validate_expression("POSTCODE", None, "postcode_field", None, 1), ErrorReport) - - def test_nhsnumber_valid_and_unexpected(self): - """NHSNUMBER should pass for valid NHS number format and fail otherwise.""" - checker = self.make_checker() - self.assertIsNone(checker.validate_expression("NHSNUMBER", None, "nhs_field", "61234567890", 1)) - self.assertIsInstance(checker.validate_expression("NHSNUMBER", None, "nhs_field", None, 1), ErrorReport) - - # --- IN RULE NORMAL PATHS ------------------------------------------- + self.assertIsNone(checker.validate_expression("LIST", "PERSON_NAME", "PERSON_NAME", ["Alice"], 1)) + self.assertIsInstance(checker.validate_expression("LIST", "PERSON_NAME", "PERSON_NAME", [], 1), ErrorReport) + self.assertIsInstance(checker.validate_expression("LIST", "", "PERSON_NAME", "Alice", 1), ErrorReport) - def test_in_pass_fail(self): - """IN rule should detect substring presence correctly.""" + # DATE + def test_date_valid_and_invalid(self): checker = self.make_checker() - self.assertIsNone(checker.validate_expression("IN", "ab", "in_field", "zzabzz", 1)) - self.assertIsInstance(checker.validate_expression("IN", "ab", "in_field", "zz", 1), ErrorReport) + self.assertIsNone(checker.validate_expression("DATE", "", "date_field", "2025-01-01", 1)) + with self.assertRaises(Exception): + # invalid month should raise ValueError internally + checker.validate_expression("DATE", "", "date_field", "2025-13-01", 1) + + +# # DATETIME +# def test_datetime_valid_and_invalid(self): +# checker = self.make_checker() +# # Full date only allowed +# self.assertIsNone(checker.validate_expression("DATETIME", "", "dt_field", "2025-01-01", 1)) +# # Bad format should raise +# with self.assertRaises(Exception): +# checker.validate_expression("DATETIME", "", "dt_field", "2025-01-01T10:00:00Z", 1) + + +# # BOOLEAN +# def test_boolean_valid_and_invalid(self): +# checker = self.make_checker() +# self.assertIsNone(checker.validate_expression("BOOLEAN", "", "bool_field", True, 1)) +# self.assertIsInstance(checker.validate_expression("BOOLEAN", "", "bool_field", "true", 1), ErrorReport) + +# # POSITIVEINTEGER +# def test_positive_integer_valid_and_invalid(self): +# checker = self.make_checker() +# self.assertIsNone(checker.validate_expression("POSITIVEINTEGER", "", "pos_field", "2", 1)) +# self.assertIsInstance(checker.validate_expression("POSITIVEINTEGER", "", "pos_field", "0", 1), ErrorReport) +# self.assertIsInstance(checker.validate_expression("POSITIVEINTEGER", "", "pos_field", "-5", 1), ErrorReport) +# self.assertIsInstance(checker.validate_expression("POSITIVEINTEGER", "", "pos_field", "abc", 1), ErrorReport) + +# # INTDECIMAL +# def test_intdecimal_valid_and_invalid(self): +# checker = self.make_checker() +# self.assertIsNone(checker.validate_expression("INTDECIMAL", "", "num_field", "1.23", 1)) +# self.assertIsNone(checker.validate_expression("INTDECIMAL", "", "num_field", 3, 1)) +# self.assertIsInstance(checker.validate_expression("INTDECIMAL", "", "num_field", "abc", 1), ErrorReport) + + +# class DummyParserEx: +# def __init__(self, data=None, raise_on_get=False): +# self._data = data or {} +# self._raise_on_get = raise_on_get + +# def get_key_value(self, field_name): +# if self._raise_on_get: +# raise RuntimeError("boom") +# return [self._data.get(field_name, "")] if __name__ == "__main__": diff --git a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json index ea4ffd251..439af51be 100644 --- a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json +++ b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json @@ -11,9 +11,9 @@ "fieldNumber": 1, "errorLevel": 0, "expression": { - "expressionName": "NHS Number Not Empty Check", + "expressionName": "NHS Number String Check", "expressionType": "STRING", - "expressionRule": "" + "expressionRule": "NHS_NUMBER" }, "errorGroup": "validity" }, @@ -24,9 +24,9 @@ "fieldNumber": 2, "errorLevel": 0, "expression": { - "expressionName": "Person Forname Not Empty Check", + "expressionName": "Person Forname List Check", "expressionType": "LIST", - "expressionRule": "" + "expressionRule": "PERSON_NAME" }, "errorGroup": "completeness" }, @@ -40,7 +40,7 @@ "expression": { "expressionName": "Person Surname Not Empty Check", "expressionType": "STRING", - "expressionRule": "" + "expressionRule": "PERSON_SURNAME" }, "errorGroup": "completeness" }, @@ -66,7 +66,7 @@ "expression": { "expressionName": "Gender Valid Check", "expressionType": "STRING", - "expressionRule": "" + "expressionRule": "GENDER" }, "errorGroup": "consistency" }, diff --git a/lambdas/shared/tests/test_common/validator/test_validation_csv_row.py b/lambdas/shared/tests/test_common/validator/test_validation_csv_row.py index 6387ace62..e89c43642 100644 --- a/lambdas/shared/tests/test_common/validator/test_validation_csv_row.py +++ b/lambdas/shared/tests/test_common/validator/test_validation_csv_row.py @@ -32,11 +32,7 @@ def test_run_validation_on_invalid_csv_row(self): self.assertTrue(len(error_list) > 0) messages = [(e.name, e.message, e.details) for e in error_list] - expected_error = ( - "NHS Number Not Empty Check", - "Value not empty failure", - "Value is empty, not as expected", - ) + expected_error = "NHS Number String Check" self.assertIn(expected_error, messages) csv_parser = Mock() diff --git a/lambdas/shared/tests/test_common/validator/testing_utils/constants.py b/lambdas/shared/tests/test_common/validator/testing_utils/constants.py index 33855e31d..6fc454cbf 100644 --- a/lambdas/shared/tests/test_common/validator/testing_utils/constants.py +++ b/lambdas/shared/tests/test_common/validator/testing_utils/constants.py @@ -12,7 +12,7 @@ # Sample CSV values for testing CSV_VALUES = { "NHS_NUMBER": "9000000009", - "PERSON_FORENAME": "JOHN", + "PERSON_FORENAME": ["JOHN"], "PERSON_SURNAME": "DOE", "PERSON_DOB": "19800101", "PERSON_GENDER_CODE": "M", From f44002afd51d891b7a33d59a2bc16c558c374b32 Mon Sep 17 00:00:00 2001 From: Akol125 Date: Thu, 13 Nov 2025 12:10:13 +0000 Subject: [PATCH 06/18] test for STRING and LIST --- .../common/validator/constants/config.json | 451 ++++++++++++++++++ .../common/validator/expression_checker.py | 175 +++---- .../validator/test_expression_checker.py | 15 +- .../validator/test_schemas/test_schema.json | 417 ---------------- .../validator/test_validation_csv_row.py | 5 +- 5 files changed, 517 insertions(+), 546 deletions(-) create mode 100644 lambdas/shared/src/common/validator/constants/config.json diff --git a/lambdas/shared/src/common/validator/constants/config.json b/lambdas/shared/src/common/validator/constants/config.json new file mode 100644 index 000000000..439af51be --- /dev/null +++ b/lambdas/shared/src/common/validator/constants/config.json @@ -0,0 +1,451 @@ +{ + "id": "01K5EGR0C85TPNZT71MJ10VKYY", + "schemaName": "Base Vaccination Validation", + "version": 1.0, + "releaseDate": "2024-07-17T00:00:00.000Z", + "expressions": [ + { + "expressionId": "01K5EGR0C7Y1WJ0BC803SQDWK4", + "fieldNameFHIR": "contained|#:Patient|identifier|#:https://fhir.nhs.uk/Id/nhs-number|value", + "fieldNameFlat": "NHS_NUMBER", + "fieldNumber": 1, + "errorLevel": 0, + "expression": { + "expressionName": "NHS Number String Check", + "expressionType": "STRING", + "expressionRule": "NHS_NUMBER" + }, + "errorGroup": "validity" + }, + { + "expressionId": "01K5EGR0C7QCEJMWH1R4MBPGQA", + "fieldNameFHIR": "contained|#:Patient|name|#:official|given|0", + "fieldNameFlat": "PERSON_FORENAME", + "fieldNumber": 2, + "errorLevel": 0, + "expression": { + "expressionName": "Person Forname List Check", + "expressionType": "LIST", + "expressionRule": "PERSON_NAME" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C7RRG9F6FVHJ8HE4QX", + "fieldNameFHIR": "contained|#:Patient|name|#:official|family", + "fieldNameFlat": "PERSON_SURNAME", + "fieldNumber": 3, + "errorLevel": 0, + "parentExpression": "01K5EGR0C7Y1WJ0BC803SQDWK4", + "expression": { + "expressionName": "Person Surname Not Empty Check", + "expressionType": "STRING", + "expressionRule": "PERSON_SURNAME" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8RW1DF635S4FRZ1WF9GDS1T", + "fieldNameFHIR": "contained|#:Patient|birthDate", + "fieldNameFlat": "PERSON_DOB", + "fieldNumber": 4, + "errorLevel": 1, + "expression": { + "expressionName": "Date of Birth Not Empty Check", + "expressionType": "DATE", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8RW2ATXRM572BFG19S8TFJ2", + "fieldNameFHIR": "contained|#:Patient|gender", + "fieldNameFlat": "PERSON_GENDER_CODE", + "fieldNumber": 5, + "errorLevel": 1, + "expression": { + "expressionName": "Gender Valid Check", + "expressionType": "STRING", + "expressionRule": "GENDER" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8RW2MYFNE6YZJ99RC8B3XES", + "fieldNameFHIR": "contained|#:Patient|address|#:postalCode|postalCode", + "fieldNameFlat": "PERSON_POSTCODE", + "fieldNumber": 6, + "errorLevel": 2, + "expression": { + "expressionName": "Defaults to", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S0WRNSPQ42RBD8420Q9G7Y", + "fieldNameFHIR": "occurrenceDateTime", + "fieldNameFlat": "DATE_AND_TIME", + "fieldNumber": 7, + "errorLevel": 0, + "expression": { + "expressionName": "Date Convert", + "expressionType": "DATETIME", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K5EGR0C8M1MVNKTQCE6MSG68", + "fieldNameFHIR": "performer|#:Organization|actor|identifier|value", + "fieldNameFlat": "SITE_CODE", + "fieldNumber": 8, + "errorLevel": 0, + "expression": { + "expressionName": "Organisation Look Up Check", + "expressionType": "STRING", + "expressionRule": "Organisation" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S0X5AJ9048PAFEN3XVZ7YC", + "fieldNameFHIR": "performer|#:Organization|actor|identifier|system", + "fieldNameFlat": "SITE_CODE_TYPE_URI", + "fieldNumber": 9, + "errorLevel": 1, + "expression": { + "expressionName": "Defaults to", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S0XF2Y2WP22017N9KE6VJA", + "fieldNameFHIR": "identifier|0|value", + "fieldNameFlat": "UNIQUE_ID", + "fieldNumber": 10, + "errorLevel": 0, + "expression": { + "expressionName": "Unique ID Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "validity" + }, + { + "expressionId": "01K8S0XQYHDKMCA1P1GK4W5JHP", + "fieldNameFHIR": "identifier|0|system", + "fieldNameFlat": "UNIQUE_ID_URI", + "fieldNumber": 11, + "errorLevel": 0, + "expression": { + "expressionName": "Unique ID URI Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "validity" + }, + { + "expressionId": "01K8S0Y0TN3NZA6VBB3HM1PDR1", + "fieldNameFHIR": "", + "fieldNameFlat": "ACTION_FLAG", + "fieldNumber": 12, + "errorLevel": 1, + "expression": { + "expressionName": "Action Flag Not Empty Check", + "expressionType": "NOTEMPTY", + "expressionRule": "" + }, + "errorGroup": "validity" + }, + { + "expressionId": "01K5EGR0C8SDQBTNCEP8TJNCCW", + "fieldNameFHIR": "contained|#:Practitioner|name|0|given|0", + "fieldNameFlat": "PERFORMING_PROFESSIONAL_FORENAME", + "fieldNumber": 13, + "errorLevel": 1, + "expression": { + "expressionName": "Practitioner Forename Not Empty Check", + "expressionType": "LIST", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C8T3Z6X6h3W7D1F4VY", + "fieldNameFHIR": "contained|#:Practitioner|name|0|family", + "fieldNameFlat": "PERFORMING_PROFESSIONAL_SURNAME", + "fieldNumber": 14, + "errorLevel": 1, + "expression": { + "expressionName": "Practitioner Surname Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S0Y8TX8HTX6YGW61RDCATK", + "fieldNameFHIR": "recorded", + "fieldNameFlat": "RECORDED_DATE", + "fieldNumber": 15, + "errorLevel": 1, + "expression": { + "expressionName": "Recorded Date Convert", + "expressionType": "DATETIME", + "expressionRule": "false-strict-timezone" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K5EGR0C84CCDRR0VFSWQNFZP", + "fieldNameFHIR": "primarySource", + "fieldNameFlat": "PRIMARY_SOURCE", + "fieldNumber": 16, + "errorLevel": 0, + "expression": { + "expressionName": "Primary Source Not Empty Check", + "expressionType": "BOOLEAN", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S0YYGDFWJXN2W3THYG24EZ", + "fieldNameFHIR": "extension|0|valueCodeableConcept|coding|0|code", + "fieldNameFlat": "VACCINATION_PROCEDURE_CODE", + "fieldNumber": 17, + "errorLevel": 0, + "expression": { + "expressionName": "Procedure Code Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C85HY6MDNN6TTR1K48", + "fieldNameFHIR": "extension|0|valueCodeableConcept|coding|0|display", + "fieldNameFlat": "VACCINATION_PROCEDURE_TERM", + "fieldNumber": 18, + "errorLevel": 1, + "expression": { + "expressionName": "Procedure Term Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C84DDGW567G14AYBC6", + "fieldNameFHIR": "protocolApplied|0|doseNumberPositiveInt", + "fieldNameFlat": "DOSE_SEQUENCE", + "fieldNumber": 19, + "errorLevel": 1, + "expression": { + "expressionName": "Dose Sequence Not Empty Check", + "expressionType": "POSITIVEINTEGER", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C8W3HXFYR80ENW73SS", + "fieldNameFHIR": "vaccineCode|coding|#:http://snomed.info/sct|code", + "fieldNameFlat": "VACCINE_PRODUCT_CODE", + "fieldNumber": 20, + "errorLevel": 0, + "expression": { + "expressionName": "Produce Code Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C885N7MMW2J5JKHTT2", + "fieldNameFHIR": "vaccineCode|coding|#:http://snomed.info/sct|display", + "fieldNameFlat": "VACCINE_PRODUCT_TERM", + "fieldNumber": 21, + "errorLevel": 1, + "expression": { + "expressionName": "Produce Term Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C86XN0AF0M9DJYFGCD", + "fieldNameFHIR": "manufacturer|display", + "fieldNameFlat": "VACCINE_MANUFACTURER", + "fieldNumber": 22, + "errorLevel": 0, + "expression": { + "expressionName": "Manufacturer Display Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C89M4CV68B7XAKDCHG", + "fieldNameFHIR": "lotNumber", + "fieldNameFlat": "BATCH_NUMBER", + "fieldNumber": 23, + "errorLevel": 0, + "expression": { + "expressionName": "Batch Number Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2AG0CR7S28QB29XY14J71", + "fieldNameFHIR": "expirationDate", + "fieldNameFlat": "EXPIRY_DATE", + "fieldNumber": 24, + "errorLevel": 1, + "expression": { + "expressionName": "Date Convert", + "expressionType": "DATE", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S2AS6TD0146EZN8ZDM9AGD", + "fieldNameFHIR": "site|coding|#:http://snomed.info/sct|code", + "fieldNameFlat": "SITE_OF_VACCINATION_CODE", + "fieldNumber": 25, + "errorLevel": 0, + "expression": { + "expressionName": "Site of Vaccination Code Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2AZQ7XXTTF2AF7ZMM609C", + "fieldNameFHIR": "site|coding|#:http://snomed.info/sct|display", + "fieldNameFlat": "SITE_OF_VACCINATION_TERM", + "fieldNumber": 26, + "errorLevel": 1, + "expression": { + "expressionName": "Site of Vaccination Term Lookup Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S2B78X58EB9XAZYX3M4VPP", + "fieldNameFHIR": "route|coding|#:http://snomed.info/sct|code", + "fieldNameFlat": "ROUTE_OF_VACCINATION_CODE", + "fieldNumber": 27, + "errorLevel": 0, + "expression": { + "expressionName": "Route of Vaccination Code Not Empty Check", + "expressionType": "STRING", + "expressionRule": "UniqueList" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2BEP9C9KHNJTKPP6SH1G0", + "fieldNameFHIR": "route|coding|#:http://snomed.info/sct|display", + "fieldNameFlat": "ROUTE_OF_VACCINATION_TERM", + "fieldNumber": 28, + "errorLevel": 1, + "expression": { + "expressionName": "Route of Vaccination Term Lookup Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S2BNT5MET8E29GBT83AP0P", + "fieldNameFHIR": "doseQuantity|value", + "fieldNameFlat": "DOSE_AMOUNT", + "fieldNumber": 29, + "errorLevel": 1, + "expression": { + "expressionName": "Dose Amount Default Check", + "expressionType": "INTDECIMAL", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2BY1S0TXETJY78H418XQG", + "fieldNameFHIR": "doseQuantity|code", + "fieldNameFlat": "DOSE_UNIT_CODE", + "fieldNumber": 30, + "errorLevel": 1, + "expression": { + "expressionName": "Dose Unit Only If System Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S2C3XTDW9RK9Y2FQ9YM5WJ", + "fieldNameFHIR": "doseQuantity|unit", + "fieldNameFlat": "DOSE_UNIT_TERM", + "fieldNumber": 31, + "errorLevel": 0, + "expression": { + "expressionName": "Dose Unit Term Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2CANK2PFNDANX3D04W2NR", + "fieldNameFHIR": "reasonCode|#:http://snomed.info/sct|coding|#:http://snomed.info/sct|code", + "fieldNameFlat": "INDICATION_CODE", + "fieldNumber": 32, + "errorLevel": 0, + "expression": { + "expressionName": "Indication Code Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2CKK8FCVJ6049EW5G563P", + "fieldNameFHIR": "location|identifier|value", + "fieldNameFlat": "LOCATION_CODE", + "fieldNumber": 33, + "errorLevel": 1, + "expression": { + "expressionName": "Location Code Default Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S2CSWYXJ5WDS7K59A045JR", + "fieldNameFHIR": "location|identifier|system", + "fieldNameFlat": "LOCATION_CODE_TYPE_URI", + "fieldNumber": 34, + "errorLevel": 1, + "expression": { + "expressionName": "Location Code Type URI Default Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + } + ] +} diff --git a/lambdas/shared/src/common/validator/expression_checker.py b/lambdas/shared/src/common/validator/expression_checker.py index 1883597f7..6f9c3b721 100644 --- a/lambdas/shared/src/common/validator/expression_checker.py +++ b/lambdas/shared/src/common/validator/expression_checker.py @@ -24,50 +24,60 @@ def __init__(self, data_parser, summarise: bool, report_unexpected_exception: bo self.report_unexpected_exception = report_unexpected_exception def validate_expression( - self, expression_type: str, expression_rule: str, field_name: str, field_value: str, row: dict + self, expression_type: str, expression_rule: str, field_name: str, field_value: str, row: dict = None ) -> ErrorReport: match expression_type: case "STRING": - return self.validation_for_string_values(expression_rule, field_name, field_value, row) + return self.validation_for_string_values(expression_rule, field_name, field_value) case "LIST": - return self.validation_for_list(expression_rule, field_name, field_value, row) + return self.validation_for_list(expression_rule, field_name, field_value) case "DATE": - return self.validation_for_date(expression_rule, field_name, field_value, row) + return self.validation_for_date(expression_rule, field_name, field_value) case "DATETIME": - return self.validation_for_date_time(expression_rule, field_name, field_value, row) + return self.validation_for_date_time(expression_rule, field_name, field_value) case "POSITIVEINTEGER": - return self.validation_for_positive_integer(expression_rule, field_name, field_value, row) + return self.validation_for_positive_integer(expression_rule, field_name, field_value) case "UNIQUELIST": - return self.validation_for_unique_list(expression_rule, field_name, field_value, row) + return self.validation_for_unique_list(expression_rule, field_name, field_value) case "BOOLEAN": - return self.validation_for_boolean(expression_rule, field_name, field_value, row) + return self.validation_for_boolean(expression_rule, field_name, field_value) case "INTDECIMAL": - return self.validation_for_integer_or_decimal(expression_rule, field_name, field_value, row) + return self.validation_for_integer_or_decimal(expression_rule, field_name, field_value) case "POSTCODE": - return self._validate_post_code(expression_rule, field_name, field_value, row) + return self._validate_post_code(expression_rule, field_name, field_value) case "GENDER": - return self._validate_gender(expression_rule, field_name, field_value, row) + return self._validate_gender(expression_rule, field_name, field_value) case _: return "Schema expression not found! Check your expression type : " + expression_type # ISO 8601 date/datetime validate (currently date-only) - def validation_for_date(self, _expression_rule, field_name, field_value, row): + def validation_for_date(self, _expression_rule, field_name, field_value): """ Apply pre-validation to a date field to ensure that it is a string (JSON dates must be written as strings) containing a valid date in the format "YYYY-MM-DD" """ - future_date_allowed: bool = False - if not isinstance(field_value, str): - raise TypeError(f"{field_name} must be a string") - try: - parsed_date = datetime.strptime(field_value, "%Y-%m-%d").date() - except ValueError as value_error: - raise ValueError(f'{field_name} must be a valid date string in the format "YYYY-MM-DD"') from value_error + future_date_allowed: bool = False + if not isinstance(field_value, str): + raise TypeError(f"{field_name} must be a string") - # Enforce future date rule using central checker after successful parse - if not future_date_allowed and check_if_future_date(parsed_date): - raise ValueError(f"{field_name} must not be in the future") + try: + parsed_date = datetime.strptime(field_value, "%Y-%m-%d").date() + except ValueError as value_error: + raise ValueError(f'{field_name} must be a valid date string in the format "YYYY-MM-DD"') from value_error + + # Enforce future date rule using central checker after successful parse + if not future_date_allowed and check_if_future_date(parsed_date): + raise ValueError(f"{field_name} must not be in the future") + except (TypeError, ValueError) as e: + code = ExceptionLevels.RECORD_CHECK_FAILED + message = MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] + details = str(e) + return ErrorReport(code, message, None, field_name, details) + except Exception as e: + if self.report_unexpected_exception: + message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) + return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) def validation_for_positive_integer(self, _expression_rule, field_name, field_value, row): """ @@ -144,7 +154,7 @@ def validation_for_boolean(self, expression_rule: str, field_name: str, field_va if not isinstance(field_value, bool): raise TypeError(f"{field_name} must be a boolean") - def validation_for_list(self, expression_rule: str, field_name: str, field_value: list, row: dict): + def validation_for_list(self, expression_rule: str, field_name: str, field_value: list): """ Apply validation to a list field to ensure it is a non-empty list which meets the length requirements and requirements, if applicable, for each list element to be a non-empty string or non-empty dictionary @@ -185,7 +195,7 @@ def validation_for_list(self, expression_rule: str, field_name: str, field_value except Exception as e: if self.report_unexpected_exception: message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) + return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, field_name) def validation_for_date_time( self, expression_rule: str, field_name: str, field_value: str, row: dict, strict_timezone: bool = True @@ -341,19 +351,17 @@ def _validate_empty(self, _expression_rule: str, field_name: str, field_value: s return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) # String Pre-Validation - def validation_for_string_values( - self, expression_rule: str, field_name: str, field_value: str, row: dict - ) -> ErrorReport: + def validation_for_string_values(self, expression_rule: str, field_name: str, field_value: str) -> ErrorReport: """ Apply validation to a string field to ensure it is a non-empty string which meets the length requirements and predefined values requirements """ - if expression_rule is not None: - rules = expression_rule_per_field(expression_rule) - defined_length = rules.get("defined_length", None) - max_length = rules.get("max_length", None) - predefined_values = rules.get("predefined_values", None) - spaces_allowed = rules.get("spaces_allowed", None) + + rules = expression_rule_per_field(expression_rule) if expression_rule else {} + defined_length = rules.get("defined_length", None) + max_length = rules.get("max_length", None) + predefined_values = rules.get("predefined_values", None) + spaces_allowed = rules.get("spaces_allowed", None) try: if not isinstance(field_value, str): @@ -379,13 +387,18 @@ def validation_for_string_values( if not spaces_allowed: if " " in field_value: raise ValueError(f"{field_name} must not contain spaces") + except (ValueError, TypeError) as e: + code = ExceptionLevels.RECORD_CHECK_FAILED + message = MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] + details = str(e) + return ErrorReport(code, message, None, field_name, details) except Exception as e: if self.report_unexpected_exception: message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) + return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) # Not Empty Validate - def _validate_not_empty(self, _expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: + def _validate_not_empty(self, _expression_rule: str, field_name: str, field_value: str) -> ErrorReport: try: if not field_value: raise RecordError( @@ -396,11 +409,11 @@ def _validate_not_empty(self, _expression_rule: str, field_name: str, field_valu message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] if e.details is not None: details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) + return ErrorReport(code, message, None, field_name, details, self.summarise) except Exception as e: if self.report_unexpected_exception: message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) + return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) # Positive Validate def _validate_positive(self, _expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: @@ -417,7 +430,7 @@ def _validate_positive(self, _expression_rule: str, field_name: str, field_value message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] if e.details is not None: details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) + return ErrorReport(code, message, None, field_name, details, self.summarise) except Exception as e: if self.report_unexpected_exception: message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) @@ -446,7 +459,7 @@ def _validate_nhs_number(self, _expression_rule: str, field_name: str, field_val return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) # Gender Validate - def _validate_gender(self, _expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: + def _validate_gender(self, _expression_rule: str, field_name: str, field_value: str) -> ErrorReport: try: rule_list = ["0", "1", "2", "9"] @@ -461,14 +474,14 @@ def _validate_gender(self, _expression_rule: str, field_name: str, field_value: message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] if e.details is not None: details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) + return ErrorReport(code, message, None, field_name, details, self.summarise) except Exception as e: if self.report_unexpected_exception: message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) + return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) # PostCode Validate - def _validate_post_code(self, _expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: + def _validate_post_code(self, _expression_rule: str, field_name: str, field_value: str) -> ErrorReport: try: # UK postcode regex (allows optional space) regex_rule = r"^[A-Z]{1,2}\d[A-Z\d]?\s?\d[A-Z]{2}$" @@ -482,82 +495,8 @@ def _validate_post_code(self, _expression_rule: str, field_name: str, field_valu message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] if e.details is not None: details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - - # Max Objects Validate - def _validate_max_objects(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - value = len(field_value) - if value > int(expression_rule): - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "Max Objects failure", - "Number of objects is greater than expected", - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - - # Default to Validate - def _validate_only_if(self, expression_rule: str, field_name: str, _field_value: str, row: dict) -> ErrorReport: - try: - conversion_list = expression_rule.split("|") - location = conversion_list[0] - value_check = conversion_list[1] - data_value = self.data_parser.get_key_value(location) - - if data_value[0] != value_check: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "Validate Only If failure", - "Value was not found at that position", - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - - # Check with Lookup - def _validate_against_lookup( - self, _expression_rule: str, field_name: str, field_value: str, row: dict - ) -> ErrorReport: - try: - result = self.data_look_up.find_lookup(field_value) - if not result: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "Value lookup failure", - "Value was not found in Lookup List, " - + MessageLabel.EXPECTED_LABEL - + field_value - + " " - + MessageLabel.FOUND_LABEL - + "nothing", - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) + return ErrorReport(code, message, None, field_name, details, self.summarise) except Exception as e: if self.report_unexpected_exception: message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) + return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name, "", self.summarise) diff --git a/lambdas/shared/tests/test_common/validator/test_expression_checker.py b/lambdas/shared/tests/test_common/validator/test_expression_checker.py index 5333aec7c..e7b3ee1c4 100644 --- a/lambdas/shared/tests/test_common/validator/test_expression_checker.py +++ b/lambdas/shared/tests/test_common/validator/test_expression_checker.py @@ -30,13 +30,12 @@ def test_string_valid_and_invalid(self): "NHS_NUMBER", "contained|#:Patient|identifier|#:https://fhir.nhs.uk/Id/nhs-number|value", "9876543210", - 1, ) ) # Empty should fail NHS number string rule self.assertIsInstance( checker.validate_expression( - "STRING", "NHS_NUMBER", "contained|#:Patient|identifier|#:https://fhir.nhs.uk/Id/nhs-number|value", "", 1 + "STRING", "NHS_NUMBER", "contained|#:Patient|identifier|#:https://fhir.nhs.uk/Id/nhs-number|value", "" ), ErrorReport, ) @@ -44,17 +43,15 @@ def test_string_valid_and_invalid(self): # LIST def test_list_valid_and_invalid(self): checker = self.make_checker() - self.assertIsNone(checker.validate_expression("LIST", "PERSON_NAME", "PERSON_NAME", ["Alice"], 1)) - self.assertIsInstance(checker.validate_expression("LIST", "PERSON_NAME", "PERSON_NAME", [], 1), ErrorReport) - self.assertIsInstance(checker.validate_expression("LIST", "", "PERSON_NAME", "Alice", 1), ErrorReport) + self.assertIsNone(checker.validate_expression("LIST", "PERSON_NAME", "PERSON_NAME", ["Alice"])) + self.assertIsInstance(checker.validate_expression("LIST", "PERSON_NAME", "PERSON_NAME", []), ErrorReport) + self.assertIsInstance(checker.validate_expression("LIST", "", "PERSON_NAME", "Alice"), ErrorReport) # DATE def test_date_valid_and_invalid(self): checker = self.make_checker() - self.assertIsNone(checker.validate_expression("DATE", "", "date_field", "2025-01-01", 1)) - with self.assertRaises(Exception): - # invalid month should raise ValueError internally - checker.validate_expression("DATE", "", "date_field", "2025-13-01", 1) + self.assertIsNone(checker.validate_expression("DATE", "", "date_field", "2025-01-01")) + self.assertIsInstance(checker.validate_expression("DATE", "", "date_field", "2025-13-01"), ErrorReport) # # DATETIME diff --git a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json index 439af51be..76d47c651 100644 --- a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json +++ b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json @@ -29,423 +29,6 @@ "expressionRule": "PERSON_NAME" }, "errorGroup": "completeness" - }, - { - "expressionId": "01K5EGR0C7RRG9F6FVHJ8HE4QX", - "fieldNameFHIR": "contained|#:Patient|name|#:official|family", - "fieldNameFlat": "PERSON_SURNAME", - "fieldNumber": 3, - "errorLevel": 0, - "parentExpression": "01K5EGR0C7Y1WJ0BC803SQDWK4", - "expression": { - "expressionName": "Person Surname Not Empty Check", - "expressionType": "STRING", - "expressionRule": "PERSON_SURNAME" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K8RW1DF635S4FRZ1WF9GDS1T", - "fieldNameFHIR": "contained|#:Patient|birthDate", - "fieldNameFlat": "PERSON_DOB", - "fieldNumber": 4, - "errorLevel": 1, - "expression": { - "expressionName": "Date of Birth Not Empty Check", - "expressionType": "DATE", - "expressionRule": "" - }, - "errorGroup": "consistency" - }, - { - "expressionId": "01K8RW2ATXRM572BFG19S8TFJ2", - "fieldNameFHIR": "contained|#:Patient|gender", - "fieldNameFlat": "PERSON_GENDER_CODE", - "fieldNumber": 5, - "errorLevel": 1, - "expression": { - "expressionName": "Gender Valid Check", - "expressionType": "STRING", - "expressionRule": "GENDER" - }, - "errorGroup": "consistency" - }, - { - "expressionId": "01K8RW2MYFNE6YZJ99RC8B3XES", - "fieldNameFHIR": "contained|#:Patient|address|#:postalCode|postalCode", - "fieldNameFlat": "PERSON_POSTCODE", - "fieldNumber": 6, - "errorLevel": 2, - "expression": { - "expressionName": "Defaults to", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K8S0WRNSPQ42RBD8420Q9G7Y", - "fieldNameFHIR": "occurrenceDateTime", - "fieldNameFlat": "DATE_AND_TIME", - "fieldNumber": 7, - "errorLevel": 0, - "expression": { - "expressionName": "Date Convert", - "expressionType": "DATETIME", - "expressionRule": "" - }, - "errorGroup": "consistency" - }, - { - "expressionId": "01K5EGR0C8M1MVNKTQCE6MSG68", - "fieldNameFHIR": "performer|#:Organization|actor|identifier|value", - "fieldNameFlat": "SITE_CODE", - "fieldNumber": 8, - "errorLevel": 0, - "expression": { - "expressionName": "Organisation Look Up Check", - "expressionType": "STRING", - "expressionRule": "Organisation" - }, - "errorGroup": "consistency" - }, - { - "expressionId": "01K8S0X5AJ9048PAFEN3XVZ7YC", - "fieldNameFHIR": "performer|#:Organization|actor|identifier|system", - "fieldNameFlat": "SITE_CODE_TYPE_URI", - "fieldNumber": 9, - "errorLevel": 1, - "expression": { - "expressionName": "Defaults to", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "consistency" - }, - { - "expressionId": "01K8S0XF2Y2WP22017N9KE6VJA", - "fieldNameFHIR": "identifier|0|value", - "fieldNameFlat": "UNIQUE_ID", - "fieldNumber": 10, - "errorLevel": 0, - "expression": { - "expressionName": "Unique ID Not Empty Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "validity" - }, - { - "expressionId": "01K8S0XQYHDKMCA1P1GK4W5JHP", - "fieldNameFHIR": "identifier|0|system", - "fieldNameFlat": "UNIQUE_ID_URI", - "fieldNumber": 11, - "errorLevel": 0, - "expression": { - "expressionName": "Unique ID URI Not Empty Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "validity" - }, - { - "expressionId": "01K8S0Y0TN3NZA6VBB3HM1PDR1", - "fieldNameFHIR": "", - "fieldNameFlat": "ACTION_FLAG", - "fieldNumber": 12, - "errorLevel": 1, - "expression": { - "expressionName": "Action Flag Not Empty Check", - "expressionType": "NOTEMPTY", - "expressionRule": "" - }, - "errorGroup": "validity" - }, - { - "expressionId": "01K5EGR0C8SDQBTNCEP8TJNCCW", - "fieldNameFHIR": "contained|#:Practitioner|name|0|given|0", - "fieldNameFlat": "PERFORMING_PROFESSIONAL_FORENAME", - "fieldNumber": 13, - "errorLevel": 1, - "expression": { - "expressionName": "Practitioner Forename Not Empty Check", - "expressionType": "LIST", - "expressionRule": "" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K5EGR0C8T3Z6X6h3W7D1F4VY", - "fieldNameFHIR": "contained|#:Practitioner|name|0|family", - "fieldNameFlat": "PERFORMING_PROFESSIONAL_SURNAME", - "fieldNumber": 14, - "errorLevel": 1, - "expression": { - "expressionName": "Practitioner Surname Not Empty Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K8S0Y8TX8HTX6YGW61RDCATK", - "fieldNameFHIR": "recorded", - "fieldNameFlat": "RECORDED_DATE", - "fieldNumber": 15, - "errorLevel": 1, - "expression": { - "expressionName": "Recorded Date Convert", - "expressionType": "DATETIME", - "expressionRule": "false-strict-timezone" - }, - "errorGroup": "consistency" - }, - { - "expressionId": "01K5EGR0C84CCDRR0VFSWQNFZP", - "fieldNameFHIR": "primarySource", - "fieldNameFlat": "PRIMARY_SOURCE", - "fieldNumber": 16, - "errorLevel": 0, - "expression": { - "expressionName": "Primary Source Not Empty Check", - "expressionType": "BOOLEAN", - "expressionRule": "" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K8S0YYGDFWJXN2W3THYG24EZ", - "fieldNameFHIR": "extension|0|valueCodeableConcept|coding|0|code", - "fieldNameFlat": "VACCINATION_PROCEDURE_CODE", - "fieldNumber": 17, - "errorLevel": 0, - "expression": { - "expressionName": "Procedure Code Not Empty Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K5EGR0C85HY6MDNN6TTR1K48", - "fieldNameFHIR": "extension|0|valueCodeableConcept|coding|0|display", - "fieldNameFlat": "VACCINATION_PROCEDURE_TERM", - "fieldNumber": 18, - "errorLevel": 1, - "expression": { - "expressionName": "Procedure Term Not Empty Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K5EGR0C84DDGW567G14AYBC6", - "fieldNameFHIR": "protocolApplied|0|doseNumberPositiveInt", - "fieldNameFlat": "DOSE_SEQUENCE", - "fieldNumber": 19, - "errorLevel": 1, - "expression": { - "expressionName": "Dose Sequence Not Empty Check", - "expressionType": "POSITIVEINTEGER", - "expressionRule": "" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K5EGR0C8W3HXFYR80ENW73SS", - "fieldNameFHIR": "vaccineCode|coding|#:http://snomed.info/sct|code", - "fieldNameFlat": "VACCINE_PRODUCT_CODE", - "fieldNumber": 20, - "errorLevel": 0, - "expression": { - "expressionName": "Produce Code Not Empty Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K5EGR0C885N7MMW2J5JKHTT2", - "fieldNameFHIR": "vaccineCode|coding|#:http://snomed.info/sct|display", - "fieldNameFlat": "VACCINE_PRODUCT_TERM", - "fieldNumber": 21, - "errorLevel": 1, - "expression": { - "expressionName": "Produce Term Not Empty Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K5EGR0C86XN0AF0M9DJYFGCD", - "fieldNameFHIR": "manufacturer|display", - "fieldNameFlat": "VACCINE_MANUFACTURER", - "fieldNumber": 22, - "errorLevel": 0, - "expression": { - "expressionName": "Manufacturer Display Not Empty Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K5EGR0C89M4CV68B7XAKDCHG", - "fieldNameFHIR": "lotNumber", - "fieldNameFlat": "BATCH_NUMBER", - "fieldNumber": 23, - "errorLevel": 0, - "expression": { - "expressionName": "Batch Number Not Empty Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K8S2AG0CR7S28QB29XY14J71", - "fieldNameFHIR": "expirationDate", - "fieldNameFlat": "EXPIRY_DATE", - "fieldNumber": 24, - "errorLevel": 1, - "expression": { - "expressionName": "Date Convert", - "expressionType": "DATE", - "expressionRule": "" - }, - "errorGroup": "consistency" - }, - { - "expressionId": "01K8S2AS6TD0146EZN8ZDM9AGD", - "fieldNameFHIR": "site|coding|#:http://snomed.info/sct|code", - "fieldNameFlat": "SITE_OF_VACCINATION_CODE", - "fieldNumber": 25, - "errorLevel": 0, - "expression": { - "expressionName": "Site of Vaccination Code Not Empty Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K8S2AZQ7XXTTF2AF7ZMM609C", - "fieldNameFHIR": "site|coding|#:http://snomed.info/sct|display", - "fieldNameFlat": "SITE_OF_VACCINATION_TERM", - "fieldNumber": 26, - "errorLevel": 1, - "expression": { - "expressionName": "Site of Vaccination Term Lookup Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "consistency" - }, - { - "expressionId": "01K8S2B78X58EB9XAZYX3M4VPP", - "fieldNameFHIR": "route|coding|#:http://snomed.info/sct|code", - "fieldNameFlat": "ROUTE_OF_VACCINATION_CODE", - "fieldNumber": 27, - "errorLevel": 0, - "expression": { - "expressionName": "Route of Vaccination Code Not Empty Check", - "expressionType": "STRING", - "expressionRule": "UniqueList" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K8S2BEP9C9KHNJTKPP6SH1G0", - "fieldNameFHIR": "route|coding|#:http://snomed.info/sct|display", - "fieldNameFlat": "ROUTE_OF_VACCINATION_TERM", - "fieldNumber": 28, - "errorLevel": 1, - "expression": { - "expressionName": "Route of Vaccination Term Lookup Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "consistency" - }, - { - "expressionId": "01K8S2BNT5MET8E29GBT83AP0P", - "fieldNameFHIR": "doseQuantity|value", - "fieldNameFlat": "DOSE_AMOUNT", - "fieldNumber": 29, - "errorLevel": 1, - "expression": { - "expressionName": "Dose Amount Default Check", - "expressionType": "INTDECIMAL", - "expressionRule": "" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K8S2BY1S0TXETJY78H418XQG", - "fieldNameFHIR": "doseQuantity|code", - "fieldNameFlat": "DOSE_UNIT_CODE", - "fieldNumber": 30, - "errorLevel": 1, - "expression": { - "expressionName": "Dose Unit Only If System Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "consistency" - }, - { - "expressionId": "01K8S2C3XTDW9RK9Y2FQ9YM5WJ", - "fieldNameFHIR": "doseQuantity|unit", - "fieldNameFlat": "DOSE_UNIT_TERM", - "fieldNumber": 31, - "errorLevel": 0, - "expression": { - "expressionName": "Dose Unit Term Not Empty Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K8S2CANK2PFNDANX3D04W2NR", - "fieldNameFHIR": "reasonCode|#:http://snomed.info/sct|coding|#:http://snomed.info/sct|code", - "fieldNameFlat": "INDICATION_CODE", - "fieldNumber": 32, - "errorLevel": 0, - "expression": { - "expressionName": "Indication Code Not Empty Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "completeness" - }, - { - "expressionId": "01K8S2CKK8FCVJ6049EW5G563P", - "fieldNameFHIR": "location|identifier|value", - "fieldNameFlat": "LOCATION_CODE", - "fieldNumber": 33, - "errorLevel": 1, - "expression": { - "expressionName": "Location Code Default Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "consistency" - }, - { - "expressionId": "01K8S2CSWYXJ5WDS7K59A045JR", - "fieldNameFHIR": "location|identifier|system", - "fieldNameFlat": "LOCATION_CODE_TYPE_URI", - "fieldNumber": 34, - "errorLevel": 1, - "expression": { - "expressionName": "Location Code Type URI Default Check", - "expressionType": "STRING", - "expressionRule": "" - }, - "errorGroup": "consistency" } ] } diff --git a/lambdas/shared/tests/test_common/validator/test_validation_csv_row.py b/lambdas/shared/tests/test_common/validator/test_validation_csv_row.py index e89c43642..09a0be94f 100644 --- a/lambdas/shared/tests/test_common/validator/test_validation_csv_row.py +++ b/lambdas/shared/tests/test_common/validator/test_validation_csv_row.py @@ -32,8 +32,9 @@ def test_run_validation_on_invalid_csv_row(self): self.assertTrue(len(error_list) > 0) messages = [(e.name, e.message, e.details) for e in error_list] - expected_error = "NHS Number String Check" - self.assertIn(expected_error, messages) + expected_error = "NHS_NUMBER must be 10 characters" + print(f"test_messages {messages}") + self.assertTrue(any(expected_error in msg[2] for msg in messages)) csv_parser = Mock() csv_parser.extract_field_value.return_value = "2025-11-06T12:00:00Z" From 9d3ea262a5c23e0406f93966e5d941e4b08ccaa4 Mon Sep 17 00:00:00 2001 From: Akol125 Date: Thu, 13 Nov 2025 14:07:01 +0000 Subject: [PATCH 07/18] refactor validation for lists --- .../common/validator/expression_checker.py | 20 ++++---- .../src/common/validator/expression_rule.py | 8 ++-- .../validator/test_csv_line_parser.py | 10 ++-- .../validator/test_expression_checker.py | 48 +++++++++++++++---- .../validator/test_schemas/test_schema.json | 28 ++++++++++- .../validator/test_validation_csv_row.py | 1 + .../test_common/validator/test_validator.py | 1 + .../validator/testing_utils/constants.py | 2 +- 8 files changed, 89 insertions(+), 29 deletions(-) diff --git a/lambdas/shared/src/common/validator/expression_checker.py b/lambdas/shared/src/common/validator/expression_checker.py index 6f9c3b721..84bf6ff11 100644 --- a/lambdas/shared/src/common/validator/expression_checker.py +++ b/lambdas/shared/src/common/validator/expression_checker.py @@ -161,9 +161,8 @@ def validation_for_list(self, expression_rule: str, field_name: str, field_value """ rules = expression_rule_per_field(expression_rule) if expression_rule else {} defined_length: Optional[int] = rules.get("defined_length", None) - max_length: Optional[int] = rules.get("max_length", None) + array_max_length: Optional[int] = rules.get("array_max_length", None) elements_are_strings: bool = rules.get("elements_are_strings", False) - string_element_max_length: Optional[int] = rules.get("string_element_max_length", None) elements_are_dicts: bool = rules.get("elements_are_dicts", False) try: @@ -177,14 +176,14 @@ def validation_for_list(self, expression_rule: str, field_name: str, field_value if len(field_value) == 0: raise ValueError(f"{field_name} must be a non-empty array") - if max_length is not None and len(field_value) > max_length: - raise ValueError(f"{field_name} must be an array of maximum length {max_length}") + if array_max_length is not None and len(field_value) > array_max_length: + raise ValueError(f"{field_name} must be an array of maximum length {array_max_length}") if elements_are_strings: for idx, element in enumerate(field_value): - self._validate_for_string_values.for_string( - element, f"{field_name}[{idx}]", max_length=string_element_max_length - ) + error_report = self.validation_for_string_values(expression_rule, f"{field_name}[{idx}]", element) + if error_report is not None: + return error_report if elements_are_dicts: for element in field_value: @@ -192,10 +191,15 @@ def validation_for_list(self, expression_rule: str, field_name: str, field_value raise TypeError(f"{field_name} must be an array of objects") if len(element) == 0: raise ValueError(f"{field_name} must be an array of non-empty objects") + except (TypeError, ValueError) as e: + code = ExceptionLevels.RECORD_CHECK_FAILED + message = MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] + details = str(e) + return ErrorReport(code, message, None, field_name, details) except Exception as e: if self.report_unexpected_exception: message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, field_name) + return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name, "") def validation_for_date_time( self, expression_rule: str, field_name: str, field_value: str, row: dict, strict_timezone: bool = True diff --git a/lambdas/shared/src/common/validator/expression_rule.py b/lambdas/shared/src/common/validator/expression_rule.py index 79065117e..60b6cefb0 100644 --- a/lambdas/shared/src/common/validator/expression_rule.py +++ b/lambdas/shared/src/common/validator/expression_rule.py @@ -6,13 +6,13 @@ def expression_rule_per_field(expression_type: str) -> dict: case "NHS_NUMBER": return {"defined_length": Constants.NHS_NUMBER_LENGTH, "spaces_allowed": False} case "PERSON_NAME": - return {"max_length": Constants.PERSON_NAME_ELEMENT_MAX_LENGTH} - case "PERSON_SURNAME": return { "elements_are_strings": True, - "max_length": 5, - "string_element_max_length": Constants.PERSON_NAME_ELEMENT_MAX_LENGTH, + "array_max_length": 5, + "max_length": Constants.PERSON_NAME_ELEMENT_MAX_LENGTH, } + case "PERSON_SURNAME": + return {"max_length": Constants.PERSON_NAME_ELEMENT_MAX_LENGTH} case "GENDER": return {"predefined_values": Constants.GENDERS} case _: diff --git a/lambdas/shared/tests/test_common/validator/test_csv_line_parser.py b/lambdas/shared/tests/test_common/validator/test_csv_line_parser.py index f35364601..8320fed63 100644 --- a/lambdas/shared/tests/test_common/validator/test_csv_line_parser.py +++ b/lambdas/shared/tests/test_common/validator/test_csv_line_parser.py @@ -22,12 +22,12 @@ def test_extra_values_ignored(self): Ignore values that do not have a corresponding key """ csv_parsers = CSVLineParser() - csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex", "": "Trent"}) + csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": ["Alex"], "": "Trent"}) self.assertEqual( csv_parsers.csv_file_data, - {"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex", "": "Trent"}, + {"NHS_NUMBER": "9000000009", "PERSON_FORENAME": ["Alex"], "": "Trent"}, ) - self.assertEqual(csv_parsers.get_key_value("PERSON_FORENAME"), "Alex") + self.assertEqual(csv_parsers.get_key_value("PERSON_FORENAME"), ["Alex"]) def test_fewer_values_than_keys(self): """ @@ -35,7 +35,7 @@ def test_fewer_values_than_keys(self): raises an error when accessing key without value """ csv_parsers = CSVLineParser() - csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex"}) + csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": ["Alex"]}) self.assertIn("NHS_NUMBER", csv_parsers.csv_file_data) self.assertIn("PERSON_FORENAME", csv_parsers.csv_file_data) self.assertNotIn("PERSON_SURNAME", csv_parsers.csv_file_data) @@ -46,7 +46,7 @@ def test_get_missing_key_raises(self): """ Test that accessing a non-existent key raises KeyError""" csv_parsers = CSVLineParser() - csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex"}) + csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": ["Alex"]}) with self.assertRaises(KeyError): _ = csv_parsers.get_key_value("VACCINE_TYPE") diff --git a/lambdas/shared/tests/test_common/validator/test_expression_checker.py b/lambdas/shared/tests/test_common/validator/test_expression_checker.py index e7b3ee1c4..cafd369e1 100644 --- a/lambdas/shared/tests/test_common/validator/test_expression_checker.py +++ b/lambdas/shared/tests/test_common/validator/test_expression_checker.py @@ -32,6 +32,14 @@ def test_string_valid_and_invalid(self): "9876543210", ) ) + self.assertIsNone( + checker.validate_expression( + "STRING", + "NHS_NUMBER", + "contained|#:Patient|identifier|#:https://fhir.nhs.uk/Id/nhs-number|value", + "9876543210", + ) + ) # Empty should fail NHS number string rule self.assertIsInstance( checker.validate_expression( @@ -40,18 +48,38 @@ def test_string_valid_and_invalid(self): ErrorReport, ) - # LIST - def test_list_valid_and_invalid(self): - checker = self.make_checker() - self.assertIsNone(checker.validate_expression("LIST", "PERSON_NAME", "PERSON_NAME", ["Alice"])) - self.assertIsInstance(checker.validate_expression("LIST", "PERSON_NAME", "PERSON_NAME", []), ErrorReport) - self.assertIsInstance(checker.validate_expression("LIST", "", "PERSON_NAME", "Alice"), ErrorReport) + # VALID PERSON_SURNAME STRING + self.assertIsNone( + checker.validate_expression( + "STRING", "PERSON_SURNAME", "contained|#:Patient|name|#:official|family", "Smith" + ) + ) + self.assertIsNone(checker.validate_expression("STRING", "PERSON_SURNAME", "PERSON_SURNAME", "Taylor")) + # INVALID PERSON_SURNAME STRING (too long) + self.assertIsInstance( + checker.validate_expression( + "STRING", "PERSON_SURNAME", "contained|#:Patient|name|#:official|family", "Stan" * 51 + ), + ErrorReport, + ) - # DATE - def test_date_valid_and_invalid(self): + # LIST PERSON_FORENAME + def test_list_valid_and_invalid(self): checker = self.make_checker() - self.assertIsNone(checker.validate_expression("DATE", "", "date_field", "2025-01-01")) - self.assertIsInstance(checker.validate_expression("DATE", "", "date_field", "2025-13-01"), ErrorReport) + self.assertIsNone(checker.validate_expression("LIST", "PERSON_NAME", "PERSON_FORENAME", ["Alice"])) + self.assertIsNone( + checker.validate_expression( + "LIST", "PERSON_NAME", "contained|#:Patient|name|#:official|given|0", ["Bethany"] + ) + ) + self.assertIsInstance(checker.validate_expression("LIST", "PERSON_NAME", "PERSON_FORENAME", []), ErrorReport) + self.assertIsInstance(checker.validate_expression("LIST", "", "PERSON_FORENAME", "Alice"), ErrorReport) + + # # DATE + # def test_date_valid_and_invalid(self): + # checker = self.make_checker() + # self.assertIsNone(checker.validate_expression("DATE", "", "contained|#:Patient|name|#:official|given|0", "2025-01-01")) + # self.assertIsInstance(checker.validate_expression("DATE", "", "date_field", "2025-13-01"), ErrorReport) # # DATETIME diff --git a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json index 76d47c651..ed85a722a 100644 --- a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json +++ b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json @@ -24,11 +24,37 @@ "fieldNumber": 2, "errorLevel": 0, "expression": { - "expressionName": "Person Forname List Check", + "expressionName": "Person Forename List Check", "expressionType": "LIST", "expressionRule": "PERSON_NAME" }, "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C7RRG9F6FVHJ8HE4QX", + "fieldNameFHIR": "contained|#:Patient|name|#:official|family", + "fieldNameFlat": "PERSON_SURNAME", + "fieldNumber": 3, + "errorLevel": 0, + "expression": { + "expressionName": "Person Surname String Check", + "expressionType": "STRING", + "expressionRule": "PERSON_SURNAME" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8RW1DF635S4FRZ1WF9GDS1T", + "fieldNameFHIR": "contained|#:Patient|birthDate", + "fieldNameFlat": "PERSON_DOB", + "fieldNumber": 4, + "errorLevel": 1, + "expression": { + "expressionName": "Date of Birth Not Empty Check", + "expressionType": "DATE", + "expressionRule": "" + }, + "errorGroup": "consistency" } ] } diff --git a/lambdas/shared/tests/test_common/validator/test_validation_csv_row.py b/lambdas/shared/tests/test_common/validator/test_validation_csv_row.py index 09a0be94f..709012e32 100644 --- a/lambdas/shared/tests/test_common/validator/test_validation_csv_row.py +++ b/lambdas/shared/tests/test_common/validator/test_validation_csv_row.py @@ -24,6 +24,7 @@ def setUp(self): def test_run_validation_on_valid_csv_row(self): error_list = self.validator.validate_csv_row(CSV_VALUES, True, True, True) + print(f"Run Validation Errors for valid CSV row: {error_list}") self.assertEqual(error_list, []) def test_run_validation_on_invalid_csv_row(self): diff --git a/lambdas/shared/tests/test_common/validator/test_validator.py b/lambdas/shared/tests/test_common/validator/test_validator.py index 30c544790..5d1fba81c 100644 --- a/lambdas/shared/tests/test_common/validator/test_validator.py +++ b/lambdas/shared/tests/test_common/validator/test_validator.py @@ -27,6 +27,7 @@ def test_validate_csv(self): self.assertIsInstance(result, list) self.assertTrue(all(isinstance(err, ErrorReport) for err in result)) + print(f"CSV Validation Errors: {result}") self.assertEqual(len(result), 0, "Expected no validation errors for valid CSV row") def test_has_validation_failed_detects_critical_error(self): diff --git a/lambdas/shared/tests/test_common/validator/testing_utils/constants.py b/lambdas/shared/tests/test_common/validator/testing_utils/constants.py index 6fc454cbf..5a710a079 100644 --- a/lambdas/shared/tests/test_common/validator/testing_utils/constants.py +++ b/lambdas/shared/tests/test_common/validator/testing_utils/constants.py @@ -14,7 +14,7 @@ "NHS_NUMBER": "9000000009", "PERSON_FORENAME": ["JOHN"], "PERSON_SURNAME": "DOE", - "PERSON_DOB": "19800101", + "PERSON_DOB": "1980-01-01", "PERSON_GENDER_CODE": "M", "PERSON_POSTCODE": "AB12 3CD", "DATE_AND_TIME": "20250306T13281701", From b08c49f4e315f4bdf7e192b940e76c19f83e06df Mon Sep 17 00:00:00 2001 From: Akol125 Date: Thu, 13 Nov 2025 16:16:23 +0000 Subject: [PATCH 08/18] add more expression rules and fields --- .../fhir_immunization_pre_validators.py | 49 ++++++++-- .../common/validator/constants/constants.py | 25 +++++ .../validator/error_report/record_error.py | 3 + .../common/validator/expression_checker.py | 94 ++++++++----------- .../src/common/validator/expression_rule.py | 2 + .../validator/test_expression_checker.py | 71 +++++++++++--- .../validator/test_schemas/test_schema.json | 26 +++++ .../validator/testing_utils/constants.py | 4 +- 8 files changed, 196 insertions(+), 78 deletions(-) diff --git a/backend/src/models/fhir_immunization_pre_validators.py b/backend/src/models/fhir_immunization_pre_validators.py index 23f10eab9..8f45585c9 100644 --- a/backend/src/models/fhir_immunization_pre_validators.py +++ b/backend/src/models/fhir_immunization_pre_validators.py @@ -96,7 +96,9 @@ def validate(self): self.pre_validate_value_codeable_concept, self.pre_validate_extension_length, self.pre_validate_vaccination_procedure_code, + self.pre_validate_vaccination_procedure_display, self.pre_validate_vaccine_code, + self.pre_validate_vaccine_display, ] for method in validation_methods: @@ -590,7 +592,7 @@ def pre_validate_vaccination_procedure_code(self, values: dict) -> dict: (legacy CSV field name: VACCINATION_PROCEDURE_CODE) exists, then it is a non-empty string """ url = "https://fhir.hl7.org.uk/StructureDefinition/Extension-UKCore-" + "VaccinationProcedure" - system = "http://snomed.info/sct" + system = Urls.snomed field_type = "code" field_location = generate_field_location_for_extension(url, system, field_type) try: @@ -600,6 +602,22 @@ def pre_validate_vaccination_procedure_code(self, values: dict) -> dict: except (KeyError, IndexError): pass + def pre_validate_vaccination_procedure_display(self, values: dict) -> dict: + """ + Pre-validate that, if extension[?(@.url=='https://fhir.hl7.org.uk/StructureDefinition/Extension-UKCore- + VaccinationProcedure')].valueCodeableConcept.coding[?(@.system=='http://snomed.info/sct')].display + (legacy CSV field name: VACCINATION_PROCEDURE_TERM) exists, then it is a non-empty string + """ + url = "https://fhir.hl7.org.uk/StructureDefinition/Extension-UKCore-" + "VaccinationProcedure" + system = Urls.snomed + field_type = "display" + field_location = generate_field_location_for_extension(url, system, field_type) + try: + field_value = get_generic_extension_value(values, url, system, field_type) + PreValidation.for_string(field_value, field_location) + except (KeyError, IndexError): + pass + def pre_validate_vaccination_situation_code(self, values: dict) -> dict: """ Pre-validate that, if extension[?(@.url=='https://fhir.hl7.org.uk/StructureDefinition/Extension-UKCore- @@ -607,7 +625,7 @@ def pre_validate_vaccination_situation_code(self, values: dict) -> dict: (legacy CSV field name: VACCINATION_SITUATION_CODE) exists, then it is a non-empty string """ url = "https://fhir.hl7.org.uk/StructureDefinition/Extension-UKCore-VaccinationSituation" - system = "http://snomed.info/sct" + system = Urls.snomed field_type = "code" field_location = generate_field_location_for_extension(url, system, field_type) try: @@ -623,7 +641,7 @@ def pre_validate_vaccination_situation_display(self, values: dict) -> dict: (legacy CSV field name: VACCINATION_SITUATION_TERM) exists, then it is a non-empty string """ url = "https://fhir.hl7.org.uk/StructureDefinition/Extension-UKCore-VaccinationSituation" - system = "http://snomed.info/sct" + system = Urls.snomed field_type = "display" field_location = generate_field_location_for_extension(url, system, field_type) try: @@ -702,7 +720,7 @@ def pre_validate_disease_type_coding_codes(self, values: dict) -> dict: Pre-validate that, if protocolApplied[0].targetDisease[{i}].coding[?(@.system=='http://snomed.info/sct')].code exists, then it is a non-empty string """ - url = "http://snomed.info/sct" + url = Urls.snomed try: for i in range(len(values["protocolApplied"][0]["targetDisease"])): field_location = f"protocolApplied[0].targetDisease[{i}].coding[?(@.system=='{url}')].code" @@ -761,7 +779,7 @@ def pre_validate_site_coding_code(self, values: dict) -> dict: Pre-validate that, if site.coding[?(@.system=='http://snomed.info/sct')].code (legacy CSV field name: SITE_OF_VACCINATION_CODE) exists, then it is a non-empty string """ - url = "http://snomed.info/sct" + url = Urls.snomed field_location = f"site.coding[?(@.system=='{url}')].code" try: site_coding_code = [x for x in values["site"]["coding"] if x.get("system") == url][0]["code"] @@ -774,7 +792,7 @@ def pre_validate_site_coding_display(self, values: dict) -> dict: Pre-validate that, if site.coding[?(@.system=='http://snomed.info/sct')].display (legacy CSV field name: SITE_OF_VACCINATION_TERM) exists, then it is a non-empty string """ - url = "http://snomed.info/sct" + url = Urls.snomed field_location = f"site.coding[?(@.system=='{url}')].display" try: field_value = [x for x in values["site"]["coding"] if x.get("system") == url][0]["display"] @@ -795,7 +813,7 @@ def pre_validate_route_coding_code(self, values: dict) -> dict: Pre-validate that, if route.coding[?(@.system=='http://snomed.info/sct')].code (legacy CSV field name: ROUTE_OF_VACCINATION_CODE) exists, then it is a non-empty string """ - url = "http://snomed.info/sct" + url = Urls.snomed field_location = f"route.coding[?(@.system=='{url}')].code" try: field_value = [x for x in values["route"]["coding"] if x.get("system") == url][0]["code"] @@ -808,7 +826,7 @@ def pre_validate_route_coding_display(self, values: dict) -> dict: Pre-validate that, if route.coding[?(@.system=='http://snomed.info/sct')].display (legacy CSV field name: ROUTE_OF_VACCINATION_TERM) exists, then it is a non-empty string """ - url = "http://snomed.info/sct" + url = Urls.snomed field_location = f"route.coding[?(@.system=='{url}')].display" try: field_value = [x for x in values["route"]["coding"] if x.get("system") == url][0]["display"] @@ -951,7 +969,7 @@ def pre_validate_vaccine_code(self, values: dict) -> dict: NOTE: vaccineCode is a mandatory FHIR field. A value of None will be rejected by the FHIR model before pre-validators are run. """ - url = "http://snomed.info/sct" + url = Urls.snomed field_location = f"vaccineCode.coding[?(@.system=='{url}')].code" try: field_value = [x for x in values["vaccineCode"]["coding"] if x.get("system") == url][0]["code"] @@ -959,3 +977,16 @@ def pre_validate_vaccine_code(self, values: dict) -> dict: PreValidation.for_snomed_code(field_value, field_location) except (KeyError, IndexError): pass + + def pre_validate_vaccine_display(self, values: dict) -> dict: + """ + Pre-validate that, if vaccineCode.coding[?(@.system=='http://snomed.info/sct')].display + (legacy CSV field : VACCINE_PRODUCT_TERM) exists, then it is a non-empty string + """ + url = Urls.snomed + field_location = f"vaccineCode.coding[?(@.system=='{url}')].display" + try: + field_value = [x for x in values["vaccineCode"]["coding"] if x.get("system") == url][0]["display"] + PreValidation.for_string(field_value, field_location) + except (KeyError, IndexError): + pass diff --git a/lambdas/shared/src/common/validator/constants/constants.py b/lambdas/shared/src/common/validator/constants/constants.py index a11be49c4..bc9c7d216 100644 --- a/lambdas/shared/src/common/validator/constants/constants.py +++ b/lambdas/shared/src/common/validator/constants/constants.py @@ -2,3 +2,28 @@ class Constants: NHS_NUMBER_LENGTH = 10 PERSON_NAME_ELEMENT_MAX_LENGTH = 35 GENDERS = ["male", "female", "other", "unknown"] + DATETIME_FORMAT = "%Y-%m-%dT%H:%M" + DATETIME_FORMAT = [ + "%Y-%m-%d", + "%Y-%m-%dT%H:%M:%S%z", + "%Y-%m-%dT%H:%M:%S.%f%z", + ] + ALLOWED_SUFFIXES = { + "+00:00", + "+01:00", + "+0000", + "+0100", + } + field_name = "FIELD_TO_REPLACE" + + DATETIME_ERROR_MESSAGE = ( + f"{field_name} must be a valid datetime in one of the following formats:" + "- 'YYYY-MM-DD' — Full date only" + "- 'YYYY-MM-DDThh:mm:ss%z' — Full date and time with timezone (e.g. +00:00 or +01:00)" + "- 'YYYY-MM-DDThh:mm:ss.f%z' — Full date and time with milliseconds and timezone" + "- Date must not be in the future." + ) + STRICT_DATETIME_ERROR_MESSAGE = ( + "Only '+00:00' and '+01:00' are accepted as valid timezone offsets.\n" + f"Note that partial dates are not allowed for {field_name} in this service.\n" + ) diff --git a/lambdas/shared/src/common/validator/error_report/record_error.py b/lambdas/shared/src/common/validator/error_report/record_error.py index f9bd0442e..f92efc2f6 100644 --- a/lambdas/shared/src/common/validator/error_report/record_error.py +++ b/lambdas/shared/src/common/validator/error_report/record_error.py @@ -21,6 +21,9 @@ def __init__( self.id = None self.error_level = error_level + def __repr__(self): + return f"" + # function to return the object as a dictionary def to_dict(self): ret = {"code": self.code, "message": self.message} diff --git a/lambdas/shared/src/common/validator/expression_checker.py b/lambdas/shared/src/common/validator/expression_checker.py index 84bf6ff11..789452910 100644 --- a/lambdas/shared/src/common/validator/expression_checker.py +++ b/lambdas/shared/src/common/validator/expression_checker.py @@ -9,6 +9,7 @@ from common.validator.lookup_expressions.key_data import KeyData from common.validator.lookup_expressions.lookup_data import LookUpData from common.validator.validation_utils import check_if_future_date +from src.common.validator.constants.constants import Constants class ExpressionChecker: @@ -51,7 +52,7 @@ def validate_expression( return "Schema expression not found! Check your expression type : " + expression_type # ISO 8601 date/datetime validate (currently date-only) - def validation_for_date(self, _expression_rule, field_name, field_value): + def validation_for_date(self, _expression_rule, field_name, field_value) -> ErrorReport: """ Apply pre-validation to a date field to ensure that it is a string (JSON dates must be written as strings) containing a valid date in the format "YYYY-MM-DD" @@ -201,63 +202,50 @@ def validation_for_list(self, expression_rule: str, field_name: str, field_value message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name, "") - def validation_for_date_time( - self, expression_rule: str, field_name: str, field_value: str, row: dict, strict_timezone: bool = True - ): + def validation_for_date_time(self, expression_rule: str, field_name: str, field_value: str) -> ErrorReport: """ Apply pre-validation to a datetime field to ensure that it is a string (JSON dates must be written as strings) containing a valid datetime. Note that partial dates are valid for FHIR, but are not allowed for this API. - Valid formats are any of the following: - * 'YYYY-MM-DD' - Full date only - * 'YYYY-MM-DDThh:mm:ss%z' - Full date, time without milliseconds, timezone + Valid formats are any of the following: * 'YYYY-MM-DD' - Full date only * 'YYYY-MM-DDThh:mm:ss%z' - Full date, time without milliseconds, timezone * 'YYYY-MM-DDThh:mm:ss.f%z' - Full date, time with milliseconds (any level of precision), timezone """ + rules = expression_rule_per_field(expression_rule) if expression_rule else {} + strict_timezone = rules.get("strict_time_zone", False) + try: + if not isinstance(field_value, str): + raise TypeError(f"{field_name} must be a string") - if not isinstance(field_value, str): - raise TypeError(f"{field_name} must be a string") - - error_message = ( - f"{field_name} must be a valid datetime in one of the following formats:" - "- 'YYYY-MM-DD' — Full date only" - "- 'YYYY-MM-DDThh:mm:ss%z' — Full date and time with timezone (e.g. +00:00 or +01:00)" - "- 'YYYY-MM-DDThh:mm:ss.f%z' — Full date and time with milliseconds and timezone" - "- Date must not be in the future." - ) - if strict_timezone: - error_message += ( - "Only '+00:00' and '+01:00' are accepted as valid timezone offsets.\n" - f"Note that partial dates are not allowed for {field_name} in this service.\n" - ) - - allowed_suffixes = { - "+00:00", - "+01:00", - "+0000", - "+0100", - } - - # List of accepted strict formats - formats = [ - "%Y-%m-%d", - "%Y-%m-%dT%H:%M:%S%z", - "%Y-%m-%dT%H:%M:%S.%f%z", - ] - - for fmt in formats: - try: - fhir_date = datetime.strptime(field_value, fmt) - # Enforce future-date rule using central checker after successful parse - if check_if_future_date(fhir_date): - raise ValueError(f"{field_name} must not be in the future") - # After successful parse, enforce timezone and future-date rules - if strict_timezone and fhir_date.tzinfo is not None: - if not any(field_value.endswith(suffix) for suffix in allowed_suffixes): - raise ValueError(error_message) - return fhir_date.isoformat() - except ValueError: - continue - - raise ValueError(error_message) + error_message = Constants.DATETIME_ERROR_MESSAGE.replace("FIELD_TO_REPLACE", field_name) + if strict_timezone: + error_message += Constants.STRICT_DATETIME_ERROR_MESSAGE.replace("FIELD_TO_REPLACE", field_name) + + # List of accepted strict formats and suffixes + allowed_suffixes = Constants.ALLOWED_SUFFIXES + formats = Constants.DATETIME_FORMAT + + for fmt in formats: + try: + fhir_date = datetime.strptime(field_value, fmt) + # Enforce future-date rule using central checker after successful parse + if check_if_future_date(fhir_date): + raise ValueError(f"{field_name} must not be in the future") + # After successful parse, enforce timezone and future-date rules + if strict_timezone and fhir_date.tzinfo is not None: + if not any(field_value.endswith(suffix) for suffix in allowed_suffixes): + raise ValueError(error_message) + return None + except ValueError: + continue + raise ValueError(error_message) + except (TypeError, ValueError) as e: + code = ExceptionLevels.RECORD_CHECK_FAILED + message = MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] + details = str(e) + return ErrorReport(code, message, None, field_name, details) + except Exception as e: + if self.report_unexpected_exception: + message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) + return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) # Not Equal Validate def _validate_not_equal(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: @@ -365,7 +353,7 @@ def validation_for_string_values(self, expression_rule: str, field_name: str, fi defined_length = rules.get("defined_length", None) max_length = rules.get("max_length", None) predefined_values = rules.get("predefined_values", None) - spaces_allowed = rules.get("spaces_allowed", None) + spaces_allowed = rules.get("spaces_allowed", True) try: if not isinstance(field_value, str): diff --git a/lambdas/shared/src/common/validator/expression_rule.py b/lambdas/shared/src/common/validator/expression_rule.py index 60b6cefb0..56fb75fed 100644 --- a/lambdas/shared/src/common/validator/expression_rule.py +++ b/lambdas/shared/src/common/validator/expression_rule.py @@ -15,5 +15,7 @@ def expression_rule_per_field(expression_type: str) -> dict: return {"max_length": Constants.PERSON_NAME_ELEMENT_MAX_LENGTH} case "GENDER": return {"predefined_values": Constants.GENDERS} + case "DATETIME": + return {"strict_time_zone": True} case _: raise ValueError(f"Expression rule not found for type: {expression_type}") diff --git a/lambdas/shared/tests/test_common/validator/test_expression_checker.py b/lambdas/shared/tests/test_common/validator/test_expression_checker.py index cafd369e1..9df633a01 100644 --- a/lambdas/shared/tests/test_common/validator/test_expression_checker.py +++ b/lambdas/shared/tests/test_common/validator/test_expression_checker.py @@ -75,24 +75,67 @@ def test_list_valid_and_invalid(self): self.assertIsInstance(checker.validate_expression("LIST", "PERSON_NAME", "PERSON_FORENAME", []), ErrorReport) self.assertIsInstance(checker.validate_expression("LIST", "", "PERSON_FORENAME", "Alice"), ErrorReport) - # # DATE - # def test_date_valid_and_invalid(self): - # checker = self.make_checker() - # self.assertIsNone(checker.validate_expression("DATE", "", "contained|#:Patient|name|#:official|given|0", "2025-01-01")) - # self.assertIsInstance(checker.validate_expression("DATE", "", "date_field", "2025-13-01"), ErrorReport) - + # DATE + def test_date_valid_and_invalid(self): + checker = self.make_checker() + self.assertIsNone(checker.validate_expression("DATE", "", "contained|#:Patient|birthDate", "2025-01-01")) + self.assertIsNone(checker.validate_expression("DATE", "", "PERSON_DOB", "2025-01-01")) + self.assertIsInstance( + checker.validate_expression("DATE", "", "contained|#:Patient|birthDate", "2025-13-01"), ErrorReport + ) + self.assertIsInstance(checker.validate_expression("DATE", "", "PERSON_DOB", "2025-02-30"), ErrorReport) -# # DATETIME -# def test_datetime_valid_and_invalid(self): -# checker = self.make_checker() -# # Full date only allowed -# self.assertIsNone(checker.validate_expression("DATETIME", "", "dt_field", "2025-01-01", 1)) -# # Bad format should raise -# with self.assertRaises(Exception): -# checker.validate_expression("DATETIME", "", "dt_field", "2025-01-01T10:00:00Z", 1) + # DATETIME + def test_datetime_valid_and_invalid(self): + checker = self.make_checker() + # Full date only allowed + self.assertIsNone( + checker.validate_expression("DATETIME", "DATETIME", "occurrenceDateTime", "2025-01-01T05:00:00+00:00") + ) + self.assertIsNone( + checker.validate_expression("DATETIME", "DATETIME", "DATE_AND_TIME", "2025-01-01T05:00:00+00:00") + ) + # Bad format should raise + self.assertIsInstance( + checker.validate_expression("DATETIME", "", "occurrenceDateTime", "2026-01-01T10:00:00Z"), ErrorReport + ) + self.assertIsInstance( + checker.validate_expression("DATETIME", "", "DATE_AND_TIME", "2026-01-01T10:00:00Z"), ErrorReport + ) # # BOOLEAN + +# # STRING with GENDER rule on real field +# def test_gender_string_rule_valid_and_invalid(self): +# checker = self.make_checker() +# field_path = "contained|#:Patient|gender" +# # Valid genders per schema constants (male, female, other, unknown) +# self.assertIsNone(checker.validate_expression("STRING", "GENDER", field_path, "male")) +# self.assertIsNone(checker.validate_expression("STRING", "GENDER", field_path, "female")) +# # Invalid values should error +# self.assertIsInstance( +# checker.validate_expression("STRING", "GENDER", field_path, "M"), +# ErrorReport, +# ) + +# # STRING with no rule for PERSON_POSTCODE on real field +# def test_postcode_string_rule_valid_and_invalid(self): +# checker = self.make_checker() +# field_path = "contained|#:Patient|address|#:postalCode|postalCode" +# # With empty rule, generic string constraints apply: non-empty and no spaces +# self.assertIsNone(checker.validate_expression("STRING", "", field_path, "SW1A1AA")) +# # Real-world postcode with a space should fail as spaces are not allowed without a rule override +# field_path = "POST_CODE" +# self.assertIsInstance( +# checker.validate_expression("STRING", "", field_path, "AB12 3CD"), +# ErrorReport, +# ) +# # Empty should also fail +# self.assertIsInstance( +# checker.validate_expression("STRING", "", field_path, ""), +# ErrorReport, +# ) # def test_boolean_valid_and_invalid(self): # checker = self.make_checker() # self.assertIsNone(checker.validate_expression("BOOLEAN", "", "bool_field", True, 1)) diff --git a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json index ed85a722a..8e8784e2b 100644 --- a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json +++ b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json @@ -55,6 +55,32 @@ "expressionRule": "" }, "errorGroup": "consistency" + }, + { + "expressionId": "01K8RW2MYFNE6YZJ99RC8B3XES", + "fieldNameFHIR": "contained|#:Patient|address|#:postalCode|postalCode", + "fieldNameFlat": "PERSON_POSTCODE", + "fieldNumber": 6, + "errorLevel": 2, + "expression": { + "expressionName": "Postcode String Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S0WRNSPQ42RBD8420Q9G7Y", + "fieldNameFHIR": "occurrenceDateTime", + "fieldNameFlat": "DATE_AND_TIME", + "fieldNumber": 7, + "errorLevel": 0, + "expression": { + "expressionName": "Date Time Conversion", + "expressionType": "DATETIME", + "expressionRule": "DATETIME" + }, + "errorGroup": "consistency" } ] } diff --git a/lambdas/shared/tests/test_common/validator/testing_utils/constants.py b/lambdas/shared/tests/test_common/validator/testing_utils/constants.py index 5a710a079..6cf909804 100644 --- a/lambdas/shared/tests/test_common/validator/testing_utils/constants.py +++ b/lambdas/shared/tests/test_common/validator/testing_utils/constants.py @@ -15,9 +15,9 @@ "PERSON_FORENAME": ["JOHN"], "PERSON_SURNAME": "DOE", "PERSON_DOB": "1980-01-01", - "PERSON_GENDER_CODE": "M", + "PERSON_GENDER_CODE": "Male", "PERSON_POSTCODE": "AB12 3CD", - "DATE_AND_TIME": "20250306T13281701", + "DATE_AND_TIME": "2025-03-06T05:10:25+00:00", "SITE_CODE": "RJ1", "SITE_CODE_TYPE_URI": "https://fhir.nhs.uk/Id/ods-organization-code", "UNIQUE_ID": "ACME-vacc123456", From 9e18d90536a8e6b060f242db65d08f9993db2496 Mon Sep 17 00:00:00 2001 From: Akol125 Date: Thu, 13 Nov 2025 18:23:16 +0000 Subject: [PATCH 09/18] setup schema and validation for 15 fields --- .../validator/test_expression_checker.py | 145 ++++++++++++++---- .../validator/test_schemas/test_schema.json | 104 +++++++++++++ .../validator/testing_utils/constants.py | 6 +- 3 files changed, 220 insertions(+), 35 deletions(-) diff --git a/lambdas/shared/tests/test_common/validator/test_expression_checker.py b/lambdas/shared/tests/test_common/validator/test_expression_checker.py index 9df633a01..aa223d550 100644 --- a/lambdas/shared/tests/test_common/validator/test_expression_checker.py +++ b/lambdas/shared/tests/test_common/validator/test_expression_checker.py @@ -103,39 +103,120 @@ def test_datetime_valid_and_invalid(self): checker.validate_expression("DATETIME", "", "DATE_AND_TIME", "2026-01-01T10:00:00Z"), ErrorReport ) + # STRING with SITE_CODE + def test_site_code_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "performer|#:Organization|actor|identifier|value" + # Valid: non-empty, no spaces + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "RJ1")) + # Invalid: empty + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + # Invalid: contains spaces + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 1234), ErrorReport) + + # STRING with SITE_CODE_TYPE_URI rule + def test_site_code_type_uri_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "performer|#:Organization|actor|identifier|system" + valid_uri = "https://fhir.nhs.uk/Id/ods-organization-code" + # Valid: non-empty, no spaces + self.assertIsNone( + checker.validate_expression("STRING", "", field_path, valid_uri), + ) + # Invalid: empty + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + # Invalid: contains spaces + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 123), ErrorReport) + + # BOOLEAN + + # STRING with UNIQUE_ID rule (empty rule -> generic non-empty string) + def test_unique_id_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "identifier|0|value" + # Valid: non-empty string + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "ABC-123-XYZ")) + # Invalid: empty string + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + # Invalid: non-string value + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 987654), ErrorReport) + + # STRING with UNIQUE_ID_URI rule (empty rule -> generic non-empty string) + def test_unique_id_uri_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "identifier|0|system" + valid_system = "https://example.org/unique-id-system" + # Valid: non-empty string + self.assertIsNone(checker.validate_expression("STRING", "", field_path, valid_system)) + # Invalid: empty string + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + # Invalid: non-string value + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 42), ErrorReport) + + # STRING with GENDER rule on real field + def test_gender_string_rule_valid_and_invalid(self): + checker = self.make_checker() + field_path = "contained|#:Patient|gender" + # Valid genders per schema constants (male, female, other, unknown) + self.assertIsNone(checker.validate_expression("STRING", "GENDER", field_path, "male")) + self.assertIsNone(checker.validate_expression("STRING", "GENDER", field_path, "female")) + # Invalid values should error + self.assertIsInstance(checker.validate_expression("STRING", "GENDER", field_path, "M"), ErrorReport) + + # LIST with PERFORMING_PROFESSIONAL_FORENAME (empty rule -> non-empty list) + def test_practitioner_forename_list_valid_and_invalid(self): + checker = self.make_checker() + field_path = "contained|#:Practitioner|name|0|given|0" + # Valid: non-empty list + self.assertIsNone(checker.validate_expression("LIST", "", field_path, ["Alice"])) + # Invalid: empty list + self.assertIsInstance(checker.validate_expression("LIST", "", field_path, []), ErrorReport) + # Invalid: non-list value + self.assertIsInstance(checker.validate_expression("LIST", "", field_path, "Alice"), ErrorReport) + + # STRING with PERFORMING_PROFESSIONAL_SURNAME (empty rule -> non-empty string) + def test_practitioner_surname_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "contained|#:Practitioner|name|0|family" + # Valid: non-empty string + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "Smith")) + # Invalid: empty string + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + # Invalid: non-string + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 123), ErrorReport) + + # DATETIME with RECORDED_DATE (schema rule says 'false-strict-timezone' but we use default non-strict here) + def test_recorded_date_datetime_valid_and_invalid(self): + checker = self.make_checker() + field_path = "recorded" + # Valid: timezone offset other than +00:00 or +01:00 should be allowed when non-strict + self.assertIsNone(checker.validate_expression("DATETIME", "", field_path, "2025-01-01T10:00:00+02:00")) + # Valid: full date only also allowed per formats + self.assertIsNone(checker.validate_expression("DATETIME", "", field_path, "2025-01-01")) + # Invalid: Zulu timezone not in accepted formats + self.assertIsInstance( + checker.validate_expression("DATETIME", "", field_path, "2026-01-01T10:00:00Z"), ErrorReport + ) + + # STRING with no rule for PERSON_POSTCODE on real field + def test_postcode_string_rule_valid_and_invalid(self): + checker = self.make_checker() + field_path = "contained|#:Patient|address|#:postalCode|postalCode" + # With empty rule, generic string constraints apply: non-empty and no spaces + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "SW1A 1AA")) + # Real-world postcode with a space should fail as spaces are not allowed without a rule override + field_path = "POST_CODE" + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, 123), + ErrorReport, + ) + # Empty should also fail + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, ""), + ErrorReport, + ) + -# # BOOLEAN - -# # STRING with GENDER rule on real field -# def test_gender_string_rule_valid_and_invalid(self): -# checker = self.make_checker() -# field_path = "contained|#:Patient|gender" -# # Valid genders per schema constants (male, female, other, unknown) -# self.assertIsNone(checker.validate_expression("STRING", "GENDER", field_path, "male")) -# self.assertIsNone(checker.validate_expression("STRING", "GENDER", field_path, "female")) -# # Invalid values should error -# self.assertIsInstance( -# checker.validate_expression("STRING", "GENDER", field_path, "M"), -# ErrorReport, -# ) - -# # STRING with no rule for PERSON_POSTCODE on real field -# def test_postcode_string_rule_valid_and_invalid(self): -# checker = self.make_checker() -# field_path = "contained|#:Patient|address|#:postalCode|postalCode" -# # With empty rule, generic string constraints apply: non-empty and no spaces -# self.assertIsNone(checker.validate_expression("STRING", "", field_path, "SW1A1AA")) -# # Real-world postcode with a space should fail as spaces are not allowed without a rule override -# field_path = "POST_CODE" -# self.assertIsInstance( -# checker.validate_expression("STRING", "", field_path, "AB12 3CD"), -# ErrorReport, -# ) -# # Empty should also fail -# self.assertIsInstance( -# checker.validate_expression("STRING", "", field_path, ""), -# ErrorReport, -# ) # def test_boolean_valid_and_invalid(self): # checker = self.make_checker() # self.assertIsNone(checker.validate_expression("BOOLEAN", "", "bool_field", True, 1)) diff --git a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json index 8e8784e2b..bfc385809 100644 --- a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json +++ b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json @@ -56,6 +56,19 @@ }, "errorGroup": "consistency" }, + { + "expressionId": "01K8RW2ATXRM572BFG19S8TFJ2", + "fieldNameFHIR": "contained|#:Patient|gender", + "fieldNameFlat": "PERSON_GENDER_CODE", + "fieldNumber": 5, + "errorLevel": 1, + "expression": { + "expressionName": "Gender Valid Check", + "expressionType": "STRING", + "expressionRule": "GENDER" + }, + "errorGroup": "consistency" + }, { "expressionId": "01K8RW2MYFNE6YZJ99RC8B3XES", "fieldNameFHIR": "contained|#:Patient|address|#:postalCode|postalCode", @@ -81,6 +94,97 @@ "expressionRule": "DATETIME" }, "errorGroup": "consistency" + }, + { + "expressionId": "01K5EGR0C8M1MVNKTQCE6MSG68", + "fieldNameFHIR": "performer|#:Organization|actor|identifier|value", + "fieldNameFlat": "SITE_CODE", + "fieldNumber": 8, + "errorLevel": 0, + "expression": { + "expressionName": "Organisation String Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S0X5AJ9048PAFEN3XVZ7YC", + "fieldNameFHIR": "performer|#:Organization|actor|identifier|system", + "fieldNameFlat": "SITE_CODE_TYPE_URI", + "fieldNumber": 9, + "errorLevel": 1, + "expression": { + "expressionName": "Organisation Code Type URI Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S0XF2Y2WP22017N9KE6VJA", + "fieldNameFHIR": "identifier|0|value", + "fieldNameFlat": "UNIQUE_ID", + "fieldNumber": 10, + "errorLevel": 0, + "expression": { + "expressionName": "Unique ID Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "validity" + }, + { + "expressionId": "01K8S0XQYHDKMCA1P1GK4W5JHP", + "fieldNameFHIR": "identifier|0|system", + "fieldNameFlat": "UNIQUE_ID_URI", + "fieldNumber": 11, + "errorLevel": 0, + "expression": { + "expressionName": "Unique ID URI Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "validity" + }, + { + "expressionId": "01K5EGR0C8SDQBTNCEP8TJNCCW", + "fieldNameFHIR": "contained|#:Practitioner|name|0|given|0", + "fieldNameFlat": "PERFORMING_PROFESSIONAL_FORENAME", + "fieldNumber": 13, + "errorLevel": 1, + "expression": { + "expressionName": "Practitioner Forename Not Empty Check", + "expressionType": "LIST", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C8T3Z6X6h3W7D1F4VY", + "fieldNameFHIR": "contained|#:Practitioner|name|0|family", + "fieldNameFlat": "PERFORMING_PROFESSIONAL_SURNAME", + "fieldNumber": 14, + "errorLevel": 1, + "expression": { + "expressionName": "Practitioner Surname Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S0Y8TX8HTX6YGW61RDCATK", + "fieldNameFHIR": "recorded", + "fieldNameFlat": "RECORDED_DATE", + "fieldNumber": 15, + "errorLevel": 1, + "expression": { + "expressionName": "Recorded Date Convert", + "expressionType": "DATETIME", + "expressionRule": "" + }, + "errorGroup": "consistency" } ] } diff --git a/lambdas/shared/tests/test_common/validator/testing_utils/constants.py b/lambdas/shared/tests/test_common/validator/testing_utils/constants.py index 6cf909804..771cf1f97 100644 --- a/lambdas/shared/tests/test_common/validator/testing_utils/constants.py +++ b/lambdas/shared/tests/test_common/validator/testing_utils/constants.py @@ -15,7 +15,7 @@ "PERSON_FORENAME": ["JOHN"], "PERSON_SURNAME": "DOE", "PERSON_DOB": "1980-01-01", - "PERSON_GENDER_CODE": "Male", + "PERSON_GENDER_CODE": "male", "PERSON_POSTCODE": "AB12 3CD", "DATE_AND_TIME": "2025-03-06T05:10:25+00:00", "SITE_CODE": "RJ1", @@ -23,9 +23,9 @@ "UNIQUE_ID": "ACME-vacc123456", "UNIQUE_ID_URI": "https://supplierABC/identifiers/vacc", "ACTION_FLAG": "UPDATE", - "PERFORMING_PROFESSIONAL_FORENAME": "ALICE", + "PERFORMING_PROFESSIONAL_FORENAME": ["ALICE"], "PERFORMING_PROFESSIONAL_SURNAME": "SMITH", - "RECORDED_DATE": "20250306", + "RECORDED_DATE": "2025-03-06", "PRIMARY_SOURCE": "true", "VACCINATION_PROCEDURE_CODE": "PROC123", "VACCINATION_PROCEDURE_TERM": "Procedure Term", From 32e2313bdfa769a3055b5d4d9b009170dde63876 Mon Sep 17 00:00:00 2001 From: Akol125 Date: Thu, 13 Nov 2025 20:59:54 +0000 Subject: [PATCH 10/18] schema rules and test on 30 fhir fields --- .../common/validator/constants/constants.py | 1 + .../common/validator/expression_checker.py | 94 ++++---- .../src/common/validator/expression_rule.py | 2 + .../validator/test_expression_checker.py | 220 ++++++++++++++++-- .../validator/test_schemas/test_schema.json | 195 ++++++++++++++++ .../validator/testing_utils/constants.py | 9 +- 6 files changed, 450 insertions(+), 71 deletions(-) diff --git a/lambdas/shared/src/common/validator/constants/constants.py b/lambdas/shared/src/common/validator/constants/constants.py index bc9c7d216..f180b6640 100644 --- a/lambdas/shared/src/common/validator/constants/constants.py +++ b/lambdas/shared/src/common/validator/constants/constants.py @@ -27,3 +27,4 @@ class Constants: "Only '+00:00' and '+01:00' are accepted as valid timezone offsets.\n" f"Note that partial dates are not allowed for {field_name} in this service.\n" ) + MAXIMUM_DOSE_NUMBER_VALUE = 9 diff --git a/lambdas/shared/src/common/validator/expression_checker.py b/lambdas/shared/src/common/validator/expression_checker.py index 789452910..6cbf806fd 100644 --- a/lambdas/shared/src/common/validator/expression_checker.py +++ b/lambdas/shared/src/common/validator/expression_checker.py @@ -80,36 +80,57 @@ def validation_for_date(self, _expression_rule, field_name, field_value) -> Erro message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) - def validation_for_positive_integer(self, _expression_rule, field_name, field_value, row): + def validation_for_positive_integer(self, expression_rule, field_name, field_value) -> ErrorReport: + rules = expression_rule_per_field(expression_rule) if expression_rule else {} + max_value = rules.get("max_value", None) """ Apply pre-validation to an integer field to ensure that it is a positive integer, which does not exceed the maximum allowed value (if applicable) """ - max_value: int = None - # This check uses type() instead of isinstance() because bool is a subclass of int. - if type(field_value) is not int: # pylint: disable=unidiomatic-typecheck - raise TypeError(f"{field_name} must be a positive integer") + try: + # This check uses type() instead of isinstance() because bool is a subclass of int. + if type(field_value) is not int: # pylint: disable=unidiomatic-typecheck + raise TypeError(f"{field_name} must be a positive integer") - if field_value <= 0: - raise ValueError(f"{field_name} must be a positive integer") + if field_value <= 0: + raise ValueError(f"{field_name} must be a positive integer") - if max_value: - if field_value > max_value: - raise ValueError(f"{field_name} must be an integer in the range 1 to {max_value}") + if max_value: + if field_value > max_value: + raise ValueError(f"{field_name} must be an integer in the range 1 to {max_value}") + except (TypeError, ValueError) as e: + code = ExceptionLevels.RECORD_CHECK_FAILED + message = MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] + details = str(e) + return ErrorReport(code, message, None, field_name, details) + except Exception as e: + if self.report_unexpected_exception: + message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) + return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) def validation_for_integer_or_decimal( - self, _expression_rule, field_value: Union[int, Decimal], field_name: str, row: dict - ): + self, _expression_rule, field_name: str, field_value: Union[int, Decimal] + ) -> ErrorReport: """ Apply pre-validation to a decimal field to ensure that it is an integer or decimal, which does not exceed the maximum allowed number of decimal places (if applicable) """ - if not ( - # This check uses type() instead of isinstance() because bool is a subclass of int. - type(field_value) is int # pylint: disable=unidiomatic-typecheck - or type(field_value) is Decimal # pylint: disable=unidiomatic-typecheck - ): - raise TypeError(f"{field_name} must be a number") + try: + if not ( + # This check uses type() instead of isinstance() because bool is a subclass of int. + type(field_value) is int # pylint: disable=unidiomatic-typecheck + or type(field_value) is Decimal # pylint: disable=unidiomatic-typecheck + ): + raise TypeError(f"{field_name} must be a number") + except (TypeError, ValueError) as e: + code = ExceptionLevels.RECORD_CHECK_FAILED + message = MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] + details = str(e) + return ErrorReport(code, message, None, field_name, details) + except Exception as e: + if self.report_unexpected_exception: + message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) + return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) def validation_for_unique_list( list_to_check: list, @@ -150,10 +171,20 @@ def _validate_regex(self, expression_rule: str, field_name: str, field_value: st message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - def validation_for_boolean(self, expression_rule: str, field_name: str, field_value: str, row: dict): + def validation_for_boolean(self, expression_rule: str, field_name: str, field_value: str) -> ErrorReport: """Apply pre-validation to a boolean field to ensure that it is a boolean""" - if not isinstance(field_value, bool): - raise TypeError(f"{field_name} must be a boolean") + try: + if not isinstance(field_value, bool): + raise TypeError(f"{field_name} must be a boolean") + except (TypeError, ValueError) as e: + code = ExceptionLevels.RECORD_CHECK_FAILED + message = MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] + details = str(e) + return ErrorReport(code, message, None, field_name, details) + except Exception as e: + if self.report_unexpected_exception: + message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) + return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name, "") def validation_for_list(self, expression_rule: str, field_name: str, field_value: list): """ @@ -407,27 +438,6 @@ def _validate_not_empty(self, _expression_rule: str, field_name: str, field_valu message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) - # Positive Validate - def _validate_positive(self, _expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - value = float(field_value) - if value < 0: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "Value is not positive failure", - "Value is not positive as expected, data- " + field_value, - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, None, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - # NHSNumber Validate def _validate_nhs_number(self, _expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: try: diff --git a/lambdas/shared/src/common/validator/expression_rule.py b/lambdas/shared/src/common/validator/expression_rule.py index 56fb75fed..8657dba4e 100644 --- a/lambdas/shared/src/common/validator/expression_rule.py +++ b/lambdas/shared/src/common/validator/expression_rule.py @@ -17,5 +17,7 @@ def expression_rule_per_field(expression_type: str) -> dict: return {"predefined_values": Constants.GENDERS} case "DATETIME": return {"strict_time_zone": True} + case "DOSE_NUMBER": + return {"max_value": Constants.MAXIMUM_DOSE_NUMBER_VALUE} case _: raise ValueError(f"Expression rule not found for type: {expression_type}") diff --git a/lambdas/shared/tests/test_common/validator/test_expression_checker.py b/lambdas/shared/tests/test_common/validator/test_expression_checker.py index aa223d550..d41ae6ef5 100644 --- a/lambdas/shared/tests/test_common/validator/test_expression_checker.py +++ b/lambdas/shared/tests/test_common/validator/test_expression_checker.py @@ -1,4 +1,5 @@ import unittest +from decimal import Decimal from common.validator.error_report.record_error import ErrorReport from common.validator.expression_checker import ExpressionChecker @@ -163,6 +164,199 @@ def test_gender_string_rule_valid_and_invalid(self): # Invalid values should error self.assertIsInstance(checker.validate_expression("STRING", "GENDER", field_path, "M"), ErrorReport) + # BOOLEAN with PRIMARY_SOURCE + def test_primary_source_boolean_valid_and_invalid(self): + checker = self.make_checker() + field_path = "primarySource" + # Valid: boolean True + self.assertIsNone(checker.validate_expression("BOOLEAN", "", field_path, True)) + # Invalid: non-boolean should raise TypeError per implementation + self.assertIsInstance(checker.validate_expression("BOOLEAN", "", field_path, "true"), ErrorReport) + + # STRING with VACCINATION_PROCEDURE_CODE + def test_vaccination_procedure_code_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "extension|0|valueCodeableConcept|coding|0|code" + # Valid: non-empty string + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "123456")) + # Invalid: empty string + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + # Invalid: non-string value + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 123456), ErrorReport) + + # STRING with VACCINATION_PROCEDURE_TERM + def test_vaccination_procedure_term_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "extension|0|valueCodeableConcept|coding|0|display" + # Valid: non-empty string + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "COVID-19 vaccination")) + # Invalid: empty string + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + # Invalid: non-string value + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 999), ErrorReport) + + # POSITIVEINTEGER with DOSE_SEQUENCE + def test_dose_sequence_positiveinteger_valid_and_invalid(self): + checker = self.make_checker() + field_path = "protocolApplied|0|doseNumberPositiveInt" + # Valid: positive integer + self.assertIsNone(checker.validate_expression("POSITIVEINTEGER", "", field_path, 2)) + # Invalid: zero -> ValueError + self.assertIsInstance(checker.validate_expression("POSITIVEINTEGER", "", field_path, 0), ErrorReport) + # Invalid: negative -> ValueError + self.assertIsInstance(checker.validate_expression("POSITIVEINTEGER", "", field_path, -1), ErrorReport) + # Invalid: non-int -> TypeError + self.assertIsInstance(checker.validate_expression("POSITIVEINTEGER", "", field_path, "2"), ErrorReport) + + # STRING with VACCINE_PRODUCT_CODE + def test_vaccine_product_code_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "vaccineCode|coding|#:http://snomed.info/sct|code" + # Valid: non-empty string + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "1119349007")) + # Invalid: empty + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + # Invalid: non-string + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 1119349007), ErrorReport) + + # STRING with VACCINE_PRODUCT_TERM + def test_vaccine_product_term_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "vaccineCode|coding|#:http://snomed.info/sct|display" + # Valid: non-empty string (spaces allowed by default) + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "COVID-19 mRNA vaccine")) + # Invalid: empty + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + # Invalid: non-string + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 12345), ErrorReport) + + # STRING with VACCINE_MANUFACTURER + def test_vaccine_manufacturer_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "manufacturer|display" + # Valid: non-empty string + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "Pfizer")) + # Invalid: empty + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + # Invalid: non-string + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 101), ErrorReport) + + # STRING with SITE_OF_VACCINATION_CODE + def test_site_of_vaccination_code_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "site|coding|#:http://snomed.info/sct|code" + self.assertIsNone( + checker.validate_expression("STRING", "", field_path, "123456"), + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, ""), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, 123456), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + + # STRING with SITE_OF_VACCINATION_TERM + def test_site_of_vaccination_term_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "site|coding|#:http://snomed.info/sct|display" + self.assertIsNone( + checker.validate_expression("STRING", "", field_path, "Left deltoid"), + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, ""), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, 999), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + + # STRING with ROUTE_OF_VACCINATION_CODE + def test_route_of_vaccination_code_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "route|coding|#:http://snomed.info/sct|code" + self.assertIsNone( + checker.validate_expression("STRING", "", field_path, "1234"), + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, ""), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, 1234), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + + # STRING with ROUTE_OF_VACCINATION_TERM + def test_route_of_vaccination_term_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "route|coding|#:http://snomed.info/sct|display" + self.assertIsNone( + checker.validate_expression("STRING", "", field_path, "Intramuscular"), + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, ""), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, 12), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + + # INTDECIMAL with DOSE_AMOUNT + def test_dose_amount_intdecimal_valid_and_invalid(self): + checker = self.make_checker() + field_path = "doseQuantity|value" + # Valid: int + self.assertIsNone( + checker.validate_expression("INTDECIMAL", "", field_path, 1), + msg=f"fieldPath={field_path}", + ) + # Valid: Decimal + self.assertIsNone( + checker.validate_expression("INTDECIMAL", "", field_path, Decimal("0.5")), + msg=f"fieldPath={field_path}", + ) + # Invalid: string + self.assertIsInstance( + checker.validate_expression("INTDECIMAL", "", field_path, "0.5"), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + + # STRING with DOSE_UNIT_CODE + def test_dose_unit_code_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "doseQuantity|code" + self.assertIsNone( + checker.validate_expression("STRING", "", field_path, "ml"), + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, ""), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, 1), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + # LIST with PERFORMING_PROFESSIONAL_FORENAME (empty rule -> non-empty list) def test_practitioner_forename_list_valid_and_invalid(self): checker = self.make_checker() @@ -211,31 +405,7 @@ def test_postcode_string_rule_valid_and_invalid(self): ErrorReport, ) # Empty should also fail - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, ""), - ErrorReport, - ) - - -# def test_boolean_valid_and_invalid(self): -# checker = self.make_checker() -# self.assertIsNone(checker.validate_expression("BOOLEAN", "", "bool_field", True, 1)) -# self.assertIsInstance(checker.validate_expression("BOOLEAN", "", "bool_field", "true", 1), ErrorReport) - -# # POSITIVEINTEGER -# def test_positive_integer_valid_and_invalid(self): -# checker = self.make_checker() -# self.assertIsNone(checker.validate_expression("POSITIVEINTEGER", "", "pos_field", "2", 1)) -# self.assertIsInstance(checker.validate_expression("POSITIVEINTEGER", "", "pos_field", "0", 1), ErrorReport) -# self.assertIsInstance(checker.validate_expression("POSITIVEINTEGER", "", "pos_field", "-5", 1), ErrorReport) -# self.assertIsInstance(checker.validate_expression("POSITIVEINTEGER", "", "pos_field", "abc", 1), ErrorReport) - -# # INTDECIMAL -# def test_intdecimal_valid_and_invalid(self): -# checker = self.make_checker() -# self.assertIsNone(checker.validate_expression("INTDECIMAL", "", "num_field", "1.23", 1)) -# self.assertIsNone(checker.validate_expression("INTDECIMAL", "", "num_field", 3, 1)) -# self.assertIsInstance(checker.validate_expression("INTDECIMAL", "", "num_field", "abc", 1), ErrorReport) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) # class DummyParserEx: diff --git a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json index bfc385809..edef4c7ea 100644 --- a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json +++ b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json @@ -185,6 +185,201 @@ "expressionRule": "" }, "errorGroup": "consistency" + }, + { + "expressionId": "01K5EGR0C84CCDRR0VFSWQNFZP", + "fieldNameFHIR": "primarySource", + "fieldNameFlat": "PRIMARY_SOURCE", + "fieldNumber": 16, + "errorLevel": 0, + "expression": { + "expressionName": "Primary Source Not Empty Check", + "expressionType": "BOOLEAN", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S0YYGDFWJXN2W3THYG24EZ", + "fieldNameFHIR": "extension|0|valueCodeableConcept|coding|0|code", + "fieldNameFlat": "VACCINATION_PROCEDURE_CODE", + "fieldNumber": 17, + "errorLevel": 0, + "expression": { + "expressionName": "Procedure Code Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C85HY6MDNN6TTR1K48", + "fieldNameFHIR": "extension|0|valueCodeableConcept|coding|0|display", + "fieldNameFlat": "VACCINATION_PROCEDURE_TERM", + "fieldNumber": 18, + "errorLevel": 1, + "expression": { + "expressionName": "Procedure Term Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C84DDGW567G14AYBC6", + "fieldNameFHIR": "protocolApplied|0|doseNumberPositiveInt", + "fieldNameFlat": "DOSE_SEQUENCE", + "fieldNumber": 19, + "errorLevel": 1, + "expression": { + "expressionName": "Dose Sequence Not Empty Check", + "expressionType": "POSITIVEINTEGER", + "expressionRule": "DOSE_NUMBER" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C8W3HXFYR80ENW73SS", + "fieldNameFHIR": "vaccineCode|coding|#:http://snomed.info/sct|code", + "fieldNameFlat": "VACCINE_PRODUCT_CODE", + "fieldNumber": 20, + "errorLevel": 0, + "expression": { + "expressionName": "Produce Code Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C885N7MMW2J5JKHTT2", + "fieldNameFHIR": "vaccineCode|coding|#:http://snomed.info/sct|display", + "fieldNameFlat": "VACCINE_PRODUCT_TERM", + "fieldNumber": 21, + "errorLevel": 1, + "expression": { + "expressionName": "Produce Term Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C86XN0AF0M9DJYFGCD", + "fieldNameFHIR": "manufacturer|display", + "fieldNameFlat": "VACCINE_MANUFACTURER", + "fieldNumber": 22, + "errorLevel": 0, + "expression": { + "expressionName": "Manufacturer Display Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C89M4CV68B7XAKDCHG", + "fieldNameFHIR": "lotNumber", + "fieldNameFlat": "BATCH_NUMBER", + "fieldNumber": 23, + "errorLevel": 0, + "expression": { + "expressionName": "Batch Number String Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2AG0CR7S28QB29XY14J71", + "fieldNameFHIR": "expirationDate", + "fieldNameFlat": "EXPIRY_DATE", + "fieldNumber": 24, + "errorLevel": 1, + "expression": { + "expressionName": "Date Convert", + "expressionType": "DATE", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S2AS6TD0146EZN8ZDM9AGD", + "fieldNameFHIR": "site|coding|#:http://snomed.info/sct|code", + "fieldNameFlat": "SITE_OF_VACCINATION_CODE", + "fieldNumber": 25, + "errorLevel": 0, + "expression": { + "expressionName": "Site of Vaccination Code String Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2AZQ7XXTTF2AF7ZMM609C", + "fieldNameFHIR": "site|coding|#:http://snomed.info/sct|display", + "fieldNameFlat": "SITE_OF_VACCINATION_TERM", + "fieldNumber": 26, + "errorLevel": 1, + "expression": { + "expressionName": "Site of Vaccination Term String Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S2B78X58EB9XAZYX3M4VPP", + "fieldNameFHIR": "route|coding|#:http://snomed.info/sct|code", + "fieldNameFlat": "ROUTE_OF_VACCINATION_CODE", + "fieldNumber": 27, + "errorLevel": 0, + "expression": { + "expressionName": "Route of Vaccination Code String Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2BEP9C9KHNJTKPP6SH1G0", + "fieldNameFHIR": "route|coding|#:http://snomed.info/sct|display", + "fieldNameFlat": "ROUTE_OF_VACCINATION_TERM", + "fieldNumber": 28, + "errorLevel": 1, + "expression": { + "expressionName": "Route of Vaccination Term String Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S2BNT5MET8E29GBT83AP0P", + "fieldNameFHIR": "doseQuantity|value", + "fieldNameFlat": "DOSE_AMOUNT", + "fieldNumber": 29, + "errorLevel": 1, + "expression": { + "expressionName": "Dose Amount Int-decimal Check", + "expressionType": "INTDECIMAL", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2BY1S0TXETJY78H418XQG", + "fieldNameFHIR": "doseQuantity|code", + "fieldNameFlat": "DOSE_UNIT_CODE", + "fieldNumber": 30, + "errorLevel": 1, + "expression": { + "expressionName": "Dose Unit String Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" } ] } diff --git a/lambdas/shared/tests/test_common/validator/testing_utils/constants.py b/lambdas/shared/tests/test_common/validator/testing_utils/constants.py index 771cf1f97..d82c69ac3 100644 --- a/lambdas/shared/tests/test_common/validator/testing_utils/constants.py +++ b/lambdas/shared/tests/test_common/validator/testing_utils/constants.py @@ -1,4 +1,5 @@ # Comma-separated header string for building CSV test rows +from decimal import Decimal CSV_HEADER = ( "NHS_NUMBER,PERSON_FORENAME,PERSON_SURNAME,PERSON_DOB,PERSON_GENDER_CODE,PERSON_POSTCODE,DATE_AND_TIME,SITE_CODE," @@ -26,20 +27,20 @@ "PERFORMING_PROFESSIONAL_FORENAME": ["ALICE"], "PERFORMING_PROFESSIONAL_SURNAME": "SMITH", "RECORDED_DATE": "2025-03-06", - "PRIMARY_SOURCE": "true", + "PRIMARY_SOURCE": True, "VACCINATION_PROCEDURE_CODE": "PROC123", "VACCINATION_PROCEDURE_TERM": "Procedure Term", - "DOSE_SEQUENCE": "1", + "DOSE_SEQUENCE": 1, "VACCINE_PRODUCT_CODE": "VACC123", "VACCINE_PRODUCT_TERM": "Vaccine Term", "VACCINE_MANUFACTURER": "Manufacturer XYZ", "BATCH_NUMBER": "BATCH001", - "EXPIRY_DATE": "20250702", + "EXPIRY_DATE": "2025-07-02", "SITE_OF_VACCINATION_CODE": "368208006", "SITE_OF_VACCINATION_TERM": "Left upper arm structure (body structure)", "ROUTE_OF_VACCINATION_CODE": "78421000", "ROUTE_OF_VACCINATION_TERM": "Intramuscular route (qualifier value)", - "DOSE_AMOUNT": "0.5", + "DOSE_AMOUNT": Decimal("0.5"), "DOSE_UNIT_CODE": "ml", "DOSE_UNIT_TERM": "milliliter", "INDICATION_CODE": "443684005", From f5917e70506000aa00c897bce94e16467f4f9d82 Mon Sep 17 00:00:00 2001 From: Akol125 Date: Thu, 13 Nov 2025 22:13:56 +0000 Subject: [PATCH 11/18] shema for all 34 fields --- .../common/validator/expression_checker.py | 208 +----------------- .../validator/test_expression_checker.py | 76 +++++++ .../validator/test_schemas/test_schema.json | 52 +++++ 3 files changed, 130 insertions(+), 206 deletions(-) diff --git a/lambdas/shared/src/common/validator/expression_checker.py b/lambdas/shared/src/common/validator/expression_checker.py index 6cbf806fd..47e02a9ad 100644 --- a/lambdas/shared/src/common/validator/expression_checker.py +++ b/lambdas/shared/src/common/validator/expression_checker.py @@ -1,10 +1,9 @@ -import re from datetime import datetime from decimal import Decimal from typing import Optional, Union -from common.validator.constants.enums import MESSAGES, ExceptionLevels, MessageLabel -from common.validator.error_report.record_error import ErrorReport, RecordError +from common.validator.constants.enums import MESSAGES, ExceptionLevels +from common.validator.error_report.record_error import ErrorReport from common.validator.expression_rule import expression_rule_per_field from common.validator.lookup_expressions.key_data import KeyData from common.validator.lookup_expressions.lookup_data import LookUpData @@ -44,10 +43,6 @@ def validate_expression( return self.validation_for_boolean(expression_rule, field_name, field_value) case "INTDECIMAL": return self.validation_for_integer_or_decimal(expression_rule, field_name, field_value) - case "POSTCODE": - return self._validate_post_code(expression_rule, field_name, field_value) - case "GENDER": - return self._validate_gender(expression_rule, field_name, field_value) case _: return "Schema expression not found! Check your expression type : " + expression_type @@ -150,27 +145,6 @@ def validation_for_unique_list( found.append(item[unique_value_in_list]) - # Regex Validate - def _validate_regex(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - result = re.search(expression_rule, field_value) - if not result: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "String REGEX check failed", - "Value does not meet regex rules", - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - def validation_for_boolean(self, expression_rule: str, field_name: str, field_value: str) -> ErrorReport: """Apply pre-validation to a boolean field to ensure that it is a boolean""" try: @@ -278,101 +252,6 @@ def validation_for_date_time(self, expression_rule: str, field_name: str, field_ message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) - # Not Equal Validate - def _validate_not_equal(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - if field_value == expression_rule: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "Value not equals check failed", - "Value equals expected value when it should not, Expected- " - + expression_rule - + MessageLabel.FOUND_LABEL - + field_value, - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - - # Starts With Validate - def _validate_starts_with(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - result = field_value.startswith(expression_rule) - if not result: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "Value starts with failure", - "Value does not start as expected, " - + MessageLabel.EXPECTED_LABEL - + expression_rule - + " " - + MessageLabel.FOUND_LABEL - + field_value, - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - - # Ends With Validate - def _validate_ends_with(self, expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - result = field_value.endswith(expression_rule) - if not result: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "Value ends with failure", - "Value does not end as expected, " - + MessageLabel.EXPECTED_LABEL - + expression_rule - + " " - + MessageLabel.FOUND_LABEL - + field_value, - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - - # Empty Validate - def _validate_empty(self, _expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - if field_value: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "Value is empty failure", - "Value has data, not as expected, data- " + field_value, - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - # String Pre-Validation def validation_for_string_values(self, expression_rule: str, field_name: str, field_value: str) -> ErrorReport: """ @@ -419,86 +298,3 @@ def validation_for_string_values(self, expression_rule: str, field_name: str, fi if self.report_unexpected_exception: message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) - - # Not Empty Validate - def _validate_not_empty(self, _expression_rule: str, field_name: str, field_value: str) -> ErrorReport: - try: - if not field_value: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, "Value not empty failure", "Value is empty, not as expected" - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, None, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) - - # NHSNumber Validate - def _validate_nhs_number(self, _expression_rule: str, field_name: str, field_value: str, row: dict) -> ErrorReport: - try: - regex_rule = r"^6\d{10}$" - result = re.search(regex_rule, field_value) - if not result: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "NHS Number check failed", - "NHS Number does not meet regex rules, data- " + field_value, - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, row, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, row, field_name, "", self.summarise) - - # Gender Validate - def _validate_gender(self, _expression_rule: str, field_name: str, field_value: str) -> ErrorReport: - try: - rule_list = ["0", "1", "2", "9"] - - if field_value not in rule_list: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, - "Gender check failed", - "Gender value not found in array, data- " + field_value, - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, None, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) - - # PostCode Validate - def _validate_post_code(self, _expression_rule: str, field_name: str, field_value: str) -> ErrorReport: - try: - # UK postcode regex (allows optional space) - regex_rule = r"^[A-Z]{1,2}\d[A-Z\d]?\s?\d[A-Z]{2}$" - result = re.search(regex_rule, field_value) - if not result: - raise RecordError( - ExceptionLevels.RECORD_CHECK_FAILED, "Postcode check failed", "Postcode does not meet regex rules" - ) - except RecordError as e: - code = e.code if e.code is not None else ExceptionLevels.RECORD_CHECK_FAILED - message = e.message if e.message is not None else MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] - if e.details is not None: - details = e.details - return ErrorReport(code, message, None, field_name, details, self.summarise) - except Exception as e: - if self.report_unexpected_exception: - message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) - return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name, "", self.summarise) diff --git a/lambdas/shared/tests/test_common/validator/test_expression_checker.py b/lambdas/shared/tests/test_common/validator/test_expression_checker.py index d41ae6ef5..8db76a956 100644 --- a/lambdas/shared/tests/test_common/validator/test_expression_checker.py +++ b/lambdas/shared/tests/test_common/validator/test_expression_checker.py @@ -357,6 +357,82 @@ def test_dose_unit_code_string_valid_and_invalid(self): msg=f"fieldPath={field_path}", ) + # STRING with DOSE_UNIT_TERM + def test_dose_unit_term_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "doseQuantity|unit" + self.assertIsNone( + checker.validate_expression("STRING", "", field_path, "milliliter"), + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, ""), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, 1), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + + # STRING with INDICATION_CODE + def test_indication_code_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "reasonCode|#:http://snomed.info/sct|coding|#:http://snomed.info/sct|code" + self.assertIsNone( + checker.validate_expression("STRING", "", field_path, "987654"), + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, ""), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, 987654), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + + # STRING with LOCATION_CODE + def test_location_code_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "location|identifier|value" + self.assertIsNone( + checker.validate_expression("STRING", "", field_path, "LOC-123"), + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, ""), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, 321), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + + # STRING with LOCATION_CODE_TYPE_URI + def test_location_code_type_uri_string_valid_and_invalid(self): + checker = self.make_checker() + field_path = "location|identifier|system" + self.assertIsNone( + checker.validate_expression("STRING", "", field_path, "https://example.org/location-code-system"), + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, ""), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + self.assertIsInstance( + checker.validate_expression("STRING", "", field_path, 0), + ErrorReport, + msg=f"fieldPath={field_path}", + ) + # LIST with PERFORMING_PROFESSIONAL_FORENAME (empty rule -> non-empty list) def test_practitioner_forename_list_valid_and_invalid(self): checker = self.make_checker() diff --git a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json index edef4c7ea..d7575c6ac 100644 --- a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json +++ b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json @@ -380,6 +380,58 @@ "expressionRule": "" }, "errorGroup": "consistency" + }, + { + "expressionId": "01K8S2C3XTDW9RK9Y2FQ9YM5WJ", + "fieldNameFHIR": "doseQuantity|unit", + "fieldNameFlat": "DOSE_UNIT_TERM", + "fieldNumber": 31, + "errorLevel": 0, + "expression": { + "expressionName": "Dose Unit Term Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2CANK2PFNDANX3D04W2NR", + "fieldNameFHIR": "reasonCode|#:http://snomed.info/sct|coding|#:http://snomed.info/sct|code", + "fieldNameFlat": "INDICATION_CODE", + "fieldNumber": 32, + "errorLevel": 0, + "expression": { + "expressionName": "Indication Code Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2CKK8FCVJ6049EW5G563P", + "fieldNameFHIR": "location|identifier|value", + "fieldNameFlat": "LOCATION_CODE", + "fieldNumber": 33, + "errorLevel": 1, + "expression": { + "expressionName": "Location Code Default Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S2CSWYXJ5WDS7K59A045JR", + "fieldNameFHIR": "location|identifier|system", + "fieldNameFlat": "LOCATION_CODE_TYPE_URI", + "fieldNumber": 34, + "errorLevel": 1, + "expression": { + "expressionName": "Location Code Type URI Default Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" } ] } From e5f43ba6e112722b8905f42e8559783bbd1dc180 Mon Sep 17 00:00:00 2001 From: Akol125 Date: Thu, 13 Nov 2025 22:58:11 +0000 Subject: [PATCH 12/18] nhs mod 11 check and schema per environment folder --- .../validator/config_schema/dev_schema.json | 451 ++++++++++++++++++ .../config_schema/preprod_schema.json | 0 .../validator/config_schema/prod_schema.json | 0 .../common/validator/expression_checker.py | 23 +- .../validator/test_expression_checker.py | 178 ++----- .../validator/test_schemas/test_schema.json | 14 + 6 files changed, 527 insertions(+), 139 deletions(-) create mode 100644 lambdas/shared/src/common/validator/config_schema/dev_schema.json create mode 100644 lambdas/shared/src/common/validator/config_schema/preprod_schema.json create mode 100644 lambdas/shared/src/common/validator/config_schema/prod_schema.json diff --git a/lambdas/shared/src/common/validator/config_schema/dev_schema.json b/lambdas/shared/src/common/validator/config_schema/dev_schema.json new file mode 100644 index 000000000..8e11e1480 --- /dev/null +++ b/lambdas/shared/src/common/validator/config_schema/dev_schema.json @@ -0,0 +1,451 @@ +{ + "id": "01K5EGR0C85TPNZT71MJ10VKYY", + "schemaName": "Base Vaccination Validation", + "version": 1.0, + "releaseDate": "2024-07-17T00:00:00.000Z", + "expressions": [ + { + "expressionId": "01K5EGR0C7Y1WJ0BC803SQDWK4", + "fieldNameFHIR": "contained|#:Patient|identifier|#:https://fhir.nhs.uk/Id/nhs-number|value", + "fieldNameFlat": "NHS_NUMBER", + "fieldNumber": 1, + "errorLevel": 0, + "expression": { + "expressionName": "NHS Number String Check", + "expressionType": "STRING", + "expressionRule": "NHS_NUMBER" + }, + "errorGroup": "validity" + }, + { + "expressionId": "01K9ZMX49TKDKQB36ENJTGJT6J", + "parentExpressionId": "01K5EGR0C7Y1WJ0BC803SQDWK4", + "fieldNameFHIR": "contained|#:Patient|identifier|#:https://fhir.nhs.uk/Id/nhs-number|value", + "fieldNameFlat": "NHS_NUMBER", + "fieldNumber": 1, + "errorLevel": 0, + "expression": { + "expressionName": "NHS Number MOD 11 Check", + "expressionType": "NHS_NUMBER", + "expressionRule": "" + }, + "errorGroup": "validity" + }, + { + "expressionId": "01K5EGR0C7QCEJMWH1R4MBPGQA", + "fieldNameFHIR": "contained|#:Patient|name|#:official|given|0", + "fieldNameFlat": "PERSON_FORENAME", + "fieldNumber": 2, + "errorLevel": 0, + "expression": { + "expressionName": "Person Forename List Check", + "expressionType": "LIST", + "expressionRule": "PERSON_NAME" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C7RRG9F6FVHJ8HE4QX", + "fieldNameFHIR": "contained|#:Patient|name|#:official|family", + "fieldNameFlat": "PERSON_SURNAME", + "fieldNumber": 3, + "errorLevel": 0, + "expression": { + "expressionName": "Person Surname String Check", + "expressionType": "STRING", + "expressionRule": "PERSON_SURNAME" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8RW1DF635S4FRZ1WF9GDS1T", + "fieldNameFHIR": "contained|#:Patient|birthDate", + "fieldNameFlat": "PERSON_DOB", + "fieldNumber": 4, + "errorLevel": 1, + "expression": { + "expressionName": "Date of Birth Not Empty Check", + "expressionType": "DATE", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8RW2ATXRM572BFG19S8TFJ2", + "fieldNameFHIR": "contained|#:Patient|gender", + "fieldNameFlat": "PERSON_GENDER_CODE", + "fieldNumber": 5, + "errorLevel": 1, + "expression": { + "expressionName": "Gender Valid Check", + "expressionType": "STRING", + "expressionRule": "GENDER" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8RW2MYFNE6YZJ99RC8B3XES", + "fieldNameFHIR": "contained|#:Patient|address|#:postalCode|postalCode", + "fieldNameFlat": "PERSON_POSTCODE", + "fieldNumber": 6, + "errorLevel": 2, + "expression": { + "expressionName": "Postcode String Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S0WRNSPQ42RBD8420Q9G7Y", + "fieldNameFHIR": "occurrenceDateTime", + "fieldNameFlat": "DATE_AND_TIME", + "fieldNumber": 7, + "errorLevel": 0, + "expression": { + "expressionName": "Date Time Conversion", + "expressionType": "DATETIME", + "expressionRule": "DATETIME" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K5EGR0C8M1MVNKTQCE6MSG68", + "fieldNameFHIR": "performer|#:Organization|actor|identifier|value", + "fieldNameFlat": "SITE_CODE", + "fieldNumber": 8, + "errorLevel": 0, + "expression": { + "expressionName": "Organisation String Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S0X5AJ9048PAFEN3XVZ7YC", + "fieldNameFHIR": "performer|#:Organization|actor|identifier|system", + "fieldNameFlat": "SITE_CODE_TYPE_URI", + "fieldNumber": 9, + "errorLevel": 1, + "expression": { + "expressionName": "Organisation Code Type URI Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S0XF2Y2WP22017N9KE6VJA", + "fieldNameFHIR": "identifier|0|value", + "fieldNameFlat": "UNIQUE_ID", + "fieldNumber": 10, + "errorLevel": 0, + "expression": { + "expressionName": "Unique ID Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "validity" + }, + { + "expressionId": "01K8S0XQYHDKMCA1P1GK4W5JHP", + "fieldNameFHIR": "identifier|0|system", + "fieldNameFlat": "UNIQUE_ID_URI", + "fieldNumber": 11, + "errorLevel": 0, + "expression": { + "expressionName": "Unique ID URI Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "validity" + }, + { + "expressionId": "01K5EGR0C8SDQBTNCEP8TJNCCW", + "fieldNameFHIR": "contained|#:Practitioner|name|0|given|0", + "fieldNameFlat": "PERFORMING_PROFESSIONAL_FORENAME", + "fieldNumber": 13, + "errorLevel": 1, + "expression": { + "expressionName": "Practitioner Forename Not Empty Check", + "expressionType": "LIST", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C8T3Z6X6h3W7D1F4VY", + "fieldNameFHIR": "contained|#:Practitioner|name|0|family", + "fieldNameFlat": "PERFORMING_PROFESSIONAL_SURNAME", + "fieldNumber": 14, + "errorLevel": 1, + "expression": { + "expressionName": "Practitioner Surname Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S0Y8TX8HTX6YGW61RDCATK", + "fieldNameFHIR": "recorded", + "fieldNameFlat": "RECORDED_DATE", + "fieldNumber": 15, + "errorLevel": 1, + "expression": { + "expressionName": "Recorded Date Convert", + "expressionType": "DATETIME", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K5EGR0C84CCDRR0VFSWQNFZP", + "fieldNameFHIR": "primarySource", + "fieldNameFlat": "PRIMARY_SOURCE", + "fieldNumber": 16, + "errorLevel": 0, + "expression": { + "expressionName": "Primary Source Not Empty Check", + "expressionType": "BOOLEAN", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S0YYGDFWJXN2W3THYG24EZ", + "fieldNameFHIR": "extension|0|valueCodeableConcept|coding|0|code", + "fieldNameFlat": "VACCINATION_PROCEDURE_CODE", + "fieldNumber": 17, + "errorLevel": 0, + "expression": { + "expressionName": "Procedure Code Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C85HY6MDNN6TTR1K48", + "fieldNameFHIR": "extension|0|valueCodeableConcept|coding|0|display", + "fieldNameFlat": "VACCINATION_PROCEDURE_TERM", + "fieldNumber": 18, + "errorLevel": 1, + "expression": { + "expressionName": "Procedure Term Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C84DDGW567G14AYBC6", + "fieldNameFHIR": "protocolApplied|0|doseNumberPositiveInt", + "fieldNameFlat": "DOSE_SEQUENCE", + "fieldNumber": 19, + "errorLevel": 1, + "expression": { + "expressionName": "Dose Sequence Not Empty Check", + "expressionType": "POSITIVEINTEGER", + "expressionRule": "DOSE_NUMBER" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C8W3HXFYR80ENW73SS", + "fieldNameFHIR": "vaccineCode|coding|#:http://snomed.info/sct|code", + "fieldNameFlat": "VACCINE_PRODUCT_CODE", + "fieldNumber": 20, + "errorLevel": 0, + "expression": { + "expressionName": "Produce Code Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C885N7MMW2J5JKHTT2", + "fieldNameFHIR": "vaccineCode|coding|#:http://snomed.info/sct|display", + "fieldNameFlat": "VACCINE_PRODUCT_TERM", + "fieldNumber": 21, + "errorLevel": 1, + "expression": { + "expressionName": "Produce Term Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C86XN0AF0M9DJYFGCD", + "fieldNameFHIR": "manufacturer|display", + "fieldNameFlat": "VACCINE_MANUFACTURER", + "fieldNumber": 22, + "errorLevel": 0, + "expression": { + "expressionName": "Manufacturer Display Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K5EGR0C89M4CV68B7XAKDCHG", + "fieldNameFHIR": "lotNumber", + "fieldNameFlat": "BATCH_NUMBER", + "fieldNumber": 23, + "errorLevel": 0, + "expression": { + "expressionName": "Batch Number String Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2AG0CR7S28QB29XY14J71", + "fieldNameFHIR": "expirationDate", + "fieldNameFlat": "EXPIRY_DATE", + "fieldNumber": 24, + "errorLevel": 1, + "expression": { + "expressionName": "Date Convert", + "expressionType": "DATE", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S2AS6TD0146EZN8ZDM9AGD", + "fieldNameFHIR": "site|coding|#:http://snomed.info/sct|code", + "fieldNameFlat": "SITE_OF_VACCINATION_CODE", + "fieldNumber": 25, + "errorLevel": 0, + "expression": { + "expressionName": "Site of Vaccination Code String Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2AZQ7XXTTF2AF7ZMM609C", + "fieldNameFHIR": "site|coding|#:http://snomed.info/sct|display", + "fieldNameFlat": "SITE_OF_VACCINATION_TERM", + "fieldNumber": 26, + "errorLevel": 1, + "expression": { + "expressionName": "Site of Vaccination Term String Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S2B78X58EB9XAZYX3M4VPP", + "fieldNameFHIR": "route|coding|#:http://snomed.info/sct|code", + "fieldNameFlat": "ROUTE_OF_VACCINATION_CODE", + "fieldNumber": 27, + "errorLevel": 0, + "expression": { + "expressionName": "Route of Vaccination Code String Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2BEP9C9KHNJTKPP6SH1G0", + "fieldNameFHIR": "route|coding|#:http://snomed.info/sct|display", + "fieldNameFlat": "ROUTE_OF_VACCINATION_TERM", + "fieldNumber": 28, + "errorLevel": 1, + "expression": { + "expressionName": "Route of Vaccination Term String Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S2BNT5MET8E29GBT83AP0P", + "fieldNameFHIR": "doseQuantity|value", + "fieldNameFlat": "DOSE_AMOUNT", + "fieldNumber": 29, + "errorLevel": 1, + "expression": { + "expressionName": "Dose Amount Int-decimal Check", + "expressionType": "INTDECIMAL", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2BY1S0TXETJY78H418XQG", + "fieldNameFHIR": "doseQuantity|code", + "fieldNameFlat": "DOSE_UNIT_CODE", + "fieldNumber": 30, + "errorLevel": 1, + "expression": { + "expressionName": "Dose Unit String Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S2C3XTDW9RK9Y2FQ9YM5WJ", + "fieldNameFHIR": "doseQuantity|unit", + "fieldNameFlat": "DOSE_UNIT_TERM", + "fieldNumber": 31, + "errorLevel": 0, + "expression": { + "expressionName": "Dose Unit Term Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2CANK2PFNDANX3D04W2NR", + "fieldNameFHIR": "reasonCode|#:http://snomed.info/sct|coding|#:http://snomed.info/sct|code", + "fieldNameFlat": "INDICATION_CODE", + "fieldNumber": 32, + "errorLevel": 0, + "expression": { + "expressionName": "Indication Code Not Empty Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, + { + "expressionId": "01K8S2CKK8FCVJ6049EW5G563P", + "fieldNameFHIR": "location|identifier|value", + "fieldNameFlat": "LOCATION_CODE", + "fieldNumber": 33, + "errorLevel": 1, + "expression": { + "expressionName": "Location Code Default Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + }, + { + "expressionId": "01K8S2CSWYXJ5WDS7K59A045JR", + "fieldNameFHIR": "location|identifier|system", + "fieldNameFlat": "LOCATION_CODE_TYPE_URI", + "fieldNumber": 34, + "errorLevel": 1, + "expression": { + "expressionName": "Location Code Type URI Default Check", + "expressionType": "STRING", + "expressionRule": "" + }, + "errorGroup": "consistency" + } + ] +} diff --git a/lambdas/shared/src/common/validator/config_schema/preprod_schema.json b/lambdas/shared/src/common/validator/config_schema/preprod_schema.json new file mode 100644 index 000000000..e69de29bb diff --git a/lambdas/shared/src/common/validator/config_schema/prod_schema.json b/lambdas/shared/src/common/validator/config_schema/prod_schema.json new file mode 100644 index 000000000..e69de29bb diff --git a/lambdas/shared/src/common/validator/expression_checker.py b/lambdas/shared/src/common/validator/expression_checker.py index 47e02a9ad..94871b66a 100644 --- a/lambdas/shared/src/common/validator/expression_checker.py +++ b/lambdas/shared/src/common/validator/expression_checker.py @@ -2,13 +2,13 @@ from decimal import Decimal from typing import Optional, Union +from common.validator.constants.constants import Constants from common.validator.constants.enums import MESSAGES, ExceptionLevels from common.validator.error_report.record_error import ErrorReport from common.validator.expression_rule import expression_rule_per_field from common.validator.lookup_expressions.key_data import KeyData from common.validator.lookup_expressions.lookup_data import LookUpData -from common.validator.validation_utils import check_if_future_date -from src.common.validator.constants.constants import Constants +from common.validator.validation_utils import check_if_future_date, nhs_number_mod11_check class ExpressionChecker: @@ -31,6 +31,8 @@ def validate_expression( return self.validation_for_string_values(expression_rule, field_name, field_value) case "LIST": return self.validation_for_list(expression_rule, field_name, field_value) + case "NHS_NUMBER": + return self.validation_for_nhs_number(expression_rule, field_name, field_value) case "DATE": return self.validation_for_date(expression_rule, field_name, field_value) case "DATETIME": @@ -298,3 +300,20 @@ def validation_for_string_values(self, expression_rule: str, field_name: str, fi if self.report_unexpected_exception: message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) + + def validation_for_nhs_number(self, expression_rule: str, field_name: str, field_value: str) -> ErrorReport: + """ + Apply pre-validation to an NHS number to ensure that it is a valid NHS number + """ + try: + if not nhs_number_mod11_check(field_value): + raise ValueError(f"{field_name} is not a valid NHS number") + except (ValueError, TypeError) as e: + code = ExceptionLevels.RECORD_CHECK_FAILED + message = MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] + details = str(e) + return ErrorReport(code, message, None, field_name, details) + except Exception as e: + if self.report_unexpected_exception: + message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) + return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) diff --git a/lambdas/shared/tests/test_common/validator/test_expression_checker.py b/lambdas/shared/tests/test_common/validator/test_expression_checker.py index 8db76a956..a5030b503 100644 --- a/lambdas/shared/tests/test_common/validator/test_expression_checker.py +++ b/lambdas/shared/tests/test_common/validator/test_expression_checker.py @@ -64,6 +64,17 @@ def test_string_valid_and_invalid(self): ErrorReport, ) + # NHS_NUMBER expression type (MOD 11 check) + def test_nhs_number_mod11_valid_and_invalid(self): + checker = self.make_checker() + field_path = "contained|#:Patient|identifier|#:https://fhir.nhs.uk/Id/nhs-number|value" + # Known valid NHS number + self.assertIsNone(checker.validate_expression("NHS_NUMBER", "", field_path, "9736592677")) + # Invalid: wrong check digit + self.assertIsInstance(checker.validate_expression("NHS_NUMBER", "", field_path, "9434765918"), ErrorReport) + # Invalid: non-digit / wrong length + self.assertIsInstance(checker.validate_expression("NHS_NUMBER", "", field_path, "123456789A"), ErrorReport) + # LIST PERSON_FORENAME def test_list_valid_and_invalid(self): checker = self.make_checker() @@ -245,193 +256,86 @@ def test_vaccine_manufacturer_string_valid_and_invalid(self): def test_site_of_vaccination_code_string_valid_and_invalid(self): checker = self.make_checker() field_path = "site|coding|#:http://snomed.info/sct|code" - self.assertIsNone( - checker.validate_expression("STRING", "", field_path, "123456"), - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, ""), - ErrorReport, - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, 123456), - ErrorReport, - msg=f"fieldPath={field_path}", - ) + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "123456")) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 123456), ErrorReport) # STRING with SITE_OF_VACCINATION_TERM def test_site_of_vaccination_term_string_valid_and_invalid(self): checker = self.make_checker() field_path = "site|coding|#:http://snomed.info/sct|display" - self.assertIsNone( - checker.validate_expression("STRING", "", field_path, "Left deltoid"), - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, ""), - ErrorReport, - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, 999), - ErrorReport, - msg=f"fieldPath={field_path}", - ) + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "Left deltoid")) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 999), ErrorReport) # STRING with ROUTE_OF_VACCINATION_CODE def test_route_of_vaccination_code_string_valid_and_invalid(self): checker = self.make_checker() field_path = "route|coding|#:http://snomed.info/sct|code" - self.assertIsNone( - checker.validate_expression("STRING", "", field_path, "1234"), - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, ""), - ErrorReport, - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, 1234), - ErrorReport, - msg=f"fieldPath={field_path}", - ) + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "1234")) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 1234), ErrorReport) # STRING with ROUTE_OF_VACCINATION_TERM def test_route_of_vaccination_term_string_valid_and_invalid(self): checker = self.make_checker() field_path = "route|coding|#:http://snomed.info/sct|display" - self.assertIsNone( - checker.validate_expression("STRING", "", field_path, "Intramuscular"), - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, ""), - ErrorReport, - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, 12), - ErrorReport, - msg=f"fieldPath={field_path}", - ) + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "Intramuscular")) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 12), ErrorReport) # INTDECIMAL with DOSE_AMOUNT def test_dose_amount_intdecimal_valid_and_invalid(self): checker = self.make_checker() field_path = "doseQuantity|value" # Valid: int - self.assertIsNone( - checker.validate_expression("INTDECIMAL", "", field_path, 1), - msg=f"fieldPath={field_path}", - ) + self.assertIsNone(checker.validate_expression("INTDECIMAL", "", field_path, 1)) # Valid: Decimal - self.assertIsNone( - checker.validate_expression("INTDECIMAL", "", field_path, Decimal("0.5")), - msg=f"fieldPath={field_path}", - ) + self.assertIsNone(checker.validate_expression("INTDECIMAL", "", field_path, Decimal("0.5"))) # Invalid: string - self.assertIsInstance( - checker.validate_expression("INTDECIMAL", "", field_path, "0.5"), - ErrorReport, - msg=f"fieldPath={field_path}", - ) + self.assertIsInstance(checker.validate_expression("INTDECIMAL", "", field_path, "0.5"), ErrorReport) # STRING with DOSE_UNIT_CODE def test_dose_unit_code_string_valid_and_invalid(self): checker = self.make_checker() field_path = "doseQuantity|code" - self.assertIsNone( - checker.validate_expression("STRING", "", field_path, "ml"), - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, ""), - ErrorReport, - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, 1), - ErrorReport, - msg=f"fieldPath={field_path}", - ) + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "ml")) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 1), ErrorReport) # STRING with DOSE_UNIT_TERM def test_dose_unit_term_string_valid_and_invalid(self): checker = self.make_checker() field_path = "doseQuantity|unit" - self.assertIsNone( - checker.validate_expression("STRING", "", field_path, "milliliter"), - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, ""), - ErrorReport, - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, 1), - ErrorReport, - msg=f"fieldPath={field_path}", - ) + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "milliliter")) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 1), ErrorReport) # STRING with INDICATION_CODE def test_indication_code_string_valid_and_invalid(self): checker = self.make_checker() field_path = "reasonCode|#:http://snomed.info/sct|coding|#:http://snomed.info/sct|code" - self.assertIsNone( - checker.validate_expression("STRING", "", field_path, "987654"), - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, ""), - ErrorReport, - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, 987654), - ErrorReport, - msg=f"fieldPath={field_path}", - ) + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "987654")) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 987654), ErrorReport) # STRING with LOCATION_CODE def test_location_code_string_valid_and_invalid(self): checker = self.make_checker() field_path = "location|identifier|value" - self.assertIsNone( - checker.validate_expression("STRING", "", field_path, "LOC-123"), - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, ""), - ErrorReport, - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, 321), - ErrorReport, - msg=f"fieldPath={field_path}", - ) + self.assertIsNone(checker.validate_expression("STRING", "", field_path, "LOC-123")) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 321), ErrorReport) # STRING with LOCATION_CODE_TYPE_URI def test_location_code_type_uri_string_valid_and_invalid(self): checker = self.make_checker() field_path = "location|identifier|system" self.assertIsNone( - checker.validate_expression("STRING", "", field_path, "https://example.org/location-code-system"), - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, ""), - ErrorReport, - msg=f"fieldPath={field_path}", - ) - self.assertIsInstance( - checker.validate_expression("STRING", "", field_path, 0), - ErrorReport, - msg=f"fieldPath={field_path}", + checker.validate_expression("STRING", "", field_path, "https://example.org/location-code-system") ) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) + self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 0), ErrorReport) # LIST with PERFORMING_PROFESSIONAL_FORENAME (empty rule -> non-empty list) def test_practitioner_forename_list_valid_and_invalid(self): diff --git a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json index d7575c6ac..8e11e1480 100644 --- a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json +++ b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json @@ -17,6 +17,20 @@ }, "errorGroup": "validity" }, + { + "expressionId": "01K9ZMX49TKDKQB36ENJTGJT6J", + "parentExpressionId": "01K5EGR0C7Y1WJ0BC803SQDWK4", + "fieldNameFHIR": "contained|#:Patient|identifier|#:https://fhir.nhs.uk/Id/nhs-number|value", + "fieldNameFlat": "NHS_NUMBER", + "fieldNumber": 1, + "errorLevel": 0, + "expression": { + "expressionName": "NHS Number MOD 11 Check", + "expressionType": "NHS_NUMBER", + "expressionRule": "" + }, + "errorGroup": "validity" + }, { "expressionId": "01K5EGR0C7QCEJMWH1R4MBPGQA", "fieldNameFHIR": "contained|#:Patient|name|#:official|given|0", From fb424151117c6fc79caf824e967bfeffdbc1711e Mon Sep 17 00:00:00 2001 From: Akol125 Date: Fri, 14 Nov 2025 13:01:30 +0000 Subject: [PATCH 13/18] schema rules for vaccination procedure code --- .../src/models/utils/pre_validator_utils.py | 26 +++++++---------- .../common/validator/expression_checker.py | 25 +++++++++++++++-- .../src/common/validator/validation_utils.py | 28 +++++++++---------- .../validator/test_expression_checker.py | 24 ++++++++-------- .../validator/test_schemas/test_schema.json | 14 ++++++++++ .../validator/testing_utils/constants.py | 2 +- 6 files changed, 74 insertions(+), 45 deletions(-) diff --git a/backend/src/models/utils/pre_validator_utils.py b/backend/src/models/utils/pre_validator_utils.py index d68d45149..c7d9bf6cb 100644 --- a/backend/src/models/utils/pre_validator_utils.py +++ b/backend/src/models/utils/pre_validator_utils.py @@ -1,6 +1,6 @@ from datetime import date, datetime from decimal import Decimal -from typing import Optional, Union +from typing import Union from .generic_utils import is_valid_simple_snomed, nhs_number_mod11_check @@ -23,9 +23,6 @@ def for_string( if not isinstance(field_value, str): raise TypeError(f"{field_location} must be a string") - if field_value.isspace(): - raise ValueError(f"{field_location} must be a non-empty string") - if defined_length: if len(field_value) != defined_length: raise ValueError(f"{field_location} must be {defined_length} characters") @@ -49,15 +46,14 @@ def for_string( def for_list( field_value: list, field_location: str, - defined_length: Optional[int] = None, - max_length: Optional[int] = None, + defined_length: int = None, elements_are_strings: bool = False, - string_element_max_length: Optional[int] = None, elements_are_dicts: bool = False, ): """ - Apply pre-validation to a list field to ensure it is a non-empty list which meets the length requirements and - requirements, if applicable, for each list element to be a non-empty string or non-empty dictionary + Apply pre-validation to a list field to ensure it is a non-empty list which meets the length + requirements and requirements, if applicable, for each list element to be a non-empty string + or non-empty dictionary """ if not isinstance(field_value, list): raise TypeError(f"{field_location} must be an array") @@ -69,12 +65,12 @@ def for_list( if len(field_value) == 0: raise ValueError(f"{field_location} must be a non-empty array") - if max_length is not None and len(field_value) > max_length: - raise ValueError(f"{field_location} must be an array of maximum length {max_length}") - if elements_are_strings: - for idx, element in enumerate(field_value): - PreValidation.for_string(element, f"{field_location}[{idx}]", max_length=string_element_max_length) + for element in field_value: + if not isinstance(element, str): + raise TypeError(f"{field_location} must be an array of strings") + if len(element) == 0: + raise ValueError(f"{field_location} must be an array of non-empty strings") if elements_are_dicts: for element in field_value: @@ -185,7 +181,6 @@ def for_positive_integer(field_value: int, field_location: str, max_value: int = Apply pre-validation to an integer field to ensure that it is a positive integer, which does not exceed the maximum allowed value (if applicable) """ - # This check uses type() instead of isinstance() because bool is a subclass of int. if type(field_value) is not int: # pylint: disable=unidiomatic-typecheck raise TypeError(f"{field_location} must be a positive integer") @@ -203,7 +198,6 @@ def for_integer_or_decimal(field_value: Union[int, Decimal], field_location: str which does not exceed the maximum allowed number of decimal places (if applicable) """ if not ( - # This check uses type() instead of isinstance() because bool is a subclass of int. type(field_value) is int # pylint: disable=unidiomatic-typecheck or type(field_value) is Decimal # pylint: disable=unidiomatic-typecheck ): diff --git a/lambdas/shared/src/common/validator/expression_checker.py b/lambdas/shared/src/common/validator/expression_checker.py index 94871b66a..2acef0cda 100644 --- a/lambdas/shared/src/common/validator/expression_checker.py +++ b/lambdas/shared/src/common/validator/expression_checker.py @@ -8,7 +8,7 @@ from common.validator.expression_rule import expression_rule_per_field from common.validator.lookup_expressions.key_data import KeyData from common.validator.lookup_expressions.lookup_data import LookUpData -from common.validator.validation_utils import check_if_future_date, nhs_number_mod11_check +from common.validator.validation_utils import check_if_future_date, is_valid_simple_snomed, nhs_number_mod11_check class ExpressionChecker: @@ -31,8 +31,6 @@ def validate_expression( return self.validation_for_string_values(expression_rule, field_name, field_value) case "LIST": return self.validation_for_list(expression_rule, field_name, field_value) - case "NHS_NUMBER": - return self.validation_for_nhs_number(expression_rule, field_name, field_value) case "DATE": return self.validation_for_date(expression_rule, field_name, field_value) case "DATETIME": @@ -45,6 +43,10 @@ def validate_expression( return self.validation_for_boolean(expression_rule, field_name, field_value) case "INTDECIMAL": return self.validation_for_integer_or_decimal(expression_rule, field_name, field_value) + case "NHS_NUMBER": + return self.validation_for_nhs_number(expression_rule, field_name, field_value) + case "SNOMED_CODE": + return self.validation_for_snomed_code(expression_rule, field_name, field_value) case _: return "Schema expression not found! Check your expression type : " + expression_type @@ -317,3 +319,20 @@ def validation_for_nhs_number(self, expression_rule: str, field_name: str, field if self.report_unexpected_exception: message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) + + def validation_for_snomed_code(self, expression_rule: str, field_location: str, field_value: str): + """ + Apply prevalidation to snomed code to ensure that its a valid one. + """ + + error_message = f"{field_location} is not a valid snomed code" + + try: + is_valid = is_valid_simple_snomed(field_value) + if not is_valid: + raise ValueError(error_message) + except ValueError as e: + code = ExceptionLevels.RECORD_CHECK_FAILED + message = MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] + details = str(e) + return ErrorReport(code, message, None, field_location, details) diff --git a/lambdas/shared/src/common/validator/validation_utils.py b/lambdas/shared/src/common/validator/validation_utils.py index e216ca55a..547d01d71 100644 --- a/lambdas/shared/src/common/validator/validation_utils.py +++ b/lambdas/shared/src/common/validator/validation_utils.py @@ -16,20 +16,6 @@ def check_if_future_date(parsed_value: date | datetime): return False -def is_valid_simple_snomed(simple_snomed: str) -> bool: - "check the snomed code valid or not." - min_snomed_length = 6 - max_snomed_length = 18 - return ( - simple_snomed is not None - and simple_snomed.isdigit() - and simple_snomed[0] != "0" - and min_snomed_length <= len(simple_snomed) <= max_snomed_length - and validate(simple_snomed) - and (simple_snomed[-3:-1] in ("00", "10")) - ) - - def nhs_number_mod11_check(nhs_number: str) -> bool: """ Parameters:- @@ -56,3 +42,17 @@ def nhs_number_mod11_check(nhs_number: str) -> bool: is_mod11 = check_digit == int(nhs_number[-1]) return is_mod11 + + +def is_valid_simple_snomed(simple_snomed: str) -> bool: + "check the snomed code valid or not." + min_snomed_length = 6 + max_snomed_length = 18 + return ( + simple_snomed is not None + and simple_snomed.isdigit() + and simple_snomed[0] != "0" + and min_snomed_length <= len(simple_snomed) <= max_snomed_length + and validate(simple_snomed) + and (simple_snomed[-3:-1] in ("00", "10")) + ) diff --git a/lambdas/shared/tests/test_common/validator/test_expression_checker.py b/lambdas/shared/tests/test_common/validator/test_expression_checker.py index a5030b503..8332dd98d 100644 --- a/lambdas/shared/tests/test_common/validator/test_expression_checker.py +++ b/lambdas/shared/tests/test_common/validator/test_expression_checker.py @@ -195,6 +195,19 @@ def test_vaccination_procedure_code_string_valid_and_invalid(self): # Invalid: non-string value self.assertIsInstance(checker.validate_expression("STRING", "", field_path, 123456), ErrorReport) + # After parent check succeeds - SNOMED_CODE for VACCINATION_PROCEDURE_CODE + def test_vaccination_procedure_snomed_code_valid_and_invalid(self): + checker = self.make_checker() + field_path = "extension|0|valueCodeableConcept|coding|0|code" + # Valid SNOMED example (passes Verhoeff, doesn't start with 0, length ok, suffix rule) + self.assertIsNone(checker.validate_expression("SNOMED_CODE", "", field_path, "1119349007")) + # Invalid: empty + self.assertIsInstance(checker.validate_expression("SNOMED_CODE", "", field_path, ""), ErrorReport) + # Invalid: non-digit + self.assertIsInstance(checker.validate_expression("SNOMED_CODE", "", field_path, "ABC123"), ErrorReport) + # Invalid: starts with 0 + self.assertIsInstance(checker.validate_expression("SNOMED_CODE", "", field_path, "012345"), ErrorReport) + # STRING with VACCINATION_PROCEDURE_TERM def test_vaccination_procedure_term_string_valid_and_invalid(self): checker = self.make_checker() @@ -388,16 +401,5 @@ def test_postcode_string_rule_valid_and_invalid(self): self.assertIsInstance(checker.validate_expression("STRING", "", field_path, ""), ErrorReport) -# class DummyParserEx: -# def __init__(self, data=None, raise_on_get=False): -# self._data = data or {} -# self._raise_on_get = raise_on_get - -# def get_key_value(self, field_name): -# if self._raise_on_get: -# raise RuntimeError("boom") -# return [self._data.get(field_name, "")] - - if __name__ == "__main__": unittest.main() diff --git a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json index 8e11e1480..d57bafd73 100644 --- a/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json +++ b/lambdas/shared/tests/test_common/validator/test_schemas/test_schema.json @@ -226,6 +226,20 @@ }, "errorGroup": "completeness" }, + { + "expressionId": "01K9ZR2QZ6D9MTX3T7SQMK37C7", + "parentExpressionId": "01K8S0YYGDFWJXN2W3THYG24EZ", + "fieldNameFHIR": "extension|0|valueCodeableConcept|coding|0|code", + "fieldNameFlat": "VACCINATION_PROCEDURE_CODE", + "fieldNumber": 17, + "errorLevel": 0, + "expression": { + "expressionName": "Procedure Code Not Empty Check", + "expressionType": "SNOMED_CODE", + "expressionRule": "" + }, + "errorGroup": "completeness" + }, { "expressionId": "01K5EGR0C85HY6MDNN6TTR1K48", "fieldNameFHIR": "extension|0|valueCodeableConcept|coding|0|display", diff --git a/lambdas/shared/tests/test_common/validator/testing_utils/constants.py b/lambdas/shared/tests/test_common/validator/testing_utils/constants.py index d82c69ac3..d0df96fef 100644 --- a/lambdas/shared/tests/test_common/validator/testing_utils/constants.py +++ b/lambdas/shared/tests/test_common/validator/testing_utils/constants.py @@ -28,7 +28,7 @@ "PERFORMING_PROFESSIONAL_SURNAME": "SMITH", "RECORDED_DATE": "2025-03-06", "PRIMARY_SOURCE": True, - "VACCINATION_PROCEDURE_CODE": "PROC123", + "VACCINATION_PROCEDURE_CODE": "1324681000000101", "VACCINATION_PROCEDURE_TERM": "Procedure Term", "DOSE_SEQUENCE": 1, "VACCINE_PRODUCT_CODE": "VACC123", From e6437444ac20b606142911269902f08bb37ad18a Mon Sep 17 00:00:00 2001 From: Akol125 Date: Fri, 14 Nov 2025 14:21:19 +0000 Subject: [PATCH 14/18] resolve sonar issues --- sonar-project.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonar-project.properties b/sonar-project.properties index 5abedcf21..6edad3f55 100644 --- a/sonar-project.properties +++ b/sonar-project.properties @@ -4,7 +4,7 @@ sonar.organization=nhsdigital sonar.host.url=https://sonarcloud.io sonar.python.version=3.11 sonar.exclusions=**/e2e/**,**/e2e_batch/**,**/devtools/**,**/proxies/**,**/utilities/scripts/**,**/infrastructure/account/**,**/infrastructure/instance/**,**/infrastructure/grafana/**,**/terraform_aws_backup/**,**/tests/** -sonar.python.coverage.reportPaths=backend-coverage.xml,delta-coverage.xml,ack-lambda-coverage.xml,filenameprocessor-coverage.xml,recordforwarder-coverage.xml,recordprocessor-coverage.xml,mesh_processor-coverage.xml,redis_sync-coverage.xml,mns_subscription-coverage.xml,id_sync-coverage.xml,shared-coverage.xml,batchprocessorfilter-coverage.xml +sonar.python.coverage.reportPaths=backend-coverage.xml,delta-coverage.xml,ack-lambda-coverage.xml,filenameprocessor-coverage.xml,recordforwarder-coverage.xml,recordprocessor-coverage.xml,mesh_processor-coverage.xml,redis_sync-coverage.xml,mns_subscription-coverage.xml,id_sync-coverage.xml,shared-coverage.xml,batchprocessorfilter-coverage.xml,**/backend/** sonar.cpd.exclusions=**/cache.py,**/authentication.py,**/test_cache.py,**/test_authentication.py,**/mns_service.py,**/errors.py,**/Dockerfile,lambdas/shared/src/common/**,filenameprocessor/src/logging_decorator.py sonar.issue.ignore.multicriteria=exclude_snomed_urls,exclude_hl7_urls sonar.issue.ignore.multicriteria.exclude_snomed_urls.ruleKey=python:S5332 From b878b22f9cedcb8432b087dfa8bc2812b63f7285 Mon Sep 17 00:00:00 2001 From: Akol125 Date: Fri, 14 Nov 2025 14:45:29 +0000 Subject: [PATCH 15/18] add removed code back --- .../src/models/utils/pre_validator_utils.py | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/backend/src/models/utils/pre_validator_utils.py b/backend/src/models/utils/pre_validator_utils.py index c7d9bf6cb..d68d45149 100644 --- a/backend/src/models/utils/pre_validator_utils.py +++ b/backend/src/models/utils/pre_validator_utils.py @@ -1,6 +1,6 @@ from datetime import date, datetime from decimal import Decimal -from typing import Union +from typing import Optional, Union from .generic_utils import is_valid_simple_snomed, nhs_number_mod11_check @@ -23,6 +23,9 @@ def for_string( if not isinstance(field_value, str): raise TypeError(f"{field_location} must be a string") + if field_value.isspace(): + raise ValueError(f"{field_location} must be a non-empty string") + if defined_length: if len(field_value) != defined_length: raise ValueError(f"{field_location} must be {defined_length} characters") @@ -46,14 +49,15 @@ def for_string( def for_list( field_value: list, field_location: str, - defined_length: int = None, + defined_length: Optional[int] = None, + max_length: Optional[int] = None, elements_are_strings: bool = False, + string_element_max_length: Optional[int] = None, elements_are_dicts: bool = False, ): """ - Apply pre-validation to a list field to ensure it is a non-empty list which meets the length - requirements and requirements, if applicable, for each list element to be a non-empty string - or non-empty dictionary + Apply pre-validation to a list field to ensure it is a non-empty list which meets the length requirements and + requirements, if applicable, for each list element to be a non-empty string or non-empty dictionary """ if not isinstance(field_value, list): raise TypeError(f"{field_location} must be an array") @@ -65,12 +69,12 @@ def for_list( if len(field_value) == 0: raise ValueError(f"{field_location} must be a non-empty array") + if max_length is not None and len(field_value) > max_length: + raise ValueError(f"{field_location} must be an array of maximum length {max_length}") + if elements_are_strings: - for element in field_value: - if not isinstance(element, str): - raise TypeError(f"{field_location} must be an array of strings") - if len(element) == 0: - raise ValueError(f"{field_location} must be an array of non-empty strings") + for idx, element in enumerate(field_value): + PreValidation.for_string(element, f"{field_location}[{idx}]", max_length=string_element_max_length) if elements_are_dicts: for element in field_value: @@ -181,6 +185,7 @@ def for_positive_integer(field_value: int, field_location: str, max_value: int = Apply pre-validation to an integer field to ensure that it is a positive integer, which does not exceed the maximum allowed value (if applicable) """ + # This check uses type() instead of isinstance() because bool is a subclass of int. if type(field_value) is not int: # pylint: disable=unidiomatic-typecheck raise TypeError(f"{field_location} must be a positive integer") @@ -198,6 +203,7 @@ def for_integer_or_decimal(field_value: Union[int, Decimal], field_location: str which does not exceed the maximum allowed number of decimal places (if applicable) """ if not ( + # This check uses type() instead of isinstance() because bool is a subclass of int. type(field_value) is int # pylint: disable=unidiomatic-typecheck or type(field_value) is Decimal # pylint: disable=unidiomatic-typecheck ): From b5ae016f6530bdef40e955d6741ddeaf18104ada Mon Sep 17 00:00:00 2001 From: Akol125 Date: Fri, 14 Nov 2025 14:49:44 +0000 Subject: [PATCH 16/18] exclude backend --- sonar-project.properties | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sonar-project.properties b/sonar-project.properties index 6edad3f55..60d2bf186 100644 --- a/sonar-project.properties +++ b/sonar-project.properties @@ -3,8 +3,8 @@ sonar.projectKey=NHSDigital_immunisation-fhir-api sonar.organization=nhsdigital sonar.host.url=https://sonarcloud.io sonar.python.version=3.11 -sonar.exclusions=**/e2e/**,**/e2e_batch/**,**/devtools/**,**/proxies/**,**/utilities/scripts/**,**/infrastructure/account/**,**/infrastructure/instance/**,**/infrastructure/grafana/**,**/terraform_aws_backup/**,**/tests/** -sonar.python.coverage.reportPaths=backend-coverage.xml,delta-coverage.xml,ack-lambda-coverage.xml,filenameprocessor-coverage.xml,recordforwarder-coverage.xml,recordprocessor-coverage.xml,mesh_processor-coverage.xml,redis_sync-coverage.xml,mns_subscription-coverage.xml,id_sync-coverage.xml,shared-coverage.xml,batchprocessorfilter-coverage.xml,**/backend/** +sonar.exclusions=**/e2e/**,**/e2e_batch/**,**/devtools/**,**/proxies/**,**/utilities/scripts/**,**/infrastructure/account/**,**/infrastructure/instance/**,**/infrastructure/grafana/**,**/terraform_aws_backup/**,**/tests/**,**/backend/** +sonar.python.coverage.reportPaths=backend-coverage.xml,delta-coverage.xml,ack-lambda-coverage.xml,filenameprocessor-coverage.xml,recordforwarder-coverage.xml,recordprocessor-coverage.xml,mesh_processor-coverage.xml,redis_sync-coverage.xml,mns_subscription-coverage.xml,id_sync-coverage.xml,shared-coverage.xml sonar.cpd.exclusions=**/cache.py,**/authentication.py,**/test_cache.py,**/test_authentication.py,**/mns_service.py,**/errors.py,**/Dockerfile,lambdas/shared/src/common/**,filenameprocessor/src/logging_decorator.py sonar.issue.ignore.multicriteria=exclude_snomed_urls,exclude_hl7_urls sonar.issue.ignore.multicriteria.exclude_snomed_urls.ruleKey=python:S5332 From a705b80a65108d42c1e736ffdf8aa23f4bf3782e Mon Sep 17 00:00:00 2001 From: Akol125 Date: Fri, 14 Nov 2025 16:52:06 +0000 Subject: [PATCH 17/18] resolve sonar issues --- .../common/validator/expression_checker.py | 28 +++---------------- .../shared/src/common/validator/validator.py | 2 +- 2 files changed, 5 insertions(+), 25 deletions(-) diff --git a/lambdas/shared/src/common/validator/expression_checker.py b/lambdas/shared/src/common/validator/expression_checker.py index 2acef0cda..fb8fef390 100644 --- a/lambdas/shared/src/common/validator/expression_checker.py +++ b/lambdas/shared/src/common/validator/expression_checker.py @@ -1,6 +1,6 @@ from datetime import datetime from decimal import Decimal -from typing import Optional, Union +from typing import Optional from common.validator.constants.constants import Constants from common.validator.constants.enums import MESSAGES, ExceptionLevels @@ -37,8 +37,6 @@ def validate_expression( return self.validation_for_date_time(expression_rule, field_name, field_value) case "POSITIVEINTEGER": return self.validation_for_positive_integer(expression_rule, field_name, field_value) - case "UNIQUELIST": - return self.validation_for_unique_list(expression_rule, field_name, field_value) case "BOOLEAN": return self.validation_for_boolean(expression_rule, field_name, field_value) case "INTDECIMAL": @@ -108,7 +106,7 @@ def validation_for_positive_integer(self, expression_rule, field_name, field_val return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) def validation_for_integer_or_decimal( - self, _expression_rule, field_name: str, field_value: Union[int, Decimal] + self, _expression_rule, field_name: str, field_value: int | Decimal ) -> ErrorReport: """ Apply pre-validation to a decimal field to ensure that it is an integer or decimal, @@ -131,24 +129,6 @@ def validation_for_integer_or_decimal( message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) - def validation_for_unique_list( - list_to_check: list, - unique_value_in_list: str, - field_location: str, - ): - """ - Apply pre-validation to a list of dictionaries to ensure that a specified value in each - dictionary is unique across the list - """ - found = [] - for item in list_to_check: - if item[unique_value_in_list] in found: - raise ValueError( - f"{field_location.replace('FIELD_TO_REPLACE', item[unique_value_in_list])}" + " must be unique" - ) - - found.append(item[unique_value_in_list]) - def validation_for_boolean(self, expression_rule: str, field_name: str, field_value: str) -> ErrorReport: """Apply pre-validation to a boolean field to ensure that it is a boolean""" try: @@ -303,7 +283,7 @@ def validation_for_string_values(self, expression_rule: str, field_name: str, fi message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) - def validation_for_nhs_number(self, expression_rule: str, field_name: str, field_value: str) -> ErrorReport: + def validation_for_nhs_number(self, _expression_rule: str, field_name: str, field_value: str) -> ErrorReport: """ Apply pre-validation to an NHS number to ensure that it is a valid NHS number """ @@ -320,7 +300,7 @@ def validation_for_nhs_number(self, expression_rule: str, field_name: str, field message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) - def validation_for_snomed_code(self, expression_rule: str, field_location: str, field_value: str): + def validation_for_snomed_code(self, _expression_rule: str, field_location: str, field_value: str): """ Apply prevalidation to snomed code to ensure that its a valid one. """ diff --git a/lambdas/shared/src/common/validator/validator.py b/lambdas/shared/src/common/validator/validator.py index 21c846098..41a3be8c6 100644 --- a/lambdas/shared/src/common/validator/validator.py +++ b/lambdas/shared/src/common/validator/validator.py @@ -16,7 +16,7 @@ class Validator: - def __init__(self, schema_file: dict = {}): + def __init__(self, schema_file: dict = None): self.schema_file = schema_file self.schema_parser = SchemaParser() From 56b813dba497488db497b54985ea11a17290d971 Mon Sep 17 00:00:00 2001 From: Akol125 Date: Fri, 14 Nov 2025 17:09:33 +0000 Subject: [PATCH 18/18] resolve sonar issues 2 --- .../common/validator/expression_checker.py | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/lambdas/shared/src/common/validator/expression_checker.py b/lambdas/shared/src/common/validator/expression_checker.py index fb8fef390..207390030 100644 --- a/lambdas/shared/src/common/validator/expression_checker.py +++ b/lambdas/shared/src/common/validator/expression_checker.py @@ -92,9 +92,8 @@ def validation_for_positive_integer(self, expression_rule, field_name, field_val if field_value <= 0: raise ValueError(f"{field_name} must be a positive integer") - if max_value: - if field_value > max_value: - raise ValueError(f"{field_name} must be an integer in the range 1 to {max_value}") + if max_value and field_value > max_value: + raise ValueError(f"{field_name} must be an integer in the range 1 to {max_value}") except (TypeError, ValueError) as e: code = ExceptionLevels.RECORD_CHECK_FAILED message = MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED] @@ -129,7 +128,7 @@ def validation_for_integer_or_decimal( message = MESSAGES[ExceptionLevels.UNEXPECTED_EXCEPTION] % (e.__class__.__name__, e) return ErrorReport(ExceptionLevels.UNEXPECTED_EXCEPTION, message, None, field_name) - def validation_for_boolean(self, expression_rule: str, field_name: str, field_value: str) -> ErrorReport: + def validation_for_boolean(self, _expression_rule: str, field_name: str, field_value: str) -> ErrorReport: """Apply pre-validation to a boolean field to ensure that it is a boolean""" try: if not isinstance(field_value, bool): @@ -259,20 +258,17 @@ def validation_for_string_values(self, expression_rule: str, field_name: str, fi if defined_length: if len(field_value) != defined_length: raise ValueError(f"{field_name} must be {defined_length} characters") - else: - if len(field_value) == 0: - raise ValueError(f"{field_name} must be a non-empty string") - - if max_length: - if len(field_value) > max_length: - raise ValueError(f"{field_name} must be {max_length} or fewer characters") - if predefined_values: - if field_value not in predefined_values: - raise ValueError(f"{field_name} must be one of the following: " + str(", ".join(predefined_values))) - - if not spaces_allowed: - if " " in field_value: - raise ValueError(f"{field_name} must not contain spaces") + elif len(field_value) == 0: + raise ValueError(f"{field_name} must be a non-empty string") + + if max_length and len(field_value) > max_length: + raise ValueError(f"{field_name} must be {max_length} or fewer characters") + + if predefined_values and field_value not in predefined_values: + raise ValueError(f"{field_name} must be one of the following: " + str(", ".join(predefined_values))) + + if not spaces_allowed and " " in field_value: + raise ValueError(f"{field_name} must not contain spaces") except (ValueError, TypeError) as e: code = ExceptionLevels.RECORD_CHECK_FAILED message = MESSAGES[ExceptionLevels.RECORD_CHECK_FAILED]