NHSDigital · Akol125 · Nov 11, 2025 · Nov 12, 2025 · Nov 12, 2025 · Nov 12, 2025
diff --git a/lambdas/shared/src/common/validator/config_schema/dev_schema.json b/lambdas/shared/src/common/validator/config_schema/dev_schema.json
diff --git a/lambdas/shared/src/common/validator/config_schema/preprod_schema.json b/lambdas/shared/src/common/validator/config_schema/preprod_schema.json
diff --git a/lambdas/shared/src/common/validator/config_schema/prod_schema.json b/lambdas/shared/src/common/validator/config_schema/prod_schema.json
diff --git a/lambdas/shared/src/common/validator/constants/config.json b/lambdas/shared/src/common/validator/constants/config.json
diff --git a/lambdas/shared/src/common/validator/constants/constants.py b/lambdas/shared/src/common/validator/constants/constants.py
@@ -0,0 +1,30 @@
+class Constants:
+    NHS_NUMBER_LENGTH = 10
+    PERSON_NAME_ELEMENT_MAX_LENGTH = 35
+    GENDERS = ["male", "female", "other", "unknown"]
+    DATETIME_FORMAT = "%Y-%m-%dT%H:%M"
+    DATETIME_FORMAT = [
+        "%Y-%m-%d",
+        "%Y-%m-%dT%H:%M:%S%z",
+        "%Y-%m-%dT%H:%M:%S.%f%z",
+    ]
+    ALLOWED_SUFFIXES = {
+        "+00:00",
+        "+01:00",
+        "+0000",
+        "+0100",
+    }
+    field_name = "FIELD_TO_REPLACE"
+
+    DATETIME_ERROR_MESSAGE = (
+        f"{field_name} must be a valid datetime in one of the following formats:"
+        "- 'YYYY-MM-DD' — Full date only"
+        "- 'YYYY-MM-DDThh:mm:ss%z' — Full date and time with timezone (e.g. +00:00 or +01:00)"
+        "- 'YYYY-MM-DDThh:mm:ss.f%z' — Full date and time with milliseconds and timezone"
+        "-  Date must not be in the future."
+    )
+    STRICT_DATETIME_ERROR_MESSAGE = (
+        "Only '+00:00' and '+01:00' are accepted as valid timezone offsets.\n"
+        f"Note that partial dates are not allowed for {field_name} in this service.\n"
+    )
+    MAXIMUM_DOSE_NUMBER_VALUE = 9
diff --git a/lambdas/shared/src/common/validator/error_report/record_error.py b/lambdas/shared/src/common/validator/error_report/record_error.py
@@ -21,6 +21,9 @@ def __init__(
         self.id = None
         self.error_level = error_level
 
+    def __repr__(self):
+        return f"<ErrorReport code={self.code}, field={self.field}, message={self.message!r}, details={self.details!r}>"
+
     # function to return the object as a dictionary
     def to_dict(self):
         ret = {"code": self.code, "message": self.message}

diff --git a/lambdas/shared/src/common/validator/expression_checker.py b/lambdas/shared/src/common/validator/expression_checker.py
diff --git a/lambdas/shared/src/common/validator/expression_rule.py b/lambdas/shared/src/common/validator/expression_rule.py
@@ -0,0 +1,23 @@
+from common.validator.constants.constants import Constants
+
+
+def expression_rule_per_field(expression_type: str) -> dict:
+    match expression_type:
+        case "NHS_NUMBER":
+            return {"defined_length": Constants.NHS_NUMBER_LENGTH, "spaces_allowed": False}
+        case "PERSON_NAME":
+            return {
+                "elements_are_strings": True,
+                "array_max_length": 5,
+                "max_length": Constants.PERSON_NAME_ELEMENT_MAX_LENGTH,
+            }
+        case "PERSON_SURNAME":
+            return {"max_length": Constants.PERSON_NAME_ELEMENT_MAX_LENGTH}
+        case "GENDER":
+            return {"predefined_values": Constants.GENDERS}
+        case "DATETIME":
+            return {"strict_time_zone": True}
+        case "DOSE_NUMBER":
+            return {"max_value": Constants.MAXIMUM_DOSE_NUMBER_VALUE}
+        case _:
+            raise ValueError(f"Expression rule not found for type: {expression_type}")
diff --git a/lambdas/shared/src/common/validator/validation_utils.py b/lambdas/shared/src/common/validator/validation_utils.py
@@ -0,0 +1,58 @@
+from datetime import date, datetime
+
+from stdnum.verhoeff import validate
+
+
+def check_if_future_date(parsed_value: date | datetime):
+    """
+    Ensure a parsed date or datetime object is not in the future.
+    """
+    if isinstance(parsed_value, datetime):
+        now = datetime.now(parsed_value.tzinfo) if parsed_value.tzinfo else datetime.now()
+    elif isinstance(parsed_value, date):
+        now = datetime.now().date()
+    if parsed_value > now:
+        return True
+    return False
+
+
+def nhs_number_mod11_check(nhs_number: str) -> bool:
+    """
+    Parameters:-
+    nhs_number: str
+        The NHS number to be checked.
+    Returns:-
+        True if the nhs number passes the mod 11 check, False otherwise.
+
+    Definition of NHS number can be found at:
+    https://www.datadictionary.nhs.uk/attributes/nhs_number.html
+    """
+    is_mod11 = False
+    if nhs_number.isdigit() and len(nhs_number) == 10:
+        # Create a reversed list of weighting factors
+        weighting_factors = list(range(2, 11))[::-1]
+        # Multiply each of the first nine digits by the weighting factor and add the results of each multiplication
+        # together
+        total = sum(int(digit) * weight for digit, weight in zip(nhs_number[:-1], weighting_factors))
+        # Divide the total by 11 and establish the remainder and subtract the remainder from 11 to give the check digit.
+        # If the result is 11 then a check digit of 0 is used. If the result is 10 then the NHS NUMBER is invalid and
+        # not used.
+        check_digit = 0 if (total % 11 == 0) else (11 - (total % 11))
+        # Check the remainder matches the check digit. If it does not, the NHS NUMBER is invalid.
+        is_mod11 = check_digit == int(nhs_number[-1])
+
+    return is_mod11
+
+
+def is_valid_simple_snomed(simple_snomed: str) -> bool:
+    "check the snomed code valid or not."
+    min_snomed_length = 6
+    max_snomed_length = 18
+    return (
+        simple_snomed is not None
+        and simple_snomed.isdigit()
+        and simple_snomed[0] != "0"
+        and min_snomed_length <= len(simple_snomed) <= max_snomed_length
+        and validate(simple_snomed)
+        and (simple_snomed[-3:-1] in ("00", "10"))
+    )
diff --git a/lambdas/shared/src/common/validator/validator.py b/lambdas/shared/src/common/validator/validator.py
@@ -16,7 +16,7 @@
 
 
 class Validator:
-    def __init__(self, schema_file: dict = {}):
+    def __init__(self, schema_file: dict = None):
         self.schema_file = schema_file
         self.schema_parser = SchemaParser()
 
@@ -56,6 +56,7 @@ def _validate_expression(
 
         try:
             expression_values = data_parser.extract_field_values(expression_fieldname)
+            print(f"Validating Expression ID {expression_fieldname} with values: {expression_values}")
         except Exception as e:
             message = f"Data get values Unexpected exception [{e.__class__.__name__}]: {e}"
             error_record = ErrorReport(code=ExceptionLevels.PARSING_ERROR, message=message)
@@ -72,10 +73,14 @@ def _validate_expression(
                 add_error_record(
                     error_records, error_record, expression_error_group, expression_name, expression_id, error_level
                 )
-        except Exception:
-            print(f"Exception validating expression {expression_id} on row {row}: {error_record}")
-        row += 1
-        return row
+        except Exception as e:
+            message = f"Expression Validation Unexpected exception [{e.__class__.__name__}]: {e}"
+            error_record = ErrorReport(code=ExceptionLevels.UNEXPECTED_EXCEPTION, message=message)
+            error_record = ErrorReport(code=ExceptionLevels.PARSING_ERROR, message=message)
+            add_error_record(
+                error_records, error_record, expression_error_group, expression_name, expression_id, error_level
+            )
+            return
 
     def validate_fhir(
         self,

diff --git a/lambdas/shared/tests/test_common/validator/test_csv_line_parser.py b/lambdas/shared/tests/test_common/validator/test_csv_line_parser.py
@@ -22,20 +22,20 @@ def test_extra_values_ignored(self):
         Ignore values that do not have a corresponding key
         """
         csv_parsers = CSVLineParser()
-        csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex", "": "Trent"})
+        csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": ["Alex"], "": "Trent"})
         self.assertEqual(
             csv_parsers.csv_file_data,
-            {"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex", "": "Trent"},
+            {"NHS_NUMBER": "9000000009", "PERSON_FORENAME": ["Alex"], "": "Trent"},
         )
-        self.assertEqual(csv_parsers.get_key_value("PERSON_FORENAME"), "Alex")
+        self.assertEqual(csv_parsers.get_key_value("PERSON_FORENAME"), ["Alex"])
 
     def test_fewer_values_than_keys(self):
         """
         Test that fewer values (rows) than keys (columns/headers)
         raises an error when accessing key without value
         """
         csv_parsers = CSVLineParser()
-        csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex"})
+        csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": ["Alex"]})
         self.assertIn("NHS_NUMBER", csv_parsers.csv_file_data)
         self.assertIn("PERSON_FORENAME", csv_parsers.csv_file_data)
         self.assertNotIn("PERSON_SURNAME", csv_parsers.csv_file_data)
@@ -46,7 +46,7 @@ def test_get_missing_key_raises(self):
         """
         Test that accessing a non-existent key raises KeyError"""
         csv_parsers = CSVLineParser()
-        csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex"})
+        csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": ["Alex"]})
         with self.assertRaises(KeyError):
             _ = csv_parsers.get_key_value("VACCINE_TYPE")