Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
451 changes: 451 additions & 0 deletions lambdas/shared/src/common/validator/config_schema/dev_schema.json
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where are these schema files used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these files would be used as schema files per environment, not populated yet as it is meant to be used when we have a fully functional schema that we can use per environment,

Large diffs are not rendered by default.

Empty file.
Empty file.
451 changes: 451 additions & 0 deletions lambdas/shared/src/common/validator/constants/config.json
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is this file used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

Large diffs are not rendered by default.

30 changes: 30 additions & 0 deletions lambdas/shared/src/common/validator/constants/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
class Constants:
NHS_NUMBER_LENGTH = 10
PERSON_NAME_ELEMENT_MAX_LENGTH = 35
GENDERS = ["male", "female", "other", "unknown"]
DATETIME_FORMAT = "%Y-%m-%dT%H:%M"
DATETIME_FORMAT = [
"%Y-%m-%d",
"%Y-%m-%dT%H:%M:%S%z",
"%Y-%m-%dT%H:%M:%S.%f%z",
]
ALLOWED_SUFFIXES = {
"+00:00",
"+01:00",
"+0000",
"+0100",
}
field_name = "FIELD_TO_REPLACE"

DATETIME_ERROR_MESSAGE = (
f"{field_name} must be a valid datetime in one of the following formats:"
"- 'YYYY-MM-DD' — Full date only"
"- 'YYYY-MM-DDThh:mm:ss%z' — Full date and time with timezone (e.g. +00:00 or +01:00)"
"- 'YYYY-MM-DDThh:mm:ss.f%z' — Full date and time with milliseconds and timezone"
"- Date must not be in the future."
)
STRICT_DATETIME_ERROR_MESSAGE = (
"Only '+00:00' and '+01:00' are accepted as valid timezone offsets.\n"
f"Note that partial dates are not allowed for {field_name} in this service.\n"
)
MAXIMUM_DOSE_NUMBER_VALUE = 9
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ def __init__(
self.id = None
self.error_level = error_level

def __repr__(self):
return f"<ErrorReport code={self.code}, field={self.field}, message={self.message!r}, details={self.details!r}>"

# function to return the object as a dictionary
def to_dict(self):
ret = {"code": self.code, "message": self.message}
Expand Down
811 changes: 247 additions & 564 deletions lambdas/shared/src/common/validator/expression_checker.py

Large diffs are not rendered by default.

23 changes: 23 additions & 0 deletions lambdas/shared/src/common/validator/expression_rule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from common.validator.constants.constants import Constants


def expression_rule_per_field(expression_type: str) -> dict:
match expression_type:
case "NHS_NUMBER":
return {"defined_length": Constants.NHS_NUMBER_LENGTH, "spaces_allowed": False}
case "PERSON_NAME":
return {
"elements_are_strings": True,
"array_max_length": 5,
"max_length": Constants.PERSON_NAME_ELEMENT_MAX_LENGTH,
}
case "PERSON_SURNAME":
return {"max_length": Constants.PERSON_NAME_ELEMENT_MAX_LENGTH}
case "GENDER":
return {"predefined_values": Constants.GENDERS}
case "DATETIME":
return {"strict_time_zone": True}
case "DOSE_NUMBER":
return {"max_value": Constants.MAXIMUM_DOSE_NUMBER_VALUE}
case _:
raise ValueError(f"Expression rule not found for type: {expression_type}")
58 changes: 58 additions & 0 deletions lambdas/shared/src/common/validator/validation_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from datetime import date, datetime

from stdnum.verhoeff import validate


def check_if_future_date(parsed_value: date | datetime):
"""
Ensure a parsed date or datetime object is not in the future.
"""
if isinstance(parsed_value, datetime):
now = datetime.now(parsed_value.tzinfo) if parsed_value.tzinfo else datetime.now()
elif isinstance(parsed_value, date):
now = datetime.now().date()
if parsed_value > now:
return True
return False


def nhs_number_mod11_check(nhs_number: str) -> bool:
"""
Parameters:-
nhs_number: str
The NHS number to be checked.
Returns:-
True if the nhs number passes the mod 11 check, False otherwise.

Definition of NHS number can be found at:
https://www.datadictionary.nhs.uk/attributes/nhs_number.html
"""
is_mod11 = False
if nhs_number.isdigit() and len(nhs_number) == 10:
# Create a reversed list of weighting factors
weighting_factors = list(range(2, 11))[::-1]
# Multiply each of the first nine digits by the weighting factor and add the results of each multiplication
# together
total = sum(int(digit) * weight for digit, weight in zip(nhs_number[:-1], weighting_factors))
# Divide the total by 11 and establish the remainder and subtract the remainder from 11 to give the check digit.
# If the result is 11 then a check digit of 0 is used. If the result is 10 then the NHS NUMBER is invalid and
# not used.
check_digit = 0 if (total % 11 == 0) else (11 - (total % 11))
# Check the remainder matches the check digit. If it does not, the NHS NUMBER is invalid.
is_mod11 = check_digit == int(nhs_number[-1])

return is_mod11


def is_valid_simple_snomed(simple_snomed: str) -> bool:
"check the snomed code valid or not."
min_snomed_length = 6
max_snomed_length = 18
return (
simple_snomed is not None
and simple_snomed.isdigit()
and simple_snomed[0] != "0"
and min_snomed_length <= len(simple_snomed) <= max_snomed_length
and validate(simple_snomed)
and (simple_snomed[-3:-1] in ("00", "10"))
)
15 changes: 10 additions & 5 deletions lambdas/shared/src/common/validator/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


class Validator:
def __init__(self, schema_file: dict = {}):
def __init__(self, schema_file: dict = None):
self.schema_file = schema_file
self.schema_parser = SchemaParser()

Expand Down Expand Up @@ -56,6 +56,7 @@ def _validate_expression(

try:
expression_values = data_parser.extract_field_values(expression_fieldname)
print(f"Validating Expression ID {expression_fieldname} with values: {expression_values}")
except Exception as e:
message = f"Data get values Unexpected exception [{e.__class__.__name__}]: {e}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd make the message "Extract Field Values: Unexpected exception ..." - it's better English and more consistent with the function naming

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for testing but would ultimatelty remove it eventually

error_record = ErrorReport(code=ExceptionLevels.PARSING_ERROR, message=message)
Expand All @@ -72,10 +73,14 @@ def _validate_expression(
add_error_record(
error_records, error_record, expression_error_group, expression_name, expression_id, error_level
)
except Exception:
print(f"Exception validating expression {expression_id} on row {row}: {error_record}")
row += 1
return row
except Exception as e:
message = f"Expression Validation Unexpected exception [{e.__class__.__name__}]: {e}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd make the message "Validate Expression: Unexpected exception ..." (see above)

error_record = ErrorReport(code=ExceptionLevels.UNEXPECTED_EXCEPTION, message=message)
error_record = ErrorReport(code=ExceptionLevels.PARSING_ERROR, message=message)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're overwriting the error_record written in the previous line.
Do we want to add both of these in add_error_record()?

add_error_record(
error_records, error_record, expression_error_group, expression_name, expression_id, error_level
)
return

def validate_fhir(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,20 @@ def test_extra_values_ignored(self):
Ignore values that do not have a corresponding key
"""
csv_parsers = CSVLineParser()
csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex", "": "Trent"})
csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": ["Alex"], "": "Trent"})
self.assertEqual(
csv_parsers.csv_file_data,
{"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex", "": "Trent"},
{"NHS_NUMBER": "9000000009", "PERSON_FORENAME": ["Alex"], "": "Trent"},
)
self.assertEqual(csv_parsers.get_key_value("PERSON_FORENAME"), "Alex")
self.assertEqual(csv_parsers.get_key_value("PERSON_FORENAME"), ["Alex"])

def test_fewer_values_than_keys(self):
"""
Test that fewer values (rows) than keys (columns/headers)
raises an error when accessing key without value
"""
csv_parsers = CSVLineParser()
csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex"})
csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": ["Alex"]})
self.assertIn("NHS_NUMBER", csv_parsers.csv_file_data)
self.assertIn("PERSON_FORENAME", csv_parsers.csv_file_data)
self.assertNotIn("PERSON_SURNAME", csv_parsers.csv_file_data)
Expand All @@ -46,7 +46,7 @@ def test_get_missing_key_raises(self):
"""
Test that accessing a non-existent key raises KeyError"""
csv_parsers = CSVLineParser()
csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex"})
csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": ["Alex"]})
with self.assertRaises(KeyError):
_ = csv_parsers.get_key_value("VACCINE_TYPE")

Expand Down
Loading