-
Notifications
You must be signed in to change notification settings - Fork 3
VED-909: Uplift Schema Expression Rule #982
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: staging/VED-508-validation-refactor-and-dq-feature
Are you sure you want to change the base?
Changes from all commits
17a4874
2493245
9b50fcd
69d8e24
15bdfa6
f44002a
9d3ea26
b08c49f
9e18d90
32e2313
f5917e7
e5f43ba
fb42415
e643744
b878b22
b5ae016
a705b80
56b813d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where is this file used?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| class Constants: | ||
| NHS_NUMBER_LENGTH = 10 | ||
| PERSON_NAME_ELEMENT_MAX_LENGTH = 35 | ||
| GENDERS = ["male", "female", "other", "unknown"] | ||
| DATETIME_FORMAT = "%Y-%m-%dT%H:%M" | ||
| DATETIME_FORMAT = [ | ||
| "%Y-%m-%d", | ||
| "%Y-%m-%dT%H:%M:%S%z", | ||
| "%Y-%m-%dT%H:%M:%S.%f%z", | ||
| ] | ||
| ALLOWED_SUFFIXES = { | ||
| "+00:00", | ||
| "+01:00", | ||
| "+0000", | ||
| "+0100", | ||
| } | ||
| field_name = "FIELD_TO_REPLACE" | ||
|
|
||
| DATETIME_ERROR_MESSAGE = ( | ||
| f"{field_name} must be a valid datetime in one of the following formats:" | ||
| "- 'YYYY-MM-DD' — Full date only" | ||
| "- 'YYYY-MM-DDThh:mm:ss%z' — Full date and time with timezone (e.g. +00:00 or +01:00)" | ||
| "- 'YYYY-MM-DDThh:mm:ss.f%z' — Full date and time with milliseconds and timezone" | ||
| "- Date must not be in the future." | ||
| ) | ||
| STRICT_DATETIME_ERROR_MESSAGE = ( | ||
| "Only '+00:00' and '+01:00' are accepted as valid timezone offsets.\n" | ||
| f"Note that partial dates are not allowed for {field_name} in this service.\n" | ||
| ) | ||
| MAXIMUM_DOSE_NUMBER_VALUE = 9 |
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| from common.validator.constants.constants import Constants | ||
|
|
||
|
|
||
| def expression_rule_per_field(expression_type: str) -> dict: | ||
| match expression_type: | ||
| case "NHS_NUMBER": | ||
| return {"defined_length": Constants.NHS_NUMBER_LENGTH, "spaces_allowed": False} | ||
| case "PERSON_NAME": | ||
| return { | ||
| "elements_are_strings": True, | ||
| "array_max_length": 5, | ||
| "max_length": Constants.PERSON_NAME_ELEMENT_MAX_LENGTH, | ||
| } | ||
| case "PERSON_SURNAME": | ||
| return {"max_length": Constants.PERSON_NAME_ELEMENT_MAX_LENGTH} | ||
| case "GENDER": | ||
| return {"predefined_values": Constants.GENDERS} | ||
| case "DATETIME": | ||
| return {"strict_time_zone": True} | ||
| case "DOSE_NUMBER": | ||
| return {"max_value": Constants.MAXIMUM_DOSE_NUMBER_VALUE} | ||
| case _: | ||
| raise ValueError(f"Expression rule not found for type: {expression_type}") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| from datetime import date, datetime | ||
|
|
||
| from stdnum.verhoeff import validate | ||
|
|
||
|
|
||
| def check_if_future_date(parsed_value: date | datetime): | ||
| """ | ||
| Ensure a parsed date or datetime object is not in the future. | ||
| """ | ||
| if isinstance(parsed_value, datetime): | ||
| now = datetime.now(parsed_value.tzinfo) if parsed_value.tzinfo else datetime.now() | ||
| elif isinstance(parsed_value, date): | ||
| now = datetime.now().date() | ||
| if parsed_value > now: | ||
| return True | ||
| return False | ||
|
|
||
|
|
||
| def nhs_number_mod11_check(nhs_number: str) -> bool: | ||
| """ | ||
| Parameters:- | ||
| nhs_number: str | ||
| The NHS number to be checked. | ||
| Returns:- | ||
| True if the nhs number passes the mod 11 check, False otherwise. | ||
|
|
||
| Definition of NHS number can be found at: | ||
| https://www.datadictionary.nhs.uk/attributes/nhs_number.html | ||
| """ | ||
| is_mod11 = False | ||
| if nhs_number.isdigit() and len(nhs_number) == 10: | ||
| # Create a reversed list of weighting factors | ||
| weighting_factors = list(range(2, 11))[::-1] | ||
| # Multiply each of the first nine digits by the weighting factor and add the results of each multiplication | ||
| # together | ||
| total = sum(int(digit) * weight for digit, weight in zip(nhs_number[:-1], weighting_factors)) | ||
| # Divide the total by 11 and establish the remainder and subtract the remainder from 11 to give the check digit. | ||
| # If the result is 11 then a check digit of 0 is used. If the result is 10 then the NHS NUMBER is invalid and | ||
| # not used. | ||
| check_digit = 0 if (total % 11 == 0) else (11 - (total % 11)) | ||
| # Check the remainder matches the check digit. If it does not, the NHS NUMBER is invalid. | ||
| is_mod11 = check_digit == int(nhs_number[-1]) | ||
|
|
||
| return is_mod11 | ||
|
|
||
|
|
||
| def is_valid_simple_snomed(simple_snomed: str) -> bool: | ||
| "check the snomed code valid or not." | ||
| min_snomed_length = 6 | ||
| max_snomed_length = 18 | ||
| return ( | ||
| simple_snomed is not None | ||
| and simple_snomed.isdigit() | ||
| and simple_snomed[0] != "0" | ||
| and min_snomed_length <= len(simple_snomed) <= max_snomed_length | ||
| and validate(simple_snomed) | ||
| and (simple_snomed[-3:-1] in ("00", "10")) | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,7 +16,7 @@ | |
|
|
||
|
|
||
| class Validator: | ||
| def __init__(self, schema_file: dict = {}): | ||
| def __init__(self, schema_file: dict = None): | ||
| self.schema_file = schema_file | ||
| self.schema_parser = SchemaParser() | ||
|
|
||
|
|
@@ -56,6 +56,7 @@ def _validate_expression( | |
|
|
||
| try: | ||
| expression_values = data_parser.extract_field_values(expression_fieldname) | ||
| print(f"Validating Expression ID {expression_fieldname} with values: {expression_values}") | ||
| except Exception as e: | ||
| message = f"Data get values Unexpected exception [{e.__class__.__name__}]: {e}" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd make the message "Extract Field Values: Unexpected exception ..." - it's better English and more consistent with the function naming
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for testing but would ultimatelty remove it eventually |
||
| error_record = ErrorReport(code=ExceptionLevels.PARSING_ERROR, message=message) | ||
|
|
@@ -72,10 +73,14 @@ def _validate_expression( | |
| add_error_record( | ||
| error_records, error_record, expression_error_group, expression_name, expression_id, error_level | ||
| ) | ||
| except Exception: | ||
| print(f"Exception validating expression {expression_id} on row {row}: {error_record}") | ||
| row += 1 | ||
| return row | ||
| except Exception as e: | ||
| message = f"Expression Validation Unexpected exception [{e.__class__.__name__}]: {e}" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd make the message "Validate Expression: Unexpected exception ..." (see above) |
||
| error_record = ErrorReport(code=ExceptionLevels.UNEXPECTED_EXCEPTION, message=message) | ||
| error_record = ErrorReport(code=ExceptionLevels.PARSING_ERROR, message=message) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We're overwriting the error_record written in the previous line. |
||
| add_error_record( | ||
| error_records, error_record, expression_error_group, expression_name, expression_id, error_level | ||
| ) | ||
| return | ||
|
|
||
| def validate_fhir( | ||
| self, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where are these schema files used?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these files would be used as schema files per environment, not populated yet as it is meant to be used when we have a fully functional schema that we can use per environment,