From a95f144698a79803ce1080588e68cdd5f641f886 Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Thu, 12 Dec 2024 16:00:11 +0000 Subject: [PATCH] [NRL-1228] Fix advance care plan naming. Add report script to find invalid pointers --- api/consumer/swagger.yaml | 6 +- api/producer/swagger.yaml | 6 +- layer/nrlf/core/constants.py | 8 +- reports/find_invalid_pointers.py | 81 +++++++++++++++++++ resources/fhir/NRLF-RecordType-ValueSet.json | 2 +- .../createDocumentReference-success.feature | 2 +- tests/performance/environment.py | 2 +- 7 files changed, 94 insertions(+), 13 deletions(-) create mode 100644 reports/find_invalid_pointers.py diff --git a/api/consumer/swagger.yaml b/api/consumer/swagger.yaml index 47c1a072c..632dca1bf 100644 --- a/api/consumer/swagger.yaml +++ b/api/consumer/swagger.yaml @@ -36,7 +36,7 @@ info: * [End of Life Care Coordination Summary](http://snomed.info/sct/861421000000109) * [Emergency health care plan](http://snomed.info/sct/887701000000100) * [Lloyd George record folder](http://snomed.info/sct/16521000000101) - * [Advanced care plan](http://snomed.info/sct/736366004) + * [Advance care plan](http://snomed.info/sct/736366004) * [Treatment escalation plan](http://snomed.info/sct/735324008) * [Summary record]("http://snomed.info/sct|824321000000109") * [Personalised Care and Support Plan]("http://snomed.info/sct|2181441000000107") @@ -1519,8 +1519,8 @@ components: SNOMED_CODES_LLOYD_GEORGE_RECORD_FOLDER: summary: Lloyd George record folder value: http://snomed.info/sct|16521000000101 - SNOMED_CODES_ADVANCED_CARE_PLAN: - summary: Advanced care plan + SNOMED_CODES_ADVANCE_CARE_PLAN: + summary: Advance care plan value: http://snomed.info/sct|736366004 SNOMED_CODES_TREATMENT_ESCALATION_PLAN: summary: Treatment escalation plan diff --git a/api/producer/swagger.yaml b/api/producer/swagger.yaml index bdd0127b8..642932212 100644 --- a/api/producer/swagger.yaml +++ b/api/producer/swagger.yaml @@ -36,7 +36,7 @@ info: * [End of Life Care Coordination Summary](http://snomed.info/sct/861421000000109) * [Emergency health care plan](http://snomed.info/sct/887701000000100) * [Lloyd George record folder](http://snomed.info/sct/16521000000101) - * [Advanced care plan](http://snomed.info/sct/736366004) + * [Advance care plan](http://snomed.info/sct/736366004) * [Treatment escalation plan](http://snomed.info/sct/735324008) * [Summary record]("http://snomed.info/sct|824321000000109") * [Personalised Care and Support Plan]("http://snomed.info/sct|2181441000000107") @@ -2054,8 +2054,8 @@ components: SNOMED_CODES_LLOYD_GEORGE_RECORD_FOLDER: summary: Lloyd George record folder value: http://snomed.info/sct|16521000000101 - SNOMED_CODES_ADVANCED_CARE_PLAN: - summary: Advanced care plan + SNOMED_CODES_ADVANCE_CARE_PLAN: + summary: Advance care plan value: http://snomed.info/sct|736366004 SNOMED_CODES_TREATMENT_ESCALATION_PLAN: summary: Treatment escalation plan diff --git a/layer/nrlf/core/constants.py b/layer/nrlf/core/constants.py index 67971859f..4280f7a6d 100644 --- a/layer/nrlf/core/constants.py +++ b/layer/nrlf/core/constants.py @@ -60,7 +60,7 @@ class PointerTypes(Enum): CONTINGENCY_PLAN = "http://snomed.info/sct|325691000000100" EOL_CARE_PLAN = "http://snomed.info/sct|736373009" LLOYD_GEORGE_FOLDER = "http://snomed.info/sct|16521000000101" - ADVANCED_CARE_PLAN = "http://snomed.info/sct|736366004" + ADVANCE_CARE_PLAN = "http://snomed.info/sct|736366004" TREATMENT_ESCALATION_PLAN = "http://snomed.info/sct|735324008" SUMMARY_RECORD = "http://snomed.info/sct|824321000000109" PERSONALISED_CARE_AND_SUPPORT_PLAN = "http://snomed.info/sct|2181441000000107" @@ -139,8 +139,8 @@ def coding_value(self): PointerTypes.LLOYD_GEORGE_FOLDER.value: { "display": "Lloyd George record folder", }, - PointerTypes.ADVANCED_CARE_PLAN.value: { - "display": "Advanced care plan", + PointerTypes.ADVANCE_CARE_PLAN.value: { + "display": "Advance care plan", }, PointerTypes.TREATMENT_ESCALATION_PLAN.value: { "display": "Treatment escalation plan", @@ -169,7 +169,7 @@ def coding_value(self): PointerTypes.CONTINGENCY_PLAN.value: Categories.CARE_PLAN.value, PointerTypes.EOL_CARE_PLAN.value: Categories.CARE_PLAN.value, PointerTypes.LLOYD_GEORGE_FOLDER.value: Categories.CARE_PLAN.value, - PointerTypes.ADVANCED_CARE_PLAN.value: Categories.CARE_PLAN.value, + PointerTypes.ADVANCE_CARE_PLAN.value: Categories.CARE_PLAN.value, PointerTypes.TREATMENT_ESCALATION_PLAN.value: Categories.CARE_PLAN.value, PointerTypes.PERSONALISED_CARE_AND_SUPPORT_PLAN.value: Categories.CARE_PLAN.value, # diff --git a/reports/find_invalid_pointers.py b/reports/find_invalid_pointers.py new file mode 100644 index 000000000..23bbb8e07 --- /dev/null +++ b/reports/find_invalid_pointers.py @@ -0,0 +1,81 @@ +from datetime import datetime, timedelta, timezone +from typing import Any + +import boto3 +import fire + +from nrlf.consumer.fhir.r4.model import DocumentReference +from nrlf.core.logger import logger +from nrlf.core.validators import DocumentReferenceValidator + +dynamodb = boto3.client("dynamodb") +paginator = dynamodb.get_paginator("scan") + +logger.setLevel("ERROR") + + +def _validate_document(document: str): + docref = DocumentReference.model_validate_json(document) + + validator = DocumentReferenceValidator() + result = validator.validate(data=docref) + + if not result.is_valid: + raise Exception("Failed to validate document: " + str(result.issues)) + + +def _find_invalid_pointers(table_name: str) -> dict[str, float]: + """ + Find pointers in the given table that are invalid. + Parameters: + - table_name: The name of the pointers table to use. + """ + + print(f"Finding invalid pointers in table {table_name}....") # noqa + + params: dict[str, Any] = { + "TableName": table_name, + "PaginationConfig": {"PageSize": 50}, + } + + invalid_pointers = [] + total_scanned_count = 0 + + start_time = datetime.now(tz=timezone.utc) + + for page in paginator.paginate(**params): + for item in page["Items"]: + id = item.get("id", {}).get("S") + document = item.get("document", {}).get("S", "") + try: + _validate_document(document) + except Exception as exc: + invalid_pointers.append((id, exc)) + + total_scanned_count += page["ScannedCount"] + + if total_scanned_count % 1000 == 0: + print(".", end="", flush=True) # noqa + + if total_scanned_count % 100000 == 0: + print( # noqa + f"scanned={total_scanned_count} invalid={len(invalid_pointers)}" + ) + + end_time = datetime.now(tz=timezone.utc) + + print("Writing invalid_pointers to file ./invalid_pointers.txt ...") # noqa + with open("invalid_pointers.txt", "w") as f: + for id, err in invalid_pointers: + f.write(f"{id}: {err}\n") + + print(" Done") # noqa + return { + "invalid_pointers": len(invalid_pointers), + "scanned_count": total_scanned_count, + "took-secs": timedelta.total_seconds(end_time - start_time), + } + + +if __name__ == "__main__": + fire.Fire(_find_invalid_pointers) diff --git a/resources/fhir/NRLF-RecordType-ValueSet.json b/resources/fhir/NRLF-RecordType-ValueSet.json index 4fda682f0..08652fe5f 100644 --- a/resources/fhir/NRLF-RecordType-ValueSet.json +++ b/resources/fhir/NRLF-RecordType-ValueSet.json @@ -56,7 +56,7 @@ }, { "code": "736366004", - "display": "Advanced care plan" + "display": "Advance care plan" }, { "code": "735324008", diff --git a/tests/features/producer/createDocumentReference-success.feature b/tests/features/producer/createDocumentReference-success.feature index 202595190..8842111fb 100644 --- a/tests/features/producer/createDocumentReference-success.feature +++ b/tests/features/producer/createDocumentReference-success.feature @@ -211,7 +211,7 @@ Feature: Producer - createDocumentReference - Success Scenarios | 325691000000100 | 734163000 | CONTINGENCY_PLAN | | 736373009 | 734163000 | EOL_CARE_PLAN | | 16521000000101 | 734163000 | LLOYD_GEORGE_FOLDER | - | 736366004 | 734163000 | ADVANCED_CARE_PLAN | + | 736366004 | 734163000 | ADVANCE_CARE_PLAN | | 735324008 | 734163000 | TREATMENT_ESCALATION_PLAN | | 2181441000000107 | 734163000 | PERSONALISED_CARE_AND_SUPPORT_PLAN | diff --git a/tests/performance/environment.py b/tests/performance/environment.py index 5baa3f7f0..fc7861bd7 100644 --- a/tests/performance/environment.py +++ b/tests/performance/environment.py @@ -29,7 +29,7 @@ class LogReference: "736373009": "End of life care plan", "861421000000109": "End of life care coordination summary", "887701000000100": "Emergency Health Care Plans", - "736366004": "Advanced Care Plan", + "736366004": "Advance Care Plan", "735324008": "Treatment Escalation Plan", "824321000000109": "Summary Record", "2181441000000107": "Personalised Care and Support Plan",