diff --git a/layer/nrlf/core/constants.py b/layer/nrlf/core/constants.py index 08f58e6e4..793c2458c 100644 --- a/layer/nrlf/core/constants.py +++ b/layer/nrlf/core/constants.py @@ -3,7 +3,8 @@ class Source(Enum): NRLF = "NRLF" - LEGACY = "NRL" + LEGACY = "NRL" # not actually used + PERFTEST = "NFT-SEED" VALID_SOURCES = frozenset(item.value for item in Source.__members__.values()) diff --git a/layer/nrlf/core/dynamodb/model.py b/layer/nrlf/core/dynamodb/model.py index 1cfca8fab..c66bce77f 100644 --- a/layer/nrlf/core/dynamodb/model.py +++ b/layer/nrlf/core/dynamodb/model.py @@ -91,7 +91,10 @@ def dict(self, **kwargs) -> dict[str, Any]: @classmethod def from_document_reference( - cls, resource: DocumentReference, created_on: Optional[str] = None + cls, + resource: DocumentReference, + created_on: Optional[str] = None, + source: str = "NRLF", ) -> "DocumentPointer": resource_id = getattr(resource, "id") @@ -144,7 +147,7 @@ def from_document_reference( type_id=type_id, category=pointer_category, category_id=category_id, - source="NRLF", + source=source, version=1, document=resource.model_dump_json(exclude_none=True), created_on=created_on or create_fhir_instant(), diff --git a/scripts/seed_nft_tables.py b/scripts/seed_nft_tables.py new file mode 100644 index 000000000..84c6e628f --- /dev/null +++ b/scripts/seed_nft_tables.py @@ -0,0 +1,243 @@ +from datetime import datetime, timedelta, timezone +from itertools import cycle +from math import gcd +from random import shuffle +from typing import Any + +import boto3 +import fire + +from nrlf.consumer.fhir.r4.model import DocumentReference +from nrlf.core.constants import ( + CATEGORY_ATTRIBUTES, + SNOMED_SYSTEM_URL, + TYPE_ATTRIBUTES, + TYPE_CATEGORIES, + Categories, + PointerTypes, +) +from nrlf.core.dynamodb.model import DocumentPointer +from nrlf.core.logger import logger +from nrlf.tests.data import load_document_reference + +dynamodb = boto3.client("dynamodb") +resource = boto3.resource("dynamodb") + +logger.setLevel("ERROR") + +DOC_REF_TEMPLATE = load_document_reference("NFT-template") + +CHECKSUM_WEIGHTS = [i for i in range(10, 1, -1)] + +# These are based on the Nov 7th 2025 pointer stats report +DEFAULT_TYPE_DISTRIBUTIONS = { + "736253002": 65, # mental health crisis plan + "1382601000000107": 5, # respect form + "887701000000100": 15, # emergency healthcare plan + "861421000000109": 5, # eol care coordination summary + "735324008": 5, # treatment escalation plan + "824321000000109": 5, # summary record +} + +DEFAULT_CUSTODIAN_DISTRIBUTIONS = { + "736253002": { + "TRPG": 9, + "TRHA": 1, + "TRRE": 20, + "TRAT": 10, + "TWR4": 4, + "TRKL": 9, + "TRW1": 5, + "TRH5": 1, + "TRP7": 13, + "TRWK": 8, + "TRQY": 3, + "TRV5": 3, + "TRJ8": 2, + "TRXA": 4, + "T11X": 1, + "TG6V": 2, + }, + "1382601000000107": {"T8GX8": 3, "TQUY": 2}, # respect form + "887701000000100": { + "TV1": 1, + "TV2": 2, + "TV3": 1, + "TV4": 1, + "TV5": 3, + "TV6": 1, + }, # emergency healthcare plan + "861421000000109": { + "TV1": 2, + "TV2": 2, + "TV3": 1, + "TV4": 1, + "TV5": 3, + "TV6": 1, + }, # eol care coordination summary + "735324008": { + "TV1": 1, + "TV2": 1, + "TV3": 1, + "TV4": 2, + "TV5": 2, + "TV6": 1, + }, # treatment escalation plan + "824321000000109": { + "TRXT": 1, + }, # summary record currently has only one supplier +} + +DEFAULT_COUNT_DISTRIBUTIONS = {"1": 91, "2": 8, "3": 1} + + +class TestNhsNumbersIterator: + def __iter__(self): + self.first9 = 900000000 + return self + + def __next__(self): + if self.first9 > 999999999: + raise StopIteration + checksum = 10 + while checksum == 10: + self.first9 += 1 + nhs_no_digits = list(map(int, str(self.first9))) + checksum = ( + sum( + weight * digit + for weight, digit in zip(CHECKSUM_WEIGHTS, nhs_no_digits) + ) + * -1 + % 11 + ) + nhs_no = str(self.first9) + str(checksum) + return nhs_no + + +def _make_seed_pointer( + type_code: str, custodian: str, nhs_number: str, counter: int +) -> DocumentPointer: + """ + Populates the example pointer template with test data to create a valid NRL 3.0 pointer + """ + doc_ref = DOC_REF_TEMPLATE + doc_ref.id = f"{custodian}-{str(counter).zfill(12)}" # deterministic to aid perftest script retrieval + doc_ref.subject.identifier.value = nhs_number + doc_ref.custodian.identifier.value = custodian + doc_ref.author[0].identifier.value = "X26NFT" + doc_ref.type.coding[0].code = type_code + doc_ref.type.coding[0].display = TYPE_ATTRIBUTES.get( + f"{SNOMED_SYSTEM_URL}|{type_code}" + ).get("display") + type_url = f"{SNOMED_SYSTEM_URL}|{type_code}" + category = TYPE_CATEGORIES.get(type_url) + doc_ref.category[0].coding[0].code = category.split("|")[-1] + doc_ref.category[0].coding[0].display = CATEGORY_ATTRIBUTES.get(category).get( + "display" + ) + nft_pointer = DocumentPointer.from_document_reference(doc_ref, source="NFT-SEED") + return nft_pointer + + +def _populate_seed_table( + table_name: str, + px_with_pointers: int, + pointers_per_px: float = 1.0, + type_dists: dict[str, int] = DEFAULT_TYPE_DISTRIBUTIONS, + custodian_dists: dict[str, int] = DEFAULT_CUSTODIAN_DISTRIBUTIONS, +): + """ + Seeds a table with example data for non-functional testing. + """ + if pointers_per_px < 1.0: + raise ValueError("Cannot populate table with patients with zero pointers") + # set up iterations + type_iter = _set_up_cyclical_iterator(type_dists) + custodian_iters = _set_up_custodian_iterators(custodian_dists) + count_iter = _set_up_cyclical_iterator(DEFAULT_COUNT_DISTRIBUTIONS) + testnum_cls = TestNhsNumbersIterator() + testnum_iter = iter(testnum_cls) + + px_counter = 0 + doc_ref_target = int(pointers_per_px * px_with_pointers) + print( + f"Will upsert {doc_ref_target} test pointers for {px_with_pointers} patients." + ) + doc_ref_counter = 0 + batch_counter = 0 + + start_time = datetime.now(tz=timezone.utc) + + batch_upsert_items = [] + while px_counter <= px_with_pointers: + pointers_for_px = int(next(count_iter)) + if batch_counter + pointers_for_px > 25 or px_counter == px_with_pointers: + resource.batch_write_item(RequestItems={table_name: batch_upsert_items}) + batch_upsert_items = [] + batch_counter = 0 + + new_px = next(testnum_iter) + for _ in range(pointers_for_px): + new_type = next(type_iter) + new_custodian = next(custodian_iters[new_type]) + doc_ref_counter += 1 + batch_counter += 1 + + pointer = _make_seed_pointer( + new_type, new_custodian, new_px, doc_ref_counter + ) + put_req = {"PutRequest": {"Item": pointer.model_dump()}} + batch_upsert_items.append(put_req) + px_counter += 1 + + end_time = datetime.now(tz=timezone.utc) + print( + f"Created {doc_ref_counter} pointers in {timedelta.total_seconds(end_time - start_time)} seconds." + ) + + +def _set_up_cyclical_iterator(dists: dict[str, int]) -> iter: + """ + Given a dict of values and their relative frequencies, + returns an iterator that will cycle through a the reduced and shuffled set of values. + This should result in more live-like data than e.g. creating a bulk amount of each pointer type/custodian in series. + It also means each batch will contain a representative sample of the distribution. + """ + d = gcd(*dists.values()) + value_list = [] + for entry in dists: + value_list.extend([entry] * (dists[entry] // d)) + shuffle(value_list) + return cycle(value_list) + + +def _set_up_custodian_iterators( + custodian_dists: dict[dict[str, int]] +) -> dict[str, iter]: + custodian_iters = {} + for pointer_type in custodian_dists: + custodian_iters[pointer_type] = _set_up_cyclical_iterator( + custodian_dists[pointer_type] + ) + return custodian_iters + + +def _set_up_count_iterator(pointers_per_px: float) -> iter: + """ + Given a target average number of pointers per patient, + generates a distribution of counts per individual patient. + """ + + extra_per_hundred = int( + (pointers_per_px - 1.0) * 100 + ) # no patients can have zero pointers + counts = {} + counts["3"] = extra_per_hundred // 10 + counts["2"] = extra_per_hundred - 2 * counts["3"] + counts["1"] = 100 - counts[2] - counts[3] + return _set_up_cyclical_iterator(counts) + + +if __name__ == "__main__": + fire.Fire(_populate_seed_table) diff --git a/tests/data/DocumentReference/NFT-template.json b/tests/data/DocumentReference/NFT-template.json new file mode 100644 index 000000000..8d70ef667 --- /dev/null +++ b/tests/data/DocumentReference/NFT-template.json @@ -0,0 +1,123 @@ +{ + "resourceType": "DocumentReference", + "id": "X26-00000000000000000000", + "status": "current", + "docStatus": "final", + "type": { + "coding": [ + { + "system": "http://snomed.info/sct", + "code": "736253002", + "display": "Mental health crisis plan" + } + ] + }, + "category": [ + { + "coding": [ + { + "system": "http://snomed.info/sct", + "code": "734163000", + "display": "Care plan" + } + ] + } + ], + "subject": { + "identifier": { + "system": "https://fhir.nhs.uk/Id/nhs-number", + "value": "9999999999" + } + }, + "author": [ + { + "identifier": { + "system": "https://fhir.nhs.uk/Id/ods-organization-code", + "value": "X26" + } + } + ], + "custodian": { + "identifier": { + "system": "https://fhir.nhs.uk/Id/ods-organization-code", + "value": "X26" + } + }, + "description": "This is a bulk generated test pointer created to seed a table for performance testing that otherwise resembles the structure of real NRL3 pointers", + "securityLabel": [ + { + "coding": [ + { + "system": "http://terminology.hl7.org/CodeSystem/v3-Confidentiality", + "code": "V", + "display": "very restricted" + } + ] + } + ], + "content": [ + { + "attachment": { + "contentType": "application/pdf", + "language": "en-UK", + "url": "ssp://nrl-example.thirdparty.nhs.uk/exampledocument/exampleid.pdf", + "title": "Example document for NFT testing", + "creation": "2025-11-25T10:45:41+11:00" + }, + "format": { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLFormatCode", + "code": "urn:nhs-ic:unstructured", + "display": "Unstructured Document" + }, + "extension": [ + { + "url": "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-ContentStability", + "valueCodeableConcept": { + "coding": [ + { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLContentStability", + "code": "static", + "display": "Static" + } + ] + } + }, + { + "url": "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-NRLRetrievalMechanism", + "valueCodeableConcept": { + "coding": [ + { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLRetrievalMechanism", + "code": "SSP", + "display": "Spine Secure Proxy" + } + ] + } + } + ] + } + ], + "context": { + "practiceSetting": { + "coding": [ + { + "system": "http://snomed.info/sct", + "code": "788002001", + "display": "Adult mental health service" + } + ] + }, + "related": [ + { + "identifier": { + "system": "https://fhir.nhs.uk/Id/nhsSpineASID", + "value": "012345678910" + } + } + ] + }, + "masterIdentifier": { + "system": "https://fhir.nhs.uk/Id/NFT-document-reference-master-id", + "value": "NFT-0000000000" + } +}