From 2ec9887d2c5bcbe06dbbe88f98a652f91f8fd082 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Mon, 17 Nov 2025 16:28:58 +0000 Subject: [PATCH 01/12] NRL-1798 Add delete pointers script --- scripts/delete_pointers_by_id.py | 317 +++++++++++++++++ scripts/tests/test_delete_pointers_by_id.py | 374 ++++++++++++++++++++ 2 files changed, 691 insertions(+) create mode 100755 scripts/delete_pointers_by_id.py create mode 100644 scripts/tests/test_delete_pointers_by_id.py diff --git a/scripts/delete_pointers_by_id.py b/scripts/delete_pointers_by_id.py new file mode 100755 index 000000000..80e0406b3 --- /dev/null +++ b/scripts/delete_pointers_by_id.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python +import json +from datetime import datetime, timedelta, timezone +from typing import Any, Dict, List + +import boto3 +import fire + +dynamodb = boto3.client("dynamodb") + + +def _load_pointers_from_file(pointers_file: str) -> list[str]: + """ + Read pointers from a file. Supports: + - JSON array of objects with an "id" field + - line-delimited plain text (one id per line) + + Returns a list of pointer id strings. Prints a warning for skipped malformed JSON entries. + """ + with open(pointers_file, "r") as fh: + content = fh.read().strip() + + if not content: + return [] + + # JSON path + if content.startswith("[") or content.startswith("{"): + try: + data = json.loads(content) + except json.JSONDecodeError as e: + raise ValueError(f"Failed to parse JSON file {pointers_file}: {e}") from e + + if not isinstance(data, list): + raise ValueError("JSON file must contain an array of objects") + + parsed_ids: list[str] = [] + skipped_count = 0 + for item in data: + if ( + isinstance(item, dict) + and "id" in item + and isinstance(item["id"], str) + and item["id"].strip() + ): + parsed_ids.append(item["id"].strip()) + else: + skipped_count += 1 + + if skipped_count: + print( + f"Warning: skipped {skipped_count} malformed entries in JSON file {pointers_file}" + ) + + return parsed_ids + + # Plain text fallback + return [line.strip() for line in content.splitlines() if line.strip()] + + +def _check_pointers_match_ods_code( + ods_code: str, pointer_ids: List[str] +) -> tuple[List[str], List[str]]: + """ + Validate that pointer IDs are in line with the provided ODS code. + Returns (matched_ids, mismatched_ids) + """ + matched = [] + mismatched = [] + + for pointer_id in pointer_ids: + if pointer_id.startswith(f"{ods_code}-"): + matched.append(pointer_id) + else: + mismatched.append(pointer_id) + + return matched, mismatched + + +def _batch_get_existing_pointers( + table_name: str, pointer_ids: List[str] +) -> tuple[List[str], List[str]]: + """ + Check which pointers exist using BatchGetItem (max 100 items per request). + Returns (existing_ids, not_found_ids) + """ + existing = [] + not_found = [] + + for batch_idx in range(0, len(pointer_ids), 100): + batch_ids = pointer_ids[batch_idx : batch_idx + 100] + + keys = [ + { + "pk": {"S": f"D#{pointer_id}"}, + "sk": {"S": f"D#{pointer_id}"}, + } + for pointer_id in batch_ids + ] + + response = dynamodb.batch_get_item(RequestItems={table_name: {"Keys": keys}}) + + found_ids = { + item["pk"]["S"][2:] + for item in response.get("Responses", {}).get(table_name, []) + } + + for pointer_id in batch_ids: + if pointer_id in found_ids: + existing.append(pointer_id) + else: + not_found.append(pointer_id) + + return existing, not_found + + +def _batch_delete_pointers( + table_name: str, pointer_ids: List[str] +) -> tuple[List[str], List[str]]: + """ + Delete pointers using BatchWriteItem (max 25 items per request). + Returns (deleted_ids, failed_ids) + """ + pointers_deleted = [] + failed_deletes_set: set[str] = set() + + for _batch_id in range(0, len(pointer_ids), 25): + batch_ptrs = pointer_ids[_batch_id : _batch_id + 25] + + batch = [ + { + "DeleteRequest": { + "Key": { + "pk": {"S": f"D#{pointer_id}"}, + "sk": {"S": f"D#{pointer_id}"}, + } + } + } + for pointer_id in batch_ptrs + ] + + result = dynamodb.batch_write_item(RequestItems={table_name: batch}) + unprocessed = result.get("UnprocessedItems", {}).get(table_name, []) + + # Collect unprocessed IDs + for item in unprocessed: + pk_val = item["DeleteRequest"]["Key"]["pk"]["S"] + failed_deletes_set.add(pk_val[2:]) # Remove "D#" + + # Only count successfully deleted items (batch size minus unprocessed) + successfully_deleted = [p for p in batch_ptrs if p not in failed_deletes_set] + pointers_deleted.extend(successfully_deleted) + + if len(pointers_deleted) % 1000 == 0 and len(pointers_deleted) > 0: + print(".", end="", flush=True) + + return pointers_deleted, sorted(failed_deletes_set) + + +def _delete_pointers_by_id( + table_name: str, + ods_code: str, + pointers_to_delete: List[str] | None = None, + pointers_file: str | None = None, +) -> Dict[str, Any]: + """ + Delete pointers from DynamoDB table. + + Can accept pointers as: + - list of strings: pointers_to_delete=["id1", "id2"] + - JSON file: pointers_file=/path/to/pointers.json (array of objects with "id" field) + - text file: pointers_file=/path/to/ids.txt (one id per line) + + Parameters: + - table_name: DynamoDB table name + - ods_code: ODS code of the organisation that the pointers belong to + - pointers_to_delete: list of pointer ids to delete + - pointers_file: path to JSON file (array of objects with "id" field) or text file (one id per line) + """ + if pointers_to_delete is None and pointers_file is None: + raise ValueError("Provide either pointers_to_delete or pointers_file") + + if pointers_to_delete is not None and pointers_file is not None: + raise ValueError("Provide either pointers_to_delete or pointers_file, not both") + + # Load pointers from file if provided + if pointers_file: + pointers_to_delete = _load_pointers_from_file(pointers_file) + + if len(pointers_to_delete) == 0: + return { + "pointers_to_delete": 0, + "ods_code": ods_code, + "ods_code_matched": [], + "ods_code_mismatched": [], + "pointer_not_found": [], + "deleted_pointers": [], + "failed_deletes": [], + "deletes-took-secs": 0, + } + + start_time = datetime.now(tz=timezone.utc) + + print( + f"Validating {len(pointers_to_delete)} pointers against ODS code {ods_code}..." + ) + matched_pointers, mismatched_pointers = _check_pointers_match_ods_code( + ods_code, pointers_to_delete + ) + + print( + f"Validate pointer's ODS code: {len(matched_pointers)} matched, {len(mismatched_pointers)} mismatched" + ) + + if len(matched_pointers) == 0: + print(f"None of the pointer IDs are a match for ODS code {ods_code}. Exiting.") + end_time = datetime.now(tz=timezone.utc) + return { + "pointers_to_delete": len(pointers_to_delete), + "ods_code": ods_code, + "ods_code_matched": { + "count": len(matched_pointers), + "ids": matched_pointers, + }, + "ods_code_mismatched": { + "count": len(mismatched_pointers), + "ids": mismatched_pointers, + }, + "pointer_not_found": { + "count": 0, + "ids": [], + }, + "deleted_pointers": { + "count": 0, + "ids": [], + }, + "failed_deletes": { + "count": 0, + "ids": [], + }, + "deletes-took-secs": timedelta.total_seconds(end_time - start_time), + } + + print(f"Checking existence of {len(matched_pointers)} pointers in {table_name}...") + existing_pointers, not_found_pointers = _batch_get_existing_pointers( + table_name, matched_pointers + ) + + print( + f"Found {len(existing_pointers)} existing pointers to delete, {len(not_found_pointers)} not found." + ) + + if len(existing_pointers) == 0: + print("No pointers found to delete. Exiting.") + end_time = datetime.now(tz=timezone.utc) + return { + "pointers_to_delete": len(pointers_to_delete), + "ods_code": ods_code, + "ods_code_matched": { + "count": len(matched_pointers), + "ids": matched_pointers, + }, + "ods_code_mismatched": { + "count": len(mismatched_pointers), + "ids": mismatched_pointers, + }, + "pointer_not_found": { + "count": len(not_found_pointers), + "ids": not_found_pointers, + }, + "deleted_pointers": { + "count": 0, + "ids": [], + }, + "failed_deletes": { + "count": 0, + "ids": [], + }, + "deletes-took-secs": timedelta.total_seconds(end_time - start_time), + } + + # Proceed with deletion using BatchWriteItem + pointers_deleted, failed_deletes = _batch_delete_pointers( + table_name, existing_pointers + ) + + end_time = datetime.now(tz=timezone.utc) + + print(" Done") + return { + "pointers_to_delete": len(pointers_to_delete), + "ods_code": ods_code, + "ods_code_matched": { + "count": len(matched_pointers), + "ids": matched_pointers, + }, + "ods_code_mismatched": { + "count": len(mismatched_pointers), + "ids": mismatched_pointers, + }, + "pointer_not_found": { + "count": len(not_found_pointers), + "ids": not_found_pointers, + }, + "deleted_pointers": { + "count": len(pointers_deleted), + "ids": pointers_deleted, + }, + "failed_deletes": { + "count": len(failed_deletes), + "ids": failed_deletes, + }, + "deletes-took-secs": timedelta.total_seconds(end_time - start_time), + } + + +if __name__ == "__main__": + fire.Fire(_delete_pointers_by_id) diff --git a/scripts/tests/test_delete_pointers_by_id.py b/scripts/tests/test_delete_pointers_by_id.py new file mode 100644 index 000000000..3c220b94e --- /dev/null +++ b/scripts/tests/test_delete_pointers_by_id.py @@ -0,0 +1,374 @@ +from unittest.mock import patch + +import pytest +from delete_pointers_by_id import ( + _batch_delete_pointers, + _batch_get_existing_pointers, + _check_pointers_match_ods_code, + _delete_pointers_by_id, + _load_pointers_from_file, +) + + +class TestLoadPointersFromFile: + """Tests for file parsing.""" + + @patch("builtins.open", create=True) + def test_load_json_array(self, mock_open): + mock_open.return_value.__enter__.return_value.read.return_value = ( + '[{"id": "G3H9E-id1"}, {"id": "G3H9E-id2"}]' + ) + + result = _load_pointers_from_file("./pointers.json") + + assert len(result) == 2 + assert "G3H9E-id1" in result + assert "G3H9E-id2" in result + + @patch("builtins.open", create=True) + def test_load_json_with_malformed_entries(self, mock_open): + mock_open.return_value.__enter__.return_value.read.return_value = '[{"id": "G3H9E-id1"}, {"patient_number": "123"}, {"id": ""}, {"id": "G3H9E-id2"}]' + + result = _load_pointers_from_file("./pointers.json") + + assert len(result) == 2 + assert "G3H9E-id1" in result + assert "G3H9E-id2" in result + + @patch("builtins.open", create=True) + def test_load_plain_text_file(self, mock_open): + mock_open.return_value.__enter__.return_value.read.return_value = ( + "G3H9E-id1\nG3H9E-id2\nG3H9E-id3" + ) + + result = _load_pointers_from_file("./ids.txt") + + assert len(result) == 3 + assert "G3H9E-id1" in result + assert "G3H9E-id3" in result + + @patch("builtins.open", create=True) + def test_load_plain_text_with_empty_lines(self, mock_open): + mock_open.return_value.__enter__.return_value.read.return_value = ( + "G3H9E-id1\n\nG3H9E-id2\n \nG3H9E-id3" + ) + + result = _load_pointers_from_file("./ids.txt") + + assert len(result) == 3 + assert "G3H9E-id1" in result + + @patch("builtins.open", create=True) + def test_load_empty_file(self, mock_open): + mock_open.return_value.__enter__.return_value.read.return_value = "" + + result = _load_pointers_from_file("./empty.txt") + + assert len(result) == 0 + + @patch("builtins.open", create=True) + def test_invalid_json(self, mock_open): + mock_open.return_value.__enter__.return_value.read.return_value = ( + '[{"id": "G3H9E-id1"' + ) + + with pytest.raises(ValueError, match="Failed to parse JSON file"): + _load_pointers_from_file("./bad.json") + + @patch("builtins.open", create=True) + def test_json_not_array(self, mock_open): + mock_open.return_value.__enter__.return_value.read.return_value = ( + '{"id": "G3H9E-id1"}' + ) + + with pytest.raises( + ValueError, match="JSON file must contain an array of objects" + ): + _load_pointers_from_file("./not_array.json") + + +class TestCheckPointersMatchOdsCode: + """Tests for ODS code validation.""" + + def test_all_pointers_match_ods_code(self): + ods_code = "G3H9E" + pointer_ids = [ + "G3H9E-1794b612-c617-4e0b-8671-ac7baacc4610", + "G3H9E-5d8f1038-19f8-11f0-9d79-2dc2b9c1e380", + ] + matched, mismatched = _check_pointers_match_ods_code(ods_code, pointer_ids) + + assert len(matched) == 2 + assert len(mismatched) == 0 + assert matched == pointer_ids + + def test_no_pointers_match_ods_code(self): + ods_code = "G3H9E" + pointer_ids = [ + "RAT-1794b612-c617-4e0b-8671-ac7baacc4610", + "RKL-5d8f1038-19f8-11f0-9d79-2dc2b9c1e380", + ] + matched, mismatched = _check_pointers_match_ods_code(ods_code, pointer_ids) + + assert len(matched) == 0 + assert len(mismatched) == 2 + assert mismatched == pointer_ids + + def test_mixed_matching_and_mismatching_pointers(self): + ods_code = "G3H9E" + pointer_ids = [ + "G3H9E-1794b612-c617-4e0b-8671-ac7baacc4610", + "RAT-5d8f1038-19f8-11f0-9d79-2dc2b9c1e380", + "G3H9E-abc123", + ] + matched, mismatched = _check_pointers_match_ods_code(ods_code, pointer_ids) + + assert len(matched) == 2 + assert len(mismatched) == 1 + assert "G3H9E-1794b612-c617-4e0b-8671-ac7baacc4610" in matched + assert "G3H9E-abc123" in matched + assert "RAT-5d8f1038-19f8-11f0-9d79-2dc2b9c1e380" in mismatched + + def test_empty_pointer_list(self): + ods_code = "G3H9E" + matched, mismatched = _check_pointers_match_ods_code(ods_code, []) + + assert len(matched) == 0 + assert len(mismatched) == 0 + + +class TestBatchGetExistingPointers: + """Tests for batch existence checking.""" + + @patch("delete_pointers_by_id.dynamodb") + def test_all_pointers_exist(self, mock_dynamodb): + table_name = "test-table" + pointer_ids = ["G3H9E-id1", "G3H9E-id2"] + + mock_dynamodb.batch_get_item.return_value = { + "Responses": { + table_name: [ + {"pk": {"S": "D#G3H9E-id1"}}, + {"pk": {"S": "D#G3H9E-id2"}}, + ] + } + } + + existing, not_found = _batch_get_existing_pointers(table_name, pointer_ids) + + assert len(existing) == 2 + assert len(not_found) == 0 + assert "G3H9E-id1" in existing + assert "G3H9E-id2" in existing + + @patch("delete_pointers_by_id.dynamodb") + def test_no_pointers_exist(self, mock_dynamodb): + table_name = "test-table" + pointer_ids = ["G3H9E-id1", "G3H9E-id2"] + + mock_dynamodb.batch_get_item.return_value = {"Responses": {table_name: []}} + + existing, not_found = _batch_get_existing_pointers(table_name, pointer_ids) + + assert len(existing) == 0 + assert len(not_found) == 2 + assert "G3H9E-id1" in not_found + assert "G3H9E-id2" in not_found + + @patch("delete_pointers_by_id.dynamodb") + def test_mixed_existing_and_not_found(self, mock_dynamodb): + table_name = "test-table" + pointer_ids = ["G3H9E-id1", "G3H9E-id2", "G3H9E-id3"] + + mock_dynamodb.batch_get_item.return_value = { + "Responses": { + table_name: [ + {"pk": {"S": "D#G3H9E-id1"}}, + {"pk": {"S": "D#G3H9E-id3"}}, + ] + } + } + + existing, not_found = _batch_get_existing_pointers(table_name, pointer_ids) + + assert len(existing) == 2 + assert len(not_found) == 1 + assert "G3H9E-id1" in existing + assert "G3H9E-id3" in existing + assert "G3H9E-id2" in not_found + + @patch("delete_pointers_by_id.dynamodb") + def test_batch_processing_large_list(self, mock_dynamodb): + table_name = "test-table" + pointer_ids = [f"G3H9E-id{i}" for i in range(150)] + + mock_dynamodb.batch_get_item.return_value = { + "Responses": { + table_name: [{"pk": {"S": f"D#G3H9E-id{i}"}} for i in range(150)] + } + } + + existing, not_found = _batch_get_existing_pointers(table_name, pointer_ids) + + assert mock_dynamodb.batch_get_item.call_count == 2 + assert len(existing) == 150 + assert len(not_found) == 0 + + +class TestBatchDeletePointers: + """Tests for batch deletion.""" + + @patch("delete_pointers_by_id.dynamodb") + def test_all_pointers_deleted_successfully(self, mock_dynamodb): + table_name = "test-table" + pointer_ids = ["G3H9E-id1", "G3H9E-id2"] + + mock_dynamodb.batch_write_item.return_value = {"UnprocessedItems": {}} + + deleted, failed = _batch_delete_pointers(table_name, pointer_ids) + + assert len(deleted) == 2 + assert len(failed) == 0 + assert "G3H9E-id1" in deleted + assert "G3H9E-id2" in deleted + + @patch("delete_pointers_by_id.dynamodb") + def test_some_pointers_unprocessed(self, mock_dynamodb): + table_name = "test-table" + pointer_ids = ["G3H9E-id1", "G3H9E-id2", "G3H9E-id3"] + + mock_dynamodb.batch_write_item.return_value = { + "UnprocessedItems": { + table_name: [ + { + "DeleteRequest": { + "Key": { + "pk": {"S": "D#G3H9E-id3"}, + "sk": {"S": "D#G3H9E-id3"}, + } + } + } + ] + } + } + + deleted, failed = _batch_delete_pointers(table_name, pointer_ids) + + assert len(deleted) == 2 + assert len(failed) == 1 + assert "G3H9E-id1" in deleted + assert "G3H9E-id2" in deleted + assert "G3H9E-id3" in failed + + @patch("delete_pointers_by_id.dynamodb") + def test_batch_processing_large_list(self, mock_dynamodb): + table_name = "test-table" + pointer_ids = [f"G3H9E-id{i}" for i in range(60)] + + mock_dynamodb.batch_write_item.return_value = {"UnprocessedItems": {}} + + deleted, failed = _batch_delete_pointers(table_name, pointer_ids) + + assert mock_dynamodb.batch_write_item.call_count == 3 + assert len(deleted) == 60 + assert len(failed) == 0 + + +class TestDeletePointersById: + """Tests for main delete function.""" + + def test_missing_required_parameters(self): + with pytest.raises( + ValueError, match="Provide either pointers_to_delete or pointers_file" + ): + _delete_pointers_by_id("test-table", "G3H9E") + + def test_both_parameters_provided(self): + with pytest.raises( + ValueError, + match="Provide either pointers_to_delete or pointers_file, not both", + ): + _delete_pointers_by_id( + "test-table", + "G3H9E", + pointers_to_delete=["G3H9E-id1"], + pointers_file="./ids.json", + ) + + @patch("delete_pointers_by_id._check_pointers_match_ods_code") + @patch("delete_pointers_by_id._batch_get_existing_pointers") + @patch("delete_pointers_by_id._batch_delete_pointers") + def test_no_matched_ods_codes(self, mock_batch_delete, mock_batch_get, mock_check): + mock_check.return_value = ([], ["RAT-id1", "RAT-id2"]) + mock_batch_get.return_value = ([], []) + mock_batch_delete.return_value = ([], []) + + result = _delete_pointers_by_id( + "test-table", "G3H9E", pointers_to_delete=["RAT-id1", "RAT-id2"] + ) + + assert result["ods_code_matched"]["count"] == 0 + assert result["ods_code_mismatched"]["count"] == 2 + assert result["deleted_pointers"]["count"] == 0 + + @patch("delete_pointers_by_id._check_pointers_match_ods_code") + @patch("delete_pointers_by_id._batch_get_existing_pointers") + @patch("delete_pointers_by_id._batch_delete_pointers") + def test_successful_deletion_flow( + self, mock_batch_delete, mock_batch_get, mock_check + ): + pointer_ids = ["G3H9E-id1", "G3H9E-id2", "G3H9E-id3"] + mock_check.return_value = (pointer_ids, []) + mock_batch_get.return_value = (pointer_ids, []) + mock_batch_delete.return_value = (pointer_ids, []) + + result = _delete_pointers_by_id( + "test-table", "G3H9E", pointers_to_delete=pointer_ids + ) + + assert result["pointers_to_delete"] == 3 + assert result["ods_code"] == "G3H9E" + assert result["ods_code_matched"]["count"] == 3 + assert result["ods_code_mismatched"]["count"] == 0 + assert result["pointer_not_found"]["count"] == 0 + assert result["deleted_pointers"]["count"] == 3 + assert result["failed_deletes"]["count"] == 0 + + @patch("delete_pointers_by_id._check_pointers_match_ods_code") + @patch("delete_pointers_by_id._batch_get_existing_pointers") + @patch("delete_pointers_by_id._batch_delete_pointers") + def test_partial_deletion_with_failures( + self, mock_batch_delete, mock_batch_get, mock_check + ): + matched = ["G3H9E-id1", "G3H9E-id2", "G3H9E-id3"] + mock_check.return_value = (matched, ["RAT-id1"]) + mock_batch_get.return_value = (matched, []) + mock_batch_delete.return_value = (["G3H9E-id1", "G3H9E-id2"], ["G3H9E-id3"]) + + result = _delete_pointers_by_id( + "test-table", "G3H9E", pointers_to_delete=matched + ["RAT-id1"] + ) + + assert result["ods_code_mismatched"]["count"] == 1 + assert result["deleted_pointers"]["count"] == 2 + assert result["failed_deletes"]["count"] == 1 + assert "G3H9E-id3" in result["failed_deletes"]["ids"] + + @patch("delete_pointers_by_id._check_pointers_match_ods_code") + @patch("delete_pointers_by_id._batch_get_existing_pointers") + @patch("delete_pointers_by_id._batch_delete_pointers") + def test_some_pointers_not_found( + self, mock_batch_delete, mock_batch_get, mock_check + ): + matched = ["G3H9E-id1", "G3H9E-id2", "G3H9E-id3"] + mock_check.return_value = (matched, []) + mock_batch_get.return_value = (["G3H9E-id1", "G3H9E-id2"], ["G3H9E-id3"]) + mock_batch_delete.return_value = (["G3H9E-id1", "G3H9E-id2"], []) + + result = _delete_pointers_by_id( + "test-table", "G3H9E", pointers_to_delete=matched + ) + + assert result["pointer_not_found"]["count"] == 1 + assert "G3H9E-id3" in result["pointer_not_found"]["ids"] + assert result["deleted_pointers"]["count"] == 2 From ffd911954a5b76c72c49ac34e55f9447edb1eaef Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Wed, 19 Nov 2025 16:13:13 +0000 Subject: [PATCH 02/12] NRL-1798 Add output file --- scripts/delete_pointers_by_id.py | 143 ++++++++++++++++++++++--------- 1 file changed, 101 insertions(+), 42 deletions(-) diff --git a/scripts/delete_pointers_by_id.py b/scripts/delete_pointers_by_id.py index 80e0406b3..cc733639b 100755 --- a/scripts/delete_pointers_by_id.py +++ b/scripts/delete_pointers_by_id.py @@ -1,5 +1,7 @@ #!/usr/bin/env python import json +import os +import tempfile from datetime import datetime, timedelta, timezone from typing import Any, Dict, List @@ -156,11 +158,53 @@ def _batch_delete_pointers( return pointers_deleted, sorted(failed_deletes_set) +def _write_result_file(result: Dict[str, Any], output_file: str) -> None: + """ + Atomically write result dict to output_file as JSON. + Raises on failure. + """ + out_dir = os.path.dirname(os.path.abspath(output_file)) or "." + with tempfile.NamedTemporaryFile( + "w", delete=False, dir=out_dir, prefix=".tmp_delete_results_", suffix=".json" + ) as tf: + json.dump(result, tf, indent=2) + tf.flush() + os.fsync(tf.fileno()) + os.replace(tf.name, output_file) + + +def _print_summary(result: Dict[str, Any]) -> None: + """ + Print a concise summary of the result to stdout. + """ + + def count_from(field): + val = result.get(field) + if isinstance(val, dict): + return val.get("count", 0) + if isinstance(val, list): + return len(val) + return 0 + + print("Summary:") + print(f" pointers_to_delete: {result.get('pointers_to_delete')}") + print(f" ods_code_matched: {count_from('ods_code_matched')}") + print(f" ods_code_mismatched:{count_from('ods_code_mismatched')}") + print(f" pointer_not_found: {count_from('pointer_not_found')}") + print(f" deleted_pointers: {count_from('deleted_pointers')}") + print(f" failed_deletes: {count_from('failed_deletes')}") + if "_output_error" in result: + print(f" output_error: {result['_output_error']}") + if "deletes-took-secs" in result: + print(f" deletes-took-secs: {result.get('deletes-took-secs')}") + + def _delete_pointers_by_id( table_name: str, ods_code: str, pointers_to_delete: List[str] | None = None, pointers_file: str | None = None, + output_file: str | None = None, ) -> Dict[str, Any]: """ Delete pointers from DynamoDB table. @@ -175,6 +219,7 @@ def _delete_pointers_by_id( - ods_code: ODS code of the organisation that the pointers belong to - pointers_to_delete: list of pointer ids to delete - pointers_file: path to JSON file (array of objects with "id" field) or text file (one id per line) + - output_file: optional path to write the result JSON to (atomic replace) """ if pointers_to_delete is None and pointers_file is None: raise ValueError("Provide either pointers_to_delete or pointers_file") @@ -187,16 +232,25 @@ def _delete_pointers_by_id( pointers_to_delete = _load_pointers_from_file(pointers_file) if len(pointers_to_delete) == 0: - return { + result = { "pointers_to_delete": 0, "ods_code": ods_code, - "ods_code_matched": [], - "ods_code_mismatched": [], - "pointer_not_found": [], - "deleted_pointers": [], - "failed_deletes": [], + "ods_code_matched": {"count": 0, "ids": []}, + "ods_code_mismatched": {"count": 0, "ids": []}, + "pointer_not_found": {"count": 0, "ids": []}, + "deleted_pointers": {"count": 0, "ids": []}, + "failed_deletes": {"count": 0, "ids": []}, "deletes-took-secs": 0, } + if output_file: + try: + _write_result_file(result, output_file) + except Exception as exc: + result["_output_error"] = ( + f"Failed to write output_file {output_file}: {exc}" + ) + _print_summary(result) + return result start_time = datetime.now(tz=timezone.utc) @@ -214,7 +268,7 @@ def _delete_pointers_by_id( if len(matched_pointers) == 0: print(f"None of the pointer IDs are a match for ODS code {ods_code}. Exiting.") end_time = datetime.now(tz=timezone.utc) - return { + result = { "pointers_to_delete": len(pointers_to_delete), "ods_code": ods_code, "ods_code_matched": { @@ -225,20 +279,20 @@ def _delete_pointers_by_id( "count": len(mismatched_pointers), "ids": mismatched_pointers, }, - "pointer_not_found": { - "count": 0, - "ids": [], - }, - "deleted_pointers": { - "count": 0, - "ids": [], - }, - "failed_deletes": { - "count": 0, - "ids": [], - }, + "pointer_not_found": {"count": 0, "ids": []}, + "deleted_pointers": {"count": 0, "ids": []}, + "failed_deletes": {"count": 0, "ids": []}, "deletes-took-secs": timedelta.total_seconds(end_time - start_time), } + if output_file: + try: + _write_result_file(result, output_file) + except Exception as exc: + result["_output_error"] = ( + f"Failed to write output_file {output_file}: {exc}" + ) + _print_summary(result) + return result print(f"Checking existence of {len(matched_pointers)} pointers in {table_name}...") existing_pointers, not_found_pointers = _batch_get_existing_pointers( @@ -252,7 +306,7 @@ def _delete_pointers_by_id( if len(existing_pointers) == 0: print("No pointers found to delete. Exiting.") end_time = datetime.now(tz=timezone.utc) - return { + result = { "pointers_to_delete": len(pointers_to_delete), "ods_code": ods_code, "ods_code_matched": { @@ -267,16 +321,19 @@ def _delete_pointers_by_id( "count": len(not_found_pointers), "ids": not_found_pointers, }, - "deleted_pointers": { - "count": 0, - "ids": [], - }, - "failed_deletes": { - "count": 0, - "ids": [], - }, + "deleted_pointers": {"count": 0, "ids": []}, + "failed_deletes": {"count": 0, "ids": []}, "deletes-took-secs": timedelta.total_seconds(end_time - start_time), } + if output_file: + try: + _write_result_file(result, output_file) + except Exception as exc: + result["_output_error"] = ( + f"Failed to write output_file {output_file}: {exc}" + ) + _print_summary(result) + return result # Proceed with deletion using BatchWriteItem pointers_deleted, failed_deletes = _batch_delete_pointers( @@ -285,14 +342,10 @@ def _delete_pointers_by_id( end_time = datetime.now(tz=timezone.utc) - print(" Done") - return { + result = { "pointers_to_delete": len(pointers_to_delete), "ods_code": ods_code, - "ods_code_matched": { - "count": len(matched_pointers), - "ids": matched_pointers, - }, + "ods_code_matched": {"count": len(matched_pointers), "ids": matched_pointers}, "ods_code_mismatched": { "count": len(mismatched_pointers), "ids": mismatched_pointers, @@ -301,17 +354,23 @@ def _delete_pointers_by_id( "count": len(not_found_pointers), "ids": not_found_pointers, }, - "deleted_pointers": { - "count": len(pointers_deleted), - "ids": pointers_deleted, - }, - "failed_deletes": { - "count": len(failed_deletes), - "ids": failed_deletes, - }, + "deleted_pointers": {"count": len(pointers_deleted), "ids": pointers_deleted}, + "failed_deletes": {"count": len(failed_deletes), "ids": failed_deletes}, "deletes-took-secs": timedelta.total_seconds(end_time - start_time), } + if output_file: + try: + _write_result_file(result, output_file) + except Exception as exc: + result["_output_error"] = ( + f"Failed to write output_file {output_file}: {exc}" + ) + + _print_summary(result) + print(" Done") + return result + if __name__ == "__main__": fire.Fire(_delete_pointers_by_id) From 99c8f2926e53c84736b4d8dd22655ebcd7e0a26c Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Thu, 20 Nov 2025 11:06:26 +0000 Subject: [PATCH 03/12] NRL-1798 Remove output file param --- scripts/delete_pointers_by_id.py | 94 +++--- scripts/tests/test_delete_pointers_by_id.py | 344 +++++--------------- 2 files changed, 135 insertions(+), 303 deletions(-) diff --git a/scripts/delete_pointers_by_id.py b/scripts/delete_pointers_by_id.py index cc733639b..4216ab534 100755 --- a/scripts/delete_pointers_by_id.py +++ b/scripts/delete_pointers_by_id.py @@ -59,6 +59,21 @@ def _load_pointers_from_file(pointers_file: str) -> list[str]: return [line.strip() for line in content.splitlines() if line.strip()] +def _write_result_file(result: Dict[str, Any], output_file: str) -> None: + """ + Atomically write result dict to output_file as JSON. + Raises on failure. + """ + out_dir = os.path.dirname(os.path.abspath(output_file)) or "." + with tempfile.NamedTemporaryFile( + "w", delete=False, dir=out_dir, prefix=".tmp_delete_results_", suffix=".json" + ) as tf: + json.dump(result, tf, indent=2) + tf.flush() + os.fsync(tf.fileno()) + os.replace(tf.name, output_file) + + def _check_pointers_match_ods_code( ods_code: str, pointer_ids: List[str] ) -> tuple[List[str], List[str]]: @@ -158,21 +173,6 @@ def _batch_delete_pointers( return pointers_deleted, sorted(failed_deletes_set) -def _write_result_file(result: Dict[str, Any], output_file: str) -> None: - """ - Atomically write result dict to output_file as JSON. - Raises on failure. - """ - out_dir = os.path.dirname(os.path.abspath(output_file)) or "." - with tempfile.NamedTemporaryFile( - "w", delete=False, dir=out_dir, prefix=".tmp_delete_results_", suffix=".json" - ) as tf: - json.dump(result, tf, indent=2) - tf.flush() - os.fsync(tf.fileno()) - os.replace(tf.name, output_file) - - def _print_summary(result: Dict[str, Any]) -> None: """ Print a concise summary of the result to stdout. @@ -204,7 +204,6 @@ def _delete_pointers_by_id( ods_code: str, pointers_to_delete: List[str] | None = None, pointers_file: str | None = None, - output_file: str | None = None, ) -> Dict[str, Any]: """ Delete pointers from DynamoDB table. @@ -219,7 +218,6 @@ def _delete_pointers_by_id( - ods_code: ODS code of the organisation that the pointers belong to - pointers_to_delete: list of pointer ids to delete - pointers_file: path to JSON file (array of objects with "id" field) or text file (one id per line) - - output_file: optional path to write the result JSON to (atomic replace) """ if pointers_to_delete is None and pointers_file is None: raise ValueError("Provide either pointers_to_delete or pointers_file") @@ -231,6 +229,14 @@ def _delete_pointers_by_id( if pointers_file: pointers_to_delete = _load_pointers_from_file(pointers_file) + # establish start_time early so any early-return can use it for filename + start_time = datetime.now(tz=timezone.utc) + stamp = start_time.strftime("%Y%m%dT%H%M%SZ") + script_dir = os.path.dirname(os.path.abspath(__file__)) or "." + output_file_path = os.path.join( + script_dir, f"delete_results_{ods_code}_{stamp}.json" + ) + if len(pointers_to_delete) == 0: result = { "pointers_to_delete": 0, @@ -242,18 +248,15 @@ def _delete_pointers_by_id( "failed_deletes": {"count": 0, "ids": []}, "deletes-took-secs": 0, } - if output_file: - try: - _write_result_file(result, output_file) - except Exception as exc: - result["_output_error"] = ( - f"Failed to write output_file {output_file}: {exc}" - ) + try: + _write_result_file(result, output_file_path) + except Exception as exc: + result["_output_error"] = ( + f"Failed to write result file {output_file_path}: {exc}" + ) _print_summary(result) return result - start_time = datetime.now(tz=timezone.utc) - print( f"Validating {len(pointers_to_delete)} pointers against ODS code {ods_code}..." ) @@ -284,13 +287,12 @@ def _delete_pointers_by_id( "failed_deletes": {"count": 0, "ids": []}, "deletes-took-secs": timedelta.total_seconds(end_time - start_time), } - if output_file: - try: - _write_result_file(result, output_file) - except Exception as exc: - result["_output_error"] = ( - f"Failed to write output_file {output_file}: {exc}" - ) + try: + _write_result_file(result, output_file_path) + except Exception as exc: + result["_output_error"] = ( + f"Failed to write result file {output_file_path}: {exc}" + ) _print_summary(result) return result @@ -325,13 +327,12 @@ def _delete_pointers_by_id( "failed_deletes": {"count": 0, "ids": []}, "deletes-took-secs": timedelta.total_seconds(end_time - start_time), } - if output_file: - try: - _write_result_file(result, output_file) - except Exception as exc: - result["_output_error"] = ( - f"Failed to write output_file {output_file}: {exc}" - ) + try: + _write_result_file(result, output_file_path) + except Exception as exc: + result["_output_error"] = ( + f"Failed to write result file {output_file_path}: {exc}" + ) _print_summary(result) return result @@ -359,13 +360,12 @@ def _delete_pointers_by_id( "deletes-took-secs": timedelta.total_seconds(end_time - start_time), } - if output_file: - try: - _write_result_file(result, output_file) - except Exception as exc: - result["_output_error"] = ( - f"Failed to write output_file {output_file}: {exc}" - ) + try: + _write_result_file(result, output_file_path) + except Exception as exc: + result["_output_error"] = ( + f"Failed to write result file {output_file_path}: {exc}" + ) _print_summary(result) print(" Done") diff --git a/scripts/tests/test_delete_pointers_by_id.py b/scripts/tests/test_delete_pointers_by_id.py index 3c220b94e..880b27fcf 100644 --- a/scripts/tests/test_delete_pointers_by_id.py +++ b/scripts/tests/test_delete_pointers_by_id.py @@ -1,4 +1,5 @@ -from unittest.mock import patch +import json +from unittest.mock import MagicMock, patch import pytest from delete_pointers_by_id import ( @@ -11,76 +12,47 @@ class TestLoadPointersFromFile: - """Tests for file parsing.""" - @patch("builtins.open", create=True) def test_load_json_array(self, mock_open): mock_open.return_value.__enter__.return_value.read.return_value = ( '[{"id": "G3H9E-id1"}, {"id": "G3H9E-id2"}]' ) - result = _load_pointers_from_file("./pointers.json") - - assert len(result) == 2 - assert "G3H9E-id1" in result - assert "G3H9E-id2" in result + assert result == ["G3H9E-id1", "G3H9E-id2"] @patch("builtins.open", create=True) def test_load_json_with_malformed_entries(self, mock_open): mock_open.return_value.__enter__.return_value.read.return_value = '[{"id": "G3H9E-id1"}, {"patient_number": "123"}, {"id": ""}, {"id": "G3H9E-id2"}]' - result = _load_pointers_from_file("./pointers.json") - - assert len(result) == 2 - assert "G3H9E-id1" in result - assert "G3H9E-id2" in result + assert result == ["G3H9E-id1", "G3H9E-id2"] @patch("builtins.open", create=True) def test_load_plain_text_file(self, mock_open): mock_open.return_value.__enter__.return_value.read.return_value = ( "G3H9E-id1\nG3H9E-id2\nG3H9E-id3" ) - - result = _load_pointers_from_file("./ids.txt") - - assert len(result) == 3 - assert "G3H9E-id1" in result - assert "G3H9E-id3" in result - - @patch("builtins.open", create=True) - def test_load_plain_text_with_empty_lines(self, mock_open): - mock_open.return_value.__enter__.return_value.read.return_value = ( - "G3H9E-id1\n\nG3H9E-id2\n \nG3H9E-id3" - ) - result = _load_pointers_from_file("./ids.txt") - - assert len(result) == 3 - assert "G3H9E-id1" in result + assert result == ["G3H9E-id1", "G3H9E-id2", "G3H9E-id3"] @patch("builtins.open", create=True) def test_load_empty_file(self, mock_open): mock_open.return_value.__enter__.return_value.read.return_value = "" - result = _load_pointers_from_file("./empty.txt") - - assert len(result) == 0 + assert result == [] @patch("builtins.open", create=True) def test_invalid_json(self, mock_open): mock_open.return_value.__enter__.return_value.read.return_value = ( '[{"id": "G3H9E-id1"' ) - with pytest.raises(ValueError, match="Failed to parse JSON file"): - _load_pointers_from_file("./bad.json") + _load_pointers_from_file("./invalid.json") @patch("builtins.open", create=True) def test_json_not_array(self, mock_open): mock_open.return_value.__enter__.return_value.read.return_value = ( '{"id": "G3H9E-id1"}' ) - with pytest.raises( ValueError, match="JSON file must contain an array of objects" ): @@ -88,287 +60,147 @@ def test_json_not_array(self, mock_open): class TestCheckPointersMatchOdsCode: - """Tests for ODS code validation.""" - - def test_all_pointers_match_ods_code(self): - ods_code = "G3H9E" - pointer_ids = [ - "G3H9E-1794b612-c617-4e0b-8671-ac7baacc4610", - "G3H9E-5d8f1038-19f8-11f0-9d79-2dc2b9c1e380", - ] - matched, mismatched = _check_pointers_match_ods_code(ods_code, pointer_ids) - - assert len(matched) == 2 - assert len(mismatched) == 0 - assert matched == pointer_ids - - def test_no_pointers_match_ods_code(self): - ods_code = "G3H9E" - pointer_ids = [ - "RAT-1794b612-c617-4e0b-8671-ac7baacc4610", - "RKL-5d8f1038-19f8-11f0-9d79-2dc2b9c1e380", - ] - matched, mismatched = _check_pointers_match_ods_code(ods_code, pointer_ids) - - assert len(matched) == 0 - assert len(mismatched) == 2 - assert mismatched == pointer_ids - - def test_mixed_matching_and_mismatching_pointers(self): - ods_code = "G3H9E" - pointer_ids = [ - "G3H9E-1794b612-c617-4e0b-8671-ac7baacc4610", - "RAT-5d8f1038-19f8-11f0-9d79-2dc2b9c1e380", - "G3H9E-abc123", - ] - matched, mismatched = _check_pointers_match_ods_code(ods_code, pointer_ids) - - assert len(matched) == 2 - assert len(mismatched) == 1 - assert "G3H9E-1794b612-c617-4e0b-8671-ac7baacc4610" in matched - assert "G3H9E-abc123" in matched - assert "RAT-5d8f1038-19f8-11f0-9d79-2dc2b9c1e380" in mismatched - - def test_empty_pointer_list(self): - ods_code = "G3H9E" - matched, mismatched = _check_pointers_match_ods_code(ods_code, []) - - assert len(matched) == 0 - assert len(mismatched) == 0 + def test_all_match(self): + ods = "G3H9E" + ids = ["G3H9E-a", "G3H9E-b"] + matched, mismatched = _check_pointers_match_ods_code(ods, ids) + assert matched == ids and mismatched == [] + + def test_none_match(self): + ods = "G3H9E" + ids = ["X-a", "Y-b"] + matched, mismatched = _check_pointers_match_ods_code(ods, ids) + assert matched == [] and mismatched == ids + + def test_mixed(self): + ods = "G3H9E" + ids = ["G3H9E-a", "X-b", "G3H9E-c"] + matched, mismatched = _check_pointers_match_ods_code(ods, ids) + assert matched == ["G3H9E-a", "G3H9E-c"] + assert mismatched == ["X-b"] class TestBatchGetExistingPointers: - """Tests for batch existence checking.""" - @patch("delete_pointers_by_id.dynamodb") - def test_all_pointers_exist(self, mock_dynamodb): - table_name = "test-table" - pointer_ids = ["G3H9E-id1", "G3H9E-id2"] - - mock_dynamodb.batch_get_item.return_value = { - "Responses": { - table_name: [ - {"pk": {"S": "D#G3H9E-id1"}}, - {"pk": {"S": "D#G3H9E-id2"}}, - ] - } - } - - existing, not_found = _batch_get_existing_pointers(table_name, pointer_ids) - - assert len(existing) == 2 - assert len(not_found) == 0 - assert "G3H9E-id1" in existing - assert "G3H9E-id2" in existing - - @patch("delete_pointers_by_id.dynamodb") - def test_no_pointers_exist(self, mock_dynamodb): - table_name = "test-table" - pointer_ids = ["G3H9E-id1", "G3H9E-id2"] - - mock_dynamodb.batch_get_item.return_value = {"Responses": {table_name: []}} - - existing, not_found = _batch_get_existing_pointers(table_name, pointer_ids) - - assert len(existing) == 0 - assert len(not_found) == 2 - assert "G3H9E-id1" in not_found - assert "G3H9E-id2" in not_found - - @patch("delete_pointers_by_id.dynamodb") - def test_mixed_existing_and_not_found(self, mock_dynamodb): - table_name = "test-table" - pointer_ids = ["G3H9E-id1", "G3H9E-id2", "G3H9E-id3"] - + def test_all_exist(self, mock_dynamodb): + table = "t" + ids = ["G3H9E-1", "G3H9E-2"] mock_dynamodb.batch_get_item.return_value = { "Responses": { - table_name: [ - {"pk": {"S": "D#G3H9E-id1"}}, - {"pk": {"S": "D#G3H9E-id3"}}, - ] + table: [{"pk": {"S": "D#G3H9E-1"}}, {"pk": {"S": "D#G3H9E-2"}}] } } - - existing, not_found = _batch_get_existing_pointers(table_name, pointer_ids) - - assert len(existing) == 2 - assert len(not_found) == 1 - assert "G3H9E-id1" in existing - assert "G3H9E-id3" in existing - assert "G3H9E-id2" in not_found + existing, not_found = _batch_get_existing_pointers(table, ids) + assert existing == ids and not_found == [] @patch("delete_pointers_by_id.dynamodb") - def test_batch_processing_large_list(self, mock_dynamodb): - table_name = "test-table" - pointer_ids = [f"G3H9E-id{i}" for i in range(150)] - - mock_dynamodb.batch_get_item.return_value = { - "Responses": { - table_name: [{"pk": {"S": f"D#G3H9E-id{i}"}} for i in range(150)] - } - } - - existing, not_found = _batch_get_existing_pointers(table_name, pointer_ids) - - assert mock_dynamodb.batch_get_item.call_count == 2 - assert len(existing) == 150 - assert len(not_found) == 0 + def test_none_exist(self, mock_dynamodb): + table = "t" + ids = ["G3H9E-1", "G3H9E-2"] + mock_dynamodb.batch_get_item.return_value = {"Responses": {table: []}} + existing, not_found = _batch_get_existing_pointers(table, ids) + assert existing == [] and not_found == ids class TestBatchDeletePointers: - """Tests for batch deletion.""" - @patch("delete_pointers_by_id.dynamodb") - def test_all_pointers_deleted_successfully(self, mock_dynamodb): - table_name = "test-table" - pointer_ids = ["G3H9E-id1", "G3H9E-id2"] - + def test_all_deleted(self, mock_dynamodb): + table = "t" + ids = ["G3H9E-1", "G3H9E-2"] mock_dynamodb.batch_write_item.return_value = {"UnprocessedItems": {}} - - deleted, failed = _batch_delete_pointers(table_name, pointer_ids) - - assert len(deleted) == 2 - assert len(failed) == 0 - assert "G3H9E-id1" in deleted - assert "G3H9E-id2" in deleted + deleted, failed = _batch_delete_pointers(table, ids) + assert deleted == ids and failed == [] @patch("delete_pointers_by_id.dynamodb") - def test_some_pointers_unprocessed(self, mock_dynamodb): - table_name = "test-table" - pointer_ids = ["G3H9E-id1", "G3H9E-id2", "G3H9E-id3"] - + def test_some_unprocessed(self, mock_dynamodb): + table = "t" + ids = ["G3H9E-1", "G3H9E-2", "G3H9E-3"] mock_dynamodb.batch_write_item.return_value = { "UnprocessedItems": { - table_name: [ + table: [ { "DeleteRequest": { - "Key": { - "pk": {"S": "D#G3H9E-id3"}, - "sk": {"S": "D#G3H9E-id3"}, - } + "Key": {"pk": {"S": "D#G3H9E-3"}, "sk": {"S": "D#G3H9E-3"}} } } ] } } - - deleted, failed = _batch_delete_pointers(table_name, pointer_ids) - - assert len(deleted) == 2 - assert len(failed) == 1 - assert "G3H9E-id1" in deleted - assert "G3H9E-id2" in deleted - assert "G3H9E-id3" in failed - - @patch("delete_pointers_by_id.dynamodb") - def test_batch_processing_large_list(self, mock_dynamodb): - table_name = "test-table" - pointer_ids = [f"G3H9E-id{i}" for i in range(60)] - - mock_dynamodb.batch_write_item.return_value = {"UnprocessedItems": {}} - - deleted, failed = _batch_delete_pointers(table_name, pointer_ids) - - assert mock_dynamodb.batch_write_item.call_count == 3 - assert len(deleted) == 60 - assert len(failed) == 0 + deleted, failed = _batch_delete_pointers(table, ids) + assert deleted == ["G3H9E-1", "G3H9E-2"] + assert failed == ["G3H9E-3"] class TestDeletePointersById: - """Tests for main delete function.""" - - def test_missing_required_parameters(self): - with pytest.raises( - ValueError, match="Provide either pointers_to_delete or pointers_file" - ): - _delete_pointers_by_id("test-table", "G3H9E") + def test_missing_params(self): + with pytest.raises(ValueError): + _delete_pointers_by_id("t", "G3H9E") - def test_both_parameters_provided(self): - with pytest.raises( - ValueError, - match="Provide either pointers_to_delete or pointers_file, not both", - ): + def test_both_params_provided(self): + with pytest.raises(ValueError): _delete_pointers_by_id( - "test-table", - "G3H9E", - pointers_to_delete=["G3H9E-id1"], - pointers_file="./ids.json", + "t", "G3H9E", pointers_to_delete=["a"], pointers_file="./f" ) + @patch("delete_pointers_by_id._write_result_file") @patch("delete_pointers_by_id._check_pointers_match_ods_code") @patch("delete_pointers_by_id._batch_get_existing_pointers") @patch("delete_pointers_by_id._batch_delete_pointers") - def test_no_matched_ods_codes(self, mock_batch_delete, mock_batch_get, mock_check): - mock_check.return_value = ([], ["RAT-id1", "RAT-id2"]) - mock_batch_get.return_value = ([], []) - mock_batch_delete.return_value = ([], []) - + def test_no_matched_ods_codes(self, mock_delete, mock_get, mock_check, mock_write): + mock_check.return_value = ([], ["RAT-1", "RAT-2"]) result = _delete_pointers_by_id( - "test-table", "G3H9E", pointers_to_delete=["RAT-id1", "RAT-id2"] + "t", "G3H9E", pointers_to_delete=["RAT-1", "RAT-2"] ) - assert result["ods_code_matched"]["count"] == 0 assert result["ods_code_mismatched"]["count"] == 2 - assert result["deleted_pointers"]["count"] == 0 + mock_write.assert_called_once() + @patch("delete_pointers_by_id._write_result_file") @patch("delete_pointers_by_id._check_pointers_match_ods_code") @patch("delete_pointers_by_id._batch_get_existing_pointers") @patch("delete_pointers_by_id._batch_delete_pointers") - def test_successful_deletion_flow( - self, mock_batch_delete, mock_batch_get, mock_check - ): - pointer_ids = ["G3H9E-id1", "G3H9E-id2", "G3H9E-id3"] - mock_check.return_value = (pointer_ids, []) - mock_batch_get.return_value = (pointer_ids, []) - mock_batch_delete.return_value = (pointer_ids, []) - - result = _delete_pointers_by_id( - "test-table", "G3H9E", pointers_to_delete=pointer_ids - ) - - assert result["pointers_to_delete"] == 3 - assert result["ods_code"] == "G3H9E" - assert result["ods_code_matched"]["count"] == 3 - assert result["ods_code_mismatched"]["count"] == 0 + def test_successful_flow(self, mock_delete, mock_get, mock_check, mock_write): + ids = ["G3H9E-1", "G3H9E-2"] + mock_check.return_value = (ids, []) + mock_get.return_value = (ids, []) + mock_delete.return_value = (ids, []) + result = _delete_pointers_by_id("t", "G3H9E", pointers_to_delete=ids) + assert result["pointers_to_delete"] == 2 + assert result["deleted_pointers"]["count"] == 2 assert result["pointer_not_found"]["count"] == 0 - assert result["deleted_pointers"]["count"] == 3 - assert result["failed_deletes"]["count"] == 0 + mock_write.assert_called_once() + @patch("delete_pointers_by_id._write_result_file") @patch("delete_pointers_by_id._check_pointers_match_ods_code") @patch("delete_pointers_by_id._batch_get_existing_pointers") @patch("delete_pointers_by_id._batch_delete_pointers") - def test_partial_deletion_with_failures( - self, mock_batch_delete, mock_batch_get, mock_check - ): - matched = ["G3H9E-id1", "G3H9E-id2", "G3H9E-id3"] - mock_check.return_value = (matched, ["RAT-id1"]) - mock_batch_get.return_value = (matched, []) - mock_batch_delete.return_value = (["G3H9E-id1", "G3H9E-id2"], ["G3H9E-id3"]) - + def test_partial_with_failures(self, mock_delete, mock_get, mock_check, mock_write): + matched = ["G3H9E-1", "G3H9E-2", "G3H9E-3"] + mock_check.return_value = (matched, ["RAT-1"]) + mock_get.return_value = (matched, []) + mock_delete.return_value = (["G3H9E-1", "G3H9E-2"], ["G3H9E-3"]) result = _delete_pointers_by_id( - "test-table", "G3H9E", pointers_to_delete=matched + ["RAT-id1"] + "t", "G3H9E", pointers_to_delete=matched + ["RAT-1"] ) - assert result["ods_code_mismatched"]["count"] == 1 assert result["deleted_pointers"]["count"] == 2 assert result["failed_deletes"]["count"] == 1 - assert "G3H9E-id3" in result["failed_deletes"]["ids"] + assert "G3H9E-3" in result["failed_deletes"]["ids"] + mock_write.assert_called_once() + @patch("delete_pointers_by_id._write_result_file") @patch("delete_pointers_by_id._check_pointers_match_ods_code") @patch("delete_pointers_by_id._batch_get_existing_pointers") @patch("delete_pointers_by_id._batch_delete_pointers") def test_some_pointers_not_found( - self, mock_batch_delete, mock_batch_get, mock_check + self, mock_delete, mock_get, mock_check, mock_write ): - matched = ["G3H9E-id1", "G3H9E-id2", "G3H9E-id3"] + matched = ["G3H9E-1", "G3H9E-2", "G3H9E-3"] mock_check.return_value = (matched, []) - mock_batch_get.return_value = (["G3H9E-id1", "G3H9E-id2"], ["G3H9E-id3"]) - mock_batch_delete.return_value = (["G3H9E-id1", "G3H9E-id2"], []) - - result = _delete_pointers_by_id( - "test-table", "G3H9E", pointers_to_delete=matched - ) - + mock_get.return_value = (["G3H9E-1", "G3H9E-2"], ["G3H9E-3"]) + mock_delete.return_value = (["G3H9E-1", "G3H9E-2"], []) + result = _delete_pointers_by_id("t", "G3H9E", pointers_to_delete=matched) assert result["pointer_not_found"]["count"] == 1 - assert "G3H9E-id3" in result["pointer_not_found"]["ids"] + assert "G3H9E-3" in result["pointer_not_found"]["ids"] assert result["deleted_pointers"]["count"] == 2 + mock_write.assert_called_once() From 2f192acf3f87e1c436f7ba14b27ff50a0fa0f2d0 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Thu, 20 Nov 2025 11:51:41 +0000 Subject: [PATCH 04/12] NRL-1798 Build result in separate function --- scripts/delete_pointers_by_id.py | 188 ++++++++++---------- scripts/tests/test_delete_pointers_by_id.py | 42 +++-- 2 files changed, 120 insertions(+), 110 deletions(-) diff --git a/scripts/delete_pointers_by_id.py b/scripts/delete_pointers_by_id.py index 4216ab534..386532a45 100755 --- a/scripts/delete_pointers_by_id.py +++ b/scripts/delete_pointers_by_id.py @@ -59,6 +59,44 @@ def _load_pointers_from_file(pointers_file: str) -> list[str]: return [line.strip() for line in content.splitlines() if line.strip()] +def _build_and_write_result( + pointers_to_delete, + ods_code, + matched_pointers, + mismatched_pointers, + not_found_pointers, + pointers_deleted, + failed_deletes, + start_time, + end_time, + output_file_path, +): + result = { + "pointers_to_delete": len(pointers_to_delete), + "ods_code": ods_code, + "ods_code_matched": {"count": len(matched_pointers), "ids": matched_pointers}, + "ods_code_mismatched": { + "count": len(mismatched_pointers), + "ids": mismatched_pointers, + }, + "pointer_not_found": { + "count": len(not_found_pointers), + "ids": not_found_pointers, + }, + "deleted_pointers": {"count": len(pointers_deleted), "ids": pointers_deleted}, + "failed_deletes": {"count": len(failed_deletes), "ids": failed_deletes}, + "deletes-took-secs": timedelta.total_seconds(end_time - start_time), + } + try: + _write_result_file(result, output_file_path) + except Exception as exc: + result["_output_error"] = ( + f"Failed to write result file {output_file_path}: {exc}" + ) + _print_summary(result) + return result + + def _write_result_file(result: Dict[str, Any], output_file: str) -> None: """ Atomically write result dict to output_file as JSON. @@ -237,25 +275,20 @@ def _delete_pointers_by_id( script_dir, f"delete_results_{ods_code}_{stamp}.json" ) - if len(pointers_to_delete) == 0: - result = { - "pointers_to_delete": 0, - "ods_code": ods_code, - "ods_code_matched": {"count": 0, "ids": []}, - "ods_code_mismatched": {"count": 0, "ids": []}, - "pointer_not_found": {"count": 0, "ids": []}, - "deleted_pointers": {"count": 0, "ids": []}, - "failed_deletes": {"count": 0, "ids": []}, - "deletes-took-secs": 0, - } - try: - _write_result_file(result, output_file_path) - except Exception as exc: - result["_output_error"] = ( - f"Failed to write result file {output_file_path}: {exc}" - ) - _print_summary(result) - return result + if not pointers_to_delete: + end_time = datetime.now(tz=timezone.utc) + return _build_and_write_result( + pointers_to_delete, + ods_code, + [], + [], + [], + [], + [], + start_time, + end_time, + output_file_path, + ) print( f"Validating {len(pointers_to_delete)} pointers against ODS code {ods_code}..." @@ -268,33 +301,21 @@ def _delete_pointers_by_id( f"Validate pointer's ODS code: {len(matched_pointers)} matched, {len(mismatched_pointers)} mismatched" ) - if len(matched_pointers) == 0: + if not matched_pointers: print(f"None of the pointer IDs are a match for ODS code {ods_code}. Exiting.") end_time = datetime.now(tz=timezone.utc) - result = { - "pointers_to_delete": len(pointers_to_delete), - "ods_code": ods_code, - "ods_code_matched": { - "count": len(matched_pointers), - "ids": matched_pointers, - }, - "ods_code_mismatched": { - "count": len(mismatched_pointers), - "ids": mismatched_pointers, - }, - "pointer_not_found": {"count": 0, "ids": []}, - "deleted_pointers": {"count": 0, "ids": []}, - "failed_deletes": {"count": 0, "ids": []}, - "deletes-took-secs": timedelta.total_seconds(end_time - start_time), - } - try: - _write_result_file(result, output_file_path) - except Exception as exc: - result["_output_error"] = ( - f"Failed to write result file {output_file_path}: {exc}" - ) - _print_summary(result) - return result + return _build_and_write_result( + pointers_to_delete, + ods_code, + matched_pointers, + mismatched_pointers, + [], + [], + [], + start_time, + end_time, + output_file_path, + ) print(f"Checking existence of {len(matched_pointers)} pointers in {table_name}...") existing_pointers, not_found_pointers = _batch_get_existing_pointers( @@ -305,36 +326,21 @@ def _delete_pointers_by_id( f"Found {len(existing_pointers)} existing pointers to delete, {len(not_found_pointers)} not found." ) - if len(existing_pointers) == 0: + if not existing_pointers: print("No pointers found to delete. Exiting.") end_time = datetime.now(tz=timezone.utc) - result = { - "pointers_to_delete": len(pointers_to_delete), - "ods_code": ods_code, - "ods_code_matched": { - "count": len(matched_pointers), - "ids": matched_pointers, - }, - "ods_code_mismatched": { - "count": len(mismatched_pointers), - "ids": mismatched_pointers, - }, - "pointer_not_found": { - "count": len(not_found_pointers), - "ids": not_found_pointers, - }, - "deleted_pointers": {"count": 0, "ids": []}, - "failed_deletes": {"count": 0, "ids": []}, - "deletes-took-secs": timedelta.total_seconds(end_time - start_time), - } - try: - _write_result_file(result, output_file_path) - except Exception as exc: - result["_output_error"] = ( - f"Failed to write result file {output_file_path}: {exc}" - ) - _print_summary(result) - return result + return _build_and_write_result( + pointers_to_delete, + ods_code, + matched_pointers, + mismatched_pointers, + not_found_pointers, + [], + [], + start_time, + end_time, + output_file_path, + ) # Proceed with deletion using BatchWriteItem pointers_deleted, failed_deletes = _batch_delete_pointers( @@ -342,32 +348,18 @@ def _delete_pointers_by_id( ) end_time = datetime.now(tz=timezone.utc) - - result = { - "pointers_to_delete": len(pointers_to_delete), - "ods_code": ods_code, - "ods_code_matched": {"count": len(matched_pointers), "ids": matched_pointers}, - "ods_code_mismatched": { - "count": len(mismatched_pointers), - "ids": mismatched_pointers, - }, - "pointer_not_found": { - "count": len(not_found_pointers), - "ids": not_found_pointers, - }, - "deleted_pointers": {"count": len(pointers_deleted), "ids": pointers_deleted}, - "failed_deletes": {"count": len(failed_deletes), "ids": failed_deletes}, - "deletes-took-secs": timedelta.total_seconds(end_time - start_time), - } - - try: - _write_result_file(result, output_file_path) - except Exception as exc: - result["_output_error"] = ( - f"Failed to write result file {output_file_path}: {exc}" - ) - - _print_summary(result) + result = _build_and_write_result( + pointers_to_delete, + ods_code, + matched_pointers, + mismatched_pointers, + not_found_pointers, + pointers_deleted, + failed_deletes, + start_time, + end_time, + output_file_path, + ) print(" Done") return result diff --git a/scripts/tests/test_delete_pointers_by_id.py b/scripts/tests/test_delete_pointers_by_id.py index 880b27fcf..cddf4a558 100644 --- a/scripts/tests/test_delete_pointers_by_id.py +++ b/scripts/tests/test_delete_pointers_by_id.py @@ -142,43 +142,57 @@ def test_both_params_provided(self): "t", "G3H9E", pointers_to_delete=["a"], pointers_file="./f" ) - @patch("delete_pointers_by_id._write_result_file") + @patch("delete_pointers_by_id._build_and_write_result") @patch("delete_pointers_by_id._check_pointers_match_ods_code") @patch("delete_pointers_by_id._batch_get_existing_pointers") @patch("delete_pointers_by_id._batch_delete_pointers") - def test_no_matched_ods_codes(self, mock_delete, mock_get, mock_check, mock_write): + def test_no_matched_ods_codes(self, mock_delete, mock_get, mock_check, mock_build): mock_check.return_value = ([], ["RAT-1", "RAT-2"]) + mock_build.return_value = { + "ods_code_matched": {"count": 0}, + "ods_code_mismatched": {"count": 2}, + } result = _delete_pointers_by_id( "t", "G3H9E", pointers_to_delete=["RAT-1", "RAT-2"] ) assert result["ods_code_matched"]["count"] == 0 assert result["ods_code_mismatched"]["count"] == 2 - mock_write.assert_called_once() + mock_build.assert_called_once() - @patch("delete_pointers_by_id._write_result_file") + @patch("delete_pointers_by_id._build_and_write_result") @patch("delete_pointers_by_id._check_pointers_match_ods_code") @patch("delete_pointers_by_id._batch_get_existing_pointers") @patch("delete_pointers_by_id._batch_delete_pointers") - def test_successful_flow(self, mock_delete, mock_get, mock_check, mock_write): + def test_successful_flow(self, mock_delete, mock_get, mock_check, mock_build): ids = ["G3H9E-1", "G3H9E-2"] mock_check.return_value = (ids, []) mock_get.return_value = (ids, []) mock_delete.return_value = (ids, []) + mock_build.return_value = { + "pointers_to_delete": 2, + "deleted_pointers": {"count": 2}, + "pointer_not_found": {"count": 0}, + } result = _delete_pointers_by_id("t", "G3H9E", pointers_to_delete=ids) assert result["pointers_to_delete"] == 2 assert result["deleted_pointers"]["count"] == 2 assert result["pointer_not_found"]["count"] == 0 - mock_write.assert_called_once() + mock_build.assert_called_once() - @patch("delete_pointers_by_id._write_result_file") + @patch("delete_pointers_by_id._build_and_write_result") @patch("delete_pointers_by_id._check_pointers_match_ods_code") @patch("delete_pointers_by_id._batch_get_existing_pointers") @patch("delete_pointers_by_id._batch_delete_pointers") - def test_partial_with_failures(self, mock_delete, mock_get, mock_check, mock_write): + def test_partial_with_failures(self, mock_delete, mock_get, mock_check, mock_build): matched = ["G3H9E-1", "G3H9E-2", "G3H9E-3"] mock_check.return_value = (matched, ["RAT-1"]) mock_get.return_value = (matched, []) mock_delete.return_value = (["G3H9E-1", "G3H9E-2"], ["G3H9E-3"]) + mock_build.return_value = { + "ods_code_mismatched": {"count": 1}, + "deleted_pointers": {"count": 2}, + "failed_deletes": {"count": 1, "ids": ["G3H9E-3"]}, + } result = _delete_pointers_by_id( "t", "G3H9E", pointers_to_delete=matched + ["RAT-1"] ) @@ -186,21 +200,25 @@ def test_partial_with_failures(self, mock_delete, mock_get, mock_check, mock_wri assert result["deleted_pointers"]["count"] == 2 assert result["failed_deletes"]["count"] == 1 assert "G3H9E-3" in result["failed_deletes"]["ids"] - mock_write.assert_called_once() + mock_build.assert_called_once() - @patch("delete_pointers_by_id._write_result_file") + @patch("delete_pointers_by_id._build_and_write_result") @patch("delete_pointers_by_id._check_pointers_match_ods_code") @patch("delete_pointers_by_id._batch_get_existing_pointers") @patch("delete_pointers_by_id._batch_delete_pointers") def test_some_pointers_not_found( - self, mock_delete, mock_get, mock_check, mock_write + self, mock_delete, mock_get, mock_check, mock_build ): matched = ["G3H9E-1", "G3H9E-2", "G3H9E-3"] mock_check.return_value = (matched, []) mock_get.return_value = (["G3H9E-1", "G3H9E-2"], ["G3H9E-3"]) mock_delete.return_value = (["G3H9E-1", "G3H9E-2"], []) + mock_build.return_value = { + "pointer_not_found": {"count": 1, "ids": ["G3H9E-3"]}, + "deleted_pointers": {"count": 2}, + } result = _delete_pointers_by_id("t", "G3H9E", pointers_to_delete=matched) assert result["pointer_not_found"]["count"] == 1 assert "G3H9E-3" in result["pointer_not_found"]["ids"] assert result["deleted_pointers"]["count"] == 2 - mock_write.assert_called_once() + mock_build.assert_called_once() From 884a3d06311c315aa77ad237c31ee6761ad8274d Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Mon, 24 Nov 2025 17:41:00 +0000 Subject: [PATCH 05/12] NRL-1798 Add dataclass and update tests --- scripts/delete_pointers_by_id.py | 246 +++++++++++--------- scripts/tests/test_delete_pointers_by_id.py | 116 +++++++-- 2 files changed, 236 insertions(+), 126 deletions(-) diff --git a/scripts/delete_pointers_by_id.py b/scripts/delete_pointers_by_id.py index 386532a45..ef6cc558d 100755 --- a/scripts/delete_pointers_by_id.py +++ b/scripts/delete_pointers_by_id.py @@ -2,6 +2,7 @@ import json import os import tempfile +from dataclasses import dataclass from datetime import datetime, timedelta, timezone from typing import Any, Dict, List @@ -19,89 +20,109 @@ def _load_pointers_from_file(pointers_file: str) -> list[str]: Returns a list of pointer id strings. Prints a warning for skipped malformed JSON entries. """ - with open(pointers_file, "r") as fh: - content = fh.read().strip() + with open(pointers_file, "r") as file: + content = file.read().strip() if not content: return [] - # JSON path if content.startswith("[") or content.startswith("{"): - try: - data = json.loads(content) - except json.JSONDecodeError as e: - raise ValueError(f"Failed to parse JSON file {pointers_file}: {e}") from e - - if not isinstance(data, list): - raise ValueError("JSON file must contain an array of objects") - - parsed_ids: list[str] = [] - skipped_count = 0 - for item in data: - if ( - isinstance(item, dict) - and "id" in item - and isinstance(item["id"], str) - and item["id"].strip() - ): - parsed_ids.append(item["id"].strip()) - else: - skipped_count += 1 + return _parse_json_pointers(content, pointers_file) + + return _parse_plain_text_pointers(content) - if skipped_count: - print( - f"Warning: skipped {skipped_count} malformed entries in JSON file {pointers_file}" - ) - return parsed_ids +def _parse_json_pointers(content: str, pointers_file: str) -> list[str]: + + try: + data = json.loads(content) + except json.JSONDecodeError as e: + raise ValueError(f"Failed to parse JSON file {pointers_file}: {e}") from e + + if not isinstance(data, list): + raise ValueError("JSON file must contain an array of objects") + + parsed_ids: list[str] = [] + skipped_count = 0 + + for item in data: + if _is_valid_pointer(item): + parsed_ids.append(item["id"].strip()) + else: + skipped_count += 1 - # Plain text fallback + if skipped_count: + # TODO should we log these? + print( + f"Warning: skipped {skipped_count} malformed entries in JSON file {pointers_file}" + ) + + return parsed_ids + + +def _is_valid_pointer(item: Any) -> bool: + return ( + isinstance(item, dict) + and "id" in item + and isinstance(item["id"], str) + and item["id"].strip() + ) + + +def _parse_plain_text_pointers(content: str) -> list[str]: return [line.strip() for line in content.splitlines() if line.strip()] -def _build_and_write_result( - pointers_to_delete, - ods_code, - matched_pointers, - mismatched_pointers, - not_found_pointers, - pointers_deleted, - failed_deletes, - start_time, - end_time, - output_file_path, -): +@dataclass +class PointerDeletionContext: + pointers_to_delete: list[str] + ods_code: str + matched_pointers: list[str] + mismatched_pointers: list[str] + not_found_pointers: list[str] + pointers_deleted: list[str] + failed_deletes: list[str] + start_time: datetime + end_time: datetime + output_file_path: str + + +def _build_and_write_result(ctx: PointerDeletionContext) -> Dict[str, Any]: + result = { - "pointers_to_delete": len(pointers_to_delete), - "ods_code": ods_code, - "ods_code_matched": {"count": len(matched_pointers), "ids": matched_pointers}, + "pointers_to_delete": len(ctx.pointers_to_delete), + "ods_code": ctx.ods_code, + "ods_code_matched": { + "count": len(ctx.matched_pointers), + "ids": ctx.matched_pointers, + }, "ods_code_mismatched": { - "count": len(mismatched_pointers), - "ids": mismatched_pointers, + "count": len(ctx.mismatched_pointers), + "ids": ctx.mismatched_pointers, }, "pointer_not_found": { - "count": len(not_found_pointers), - "ids": not_found_pointers, + "count": len(ctx.not_found_pointers), + "ids": ctx.not_found_pointers, }, - "deleted_pointers": {"count": len(pointers_deleted), "ids": pointers_deleted}, - "failed_deletes": {"count": len(failed_deletes), "ids": failed_deletes}, - "deletes-took-secs": timedelta.total_seconds(end_time - start_time), + "deleted_pointers": { + "count": len(ctx.pointers_deleted), + "ids": ctx.pointers_deleted, + }, + "failed_deletes": {"count": len(ctx.failed_deletes), "ids": ctx.failed_deletes}, + "deletes-took-secs": timedelta.total_seconds(ctx.end_time - ctx.start_time), } try: - _write_result_file(result, output_file_path) + _write_result_file(result, ctx.output_file_path) except Exception as exc: result["_output_error"] = ( - f"Failed to write result file {output_file_path}: {exc}" + f"Failed to write result file {ctx.output_file_path}: {exc}" ) _print_summary(result) return result def _write_result_file(result: Dict[str, Any], output_file: str) -> None: - """ - Atomically write result dict to output_file as JSON. - Raises on failure. - """ + out_dir = os.path.dirname(os.path.abspath(output_file)) or "." with tempfile.NamedTemporaryFile( "w", delete=False, dir=out_dir, prefix=".tmp_delete_results_", suffix=".json" @@ -115,10 +136,7 @@ def _write_result_file(result: Dict[str, Any], output_file: str) -> None: def _check_pointers_match_ods_code( ods_code: str, pointer_ids: List[str] ) -> tuple[List[str], List[str]]: - """ - Validate that pointer IDs are in line with the provided ODS code. - Returns (matched_ids, mismatched_ids) - """ + matched = [] mismatched = [] @@ -173,7 +191,6 @@ def _batch_delete_pointers( ) -> tuple[List[str], List[str]]: """ Delete pointers using BatchWriteItem (max 25 items per request). - Returns (deleted_ids, failed_ids) """ pointers_deleted = [] failed_deletes_set: set[str] = set() @@ -212,9 +229,6 @@ def _batch_delete_pointers( def _print_summary(result: Dict[str, Any]) -> None: - """ - Print a concise summary of the result to stdout. - """ def count_from(field): val = result.get(field) @@ -256,6 +270,14 @@ def _delete_pointers_by_id( - ods_code: ODS code of the organisation that the pointers belong to - pointers_to_delete: list of pointer ids to delete - pointers_file: path to JSON file (array of objects with "id" field) or text file (one id per line) + + Sample usage: + - Delete by list of ids: + python delete_pointers_by_id.py --table_name MyTable --ods_code ABC123 --pointers_to_delete '["ABC123-12345678910", "ABC123-109876543210"]' + - Delete by JSON file: + python delete_pointers_by_id.py --table_name MyTable --ods_code ABC123 --pointers_file /path/to/pointers.json + - Delete by text file: + python delete_pointers_by_id.py --table_name MyTable --ods_code ABC123 --pointers_file /path/to/ids.txt """ if pointers_to_delete is None and pointers_file is None: raise ValueError("Provide either pointers_to_delete or pointers_file") @@ -263,31 +285,31 @@ def _delete_pointers_by_id( if pointers_to_delete is not None and pointers_file is not None: raise ValueError("Provide either pointers_to_delete or pointers_file, not both") - # Load pointers from file if provided if pointers_file: pointers_to_delete = _load_pointers_from_file(pointers_file) - # establish start_time early so any early-return can use it for filename start_time = datetime.now(tz=timezone.utc) - stamp = start_time.strftime("%Y%m%dT%H%M%SZ") + timestamp = start_time.strftime("%Y%m%dT%H%M%SZ") script_dir = os.path.dirname(os.path.abspath(__file__)) or "." output_file_path = os.path.join( - script_dir, f"delete_results_{ods_code}_{stamp}.json" + script_dir, f"delete_results_{ods_code}_{timestamp}.json" ) if not pointers_to_delete: end_time = datetime.now(tz=timezone.utc) return _build_and_write_result( - pointers_to_delete, - ods_code, - [], - [], - [], - [], - [], - start_time, - end_time, - output_file_path, + PointerDeletionContext( + pointers_to_delete=pointers_to_delete, + ods_code=ods_code, + matched_pointers=[], + mismatched_pointers=[], + not_found_pointers=[], + pointers_deleted=[], + failed_deletes=[], + start_time=start_time, + end_time=end_time, + output_file_path=output_file_path, + ) ) print( @@ -305,16 +327,18 @@ def _delete_pointers_by_id( print(f"None of the pointer IDs are a match for ODS code {ods_code}. Exiting.") end_time = datetime.now(tz=timezone.utc) return _build_and_write_result( - pointers_to_delete, - ods_code, - matched_pointers, - mismatched_pointers, - [], - [], - [], - start_time, - end_time, - output_file_path, + PointerDeletionContext( + pointers_to_delete=pointers_to_delete, + ods_code=ods_code, + matched_pointers=matched_pointers, + mismatched_pointers=mismatched_pointers, + not_found_pointers=[], + pointers_deleted=[], + failed_deletes=[], + start_time=start_time, + end_time=end_time, + output_file_path=output_file_path, + ) ) print(f"Checking existence of {len(matched_pointers)} pointers in {table_name}...") @@ -330,16 +354,18 @@ def _delete_pointers_by_id( print("No pointers found to delete. Exiting.") end_time = datetime.now(tz=timezone.utc) return _build_and_write_result( - pointers_to_delete, - ods_code, - matched_pointers, - mismatched_pointers, - not_found_pointers, - [], - [], - start_time, - end_time, - output_file_path, + PointerDeletionContext( + pointers_to_delete=pointers_to_delete, + ods_code=ods_code, + matched_pointers=matched_pointers, + mismatched_pointers=mismatched_pointers, + not_found_pointers=not_found_pointers, + pointers_deleted=[], + failed_deletes=[], + start_time=start_time, + end_time=end_time, + output_file_path=output_file_path, + ) ) # Proceed with deletion using BatchWriteItem @@ -349,16 +375,18 @@ def _delete_pointers_by_id( end_time = datetime.now(tz=timezone.utc) result = _build_and_write_result( - pointers_to_delete, - ods_code, - matched_pointers, - mismatched_pointers, - not_found_pointers, - pointers_deleted, - failed_deletes, - start_time, - end_time, - output_file_path, + PointerDeletionContext( + pointers_to_delete=pointers_to_delete, + ods_code=ods_code, + matched_pointers=matched_pointers, + mismatched_pointers=mismatched_pointers, + not_found_pointers=not_found_pointers, + pointers_deleted=pointers_deleted, + failed_deletes=failed_deletes, + start_time=start_time, + end_time=end_time, + output_file_path=output_file_path, + ) ) print(" Done") return result diff --git a/scripts/tests/test_delete_pointers_by_id.py b/scripts/tests/test_delete_pointers_by_id.py index cddf4a558..621a26785 100644 --- a/scripts/tests/test_delete_pointers_by_id.py +++ b/scripts/tests/test_delete_pointers_by_id.py @@ -1,13 +1,19 @@ import json +from datetime import datetime, timezone from unittest.mock import MagicMock, patch import pytest from delete_pointers_by_id import ( + PointerDeletionContext, _batch_delete_pointers, _batch_get_existing_pointers, + _build_and_write_result, _check_pointers_match_ods_code, _delete_pointers_by_id, + _is_valid_pointer, _load_pointers_from_file, + _parse_json_pointers, + _parse_plain_text_pointers, ) @@ -20,12 +26,6 @@ def test_load_json_array(self, mock_open): result = _load_pointers_from_file("./pointers.json") assert result == ["G3H9E-id1", "G3H9E-id2"] - @patch("builtins.open", create=True) - def test_load_json_with_malformed_entries(self, mock_open): - mock_open.return_value.__enter__.return_value.read.return_value = '[{"id": "G3H9E-id1"}, {"patient_number": "123"}, {"id": ""}, {"id": "G3H9E-id2"}]' - result = _load_pointers_from_file("./pointers.json") - assert result == ["G3H9E-id1", "G3H9E-id2"] - @patch("builtins.open", create=True) def test_load_plain_text_file(self, mock_open): mock_open.return_value.__enter__.return_value.read.return_value = ( @@ -40,23 +40,95 @@ def test_load_empty_file(self, mock_open): result = _load_pointers_from_file("./empty.txt") assert result == [] - @patch("builtins.open", create=True) - def test_invalid_json(self, mock_open): - mock_open.return_value.__enter__.return_value.read.return_value = ( - '[{"id": "G3H9E-id1"' + +class TestBuildAndWriteResult: + @patch("delete_pointers_by_id._write_result_file") + @patch("delete_pointers_by_id._print_summary") + def test_build_and_write_result(self, mock_print, mock_write): + start_time = datetime.now(tz=timezone.utc) + end_time = datetime.now(tz=timezone.utc) + ctx = PointerDeletionContext( + pointers_to_delete=["G3H9E-1", "G3H9E-2"], + ods_code="G3H9E", + matched_pointers=["G3H9E-1", "G3H9E-2"], + mismatched_pointers=[], + not_found_pointers=[], + pointers_deleted=["G3H9E-1", "G3H9E-2"], + failed_deletes=[], + start_time=start_time, + end_time=end_time, + output_file_path="/tmp/test.json", ) + result = _build_and_write_result(ctx) + + assert result["pointers_to_delete"] == 2 + assert result["deleted_pointers"]["count"] == 2 + assert result["ods_code"] == "G3H9E" + mock_write.assert_called_once() + mock_print.assert_called_once() + + +class TestParseJsonPointers: + def test_invalid_json(self): + content = '[{"id": "G3H9E-id1"' with pytest.raises(ValueError, match="Failed to parse JSON file"): - _load_pointers_from_file("./invalid.json") + _parse_json_pointers(content, "./invalid.json") - @patch("builtins.open", create=True) - def test_json_not_array(self, mock_open): - mock_open.return_value.__enter__.return_value.read.return_value = ( - '{"id": "G3H9E-id1"}' - ) + def test_json_not_array(self): + content = '{"id": "G3H9E-id1"}' with pytest.raises( ValueError, match="JSON file must contain an array of objects" ): - _load_pointers_from_file("./not_array.json") + _parse_json_pointers(content, "./not_array.json") + + def test_valid_json_array(self): + content = '[{"id": "G3H9E-id1"}, {"id": "G3H9E-id2"}]' + result = _parse_json_pointers(content, "./pointers.json") + assert result == ["G3H9E-id1", "G3H9E-id2"] + + def test_json_with_malformed_entries(self): + content = '[{"id": "G3H9E-id1"}, {"patient_number": "123"}, {"id": ""}, {"id": "G3H9E-id2"}]' + result = _parse_json_pointers(content, "./pointers.json") + assert result == ["G3H9E-id1", "G3H9E-id2"] + + +class TestIsValidPointer: + def test_valid_pointer(self): + item = {"id": "G3H9E-id1"} + assert _is_valid_pointer(item) + + def test_missing_id_field(self): + item = {"patient_number": "123"} + assert not _is_valid_pointer(item) + + def test_empty_id_field(self): + item = {"id": ""} + assert not _is_valid_pointer(item) + + def test_non_string_id_field(self): + item = {"id": 123} + assert not _is_valid_pointer(item) + + def test_non_dict_item(self): + item = ["id", "G3H9E-id1"] + assert not _is_valid_pointer(item) + + +class TestParsePlainTextPointers: + def test_valid_plain_text(self): + content = "G3H9E-id1\nG3H9E-id2\nG3H9E-id3" + result = _parse_plain_text_pointers(content) + assert result == ["G3H9E-id1", "G3H9E-id2", "G3H9E-id3"] + + def test_plain_text_with_empty_lines(self): + content = "G3H9E-id1\n\nG3H9E-id2\n \nG3H9E-id3" + result = _parse_plain_text_pointers(content) + assert result == ["G3H9E-id1", "G3H9E-id2", "G3H9E-id3"] + + def test_empty_plain_text(self): + content = "" + result = _parse_plain_text_pointers(content) + assert result == [] class TestCheckPointersMatchOdsCode: @@ -101,6 +173,16 @@ def test_none_exist(self, mock_dynamodb): existing, not_found = _batch_get_existing_pointers(table, ids) assert existing == [] and not_found == ids + @patch("delete_pointers_by_id.dynamodb") + def test_some_exist(self, mock_dynamodb): + table = "t" + ids = ["G3H9E-1", "G3H9E-3"] + mock_dynamodb.batch_get_item.return_value = { + "Responses": {table: [{"pk": {"S": "D#G3H9E-1"}}]} + } + existing, not_found = _batch_get_existing_pointers(table, ids) + assert existing == ["G3H9E-1"] and not_found == ["G3H9E-3"] + class TestBatchDeletePointers: @patch("delete_pointers_by_id.dynamodb") From e11df4873ca39f2f72031847f138a776837ebf16 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Tue, 25 Nov 2025 09:50:51 +0000 Subject: [PATCH 06/12] NRL-1798 Add output file path to summary --- scripts/delete_pointers_by_id.py | 51 ++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/scripts/delete_pointers_by_id.py b/scripts/delete_pointers_by_id.py index ef6cc558d..422461f7e 100755 --- a/scripts/delete_pointers_by_id.py +++ b/scripts/delete_pointers_by_id.py @@ -110,6 +110,7 @@ def _build_and_write_result(ctx: PointerDeletionContext) -> Dict[str, Any]: }, "failed_deletes": {"count": len(ctx.failed_deletes), "ids": ctx.failed_deletes}, "deletes-took-secs": timedelta.total_seconds(ctx.end_time - ctx.start_time), + "output_file_path": ctx.output_file_path, } try: _write_result_file(result, ctx.output_file_path) @@ -133,6 +134,33 @@ def _write_result_file(result: Dict[str, Any], output_file: str) -> None: os.replace(tf.name, output_file) +def _print_summary(result: Dict[str, Any]) -> None: + + def count_from(field): + val = result.get(field) + if isinstance(val, dict): + return val.get("count", 0) + if isinstance(val, list): + return len(val) + return 0 + + print("*******************************************************") + print("Summary:") + print(f" pointers_to_delete: {result.get('pointers_to_delete')}") + print(f" ods_code_matched: {count_from('ods_code_matched')}") + print(f" ods_code_mismatched:{count_from('ods_code_mismatched')}") + print(f" pointer_not_found: {count_from('pointer_not_found')}") + print(f" deleted_pointers: {count_from('deleted_pointers')}") + print(f" failed_deletes: {count_from('failed_deletes')}") + if "_output_error" in result: + print(f" output_error: {result['_output_error']}") + if "deletes-took-secs" in result: + print(f" deletes-took-secs: {result.get('deletes-took-secs')}") + if "output_file_path" in result: + print(f" See output file for full results: {result.get('output_file_path')}") + print("*******************************************************") + + def _check_pointers_match_ods_code( ods_code: str, pointer_ids: List[str] ) -> tuple[List[str], List[str]]: @@ -228,29 +256,6 @@ def _batch_delete_pointers( return pointers_deleted, sorted(failed_deletes_set) -def _print_summary(result: Dict[str, Any]) -> None: - - def count_from(field): - val = result.get(field) - if isinstance(val, dict): - return val.get("count", 0) - if isinstance(val, list): - return len(val) - return 0 - - print("Summary:") - print(f" pointers_to_delete: {result.get('pointers_to_delete')}") - print(f" ods_code_matched: {count_from('ods_code_matched')}") - print(f" ods_code_mismatched:{count_from('ods_code_mismatched')}") - print(f" pointer_not_found: {count_from('pointer_not_found')}") - print(f" deleted_pointers: {count_from('deleted_pointers')}") - print(f" failed_deletes: {count_from('failed_deletes')}") - if "_output_error" in result: - print(f" output_error: {result['_output_error']}") - if "deletes-took-secs" in result: - print(f" deletes-took-secs: {result.get('deletes-took-secs')}") - - def _delete_pointers_by_id( table_name: str, ods_code: str, From ff8216d5913e3399b7a8ab9ffe041178092e261a Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Tue, 25 Nov 2025 10:10:40 +0000 Subject: [PATCH 07/12] NRL-1798 Remove comment --- scripts/delete_pointers_by_id.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/delete_pointers_by_id.py b/scripts/delete_pointers_by_id.py index 422461f7e..4a8885ace 100755 --- a/scripts/delete_pointers_by_id.py +++ b/scripts/delete_pointers_by_id.py @@ -52,7 +52,6 @@ def _parse_json_pointers(content: str, pointers_file: str) -> list[str]: skipped_count += 1 if skipped_count: - # TODO should we log these? print( f"Warning: skipped {skipped_count} malformed entries in JSON file {pointers_file}" ) From 8b9eac6701c9cc7271d14eca3fec1a40d4f19165 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Tue, 25 Nov 2025 10:21:05 +0000 Subject: [PATCH 08/12] NRL-1798 Update summary with filename instead of path --- scripts/delete_pointers_by_id.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/delete_pointers_by_id.py b/scripts/delete_pointers_by_id.py index 4a8885ace..8e3621c3f 100755 --- a/scripts/delete_pointers_by_id.py +++ b/scripts/delete_pointers_by_id.py @@ -156,7 +156,8 @@ def count_from(field): if "deletes-took-secs" in result: print(f" deletes-took-secs: {result.get('deletes-took-secs')}") if "output_file_path" in result: - print(f" See output file for full results: {result.get('output_file_path')}") + output_filename = os.path.basename(result.get("output_file_path")) + print(f" See output file for full results: {output_filename}") print("*******************************************************") From 81ac1a9abe2092e4565e8a9ddfeb7e11806552b2 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Tue, 25 Nov 2025 14:04:35 +0000 Subject: [PATCH 09/12] NRL-1798 Update result with filename instead of path --- scripts/delete_pointers_by_id.py | 35 +++--- scripts/tests/test_delete_pointers_by_id.py | 114 +++++++++++++------- 2 files changed, 95 insertions(+), 54 deletions(-) diff --git a/scripts/delete_pointers_by_id.py b/scripts/delete_pointers_by_id.py index 8e3621c3f..ef4c0f496 100755 --- a/scripts/delete_pointers_by_id.py +++ b/scripts/delete_pointers_by_id.py @@ -83,7 +83,7 @@ class PointerDeletionContext: failed_deletes: list[str] start_time: datetime end_time: datetime - output_file_path: str + output_filename: str def _build_and_write_result(ctx: PointerDeletionContext) -> Dict[str, Any]: @@ -109,13 +109,17 @@ def _build_and_write_result(ctx: PointerDeletionContext) -> Dict[str, Any]: }, "failed_deletes": {"count": len(ctx.failed_deletes), "ids": ctx.failed_deletes}, "deletes-took-secs": timedelta.total_seconds(ctx.end_time - ctx.start_time), - "output_file_path": ctx.output_file_path, + "output_filename": ctx.output_filename, } + + script_dir = os.path.dirname(os.path.abspath(__file__)) or "." + output_file_path = os.path.join(script_dir, ctx.output_filename) + try: - _write_result_file(result, ctx.output_file_path) + _write_result_file(result, output_file_path) except Exception as exc: result["_output_error"] = ( - f"Failed to write result file {ctx.output_file_path}: {exc}" + f"Failed to write result file {ctx.output_filename}: {exc}" ) _print_summary(result) return result @@ -151,13 +155,13 @@ def count_from(field): print(f" pointer_not_found: {count_from('pointer_not_found')}") print(f" deleted_pointers: {count_from('deleted_pointers')}") print(f" failed_deletes: {count_from('failed_deletes')}") - if "_output_error" in result: - print(f" output_error: {result['_output_error']}") if "deletes-took-secs" in result: print(f" deletes-took-secs: {result.get('deletes-took-secs')}") - if "output_file_path" in result: - output_filename = os.path.basename(result.get("output_file_path")) - print(f" See output file for full results: {output_filename}") + + if "_output_error" in result: + print(f" output_error: {result['_output_error']}") + elif "output_filename" in result: + print(f" See output file for full results: {result.get('output_filename')}") print("*******************************************************") @@ -295,10 +299,7 @@ def _delete_pointers_by_id( start_time = datetime.now(tz=timezone.utc) timestamp = start_time.strftime("%Y%m%dT%H%M%SZ") - script_dir = os.path.dirname(os.path.abspath(__file__)) or "." - output_file_path = os.path.join( - script_dir, f"delete_results_{ods_code}_{timestamp}.json" - ) + output_filename = f"delete_results_{ods_code}_{timestamp}.json" if not pointers_to_delete: end_time = datetime.now(tz=timezone.utc) @@ -313,7 +314,7 @@ def _delete_pointers_by_id( failed_deletes=[], start_time=start_time, end_time=end_time, - output_file_path=output_file_path, + output_filename=output_filename, ) ) @@ -342,7 +343,7 @@ def _delete_pointers_by_id( failed_deletes=[], start_time=start_time, end_time=end_time, - output_file_path=output_file_path, + output_filename=output_filename, ) ) @@ -369,7 +370,7 @@ def _delete_pointers_by_id( failed_deletes=[], start_time=start_time, end_time=end_time, - output_file_path=output_file_path, + output_filename=output_filename, ) ) @@ -390,7 +391,7 @@ def _delete_pointers_by_id( failed_deletes=failed_deletes, start_time=start_time, end_time=end_time, - output_file_path=output_file_path, + output_filename=output_filename, ) ) print(" Done") diff --git a/scripts/tests/test_delete_pointers_by_id.py b/scripts/tests/test_delete_pointers_by_id.py index 621a26785..f6e852a6e 100644 --- a/scripts/tests/test_delete_pointers_by_id.py +++ b/scripts/tests/test_delete_pointers_by_id.py @@ -41,33 +41,6 @@ def test_load_empty_file(self, mock_open): assert result == [] -class TestBuildAndWriteResult: - @patch("delete_pointers_by_id._write_result_file") - @patch("delete_pointers_by_id._print_summary") - def test_build_and_write_result(self, mock_print, mock_write): - start_time = datetime.now(tz=timezone.utc) - end_time = datetime.now(tz=timezone.utc) - ctx = PointerDeletionContext( - pointers_to_delete=["G3H9E-1", "G3H9E-2"], - ods_code="G3H9E", - matched_pointers=["G3H9E-1", "G3H9E-2"], - mismatched_pointers=[], - not_found_pointers=[], - pointers_deleted=["G3H9E-1", "G3H9E-2"], - failed_deletes=[], - start_time=start_time, - end_time=end_time, - output_file_path="/tmp/test.json", - ) - result = _build_and_write_result(ctx) - - assert result["pointers_to_delete"] == 2 - assert result["deleted_pointers"]["count"] == 2 - assert result["ods_code"] == "G3H9E" - mock_write.assert_called_once() - mock_print.assert_called_once() - - class TestParseJsonPointers: def test_invalid_json(self): content = '[{"id": "G3H9E-id1"' @@ -131,6 +104,60 @@ def test_empty_plain_text(self): assert result == [] +class TestBuildAndWriteResult: + @patch("delete_pointers_by_id._write_result_file") + @patch("delete_pointers_by_id._print_summary") + def test_build_and_write_result_success(self, mock_print, mock_write): + start_time = datetime.now(tz=timezone.utc) + end_time = datetime.now(tz=timezone.utc) + ctx = PointerDeletionContext( + pointers_to_delete=["G3H9E-1", "G3H9E-2"], + ods_code="G3H9E", + matched_pointers=["G3H9E-1", "G3H9E-2"], + mismatched_pointers=[], + not_found_pointers=[], + pointers_deleted=["G3H9E-1", "G3H9E-2"], + failed_deletes=[], + start_time=start_time, + end_time=end_time, + output_filename="delete_results_G3H9E_20231125T120000Z.json", + ) + result = _build_and_write_result(ctx) + + assert result["pointers_to_delete"] == 2 + assert result["deleted_pointers"]["count"] == 2 + assert result["ods_code"] == "G3H9E" + assert result["output_filename"] == "delete_results_G3H9E_20231125T120000Z.json" + assert "_output_error" not in result + mock_write.assert_called_once() + mock_print.assert_called_once() + + @patch("delete_pointers_by_id._write_result_file") + @patch("delete_pointers_by_id._print_summary") + def test_build_and_write_result_with_error(self, mock_print, mock_write): + mock_write.side_effect = Exception("Write failed") + start_time = datetime.now(tz=timezone.utc) + end_time = datetime.now(tz=timezone.utc) + ctx = PointerDeletionContext( + pointers_to_delete=["G3H9E-1"], + ods_code="G3H9E", + matched_pointers=["G3H9E-1"], + mismatched_pointers=[], + not_found_pointers=[], + pointers_deleted=["G3H9E-1"], + failed_deletes=[], + start_time=start_time, + end_time=end_time, + output_filename="delete_results_G3H9E_20231125T120000Z.json", + ) + result = _build_and_write_result(ctx) + + assert "_output_error" in result + assert "Write failed" in result["_output_error"] + mock_write.assert_called_once() + mock_print.assert_called_once() + + class TestCheckPointersMatchOdsCode: def test_all_match(self): ods = "G3H9E" @@ -173,16 +200,6 @@ def test_none_exist(self, mock_dynamodb): existing, not_found = _batch_get_existing_pointers(table, ids) assert existing == [] and not_found == ids - @patch("delete_pointers_by_id.dynamodb") - def test_some_exist(self, mock_dynamodb): - table = "t" - ids = ["G3H9E-1", "G3H9E-3"] - mock_dynamodb.batch_get_item.return_value = { - "Responses": {table: [{"pk": {"S": "D#G3H9E-1"}}]} - } - existing, not_found = _batch_get_existing_pointers(table, ids) - assert existing == ["G3H9E-1"] and not_found == ["G3H9E-3"] - class TestBatchDeletePointers: @patch("delete_pointers_by_id.dynamodb") @@ -224,6 +241,23 @@ def test_both_params_provided(self): "t", "G3H9E", pointers_to_delete=["a"], pointers_file="./f" ) + @patch("delete_pointers_by_id._build_and_write_result") + @patch("delete_pointers_by_id._check_pointers_match_ods_code") + @patch("delete_pointers_by_id._batch_get_existing_pointers") + @patch("delete_pointers_by_id._batch_delete_pointers") + def test_empty_pointers_list(self, mock_delete, mock_get, mock_check, mock_build): + mock_build.return_value = { + "pointers_to_delete": 0, + "ods_code_matched": {"count": 0}, + "output_filename": "delete_results_G3H9E_20231125T120000Z.json", + } + result = _delete_pointers_by_id("t", "G3H9E", pointers_to_delete=[]) + assert result["pointers_to_delete"] == 0 + mock_build.assert_called_once() + mock_check.assert_not_called() + mock_get.assert_not_called() + mock_delete.assert_not_called() + @patch("delete_pointers_by_id._build_and_write_result") @patch("delete_pointers_by_id._check_pointers_match_ods_code") @patch("delete_pointers_by_id._batch_get_existing_pointers") @@ -233,6 +267,7 @@ def test_no_matched_ods_codes(self, mock_delete, mock_get, mock_check, mock_buil mock_build.return_value = { "ods_code_matched": {"count": 0}, "ods_code_mismatched": {"count": 2}, + "output_filename": "delete_results_G3H9E_20231125T120000Z.json", } result = _delete_pointers_by_id( "t", "G3H9E", pointers_to_delete=["RAT-1", "RAT-2"] @@ -240,6 +275,8 @@ def test_no_matched_ods_codes(self, mock_delete, mock_get, mock_check, mock_buil assert result["ods_code_matched"]["count"] == 0 assert result["ods_code_mismatched"]["count"] == 2 mock_build.assert_called_once() + mock_get.assert_not_called() + mock_delete.assert_not_called() @patch("delete_pointers_by_id._build_and_write_result") @patch("delete_pointers_by_id._check_pointers_match_ods_code") @@ -254,6 +291,7 @@ def test_successful_flow(self, mock_delete, mock_get, mock_check, mock_build): "pointers_to_delete": 2, "deleted_pointers": {"count": 2}, "pointer_not_found": {"count": 0}, + "output_filename": "delete_results_G3H9E_20231125T120000Z.json", } result = _delete_pointers_by_id("t", "G3H9E", pointers_to_delete=ids) assert result["pointers_to_delete"] == 2 @@ -274,6 +312,7 @@ def test_partial_with_failures(self, mock_delete, mock_get, mock_check, mock_bui "ods_code_mismatched": {"count": 1}, "deleted_pointers": {"count": 2}, "failed_deletes": {"count": 1, "ids": ["G3H9E-3"]}, + "output_filename": "delete_results_G3H9E_20231125T120000Z.json", } result = _delete_pointers_by_id( "t", "G3H9E", pointers_to_delete=matched + ["RAT-1"] @@ -298,6 +337,7 @@ def test_some_pointers_not_found( mock_build.return_value = { "pointer_not_found": {"count": 1, "ids": ["G3H9E-3"]}, "deleted_pointers": {"count": 2}, + "output_filename": "delete_results_G3H9E_20231125T120000Z.json", } result = _delete_pointers_by_id("t", "G3H9E", pointers_to_delete=matched) assert result["pointer_not_found"]["count"] == 1 From d960e695cac1c7c07b8b81e287836b5f865799a1 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Tue, 25 Nov 2025 17:25:29 +0000 Subject: [PATCH 10/12] NRL-1798 Remove return --- scripts/delete_pointers_by_id.py | 14 ++- scripts/tests/test_delete_pointers_by_id.py | 119 +++++++++++--------- 2 files changed, 75 insertions(+), 58 deletions(-) diff --git a/scripts/delete_pointers_by_id.py b/scripts/delete_pointers_by_id.py index ef4c0f496..a7ffc70df 100755 --- a/scripts/delete_pointers_by_id.py +++ b/scripts/delete_pointers_by_id.py @@ -265,7 +265,7 @@ def _delete_pointers_by_id( ods_code: str, pointers_to_delete: List[str] | None = None, pointers_file: str | None = None, -) -> Dict[str, Any]: +) -> None: """ Delete pointers from DynamoDB table. @@ -303,7 +303,7 @@ def _delete_pointers_by_id( if not pointers_to_delete: end_time = datetime.now(tz=timezone.utc) - return _build_and_write_result( + _build_and_write_result( PointerDeletionContext( pointers_to_delete=pointers_to_delete, ods_code=ods_code, @@ -317,6 +317,7 @@ def _delete_pointers_by_id( output_filename=output_filename, ) ) + return print( f"Validating {len(pointers_to_delete)} pointers against ODS code {ods_code}..." @@ -332,7 +333,7 @@ def _delete_pointers_by_id( if not matched_pointers: print(f"None of the pointer IDs are a match for ODS code {ods_code}. Exiting.") end_time = datetime.now(tz=timezone.utc) - return _build_and_write_result( + _build_and_write_result( PointerDeletionContext( pointers_to_delete=pointers_to_delete, ods_code=ods_code, @@ -346,6 +347,7 @@ def _delete_pointers_by_id( output_filename=output_filename, ) ) + return print(f"Checking existence of {len(matched_pointers)} pointers in {table_name}...") existing_pointers, not_found_pointers = _batch_get_existing_pointers( @@ -359,7 +361,7 @@ def _delete_pointers_by_id( if not existing_pointers: print("No pointers found to delete. Exiting.") end_time = datetime.now(tz=timezone.utc) - return _build_and_write_result( + _build_and_write_result( PointerDeletionContext( pointers_to_delete=pointers_to_delete, ods_code=ods_code, @@ -373,6 +375,7 @@ def _delete_pointers_by_id( output_filename=output_filename, ) ) + return # Proceed with deletion using BatchWriteItem pointers_deleted, failed_deletes = _batch_delete_pointers( @@ -380,7 +383,7 @@ def _delete_pointers_by_id( ) end_time = datetime.now(tz=timezone.utc) - result = _build_and_write_result( + _build_and_write_result( PointerDeletionContext( pointers_to_delete=pointers_to_delete, ods_code=ods_code, @@ -395,7 +398,6 @@ def _delete_pointers_by_id( ) ) print(" Done") - return result if __name__ == "__main__": diff --git a/scripts/tests/test_delete_pointers_by_id.py b/scripts/tests/test_delete_pointers_by_id.py index f6e852a6e..23795f432 100644 --- a/scripts/tests/test_delete_pointers_by_id.py +++ b/scripts/tests/test_delete_pointers_by_id.py @@ -1,6 +1,6 @@ import json from datetime import datetime, timezone -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, call, patch import pytest from delete_pointers_by_id import ( @@ -232,11 +232,16 @@ def test_some_unprocessed(self, mock_dynamodb): class TestDeletePointersById: def test_missing_params(self): - with pytest.raises(ValueError): + with pytest.raises( + ValueError, match="Provide either pointers_to_delete or pointers_file" + ): _delete_pointers_by_id("t", "G3H9E") def test_both_params_provided(self): - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match="Provide either pointers_to_delete or pointers_file, not both", + ): _delete_pointers_by_id( "t", "G3H9E", pointers_to_delete=["a"], pointers_file="./f" ) @@ -246,38 +251,36 @@ def test_both_params_provided(self): @patch("delete_pointers_by_id._batch_get_existing_pointers") @patch("delete_pointers_by_id._batch_delete_pointers") def test_empty_pointers_list(self, mock_delete, mock_get, mock_check, mock_build): - mock_build.return_value = { - "pointers_to_delete": 0, - "ods_code_matched": {"count": 0}, - "output_filename": "delete_results_G3H9E_20231125T120000Z.json", - } - result = _delete_pointers_by_id("t", "G3H9E", pointers_to_delete=[]) - assert result["pointers_to_delete"] == 0 + _delete_pointers_by_id("t", "G3H9E", pointers_to_delete=[]) + mock_build.assert_called_once() + mock_check.assert_not_called() mock_get.assert_not_called() mock_delete.assert_not_called() + call_args = mock_build.call_args[0][0] + assert call_args.pointers_to_delete == [] + assert call_args.matched_pointers == [] + @patch("delete_pointers_by_id._build_and_write_result") @patch("delete_pointers_by_id._check_pointers_match_ods_code") @patch("delete_pointers_by_id._batch_get_existing_pointers") @patch("delete_pointers_by_id._batch_delete_pointers") def test_no_matched_ods_codes(self, mock_delete, mock_get, mock_check, mock_build): mock_check.return_value = ([], ["RAT-1", "RAT-2"]) - mock_build.return_value = { - "ods_code_matched": {"count": 0}, - "ods_code_mismatched": {"count": 2}, - "output_filename": "delete_results_G3H9E_20231125T120000Z.json", - } - result = _delete_pointers_by_id( - "t", "G3H9E", pointers_to_delete=["RAT-1", "RAT-2"] - ) - assert result["ods_code_matched"]["count"] == 0 - assert result["ods_code_mismatched"]["count"] == 2 + + _delete_pointers_by_id("t", "G3H9E", pointers_to_delete=["RAT-1", "RAT-2"]) + mock_build.assert_called_once() + mock_check.assert_called_once() mock_get.assert_not_called() mock_delete.assert_not_called() + call_args = mock_build.call_args[0][0] + assert call_args.matched_pointers == [] + assert call_args.mismatched_pointers == ["RAT-1", "RAT-2"] + @patch("delete_pointers_by_id._build_and_write_result") @patch("delete_pointers_by_id._check_pointers_match_ods_code") @patch("delete_pointers_by_id._batch_get_existing_pointers") @@ -287,17 +290,17 @@ def test_successful_flow(self, mock_delete, mock_get, mock_check, mock_build): mock_check.return_value = (ids, []) mock_get.return_value = (ids, []) mock_delete.return_value = (ids, []) - mock_build.return_value = { - "pointers_to_delete": 2, - "deleted_pointers": {"count": 2}, - "pointer_not_found": {"count": 0}, - "output_filename": "delete_results_G3H9E_20231125T120000Z.json", - } - result = _delete_pointers_by_id("t", "G3H9E", pointers_to_delete=ids) - assert result["pointers_to_delete"] == 2 - assert result["deleted_pointers"]["count"] == 2 - assert result["pointer_not_found"]["count"] == 0 + + _delete_pointers_by_id("t", "G3H9E", pointers_to_delete=ids) + mock_build.assert_called_once() + mock_check.assert_called_once() + mock_get.assert_called_once() + mock_delete.assert_called_once() + + call_args = mock_build.call_args[0][0] + assert call_args.pointers_deleted == ids + assert call_args.failed_deletes == [] @patch("delete_pointers_by_id._build_and_write_result") @patch("delete_pointers_by_id._check_pointers_match_ods_code") @@ -308,21 +311,16 @@ def test_partial_with_failures(self, mock_delete, mock_get, mock_check, mock_bui mock_check.return_value = (matched, ["RAT-1"]) mock_get.return_value = (matched, []) mock_delete.return_value = (["G3H9E-1", "G3H9E-2"], ["G3H9E-3"]) - mock_build.return_value = { - "ods_code_mismatched": {"count": 1}, - "deleted_pointers": {"count": 2}, - "failed_deletes": {"count": 1, "ids": ["G3H9E-3"]}, - "output_filename": "delete_results_G3H9E_20231125T120000Z.json", - } - result = _delete_pointers_by_id( - "t", "G3H9E", pointers_to_delete=matched + ["RAT-1"] - ) - assert result["ods_code_mismatched"]["count"] == 1 - assert result["deleted_pointers"]["count"] == 2 - assert result["failed_deletes"]["count"] == 1 - assert "G3H9E-3" in result["failed_deletes"]["ids"] + + _delete_pointers_by_id("t", "G3H9E", pointers_to_delete=matched + ["RAT-1"]) + mock_build.assert_called_once() + call_args = mock_build.call_args[0][0] + assert call_args.mismatched_pointers == ["RAT-1"] + assert call_args.pointers_deleted == ["G3H9E-1", "G3H9E-2"] + assert call_args.failed_deletes == ["G3H9E-3"] + @patch("delete_pointers_by_id._build_and_write_result") @patch("delete_pointers_by_id._check_pointers_match_ods_code") @patch("delete_pointers_by_id._batch_get_existing_pointers") @@ -334,13 +332,30 @@ def test_some_pointers_not_found( mock_check.return_value = (matched, []) mock_get.return_value = (["G3H9E-1", "G3H9E-2"], ["G3H9E-3"]) mock_delete.return_value = (["G3H9E-1", "G3H9E-2"], []) - mock_build.return_value = { - "pointer_not_found": {"count": 1, "ids": ["G3H9E-3"]}, - "deleted_pointers": {"count": 2}, - "output_filename": "delete_results_G3H9E_20231125T120000Z.json", - } - result = _delete_pointers_by_id("t", "G3H9E", pointers_to_delete=matched) - assert result["pointer_not_found"]["count"] == 1 - assert "G3H9E-3" in result["pointer_not_found"]["ids"] - assert result["deleted_pointers"]["count"] == 2 + + _delete_pointers_by_id("t", "G3H9E", pointers_to_delete=matched) + + mock_build.assert_called_once() + + call_args = mock_build.call_args[0][0] + assert call_args.not_found_pointers == ["G3H9E-3"] + assert call_args.pointers_deleted == ["G3H9E-1", "G3H9E-2"] + + @patch("delete_pointers_by_id._build_and_write_result") + @patch("delete_pointers_by_id._check_pointers_match_ods_code") + @patch("delete_pointers_by_id._batch_get_existing_pointers") + @patch("delete_pointers_by_id._batch_delete_pointers") + def test_no_existing_pointers(self, mock_delete, mock_get, mock_check, mock_build): + matched = ["G3H9E-1", "G3H9E-2"] + mock_check.return_value = (matched, []) + mock_get.return_value = ([], matched) + + _delete_pointers_by_id("t", "G3H9E", pointers_to_delete=matched) + mock_build.assert_called_once() + mock_delete.assert_not_called() + + # Verify the context + call_args = mock_build.call_args[0][0] + assert call_args.not_found_pointers == matched + assert call_args.pointers_deleted == [] From 91147306b3d1aeb23d33914a73ec0b956d01bf06 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Fri, 5 Dec 2025 17:46:21 +0000 Subject: [PATCH 11/12] NRL-1798 Update script help notes --- scripts/delete_pointers_by_id.py | 40 ++++++++++++++------------------ 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/scripts/delete_pointers_by_id.py b/scripts/delete_pointers_by_id.py index a7ffc70df..b22f6fe55 100755 --- a/scripts/delete_pointers_by_id.py +++ b/scripts/delete_pointers_by_id.py @@ -263,36 +263,30 @@ def _batch_delete_pointers( def _delete_pointers_by_id( table_name: str, ods_code: str, - pointers_to_delete: List[str] | None = None, + pointers_to_delete: list[str] | None = None, pointers_file: str | None = None, ) -> None: """ - Delete pointers from DynamoDB table. + Delete DynamoDB pointers by ID with ODS code validation. + + REQUIRED: Provide either --pointers_to_delete OR --pointers_file (but not both) Can accept pointers as: - - list of strings: pointers_to_delete=["id1", "id2"] - - JSON file: pointers_file=/path/to/pointers.json (array of objects with "id" field) - - text file: pointers_file=/path/to/ids.txt (one id per line) - - Parameters: - - table_name: DynamoDB table name - - ods_code: ODS code of the organisation that the pointers belong to - - pointers_to_delete: list of pointer ids to delete - - pointers_file: path to JSON file (array of objects with "id" field) or text file (one id per line) - - Sample usage: - - Delete by list of ids: - python delete_pointers_by_id.py --table_name MyTable --ods_code ABC123 --pointers_to_delete '["ABC123-12345678910", "ABC123-109876543210"]' - - Delete by JSON file: - python delete_pointers_by_id.py --table_name MyTable --ods_code ABC123 --pointers_file /path/to/pointers.json - - Delete by text file: - python delete_pointers_by_id.py --table_name MyTable --ods_code ABC123 --pointers_file /path/to/ids.txt + - list of strings: --pointers_to_delete '["ABC123-12345678910", "ABC123-109876543210"]' + - JSON file: --pointers_file /path/to/pointers.json (array of objects with "id" field) + - text file: --pointers_file /path/to/ids.txt (one id per line) + + Args: + table_name: DynamoDB table name + ods_code: ODS code to validate pointer IDs against + pointers_to_delete: List of pointer IDs as JSON string + pointers_file: Path to file containing pointer IDs """ - if pointers_to_delete is None and pointers_file is None: - raise ValueError("Provide either pointers_to_delete or pointers_file") + if not pointers_to_delete and not pointers_file: + raise ValueError("Must provide either --pointers_to_delete or --pointers_file") - if pointers_to_delete is not None and pointers_file is not None: - raise ValueError("Provide either pointers_to_delete or pointers_file, not both") + if pointers_to_delete and pointers_file: + raise ValueError("Cannot provide both --pointers_to_delete and --pointers_file") if pointers_file: pointers_to_delete = _load_pointers_from_file(pointers_file) From 26fd357459e784270264696a4592294838f7e245 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Fri, 5 Dec 2025 18:10:10 +0000 Subject: [PATCH 12/12] NRL-1798 Update unit test assertions --- scripts/delete_pointers_by_id.py | 4 ++-- scripts/tests/test_delete_pointers_by_id.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/delete_pointers_by_id.py b/scripts/delete_pointers_by_id.py index b22f6fe55..1f99cd4e3 100755 --- a/scripts/delete_pointers_by_id.py +++ b/scripts/delete_pointers_by_id.py @@ -282,10 +282,10 @@ def _delete_pointers_by_id( pointers_to_delete: List of pointer IDs as JSON string pointers_file: Path to file containing pointer IDs """ - if not pointers_to_delete and not pointers_file: + if pointers_to_delete is None and pointers_file is None: raise ValueError("Must provide either --pointers_to_delete or --pointers_file") - if pointers_to_delete and pointers_file: + if pointers_to_delete is not None and pointers_file is not None: raise ValueError("Cannot provide both --pointers_to_delete and --pointers_file") if pointers_file: diff --git a/scripts/tests/test_delete_pointers_by_id.py b/scripts/tests/test_delete_pointers_by_id.py index 23795f432..56605fa00 100644 --- a/scripts/tests/test_delete_pointers_by_id.py +++ b/scripts/tests/test_delete_pointers_by_id.py @@ -233,14 +233,15 @@ def test_some_unprocessed(self, mock_dynamodb): class TestDeletePointersById: def test_missing_params(self): with pytest.raises( - ValueError, match="Provide either pointers_to_delete or pointers_file" + ValueError, + match="Must provide either --pointers_to_delete or --pointers_file", ): _delete_pointers_by_id("t", "G3H9E") def test_both_params_provided(self): with pytest.raises( ValueError, - match="Provide either pointers_to_delete or pointers_file, not both", + match="Cannot provide both --pointers_to_delete and --pointers_file", ): _delete_pointers_by_id( "t", "G3H9E", pointers_to_delete=["a"], pointers_file="./f"