diff --git a/lambdas/services/base/s3_service.py b/lambdas/services/base/s3_service.py index 3ee42b4e2..dcb6c7603 100644 --- a/lambdas/services/base/s3_service.py +++ b/lambdas/services/base/s3_service.py @@ -2,6 +2,7 @@ from datetime import datetime, timedelta, timezone from io import BytesIO from typing import Any, Mapping +from urllib import parse import boto3 from botocore.client import Config as BotoConfig @@ -18,6 +19,9 @@ class S3Service: EXPIRED_SESSION_WARNING = "Expired session, creating a new role session" S3_PREFIX = "s3://" + DEFAULT_AUTODELETE_TAG_KEY = "autodelete" + DEFAULT_AUTODELETE_TAG_VALUE = "true" + def __new__(cls, *args, **kwargs): if cls._instance is None: cls._instance = super().__new__(cls) @@ -43,17 +47,43 @@ def __init__(self, custom_aws_role=None): self.custom_aws_role, "s3", config=self.config ) + def _refresh_custom_session_if_needed(self) -> None: + if not self.custom_client: + return + if datetime.now(timezone.utc) > self.expiration_time - timedelta(minutes=10): + logger.info(S3Service.EXPIRED_SESSION_WARNING) + self.custom_client, self.expiration_time = self.iam_service.assume_role( + self.custom_aws_role, "s3", config=self.config + ) + + @staticmethod + def build_tagging_query(tags: Mapping[str, str] | None) -> str: + """ + S3 expects Tagging as a URL-encoded querystring, e.g. "autodelete=true&foo=bar" + """ + if not tags: + return "" + return parse.urlencode(dict(tags)) + + @staticmethod + def ensure_autodelete_tag( + tags: Mapping[str, str] | None, + tag_key: str = DEFAULT_AUTODELETE_TAG_KEY, + tag_value: str = DEFAULT_AUTODELETE_TAG_VALUE, + ) -> dict[str, str]: + out = dict(tags or {}) + out.setdefault(tag_key, tag_value) + return out + + # S3 Location should be a minimum of a s3_object_key but can also be a directory location in the form of # {{directory}}/{{s3_object_key}} def create_upload_presigned_url(self, s3_bucket_name: str, s3_object_location: str): + """ + Backwards-compatible wrapper for presigned POST without enforced tags. + """ if self.custom_client: - if datetime.now(timezone.utc) > self.expiration_time - timedelta( - minutes=10 - ): - logger.info(S3Service.EXPIRED_SESSION_WARNING) - self.custom_client, self.expiration_time = self.iam_service.assume_role( - self.custom_aws_role, "s3", config=self.config - ) + self._refresh_custom_session_if_needed() return self.custom_client.generate_presigned_post( s3_bucket_name, s3_object_location, @@ -61,33 +91,76 @@ def create_upload_presigned_url(self, s3_bucket_name: str, s3_object_location: s Conditions=None, ExpiresIn=self.presigned_url_expiry, ) - - def create_put_presigned_url(self, s3_bucket_name: str, file_key: str): - if self.custom_client: - if datetime.now(timezone.utc) > self.expiration_time - timedelta( - minutes=10 - ): - logger.info(S3Service.EXPIRED_SESSION_WARNING) - self.custom_client, self.expiration_time = self.iam_service.assume_role( - self.custom_aws_role, "s3", config=self.config - ) - logger.info("Generating presigned URL") - return self.custom_client.generate_presigned_url( - "put_object", - Params={"Bucket": s3_bucket_name, "Key": file_key}, - ExpiresIn=self.presigned_url_expiry, - ) return None + def create_upload_presigned_post( + self, + s3_bucket_name: str, + s3_object_location: str, + tags: Mapping[str, str] | None = None, + require_autodelete: bool = False, + ): + if not self.custom_client: + return None + + self._refresh_custom_session_if_needed() + + final_tags = ( + self.ensure_autodelete_tag(tags) if require_autodelete else dict(tags or {}) + ) + + fields: dict[str, Any] = {} + conditions: list[Any] = [] + + if final_tags: + tagging = self.build_tagging_query(final_tags) + # For POST policy, tagging uses the "tagging" form field + fields["tagging"] = tagging + conditions.append({"tagging": tagging}) + + return self.custom_client.generate_presigned_post( + s3_bucket_name, + s3_object_location, + Fields=fields or None, + Conditions=conditions or None, + ExpiresIn=self.presigned_url_expiry, + ) + + def create_put_presigned_url( + self, + s3_bucket_name: str, + file_key: str, + tags: Mapping[str, str] | None = None, + require_autodelete: bool = False, + extra_params: Mapping[str, Any] | None = None, + ): + if not self.custom_client: + return None + + self._refresh_custom_session_if_needed() + + final_tags = ( + self.ensure_autodelete_tag(tags) if require_autodelete else dict(tags or {}) + ) + + params: dict[str, Any] = {"Bucket": s3_bucket_name, "Key": file_key} + + if final_tags: + params["Tagging"] = self.build_tagging_query(final_tags) + + if extra_params: + params.update(extra_params) + + logger.info("Generating presigned URL") + return self.custom_client.generate_presigned_url( + "put_object", + Params=params, + ExpiresIn=self.presigned_url_expiry, + ) + def create_download_presigned_url(self, s3_bucket_name: str, file_key: str): if self.custom_client: - if datetime.now(timezone.utc) > self.expiration_time - timedelta( - minutes=10 - ): - logger.info(S3Service.EXPIRED_SESSION_WARNING) - self.custom_client, self.expiration_time = self.iam_service.assume_role( - self.custom_aws_role, "s3", config=self.config - ) + self._refresh_custom_session_if_needed() logger.info("Generating presigned URL") return self.custom_client.generate_presigned_url( "get_object", @@ -143,11 +216,9 @@ def copy_across_bucket( if_none_match, False, ) - else: - raise e - else: - logger.error(f"Copy failed: {e}") - raise e + raise + logger.error(f"Copy failed: {e}") + raise def delete_object( self, s3_bucket_name: str, file_key: str, version_id: str | None = None @@ -159,6 +230,34 @@ def delete_object( Bucket=s3_bucket_name, Key=file_key, VersionId=version_id ) + def delete_object_hard(self, s3_bucket_name: str, file_key: str) -> None: + """ + Deletes ALL versions + delete markers for a given key. + """ + try: + paginator = self.client.get_paginator("list_object_versions") + to_delete: list[dict[str, str]] = [] + + for page in paginator.paginate(Bucket=s3_bucket_name, Prefix=file_key): + for v in page.get("Versions", []): + if v.get("Key") == file_key: + to_delete.append({"Key": file_key, "VersionId": v["VersionId"]}) + for m in page.get("DeleteMarkers", []): + if m.get("Key") == file_key: + to_delete.append({"Key": file_key, "VersionId": m["VersionId"]}) + + for i in range(0, len(to_delete), 1000): + chunk = to_delete[i : i + 1000] + if chunk: + self.client.delete_objects( + Bucket=s3_bucket_name, + Delete={"Objects": chunk, "Quiet": True}, + ) + except ClientError as e: + logger.error(f"Hard delete failed for s3://{s3_bucket_name}/{file_key}: {e}") + raise + + def create_object_tag( self, s3_bucket_name: str, file_key: str, tag_key: str, tag_value: str ): @@ -202,7 +301,7 @@ def file_exist_on_s3(self, s3_bucket_name: str, file_key: str) -> bool: ): return False logger.error(str(e), {"Result": "Failed to check if file exists on s3"}) - raise e + raise def list_all_objects(self, bucket_name: str) -> list[dict]: s3_paginator = self.client.get_paginator("list_objects_v2") @@ -236,20 +335,30 @@ def upload_file_obj( s3_bucket_name: str, file_key: str, extra_args: Mapping[str, Any] = None, + require_autodelete: bool = False, + tags: Mapping[str, str] | None = None, ): try: + final_extra_args: dict[str, Any] = dict(extra_args or {}) + + if require_autodelete: + final_tags = self.ensure_autodelete_tag(tags) + final_extra_args["Tagging"] = self.build_tagging_query(final_tags) + elif tags: + final_extra_args["Tagging"] = self.build_tagging_query(tags) + self.client.upload_fileobj( Fileobj=file_obj, Bucket=s3_bucket_name, Key=file_key, - ExtraArgs=extra_args or {}, + ExtraArgs=final_extra_args, ) logger.info(f"Uploaded file object to s3://{s3_bucket_name}/{file_key}") except ClientError as e: logger.error( f"Failed to upload file object to s3://{s3_bucket_name}/{file_key} - {e}" ) - raise e + raise def save_or_create_file(self, source_bucket: str, file_key: str, body: bytes): return self.client.put_object( diff --git a/lambdas/services/fhir_document_reference_service_base.py b/lambdas/services/fhir_document_reference_service_base.py index 1ae935e12..7e0fac16c 100644 --- a/lambdas/services/fhir_document_reference_service_base.py +++ b/lambdas/services/fhir_document_reference_service_base.py @@ -52,6 +52,7 @@ def _store_binary_in_s3( file_obj=binary_file, s3_bucket_name=document_reference.s3_bucket_name, file_key=document_reference.s3_upload_key, + require_autodelete=True, ) logger.info( f"Successfully stored binary content in S3: {document_reference.s3_upload_key}" @@ -77,7 +78,9 @@ def _create_s3_presigned_url(self, document_reference: DocumentReference) -> str """Create a pre-signed URL for uploading a file""" try: response = self.s3_service.create_put_presigned_url( - document_reference.s3_bucket_name, document_reference.s3_upload_key + s3_bucket_name=document_reference.s3_bucket_name, + file_key=document_reference.s3_upload_key, + require_autodelete=False, ) logger.info( f"Successfully created pre-signed URL for {document_reference.s3_upload_key}" @@ -138,10 +141,9 @@ def _get_document_reference(self, document_id: str, table) -> DocumentReference: if len(documents) > 0: logger.info("Document found for given id") return documents[0] - else: - raise FhirDocumentReferenceException( - f"Did not find any documents for document ID {document_id}" - ) + raise FhirDocumentReferenceException( + f"Did not find any documents for document ID {document_id}" + ) def _determine_document_type(self, fhir_doc: FhirDocumentReference) -> SnomedCode: """Determine the document type based on SNOMED code in the FHIR document""" @@ -190,13 +192,10 @@ def _create_fhir_response( presigned_url: str, ) -> str: """Create a FHIR response document""" - if presigned_url: attachment_url = presigned_url else: - document_retrieve_endpoint = os.getenv( - "DOCUMENT_RETRIEVE_ENDPOINT_APIM", "" - ) + document_retrieve_endpoint = os.getenv("DOCUMENT_RETRIEVE_ENDPOINT_APIM", "") attachment_url = ( document_retrieve_endpoint + "/" @@ -252,6 +251,7 @@ def _handle_document_save( presigned_url = self._create_s3_presigned_url(document_reference) except FhirDocumentReferenceException: raise DocumentRefException(500, LambdaError.InternalServerError) + try: # Save document reference to DynamoDB self._save_document_reference_to_dynamo(dynamo_table, document_reference) diff --git a/lambdas/services/post_document_review_service.py b/lambdas/services/post_document_review_service.py index 7d5389401..ddbed0dfc 100644 --- a/lambdas/services/post_document_review_service.py +++ b/lambdas/services/post_document_review_service.py @@ -154,6 +154,7 @@ def create_review_document_upload_presigned_url( presign_url_response = self.s3_service.create_put_presigned_url( s3_bucket_name=self.staging_bucket, file_key=file_key, + require_autodelete=False, ) presigned_id = f"upload/{upload_id}" deletion_date = datetime.now(timezone.utc) diff --git a/lambdas/tests/unit/services/test_fhir_document_reference_service_base.py b/lambdas/tests/unit/services/test_fhir_document_reference_service_base.py index c91c463c0..c3e2e6aaa 100644 --- a/lambdas/tests/unit/services/test_fhir_document_reference_service_base.py +++ b/lambdas/tests/unit/services/test_fhir_document_reference_service_base.py @@ -1,3 +1,4 @@ +import base64 import json import pytest @@ -32,9 +33,8 @@ def mock_service(mocker): mocker.patch("services.fhir_document_reference_service_base.S3Service") mocker.patch("services.fhir_document_reference_service_base.DynamoDBService") - + mocker.patch("services.document_service.DynamoDBService") service = FhirDocumentReferenceServiceBase() - yield service @@ -143,7 +143,6 @@ def test_dynamo_error(mock_service, mocker): def test_save_document_reference_to_dynamo_error(mock_service): """Test _save_document_reference_to_dynamo method with DynamoDB error.""" - mock_service.dynamo_service.create_item.side_effect = ClientError( {"Error": {"Code": "InternalServerError", "Message": "Test error"}}, "CreateItem", @@ -188,10 +187,8 @@ def test_check_nhs_number_with_pds_success(mock_service, mocker): ) mock_service_object.fetch_patient_details.return_value = mock_pds_patient_details - # This should not raise an exception result = mock_service._check_nhs_number_with_pds("9000000009") - # Verify the method was called correctly mock_service_object.fetch_patient_details.assert_called_once_with("9000000009") assert result == mock_pds_patient_details @@ -238,7 +235,7 @@ def test_create_document_reference_with_author(mock_service, mocker): assert result.document_snomed_code_type == "test-code" assert result.custodian == "A12345" assert result.current_gp_ods == "C13579" - assert result.author == "B67890" # Verify author is set + assert result.author == "B67890" assert result.version == "2" @@ -277,9 +274,7 @@ def test_create_document_reference_without_custodian(mock_service, mocker): s3_file_key="mock_s3_file_key", ) - assert ( - result.custodian == current_gp_ods - ) # Custodian should default to current_gp_ods + assert result.custodian == current_gp_ods def test_determine_document_type_with_missing_type(mock_service, mocker): @@ -321,7 +316,6 @@ def test_save_document_reference_to_dynamo_success(mock_service): def test_process_fhir_document_reference_with_invalid_base64_data(mock_service): - """Test process_fhir_document_reference with invalid base64 data.""" with pytest.raises(FhirDocumentReferenceException): mock_service._store_binary_in_s3( TEST_DOCUMENT_REFERENCE, b"invalid-base64-data!!!" @@ -331,8 +325,6 @@ def test_process_fhir_document_reference_with_invalid_base64_data(mock_service): def test_determine_document_type_returns_lloyd_george_type( mock_service, valid_fhir_doc_object ): - """Test that determine_document_type returns the lloyd george type for - a lloyd george document""" result = mock_service._determine_document_type(valid_fhir_doc_object) assert result == SnomedCodes.LLOYD_GEORGE.value @@ -341,12 +333,9 @@ def test_determine_document_type_returns_lloyd_george_type( def test_determine_document_type_non_snomed_coding( mock_service, valid_fhir_doc_object: FhirDocumentReference ): - """Test that determine_document_type returns the lloyd george type for - a lloyd george document""" valid_fhir_doc_object.type.coding.append( Coding(system="mocked_system", code="mocked_code", display="mocked_display") ) - valid_fhir_doc_object.type.coding.reverse() result = mock_service._determine_document_type(valid_fhir_doc_object) @@ -357,12 +346,9 @@ def test_determine_document_type_non_snomed_coding( def test_determine_document_type_non_george_lloyd_code( mock_service, valid_fhir_doc_object: FhirDocumentReference ): - """Test that determine_document_type returns the lloyd george type for - a lloyd george document""" valid_fhir_doc_object.type.coding.append( Coding(system=SNOMED_URL, code="mocked_code", display="mocked_display") ) - valid_fhir_doc_object.type.coding.reverse() result = mock_service._determine_document_type(valid_fhir_doc_object) @@ -371,7 +357,6 @@ def test_determine_document_type_non_george_lloyd_code( def test_get_document_reference_no_documents_found(mocker, mock_service): - """Test that get_document_reference raises an error when there are no document results""" mock_service.fetch_documents_from_table = mocker.patch( "services.fhir_document_reference_service_base.DocumentService.fetch_documents_from_table", return_value=[], @@ -382,7 +367,6 @@ def test_get_document_reference_no_documents_found(mocker, mock_service): def test_get_document_reference_returns_document_reference(mocker, mock_service): - """Test that get_document_reference returns the first document reference from the results""" documents = create_test_lloyd_george_doc_store_refs() mock_service.document_service.fetch_documents_from_table = mocker.patch( @@ -396,7 +380,6 @@ def test_get_document_reference_returns_document_reference(mocker, mock_service) def test_create_s3_presigned_url_error(mock_service): - """Test that create_s3_presigned_url raises a FhirDocumentReferenceException on AWS S3 ClientError""" mock_service.s3_service.create_put_presigned_url.side_effect = ClientError( {"Error": {}}, "" ) @@ -407,7 +390,6 @@ def test_create_s3_presigned_url_error(mock_service): def test_create_s3_presigned_url_returns_url(mock_service): - """Test that create_s3_presigned_url returns a url""" mock_presigned_url_response = "https://test-bucket.s3.amazonaws.com/" mock_service.s3_service.create_put_presigned_url.return_value = ( mock_presigned_url_response @@ -417,6 +399,11 @@ def test_create_s3_presigned_url_returns_url(mock_service): result = mock_service._create_s3_presigned_url(document) assert result == mock_presigned_url_response + mock_service.s3_service.create_put_presigned_url.assert_called_once_with( + s3_bucket_name=document.s3_bucket_name, + file_key=document.s3_upload_key, + require_autodelete=False, + ) def test_store_binary_in_s3_success(mock_service, mocker): @@ -430,12 +417,12 @@ def test_store_binary_in_s3_success(mock_service, mocker): mock_service.s3_service.upload_file_obj.assert_called_once_with( file_obj=mocker.ANY, s3_bucket_name=TEST_DOCUMENT_REFERENCE.s3_bucket_name, - file_key=TEST_DOCUMENT_REFERENCE.s3_file_key, + file_key=TEST_DOCUMENT_REFERENCE.s3_upload_key, + require_autodelete=True, ) def test_store_binary_in_s3_with_client_error(mock_service): - """Test _store_binary_in_s3 method with S3 ClientError.""" binary_data = b"SGVsbG8gV29ybGQ=" mock_service.s3_service.upload_file_obj.side_effect = ClientError( @@ -453,9 +440,8 @@ def test_store_binary_in_s3_with_client_error(mock_service): def test_store_binary_in_s3_with_large_binary_data(mock_service): - """Test _store_binary_in_s3 method with large binary data.""" - # Create a large binary data (8MB) - binary_data = b"A" * (8 * 1024 * 1024) + raw = b"A" * (6 * 1024 * 1024) # 6MB raw + binary_data = base64.b64encode(raw) mock_service._store_binary_in_s3(TEST_DOCUMENT_REFERENCE, binary_data) @@ -463,35 +449,30 @@ def test_store_binary_in_s3_with_large_binary_data(mock_service): def test_store_binary_in_s3_on_memory_error(mock_service): - """Test that store_binary_in_s3 raises FhirDocumentReferenceException when MemoryError is raised""" mock_service.s3_service.upload_file_obj.side_effect = MemoryError() document = create_test_lloyd_george_doc_store_refs()[0] with pytest.raises(FhirDocumentReferenceException): - mock_service._store_binary_in_s3(document, bytes()) + mock_service._store_binary_in_s3(document, b"SGVsbG8=") def test_store_binary_in_s3_on_oserror(mock_service): - """Test that store_binary_in_s3 raises FhirDocumentReferenceException when OSError is raised""" mock_service.s3_service.upload_file_obj.side_effect = OSError() document = create_test_lloyd_george_doc_store_refs()[0] with pytest.raises(FhirDocumentReferenceException): - mock_service._store_binary_in_s3(document, bytes()) + mock_service._store_binary_in_s3(document, b"SGVsbG8=") def test_store_binary_in_s3_on_ioerror(mock_service): - """Test that store_binary_in_s3 raises FhirDocumentReferenceException when IOError is raised""" mock_service.s3_service.upload_file_obj.side_effect = IOError() document = create_test_lloyd_george_doc_store_refs()[0] with pytest.raises(FhirDocumentReferenceException): - mock_service._store_binary_in_s3(document, bytes()) + mock_service._store_binary_in_s3(document, b"SGVsbG8=") def test_get_dynamo_table_for_patient_data_doc_type(set_env, mock_service): - """Test _get_dynamo_table_for_doc_type method with a non-Lloyd George document type.""" - patient_data_code = SnomedCodes.PATIENT_DATA.value result = mock_service._get_dynamo_table_for_doc_type(patient_data_code) @@ -499,8 +480,6 @@ def test_get_dynamo_table_for_patient_data_doc_type(set_env, mock_service): def test_get_dynamo_table_for_unsupported_doc_type(set_env, mock_service): - """Test _get_dynamo_table_for_doc_type method with a non-Lloyd George document type.""" - non_lg_code = SnomedCode(code="non-lg-code", display_name="Non Lloyd George") with pytest.raises(DocumentRefException) as excinfo: @@ -511,7 +490,6 @@ def test_get_dynamo_table_for_unsupported_doc_type(set_env, mock_service): def test_get_dynamo_table_for_lloyd_george_doc_type(set_env, mock_service): - """Test _get_dynamo_table_for_doc_type method with Lloyd George document type.""" lg_code = SnomedCodes.LLOYD_GEORGE.value result = mock_service._get_dynamo_table_for_doc_type(lg_code) @@ -520,8 +498,6 @@ def test_get_dynamo_table_for_lloyd_george_doc_type(set_env, mock_service): def test_create_fhir_response_with_presigned_url(mock_service, mocker): - """Test _create_fhir_response method with a presigned URL.""" - mocker.patch.object( SnomedCodes, "find_by_code", return_value=SnomedCodes.LLOYD_GEORGE.value ) @@ -547,8 +523,6 @@ def test_create_fhir_response_with_presigned_url(mock_service, mocker): def test_create_fhir_response_without_presigned_url(set_env, mock_service, mocker): - """Test _create_fhir_response method without a presigned URL (for binary uploads).""" - mocker.patch.object( SnomedCodes, "find_by_code", return_value=SnomedCodes.LLOYD_GEORGE.value ) @@ -584,8 +558,6 @@ def test_handle_document_save_returns_presigned_url( valid_fhir_doc_object, mock_document_reference, ): - """Test _handle_document_save method returns presigned URL for Lloyd George document.""" - result = mock_service._handle_document_save( mock_document_reference, valid_fhir_doc_object, "test_table" ) @@ -605,6 +577,7 @@ def test_handle_document_save_stores_binary_in_s3( result = mock_service._handle_document_save( mock_document_reference, valid_fhir_doc_object, "test_table" ) + assert result is None mock_service._store_binary_in_s3.assert_called_once() @@ -642,8 +615,6 @@ def test_handle_document_save_create_s3_failure( valid_fhir_doc_object, mock_document_reference, ): - """Test _handle_document_save method raises FhirDocumentReferenceException when S3 presigned URL creation fails.""" - mocker.patch.object( mock_service, "_create_s3_presigned_url", @@ -668,8 +639,6 @@ def test_save_document_reference_to_dynamo_failure( mock_create_s3_presigned_url, mock_document_reference, ): - """Test _handle_document_save method raises FhirDocumentReferenceException when saving to DynamoDB fails.""" - mocker.patch.object( mock_service, "_save_document_reference_to_dynamo", diff --git a/lambdas/tests/unit/services/test_post_document_review_service.py b/lambdas/tests/unit/services/test_post_document_review_service.py index 6e13db23e..7003dc1b8 100644 --- a/lambdas/tests/unit/services/test_post_document_review_service.py +++ b/lambdas/tests/unit/services/test_post_document_review_service.py @@ -225,6 +225,7 @@ def test_create_presigned_urls_for_review_reference_files_creates_presign_writes mock_service.s3_service.create_put_presigned_url.assert_called_with( s3_bucket_name=MOCK_STAGING_STORE_BUCKET, file_key=f"review/{document_review_reference.id}/{TEST_UUID}", + require_autodelete=False, ) mock_service.review_document_service.dynamo_service.create_item.assert_called_once()