From 84e38fd719e9a1837454bc0d1594380ed353b096 Mon Sep 17 00:00:00 2001
From: NogaNHS <127490765+NogaNHS@users.noreply.github.com>
Date: Wed, 11 Feb 2026 10:54:46 +0000
Subject: [PATCH 01/13] [PRMP-1384] Add file validation for document uploads
 and update dependencies

- Implement check for locked or corrupt files during document upload process.
- Introduce new INVALID status for documents that are password protected or corrupted.
- Create requirements for files_lambda_layer to include msoffcrypto-tool.
---
 ...base-lambda-layer-reusable-publish-all.yml |  12 ++
 .../base-lambdas-reusable-deploy-all.yml      |   2 +-
 Makefile                                      |   4 +
 lambdas/enums/document_status.py              |   1 +
 lambdas/enums/lambda_error.py                 |   8 +-
 lambdas/enums/virus_scan_result.py            |   1 +
 .../requirements_files_lambda_layer.txt       |   1 +
 lambdas/requirements/requirements_test.txt    |   2 +-
 lambdas/services/base/s3_service.py           |  35 ++---
 .../services/get_document_upload_status.py    |   6 +-
 lambdas/services/pdf_stitch_service.py        |  34 -----
 .../upload_document_reference_service.py      |  94 +++++++-----
 .../unit/services/test_pdf_stitch_service.py  |  68 ---------
 .../test_upload_document_reference_service.py | 111 +++++++++-----
 lambdas/tests/unit/utils/test_file_utils.py   | 129 ++++++++++++++++-
 lambdas/utils/file_utils.py                   |  43 +++++-
 poetry.lock                                   | 135 ++++++++++++------
 pyproject.toml                                |   3 +
 18 files changed, 441 insertions(+), 248 deletions(-)
 create mode 100644 lambdas/requirements/layers/requirements_files_lambda_layer.txt
 delete mode 100755 lambdas/services/pdf_stitch_service.py
 delete mode 100644 lambdas/tests/unit/services/test_pdf_stitch_service.py

diff --git a/.github/workflows/base-lambda-layer-reusable-publish-all.yml b/.github/workflows/base-lambda-layer-reusable-publish-all.yml
index be97edb37f..a7e49b91b9 100644
--- a/.github/workflows/base-lambda-layer-reusable-publish-all.yml
+++ b/.github/workflows/base-lambda-layer-reusable-publish-all.yml
@@ -87,3 +87,15 @@ jobs:
       lambda_layer_name: alerting_lambda_layer
     secrets:
       AWS_ASSUME_ROLE: ${{ secrets.AWS_ASSUME_ROLE }}
+
+  deploy_files_lambda_layer:
+    name: Deploy files_lambda_layer
+    uses: ./.github/workflows/base-lambda-layer-reusable-publish.yml
+    with:
+        environment: ${{ inputs.environment}}
+        python_version: ${{ inputs.python_version }}
+        build_branch: ${{ inputs.build_branch }}
+        sandbox: ${{ inputs.sandbox }}
+        lambda_layer_name: files_lambda_layer
+    secrets:
+        AWS_ASSUME_ROLE: ${{ secrets.AWS_ASSUME_ROLE }}
\ No newline at end of file
diff --git a/.github/workflows/base-lambdas-reusable-deploy-all.yml b/.github/workflows/base-lambdas-reusable-deploy-all.yml
index 495c36fb46..32d347ae45 100644
--- a/.github/workflows/base-lambdas-reusable-deploy-all.yml
+++ b/.github/workflows/base-lambdas-reusable-deploy-all.yml
@@ -695,7 +695,7 @@ jobs:
       sandbox: ${{ inputs.sandbox }}
       lambda_handler_name: document_reference_virus_scan_handler
       lambda_aws_name: DocumentReferenceVirusScanCheck
-      lambda_layer_names: "core_lambda_layer"
+      lambda_layer_names: "core_lambda_layer,files_lambda_layer"
     secrets:
       AWS_ASSUME_ROLE: ${{ secrets.AWS_ASSUME_ROLE }}
 
diff --git a/Makefile b/Makefile
index f9739a9c22..704f970661 100644
--- a/Makefile
+++ b/Makefile
@@ -7,6 +7,7 @@ GITHUB_REQUIREMENTS=$(REQUIREMENTS_PATH)/requirements_github_runner.txt
 TEST_REQUIREMENTS=$(REQUIREMENTS_PATH)/requirements_test.txt
 CORE_REQUIREMENTS=$(LAMBDA_LAYER_REQUIREMENTS_PATH)/requirements_core_lambda_layer.txt
 DATA_REQUIREMENTS=$(LAMBDA_LAYER_REQUIREMENTS_PATH)/requirements_data_lambda_layer.txt
+FILES_REQUIREMENTS=$(LAMBDA_LAYER_REQUIREMENTS_PATH)/requirements_files_lambda_layer.txt
 REPORTS_REQUIREMENTS=$(LAMBDA_LAYER_REQUIREMENTS_PATH)/requirements_reports_lambda_layer.txt
 ALERTING_REQUIREMENTS=$(LAMBDA_LAYER_REQUIREMENTS_PATH)/requirements_alerting_lambda_layer.txt
 EDGE_REQUIREMENTS=$(REQUIREMENTS_PATH)/requirements_edge_lambda.txt
@@ -98,6 +99,7 @@ sort-requirements:
 	sort -o $(TEST_REQUIREMENTS) $(TEST_REQUIREMENTS)
 	sort -o $(CORE_REQUIREMENTS) $(CORE_REQUIREMENTS)
 	sort -o $(DATA_REQUIREMENTS) $(DATA_REQUIREMENTS)
+	sort -o $(FILES_REQUIREMENTS) $(FILES_REQUIREMENTS)
 	sort -o $(REPORTS_REQUIREMENTS) $(REPORTS_REQUIREMENTS)
 	sort -o $(ALERTING_REQUIREMENTS) $(ALERTING_REQUIREMENTS)
 
@@ -106,6 +108,7 @@ check-packages:
 	./lambdas/venv/bin/pip-audit -r $(TEST_REQUIREMENTS)
 	./lambdas/venv/bin/pip-audit -r $(CORE_REQUIREMENTS)
 	./lambdas/venv/bin/pip-audit -r $(DATA_REQUIREMENTS)
+	./lambdas/venv/bin/pip-audit -r $(FILES_REQUIREMENTS)
 	./lambdas/venv/bin/pip-audit -r $(REPORTS_REQUIREMENTS)
 	./lambdas/venv/bin/pip-audit -r $(ALERTING_REQUIREMENTS)
 
@@ -206,6 +209,7 @@ env:
 	@./lambdas/venv/bin/pip3 install -r $(TEST_REQUIREMENTS) --no-cache-dir
 	@./lambdas/venv/bin/pip3 install -r $(CORE_REQUIREMENTS) --no-cache-dir
 	@./lambdas/venv/bin/pip3 install -r $(DATA_REQUIREMENTS) --no-cache-dir
+	@./lambdas/venv/bin/pip3 install -r $(FILES_REQUIREMENTS) --no-cache-dir
 	@./lambdas/venv/bin/pip3 install -r $(REPORTS_REQUIREMENTS) --no-cache-dir
 	@./lambdas/venv/bin/pip3 install -r $(ALERTING_REQUIREMENTS) --no-cache-dir
 	@echo " "
diff --git a/lambdas/enums/document_status.py b/lambdas/enums/document_status.py
index 2a16cd5bec..65ccac376b 100644
--- a/lambdas/enums/document_status.py
+++ b/lambdas/enums/document_status.py
@@ -6,6 +6,7 @@ class DocumentStatus(Enum):
     FORBIDDEN = ("forbidden", "UC_4003")
     NOT_FOUND = ("not-found", "UC_4004")
     INFECTED = ("infected", "UC_4005")
+    INVALID = ("invalid", "UC_4006")
 
     @property
     def code(self):
diff --git a/lambdas/enums/lambda_error.py b/lambdas/enums/lambda_error.py
index 57563fd3db..95b60ccdfb 100644
--- a/lambdas/enums/lambda_error.py
+++ b/lambdas/enums/lambda_error.py
@@ -43,7 +43,7 @@ def create_error_response(
         return error_response
 
     def to_str(
-        self, params: Optional[dict] = None, details: Optional[str] = None
+        self, params: Optional[dict] = None, details: Optional[str] = None,
     ) -> str:
         message = self.value["message"]
         if "%" in message and params:
@@ -59,7 +59,7 @@ def create_error_body(
         **kwargs,
     ) -> str:
         return self.create_error_response(
-            params=params, details=details, **kwargs
+            params=params, details=details, **kwargs,
         ).create()
 
     """
@@ -440,6 +440,10 @@ def create_error_body(
         "err_code": "UC_4005",
         "message": "Some of the given document references are not referring to clean files",
     }
+    UploadConfirmResultFilesInvalid = {
+        "err_code": "UC_4006",
+        "message": "Some of the given document references are password protected or corrupted",
+    }
     UploadConfirmResultAWSFailure = {
         "err_code": "UC_5004",
         "message": "Error occurred with an AWS service",
diff --git a/lambdas/enums/virus_scan_result.py b/lambdas/enums/virus_scan_result.py
index 484d9e9027..ade769ab5c 100644
--- a/lambdas/enums/virus_scan_result.py
+++ b/lambdas/enums/virus_scan_result.py
@@ -7,6 +7,7 @@ class VirusScanResult(StrEnum):
     INFECTED_ALLOWED = "InfectedAllowed"
     UNSCANNABLE = "Unscannable"
     ERROR = "Error"
+    INVALID = "Invalid"
 
 
 SCAN_RESULT_TAG_KEY = "scan-result"
diff --git a/lambdas/requirements/layers/requirements_files_lambda_layer.txt b/lambdas/requirements/layers/requirements_files_lambda_layer.txt
new file mode 100644
index 0000000000..0d4bff021d
--- /dev/null
+++ b/lambdas/requirements/layers/requirements_files_lambda_layer.txt
@@ -0,0 +1 @@
+msoffcrypto-tool==6.0.0
\ No newline at end of file
diff --git a/lambdas/requirements/requirements_test.txt b/lambdas/requirements/requirements_test.txt
index 2a3ae5bf28..ac64194651 100644
--- a/lambdas/requirements/requirements_test.txt
+++ b/lambdas/requirements/requirements_test.txt
@@ -9,4 +9,4 @@ pytest-unordered==0.6.0
 pytest==7.4.3
 requests_mock==1.11.0
 ruff==0.12.8
-syrupy==4.9.1
+syrupy==4.9.1
\ No newline at end of file
diff --git a/lambdas/services/base/s3_service.py b/lambdas/services/base/s3_service.py
index 3ee42b4e24..c9de2a9bea 100644
--- a/lambdas/services/base/s3_service.py
+++ b/lambdas/services/base/s3_service.py
@@ -40,7 +40,7 @@ def __init__(self, custom_aws_role=None):
             if custom_aws_role:
                 self.iam_service = IAMService()
                 self.custom_client, self.expiration_time = self.iam_service.assume_role(
-                    self.custom_aws_role, "s3", config=self.config
+                    self.custom_aws_role, "s3", config=self.config,
                 )
 
     # S3 Location should be a minimum of a s3_object_key but can also be a directory location in the form of
@@ -48,11 +48,11 @@ def __init__(self, custom_aws_role=None):
     def create_upload_presigned_url(self, s3_bucket_name: str, s3_object_location: str):
         if self.custom_client:
             if datetime.now(timezone.utc) > self.expiration_time - timedelta(
-                minutes=10
+                minutes=10,
             ):
                 logger.info(S3Service.EXPIRED_SESSION_WARNING)
                 self.custom_client, self.expiration_time = self.iam_service.assume_role(
-                    self.custom_aws_role, "s3", config=self.config
+                    self.custom_aws_role, "s3", config=self.config,
                 )
             return self.custom_client.generate_presigned_post(
                 s3_bucket_name,
@@ -65,11 +65,11 @@ def create_upload_presigned_url(self, s3_bucket_name: str, s3_object_location: s
     def create_put_presigned_url(self, s3_bucket_name: str, file_key: str):
         if self.custom_client:
             if datetime.now(timezone.utc) > self.expiration_time - timedelta(
-                minutes=10
+                minutes=10,
             ):
                 logger.info(S3Service.EXPIRED_SESSION_WARNING)
                 self.custom_client, self.expiration_time = self.iam_service.assume_role(
-                    self.custom_aws_role, "s3", config=self.config
+                    self.custom_aws_role, "s3", config=self.config,
                 )
             logger.info("Generating presigned URL")
             return self.custom_client.generate_presigned_url(
@@ -82,11 +82,11 @@ def create_put_presigned_url(self, s3_bucket_name: str, file_key: str):
     def create_download_presigned_url(self, s3_bucket_name: str, file_key: str):
         if self.custom_client:
             if datetime.now(timezone.utc) > self.expiration_time - timedelta(
-                minutes=10
+                minutes=10,
             ):
                 logger.info(S3Service.EXPIRED_SESSION_WARNING)
                 self.custom_client, self.expiration_time = self.iam_service.assume_role(
-                    self.custom_aws_role, "s3", config=self.config
+                    self.custom_aws_role, "s3", config=self.config,
                 )
             logger.info("Generating presigned URL")
             return self.custom_client.generate_presigned_url(
@@ -150,17 +150,17 @@ def copy_across_bucket(
                 raise e
 
     def delete_object(
-        self, s3_bucket_name: str, file_key: str, version_id: str | None = None
+        self, s3_bucket_name: str, file_key: str, version_id: str | None = None,
     ):
         if version_id is None:
             return self.client.delete_object(Bucket=s3_bucket_name, Key=file_key)
 
         return self.client.delete_object(
-            Bucket=s3_bucket_name, Key=file_key, VersionId=version_id
+            Bucket=s3_bucket_name, Key=file_key, VersionId=version_id,
         )
 
     def create_object_tag(
-        self, s3_bucket_name: str, file_key: str, tag_key: str, tag_value: str
+        self, s3_bucket_name: str, file_key: str, tag_key: str, tag_value: str,
     ):
         return self.client.put_object_tagging(
             Bucket=s3_bucket_name,
@@ -168,7 +168,7 @@ def create_object_tag(
             Tagging={
                 "TagSet": [
                     {"Key": tag_key, "Value": tag_value},
-                ]
+                ],
             },
         )
 
@@ -182,7 +182,7 @@ def get_tag_value(self, s3_bucket_name: str, file_key: str, tag_key: str) -> str
                 return key_value_pair["Value"]
 
         raise TagNotFoundException(
-            f"Object {file_key} doesn't have a tag of key {tag_key}"
+            f"Object {file_key} doesn't have a tag of key {tag_key}",
         )
 
     def file_exist_on_s3(self, s3_bucket_name: str, file_key: str) -> bool:
@@ -218,8 +218,11 @@ def get_file_size(self, s3_bucket_name: str, object_key: str) -> int:
     def get_head_object(self, bucket: str, key: str):
         return self.client.head_object(Bucket=bucket, Key=key)
 
-    def get_object_stream(self, bucket: str, key: str):
-        response = self.client.get_object(Bucket=bucket, Key=key)
+    def get_object_stream(self, bucket: str, key: str, byte_range: str = None):
+        params = {"Bucket": bucket, "Key": key}
+        if byte_range:
+            params["Range"] = byte_range
+        response = self.client.get_object(**params)
         return response.get("Body")
 
     def stream_s3_object_to_memory(self, bucket: str, key: str) -> BytesIO:
@@ -247,11 +250,11 @@ def upload_file_obj(
             logger.info(f"Uploaded file object to s3://{s3_bucket_name}/{file_key}")
         except ClientError as e:
             logger.error(
-                f"Failed to upload file object to s3://{s3_bucket_name}/{file_key} - {e}"
+                f"Failed to upload file object to s3://{s3_bucket_name}/{file_key} - {e}",
             )
             raise e
 
     def save_or_create_file(self, source_bucket: str, file_key: str, body: bytes):
         return self.client.put_object(
-            Bucket=source_bucket, Key=file_key, Body=BytesIO(body)
+            Bucket=source_bucket, Key=file_key, Body=BytesIO(body),
         )
diff --git a/lambdas/services/get_document_upload_status.py b/lambdas/services/get_document_upload_status.py
index 113b08908b..f6c67c6b67 100644
--- a/lambdas/services/get_document_upload_status.py
+++ b/lambdas/services/get_document_upload_status.py
@@ -24,12 +24,14 @@ def _determine_document_status(self, doc_ref, nhs_number):
         if doc_ref.doc_status == "cancelled":
             if doc_ref.virus_scanner_result == VirusScanResult.INFECTED:
                 return DocumentStatus.INFECTED.display, DocumentStatus.INFECTED.code
+            elif doc_ref.virus_scanner_result == VirusScanResult.INVALID:
+                return DocumentStatus.INVALID.display, DocumentStatus.INVALID.code
             return DocumentStatus.CANCELLED.display, DocumentStatus.CANCELLED.code
 
         return doc_ref.doc_status, None
 
     def get_document_references_by_id(
-        self, nhs_number: str, document_ids: list[str]
+        self, nhs_number: str, document_ids: list[str],
     ) -> dict:
         """
         Checks the status of a list of documents for a given patient.
@@ -42,7 +44,7 @@ def get_document_references_by_id(
             A dictionary with a list of document IDs and their corresponding statuses.
         """
         found_docs = self.document_service.get_batch_document_references_by_id(
-            document_ids, SupportedDocumentTypes.LG
+            document_ids, SupportedDocumentTypes.LG,
         )
         found_docs_by_id = {doc.id: doc for doc in found_docs}
         results = {}
diff --git a/lambdas/services/pdf_stitch_service.py b/lambdas/services/pdf_stitch_service.py
deleted file mode 100755
index d08a20d1ab..0000000000
--- a/lambdas/services/pdf_stitch_service.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import os
-from uuid import uuid4
-
-from pypdf import PdfReader, PdfWriter
-from utils.audit_logging_setup import LoggingService
-
-logger = LoggingService(__name__)
-
-
-def stitch_pdf(filenames: list[str], temp_folder: str = "/tmp/") -> str:
-    """
-    Given a list of local PDF files, stitch them into one file and return the local file path of a resulting file.
-
-    Example usage:
-        filenames = ["file1.pdf", "file2.pdf", "file3.pdf"]
-        tmp_folder = "/tmp/"
-        stitch_pdf(filename, tmp_folder)
-
-    Result:
-        "/tmp/(filename_of_stitched_file).pdf"
-    """
-    merger = PdfWriter()
-    for filename in filenames:
-        merger.append(filename)
-    output_filename = os.path.join(temp_folder, f"{str(uuid4())}.pdf")
-    merger.write(output_filename)
-    return output_filename
-
-
-def count_page_number(filename: str) -> int:
-    """
-    Return the total number of pages in a PDF file
-    """
-    return len(PdfReader(filename).pages)
diff --git a/lambdas/services/upload_document_reference_service.py b/lambdas/services/upload_document_reference_service.py
index 685d2e1544..eaded11355 100644
--- a/lambdas/services/upload_document_reference_service.py
+++ b/lambdas/services/upload_document_reference_service.py
@@ -1,3 +1,4 @@
+import io
 import os
 from typing import Optional
 
@@ -21,6 +22,7 @@
     FileProcessingException,
     TransactionConflictException,
 )
+from utils.file_utils import check_file_locked_or_corrupt
 from utils.lambda_exceptions import InvalidDocTypeException
 from utils.s3_utils import DocTypeS3BucketRouter
 from utils.utilities import get_virus_scan_service
@@ -42,7 +44,7 @@ def __init__(self):
         self.bucket_router = DocTypeS3BucketRouter()
 
     def handle_upload_document_reference_request(
-        self, object_key: str, object_size: int = 0
+        self, object_key: str, object_size: int = 0,
     ):
         """Handle the upload document reference request with comprehensive error handling"""
         if not object_key:
@@ -58,13 +60,13 @@ def handle_upload_document_reference_request(
             self._get_infrastructure_for_document_key(object_parts)
 
             preliminary_document_reference = self._fetch_preliminary_document_reference(
-                document_key, nhs_number
+                document_key, nhs_number,
             )
             if not preliminary_document_reference:
                 return
 
             self._process_preliminary_document_reference(
-                preliminary_document_reference, object_key, object_size
+                preliminary_document_reference, object_key, object_size,
             )
 
         except Exception as e:
@@ -86,12 +88,12 @@ def _get_infrastructure_for_document_key(self, object_parts: list[str]) -> None:
             self.destination_bucket_name = self.bucket_router.resolve(doc_type)
         except KeyError:
             logger.error(
-                f"SNOMED code {doc_type.code} - {doc_type.display_name} is not supported"
+                f"SNOMED code {doc_type.code} - {doc_type.display_name} is not supported",
             )
             raise InvalidDocTypeException(400, LambdaError.DocTypeDB)
 
     def _fetch_preliminary_document_reference(
-        self, document_key: str, nhs_number: str | None = None
+        self, document_key: str, nhs_number: str | None = None,
     ) -> Optional[DocumentReference]:
         """Fetch document reference from the database"""
         try:
@@ -101,7 +103,7 @@ def _fetch_preliminary_document_reference(
             else:
                 if not nhs_number:
                     logger.error(
-                        f"Failed to process object key with ID: {document_key}"
+                        f"Failed to process object key with ID: {document_key}",
                     )
                     raise FileProcessingException(400, LambdaError.DocRefInvalidFiles)
 
@@ -117,24 +119,24 @@ def _fetch_preliminary_document_reference(
 
             if not documents:
                 logger.error(
-                    f"No document with the following key found in {self.table_name} table: {document_key}"
+                    f"No document with the following key found in {self.table_name} table: {document_key}",
                 )
                 logger.info("Skipping this object")
                 return None
 
             if len(documents) > 1:
                 logger.warning(
-                    f"Multiple documents found for key {document_key}, using first one"
+                    f"Multiple documents found for key {document_key}, using first one",
                 )
 
             return documents[0]
 
         except ClientError as e:
             logger.error(
-                f"Error fetching document reference for key {document_key}: {str(e)}"
+                f"Error fetching document reference for key {document_key}: {str(e)}",
             )
             raise DocumentServiceException(
-                f"Failed to fetch document reference: {str(e)}"
+                f"Failed to fetch document reference: {str(e)}",
             )
 
     def _process_preliminary_document_reference(
@@ -146,20 +148,29 @@ def _process_preliminary_document_reference(
         """Process the preliminary (uploading) document reference with virus scanning and file operations"""
         try:
             virus_scan_result = self._perform_virus_scan(
-                preliminary_document_reference, object_key
+                preliminary_document_reference, object_key,
             )
-            preliminary_document_reference.virus_scanner_result = virus_scan_result
 
             if virus_scan_result == VirusScanResult.CLEAN:
-                self._process_clean_document(
-                    preliminary_document_reference,
-                    object_key,
-                )
+                file_type_extension = preliminary_document_reference.file_name.split('.')[-1].lower()
+                is_file_protected = self.is_file_invalid(object_key, file_type_extension)
+                if is_file_protected:
+                    logger.warning(
+                        f"Document {preliminary_document_reference.id} is password protected or corrupt, "
+                        f"marking as such in database",
+                    )
+                    virus_scan_result = VirusScanResult.Invalid
+                else:
+                    self._process_clean_document(
+                        preliminary_document_reference,
+                        object_key,
+                    )
             else:
                 logger.warning(
-                    f"Document {preliminary_document_reference.id} failed virus scan"
+                    f"Document {preliminary_document_reference.id} failed virus scan",
                 )
 
+            preliminary_document_reference.virus_scanner_result = virus_scan_result
             preliminary_document_reference.file_size = object_size
             preliminary_document_reference.uploaded = True
             preliminary_document_reference.uploading = False
@@ -173,7 +184,7 @@ def _process_preliminary_document_reference(
                 and self.doc_type.code != SnomedCodes.PATIENT_DATA.value.code
             ):
                 self._finalize_and_supersede_with_transaction(
-                    preliminary_document_reference
+                    preliminary_document_reference,
                 )
 
                 # Update NRL Pointer
@@ -184,7 +195,7 @@ def _process_preliminary_document_reference(
 
         except Exception as e:
             logger.error(
-                f"Error processing document reference {preliminary_document_reference.id}: {str(e)}"
+                f"Error processing document reference {preliminary_document_reference.id}: {str(e)}",
             )
             raise
 
@@ -199,7 +210,7 @@ def _finalize_and_supersede_with_transaction(self, new_document: DocumentReferen
         """
         try:
             logger.info(
-                f"Checking for existing final documents to supersede for NHS number {new_document.nhs_number}"
+                f"Checking for existing final documents to supersede for NHS number {new_document.nhs_number}",
             )
 
             existing_docs: list[DocumentReference] = (
@@ -243,7 +254,7 @@ def _finalize_and_supersede_with_transaction(self, new_document: DocumentReferen
             # Supersede existing final documents
             if existing_docs:
                 logger.info(
-                    f"Superseding {len(existing_docs)} existing final document(s) for NHS number {new_document.nhs_number}"
+                    f"Superseding {len(existing_docs)} existing final document(s) for NHS number {new_document.nhs_number}",
                 )
 
                 for doc in existing_docs:
@@ -297,28 +308,28 @@ def _finalize_and_supersede_with_transaction(self, new_document: DocumentReferen
                         f" and superseded {len(existing_docs)} document(s)"
                         if existing_docs
                         else ""
-                    )
+                    ),
                 )
             except ClientError as e:
                 error_code = e.response.get("Error", {}).get("Code", "")
                 if error_code == "TransactionCanceledException":
                     logger.error(
-                        f"Transaction cancelled - concurrent update detected for NHS number {new_document.nhs_number}"
+                        f"Transaction cancelled - concurrent update detected for NHS number {new_document.nhs_number}",
                     )
                     raise TransactionConflictException(
                         f"Concurrent update detected while finalizing document for NHS number {new_document.nhs_number}. "
-                        f"Another process may have already finalized a document for this patient."
+                        f"Another process may have already finalized a document for this patient.",
                     )
                 raise
 
         except Exception as e:
             if isinstance(e, TransactionConflictException):
                 logger.error(
-                    f"Cancelling preliminary document {new_document.id} due to transaction conflict"
+                    f"Cancelling preliminary document {new_document.id} due to transaction conflict",
                 )
             else:
                 logger.error(
-                    f"Unexpected error while finalizing document for {new_document.nhs_number}: {e}"
+                    f"Unexpected error while finalizing document for {new_document.nhs_number}: {e}",
                 )
                 
             new_document.doc_status = "cancelled"
@@ -327,47 +338,47 @@ def _finalize_and_supersede_with_transaction(self, new_document: DocumentReferen
             new_document.file_size = None
             self._update_dynamo_table(new_document)
             self.delete_file_from_bucket(
-                new_document.file_location, new_document.s3_version_id
+                new_document.file_location, new_document.s3_version_id,
             )
 
     def document_reference_key(self, document_id):
         return {DocumentReferenceMetadataFields.ID.value: document_id}
 
     def _perform_virus_scan(
-        self, document_reference: DocumentReference, object_key: str
+        self, document_reference: DocumentReference, object_key: str,
     ) -> VirusScanResult:
         """Perform a virus scan on the document"""
         try:
             return self.virus_scan_service.scan_file(
-                object_key, nhs_number=document_reference.nhs_number
+                object_key, nhs_number=document_reference.nhs_number,
             )
 
         except Exception as e:
             logger.error(
-                f"Virus scan failed for document {document_reference.id}: {str(e)}"
+                f"Virus scan failed for document {document_reference.id}: {str(e)}",
             )
             return VirusScanResult.ERROR
 
     def _process_clean_document(
-        self, document_reference: DocumentReference, object_key: str
+        self, document_reference: DocumentReference, object_key: str,
     ):
         """Process a document that passed virus scanning"""
         try:
             self.copy_files_from_staging_bucket(document_reference, object_key)
             
             logger.info(
-                f"Successfully processed clean document: {document_reference.id}"
+                f"Successfully processed clean document: {document_reference.id}",
             )
 
         except Exception as e:
             logger.error(
-                f"Error processing clean document {document_reference.id}: {str(e)}"
+                f"Error processing clean document {document_reference.id}: {str(e)}",
             )
             document_reference.doc_status = "cancelled"
             raise FileProcessingException(f"Failed to process clean document: {str(e)}")
 
     def copy_files_from_staging_bucket(
-        self, document_reference: DocumentReference, source_file_key: str
+        self, document_reference: DocumentReference, source_file_key: str,
     ):
         """Copy files from staging bucket to destination bucket"""
         try:
@@ -389,7 +400,7 @@ def copy_files_from_staging_bucket(
             )
             document_reference.s3_bucket_name = self.destination_bucket_name
             document_reference.file_location = document_reference._build_s3_location(
-                self.destination_bucket_name, dest_file_key
+                self.destination_bucket_name, dest_file_key,
             )
             document_reference.s3_version_id = copy_result.get("VersionId")
             return copy_result
@@ -397,7 +408,7 @@ def copy_files_from_staging_bucket(
         except ClientError as e:
             logger.error(f"Error copying files from staging bucket: {str(e)}")
             raise FileProcessingException(
-                f"Failed to copy file from staging bucket: {str(e)}"
+                f"Failed to copy file from staging bucket: {str(e)}",
             )
 
     def delete_file_from_staging_bucket(self, source_file_key: str):
@@ -413,10 +424,10 @@ def delete_file_from_bucket(self, file_location: str, version_id: str):
         """Delete file from bucket"""
         try:
             s3_bucket_name, source_file_key = DocumentReference._parse_s3_location(
-                file_location
+                file_location,
             )
             logger.info(
-                f"Deleting file from bucket: {s3_bucket_name}/{source_file_key}"
+                f"Deleting file from bucket: {s3_bucket_name}/{source_file_key}",
             )
 
             self.s3_service.delete_object(s3_bucket_name, source_file_key, version_id)
@@ -457,5 +468,10 @@ def _update_dynamo_table(
         except ClientError as e:
             logger.error(f"Error updating DynamoDB table: {str(e)}")
             raise DocumentServiceException(
-                f"Failed to update document in database: {str(e)}"
+                f"Failed to update document in database: {str(e)}",
             )
+
+    def is_file_invalid(self, object_key: str, file_type_extension: str) -> bool:
+        entire_object = self.s3_service.get_object_stream(self.staging_s3_bucket_name, object_key)
+        file_stream = io.BytesIO(entire_object.read())
+        return check_file_locked_or_corrupt(file_stream, file_type_extension)
diff --git a/lambdas/tests/unit/services/test_pdf_stitch_service.py b/lambdas/tests/unit/services/test_pdf_stitch_service.py
deleted file mode 100644
index 2634e1a1fb..0000000000
--- a/lambdas/tests/unit/services/test_pdf_stitch_service.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import os
-import tempfile
-from io import BytesIO
-
-import pytest
-from pypdf import PdfWriter
-from pypdf.errors import PyPdfError
-from services.pdf_stitch_service import count_page_number, stitch_pdf
-
-
-def test_stitch_pdf():
-    test_pdf_folder = "tests/unit/helpers/data/pdf/"
-    input_test_files = [
-        f"{test_pdf_folder}/{filename}"
-        for filename in ["file1.pdf", "file2.pdf", "file3.pdf"]
-    ]
-
-    stitched_file = stitch_pdf(input_test_files)
-    assert count_page_number(stitched_file) == sum(
-        count_page_number(filepath) for filepath in input_test_files
-    )
-
-    os.remove(stitched_file)
-
-
-def test_stitch_pdf_with_given_desc_folder():
-    test_pdf_folder = "tests/unit/helpers/data/pdf/"
-    test_desc_folder = tempfile.mkdtemp()
-
-    input_test_files = [
-        f"{test_pdf_folder}/{filename}"
-        for filename in ["file1.pdf", "file2.pdf", "file3.pdf"]
-    ]
-
-    stitched_file = stitch_pdf(input_test_files, test_desc_folder)
-
-    assert stitched_file.startswith(test_desc_folder)
-
-    os.remove(stitched_file)
-
-
-def test_stitch_pdf_raise_error_if_fail_to_perform_stitching():
-    test_pdf_folder = "tests/unit/helpers/data/pdf/"
-    input_test_files = [
-        f"{test_pdf_folder}/{filename}" for filename in ["invalid_pdf.pdf", "file1.pdf"]
-    ]
-
-    with pytest.raises(PyPdfError):
-        stitch_pdf(input_test_files)
-
-
-def test_stitch_pdf_raise_error_when_input_file_not_found():
-    test_file = "non-exist-file.pdf"
-
-    with pytest.raises(FileNotFoundError):
-        stitch_pdf([test_file])
-
-
-def create_in_memory_pdf(page_count: int = 1) -> BytesIO:
-    # Creates a PDF in memory with the received number of pages
-    writer = PdfWriter()
-    for _ in range(page_count):
-        writer.add_blank_page(width=72, height=72)
-
-    stream = BytesIO()
-    writer.write(stream)
-    stream.seek(0)
-    return stream
diff --git a/lambdas/tests/unit/services/test_upload_document_reference_service.py b/lambdas/tests/unit/services/test_upload_document_reference_service.py
index e030d461fd..3d85e5df70 100644
--- a/lambdas/tests/unit/services/test_upload_document_reference_service.py
+++ b/lambdas/tests/unit/services/test_upload_document_reference_service.py
@@ -11,7 +11,7 @@
     FinalOrPreliminaryAndNotSuperseded,
     PreliminaryStatus,
 )
-from utils.exceptions import DocumentServiceException, FileProcessingException, TransactionConflictException
+from utils.exceptions import DocumentServiceException, FileProcessingException
 from utils.lambda_exceptions import InvalidDocTypeException
 
 from lambdas.enums.snomed_codes import SnomedCodes
@@ -23,6 +23,7 @@ def mock_document_reference():
     doc_ref = DocumentReference.model_construct()
     doc_ref.id = "test-doc-id"
     doc_ref.nhs_number = "9000000001"
+    doc_ref.file_name = "test-file.txt"
     doc_ref.s3_file_key = "original/test-key"
     doc_ref.s3_bucket_name = "original-bucket"
     doc_ref.file_location = "original-location"
@@ -31,7 +32,7 @@ def mock_document_reference():
     doc_ref.doc_status = "preliminary"
     doc_ref.version = "1"
     doc_ref._build_s3_location = Mock(
-        return_value="s3://test-lg-bucket/9000000001/test-doc-id"
+        return_value="s3://test-lg-bucket/9000000001/test-doc-id",
     )
     return doc_ref
 
@@ -41,13 +42,13 @@ def mock_virus_scan_service(
     mocker,
 ):
     mock = mocker.patch(
-        "services.upload_document_reference_service.get_virus_scan_service"
+        "services.upload_document_reference_service.get_virus_scan_service",
     )
     yield mock
 
 
 @pytest.fixture
-def service(set_env, mock_virus_scan_service):
+def service(set_env, mock_virus_scan_service, mocker):
     with patch.multiple(
         "services.upload_document_reference_service",
         DocumentService=Mock(),
@@ -59,6 +60,8 @@ def service(set_env, mock_virus_scan_service):
         service.dynamo_service = Mock()
         service.virus_scan_service = MockVirusScanService()
         service.s3_service = Mock()
+        mocker.patch("io.BytesIO", return_value=None)
+        mocker.patch("services.upload_document_reference_service.check_file_locked_or_corrupt", return_value=False)
         return service
 
 
@@ -71,33 +74,34 @@ def test_handle_upload_document_reference_request_with_empty_object_key(service)
 
 def test_handle_upload_document_reference_request_with_none_object_key(service):
     """Test handling of a None object key"""
-    service.handle_upload_document_reference_request(None, 122)
+    service.handle_upload_document_reference_request("", 122)
 
     service.document_service.fetch_documents_from_table.assert_not_called()
 
 
 def test_handle_upload_document_reference_request_success(
-    service, mock_document_reference, mocker
+    service, mock_document_reference, mocker,
 ):
-    """Test successful handling of the upload document reference request"""
     object_key = "staging/test-doc-id"
     object_size = 1111
     mock_document_reference2 = Mock(spec=DocumentReference)
+    mock_document_reference2.file_name = "filename2.txt"
     mock_document_reference2.id = "another-doc-id"
     mock_document_reference2.doc_status = "final"
     mock_document_reference2.version = "1"
 
     service.s3_service.copy_across_bucket.return_value = {
-        "VersionId": "test-version-id"
+        "VersionId": "test-version-id",
     }
-
+    mocker.patch("io.BytesIO", return_value=None)
+    mocker.patch("services.upload_document_reference_service.check_file_locked_or_corrupt", return_value=False)
     # First call fetches preliminary doc, second call fetches existing final docs to supersede
     service.document_service.fetch_documents_from_table.side_effect = [
         [mock_document_reference],
         [mock_document_reference2],
     ]
     service.virus_scan_service.scan_file = mocker.MagicMock(
-        return_value=VirusScanResult.CLEAN
+        return_value=VirusScanResult.CLEAN,
     )
 
     service.handle_upload_document_reference_request(object_key, object_size)
@@ -114,7 +118,7 @@ def test_handle_upload_document_reference_request_with_exception(service):
     object_key = "staging/test-doc-id"
 
     service.document_service.fetch_documents_from_table.side_effect = Exception(
-        "Test error"
+        "Test error",
     )
 
     service.handle_upload_document_reference_request(object_key)
@@ -125,7 +129,7 @@ def test_fetch_preliminary_document_reference_success(service, mock_document_ref
     document_key = "test-doc-id"
     service.table_name = "dev_LloydGeorgeReferenceMetadata"
     service.document_service.fetch_documents_from_table.return_value = [
-        mock_document_reference
+        mock_document_reference,
     ]
 
     result = service._fetch_preliminary_document_reference(document_key)
@@ -150,7 +154,7 @@ def test_fetch_preliminary_document_reference_no_documents_found(service):
 
 
 def test_fetch_preliminary_document_reference_multiple_documents_warning(
-    service, mock_document_reference
+    service, mock_document_reference,
 ):
     """Test handling when multiple documents are found"""
     document_key = "test-doc-id"
@@ -177,23 +181,23 @@ def test_fetch_preliminary_document_reference_exception(service):
 
 
 def test__process_preliminary_document_reference_clean_virus_scan(
-    service, mock_document_reference, mocker
+    service, mock_document_reference, mocker,
 ):
     """Test processing document reference with a clean virus scan"""
     object_key = "staging/test-doc-id"
 
     mocker.patch.object(
-        service, "_perform_virus_scan", return_value=VirusScanResult.CLEAN
+        service, "_perform_virus_scan", return_value=VirusScanResult.CLEAN,
     )
     mock_delete = mocker.patch.object(service, "delete_file_from_staging_bucket")
 
     mock_process_clean = mocker.patch.object(service, "_process_clean_document")
     mock_finalize_transaction = mocker.patch.object(
-        service, "_finalize_and_supersede_with_transaction"
+        service, "_finalize_and_supersede_with_transaction",
     )
 
     service._process_preliminary_document_reference(
-        mock_document_reference, object_key, 1222
+        mock_document_reference, object_key, 1222,
     )
 
     mock_process_clean.assert_called_once()
@@ -205,20 +209,20 @@ def test__process_preliminary_document_reference_clean_virus_scan(
 
 
 def test__process_preliminary_document_reference_infected_virus_scan(
-    service, mock_document_reference, mocker
+    service, mock_document_reference, mocker,
 ):
     """Test processing document reference with an infected virus scan"""
     object_key = "staging/test-doc-id"
 
     mocker.patch.object(
-        service, "_perform_virus_scan", return_value=VirusScanResult.INFECTED
+        service, "_perform_virus_scan", return_value=VirusScanResult.INFECTED,
     )
     mock_delete = mocker.patch.object(service, "delete_file_from_staging_bucket")
 
     mock_process_clean = mocker.patch.object(service, "_process_clean_document")
     mock_update_dynamo = mocker.patch.object(service, "_update_dynamo_table")
     service._process_preliminary_document_reference(
-        mock_document_reference, object_key, 1222
+        mock_document_reference, object_key, 1222,
     )
 
     mock_process_clean.assert_not_called()
@@ -233,7 +237,7 @@ def test_perform_virus_scan_returns_clean_hardcoded(service, mock_document_refer
 
 
 def test_perform_virus_scan_exception_returns_infected(
-    service, mock_document_reference, mocker
+    service, mock_document_reference, mocker,
 ):
     """Test virus scan exception handling returns INFECTED for safety"""
     mock_virus_service = mocker.patch.object(service, "virus_scan_service")
@@ -260,7 +264,7 @@ def test_process_clean_document_success(service, mock_document_reference, mocker
 
 
 def test_process_clean_document_exception_restores_original_values(
-    service, mock_document_reference, mocker
+    service, mock_document_reference, mocker,
 ):
     """Test that original values are restored when processing fails"""
     object_key = "staging/test-doc-id"
@@ -269,7 +273,7 @@ def test_process_clean_document_exception_restores_original_values(
     original_location = "original-location"
 
     mocker.patch.object(
-        service, "copy_files_from_staging_bucket", side_effect=Exception("Copy failed")
+        service, "copy_files_from_staging_bucket", side_effect=Exception("Copy failed"),
     )
     with pytest.raises(FileProcessingException):
         service._process_clean_document(
@@ -306,7 +310,7 @@ def test_copy_files_from_staging_bucket_client_error(service, mock_document_refe
     source_file_key = "staging/test-doc-id"
     client_error = ClientError(
         error_response={
-            "Error": {"Code": "NoSuchBucket", "Message": "Bucket does not exist"}
+            "Error": {"Code": "NoSuchBucket", "Message": "Bucket does not exist"},
         },
         operation_name="CopyObject",
     )
@@ -323,7 +327,7 @@ def test_delete_file_from_staging_bucket_success(service):
     service.delete_file_from_staging_bucket(source_file_key)
 
     service.s3_service.delete_object.assert_called_once_with(
-        MOCK_STAGING_STORE_BUCKET, source_file_key
+        MOCK_STAGING_STORE_BUCKET, source_file_key,
     )
 
 
@@ -336,7 +340,7 @@ def test_delete_pdm_file_from_staging_bucket_success(service):
     service.delete_file_from_staging_bucket(source_file_key)
 
     service.s3_service.delete_object.assert_called_once_with(
-        MOCK_STAGING_STORE_BUCKET, source_file_key
+        MOCK_STAGING_STORE_BUCKET, source_file_key,
     )
 
 
@@ -345,7 +349,7 @@ def test_delete_file_from_staging_bucket_client_error(service):
     source_file_key = "staging/test-doc-id"
     client_error = ClientError(
         error_response={
-            "Error": {"Code": "NoSuchKey", "Message": "Key does not exist"}
+            "Error": {"Code": "NoSuchKey", "Message": "Key does not exist"},
         },
         operation_name="DeleteObject",
     )
@@ -389,7 +393,7 @@ def test_update_dynamo_table_client_error(service, mock_document_reference):
     """Test handling of ClientError during DynamoDB update"""
     client_error = ClientError(
         error_response={
-            "Error": {"Code": "ResourceNotFoundException", "Message": "Table not found"}
+            "Error": {"Code": "ResourceNotFoundException", "Message": "Table not found"},
         },
         operation_name="UpdateItem",
     )
@@ -418,7 +422,7 @@ def test_document_key_extraction_from_object_key_for_lg(
     # First call returns preliminary doc, second call returns empty list (no existing finals)
 
     service.s3_service.copy_across_bucket.return_value = {
-        "VersionId": "test-version-id"
+        "VersionId": "test-version-id",
     }
 
     service.document_service.fetch_documents_from_table.side_effect = [
@@ -446,7 +450,7 @@ def test_document_key_extraction_from_object_key_for_lg(
 
 
 def test_finalize_and_supersede_with_transaction_with_existing_finals(
-    service, mock_document_reference, mocker
+    service, mock_document_reference,
 ):
     """Test transaction-based finalisation with existing final documents to supersede"""
     new_doc = mock_document_reference
@@ -463,7 +467,7 @@ def test_finalize_and_supersede_with_transaction_with_existing_finals(
 
     service.table_name = "dev_LloydGeorgeReferenceMetadata"
     service.document_service.fetch_documents_from_table.return_value = [
-        existing_final_doc
+        existing_final_doc,
     ]
 
     mock_build_update = Mock(return_value={"Update": "transaction1"})
@@ -511,7 +515,7 @@ def test_finalize_and_supersede_with_transaction_with_existing_finals(
 
 
 def test_finalize_and_supersede_with_transaction_no_existing_docs(
-    service, mock_document_reference, mocker
+    service, mock_document_reference,
 ):
     """Test transaction-based finalization when no existing final documents found"""
     new_doc = mock_document_reference
@@ -535,7 +539,7 @@ def test_finalize_and_supersede_with_transaction_no_existing_docs(
 
 
 def test_finalize_and_supersede_with_transaction_multiple_existing(
-    service, mock_document_reference, mocker
+    service, mock_document_reference,
 ):
     """Test transaction-based finalization superseding multiple existing final documents"""
     new_doc = mock_document_reference
@@ -571,7 +575,7 @@ def test_finalize_and_supersede_with_transaction_multiple_existing(
 
 
 def test_finalize_and_supersede_with_transaction_skips_same_id(
-    service, mock_document_reference, mocker
+    service, mock_document_reference,
 ):
     """Test that transaction skips documents with the same ID"""
     new_doc = mock_document_reference
@@ -599,7 +603,7 @@ def test_finalize_and_supersede_with_transaction_skips_same_id(
 
 
 def test_finalize_and_supersede_with_transaction_handles_transaction_cancelled(
-    service, mock_document_reference
+    service, mock_document_reference,
 ):
 
     new_doc = mock_document_reference
@@ -639,21 +643,21 @@ def test_handle_upload_document_reference_request_no_document_found(service):
 
 
 def test_process_preliminary_document_reference_exception_during_processing(
-    service, mock_document_reference, mocker
+    service, mock_document_reference, mocker,
 ):
     """Test that exceptions during processing are properly raised"""
     object_key = "staging/test-doc-id"
 
     mocker.patch.object(
-        service, "_perform_virus_scan", return_value=VirusScanResult.CLEAN
+        service, "_perform_virus_scan", return_value=VirusScanResult.CLEAN,
     )
     mocker.patch.object(
-        service, "_process_clean_document", side_effect=Exception("Processing failed")
+        service, "_process_clean_document", side_effect=Exception("Processing failed"),
     )
 
     with pytest.raises(Exception) as exc_info:
         service._process_preliminary_document_reference(
-            mock_document_reference, object_key, 1222
+            mock_document_reference, object_key, 1222,
         )
 
     assert "Processing failed" in str(exc_info.value)
@@ -690,3 +694,32 @@ def test_get_infra_invalid_doc_type(monkeypatch, service):
     # Call function and assert the exception is raised
     with pytest.raises(InvalidDocTypeException):
         service._get_infrastructure_for_document_key(["fhir_upload", "999999"])
+
+def test_is_file_invalid_calls_correct_functions(service, mocker):
+    """Test that is_file_invalid calls the right functions in the correct order"""
+    object_key = "test-folder/test-file.docx"
+    file_extension = "docx"
+    file_content = b"fake docx file content"
+
+    mock_stream = Mock()
+    mock_stream.read.return_value = file_content
+    service.s3_service.get_object_stream.return_value = mock_stream
+    mock_bytesio = mocker.patch("services.upload_document_reference_service.io.BytesIO")
+    mock_file_stream = Mock()
+    mock_bytesio.return_value = mock_file_stream
+    mock_check = mocker.patch(
+        "services.upload_document_reference_service.check_file_locked_or_corrupt",
+        return_value=True,
+    )
+
+    result = service.is_file_invalid(object_key, file_extension)
+
+    assert result is True
+    service.s3_service.get_object_stream.assert_called_once_with(
+        service.staging_s3_bucket_name, object_key,
+    )
+    mock_stream.read.assert_called_once_with()
+    mock_bytesio.assert_called_once_with(file_content)
+    mock_check.assert_called_once_with(mock_file_stream, file_extension)
+
+
diff --git a/lambdas/tests/unit/utils/test_file_utils.py b/lambdas/tests/unit/utils/test_file_utils.py
index 3a225f1841..6bda04566c 100644
--- a/lambdas/tests/unit/utils/test_file_utils.py
+++ b/lambdas/tests/unit/utils/test_file_utils.py
@@ -1,4 +1,11 @@
-from utils.file_utils import convert_csv_dictionary_to_bytes
+from io import BytesIO
+from unittest.mock import MagicMock, Mock, patch
+
+import pytest
+from utils.file_utils import (
+    check_file_locked_or_corrupt,
+    convert_csv_dictionary_to_bytes,
+)
 
 
 def test_convert_csv_dictionary_to_bytes():
@@ -9,10 +16,128 @@ def test_convert_csv_dictionary_to_bytes():
     ]
 
     result_bytes = convert_csv_dictionary_to_bytes(
-        headers=headers, csv_dict_data=metadata_csv_data, encoding="utf-8"
+        headers=headers, csv_dict_data=metadata_csv_data, encoding="utf-8",
     )
 
     result_str = result_bytes.decode("utf-8")
     expected_output = "id,name,age\r\n1,Alice,30\r\n2,Bob,25\r\n"
 
     assert result_str == expected_output
+
+@pytest.mark.parametrize(
+    "file_extension,file_content,expected_result",
+    [
+        ("pdf", b"%PDF-1.4\nsome content", False),
+        ("zip", b"PK\x03\x04some zip content", False),
+    ],
+    ids=["pdf_file", "zip_file"],
+)
+def test_skipped_file_types(file_extension, file_content, expected_result):
+    file_stream = BytesIO(file_content)
+    result = check_file_locked_or_corrupt(file_stream, file_extension)
+    assert result == expected_result
+
+@pytest.mark.parametrize(
+    "file_extension,is_encrypted,expected_result",
+    [
+        ("docx", False, False),
+        ("docx", True, True),
+        ("xlsx", False, False),
+        ("xlsx", True, True),
+        ("pptx", False, False),
+        ("pptx", True, True),
+        ("doc", False, False),
+        ("doc", True, True),
+        ("xls", False, False),
+        ("xls", True, True),
+        ("ppt", False, False),
+        ("ppt", True, True),
+    ],
+)
+@patch("utils.file_utils.msoffcrypto.OfficeFile")
+def test_office_files(mock_office_file, file_extension, is_encrypted, expected_result):
+    mock_instance = Mock()
+    mock_instance.is_encrypted.return_value = is_encrypted
+    mock_office_file.return_value = mock_instance
+
+    file_stream = BytesIO(b"fake office content")
+    result = check_file_locked_or_corrupt(file_stream, file_extension)
+
+    assert result == expected_result
+    mock_office_file.assert_called_once_with(file_stream)
+    mock_instance.is_encrypted.assert_called_once()
+
+@pytest.mark.parametrize(
+    "file_extension,file_content,expected_result",
+    [
+        ("rtf", b"{\\rtf1\\ansi\\deff0 {\\fonttbl {\\f0 Times New Roman;}}\\f0\\fs60 Hello!}", False),
+        ("rtf", b"This is not an RTF file", True),
+        ("csv", b"name,age,city\nAlice,30,NYC\nBob,25,LA", False),
+        ("csv", b"\xff\xfe Invalid UTF-8", True),
+        ("json", b'{"key": "value", "number": 123}', False),
+        ("txt", b"This is a simple text file.\nWith multiple lines.", False),
+        ("txt", b"", False),
+        ("xml", b'<?xml version="1.0"?><root><item>data</item></root>', False),
+    ],
+)
+def test_text_based_files(file_extension, file_content, expected_result):
+    file_stream = BytesIO(file_content)
+    result = check_file_locked_or_corrupt(file_stream, file_extension)
+    assert result == expected_result
+
+@pytest.mark.parametrize(
+    "file_extension",
+    ["jpg", "jpeg", "png", "tiff", "tif",],
+    ids=["jpg", "jpeg", "png", "tiff", "tif"],
+)
+@patch("utils.file_utils.Image.open")
+def test_image_files_valid(mock_image_open, file_extension):
+    mock_img = MagicMock()
+    mock_image_open.return_value.__enter__.return_value = mock_img
+
+    file_stream = BytesIO(b"fake image content")
+    result = check_file_locked_or_corrupt(file_stream, file_extension)
+
+    assert result is False
+    mock_image_open.assert_called_once_with(file_stream)
+    mock_img.verify.assert_called_once()
+
+@pytest.mark.parametrize(
+    "file_extension",
+    ["jpg", "png", "tiff"],
+)
+@patch("utils.file_utils.Image.open")
+def test_image_files_corrupt(mock_image_open, file_extension):
+    mock_image_open.side_effect = Exception("Corrupt image")
+
+    file_stream = BytesIO(b"corrupt image data")
+    result = check_file_locked_or_corrupt(file_stream, file_extension)
+
+    assert result is True
+
+@pytest.mark.parametrize(
+    "file_extension",
+    ["unknown", "mp4", "mp3", "avi", "mov"],
+    ids=["unknown", "mp4", "mp3", "avi", "mov"],
+)
+def test_unsupported_file_extensions(file_extension):
+    file_stream = BytesIO(b"some content")
+    result = check_file_locked_or_corrupt(file_stream, file_extension)
+    assert result is False
+
+@pytest.mark.parametrize(
+    "file_extension",
+    ["docx", "xlsx", "pptx", "doc", "xls"],
+    ids=["docx", "xlsx", "pptx", "doc", "xls"],
+)
+@patch("utils.file_utils.msoffcrypto.OfficeFile")
+def test_office_file_exception_returns_true(mock_office_file, file_extension):
+    mock_office_file.side_effect = Exception("Unable to process file")
+
+    file_stream = BytesIO(b"corrupt office content")
+    result = check_file_locked_or_corrupt(file_stream, file_extension)
+
+    assert result is True
+
+
+
diff --git a/lambdas/utils/file_utils.py b/lambdas/utils/file_utils.py
index ec24c67bc3..4623c3bfeb 100644
--- a/lambdas/utils/file_utils.py
+++ b/lambdas/utils/file_utils.py
@@ -1,9 +1,14 @@
 import csv
 from io import BytesIO, TextIOWrapper
 
+import msoffcrypto
+from PIL import Image
+from utils.audit_logging_setup import LoggingService
+
+logger = LoggingService(__name__)
 
 def convert_csv_dictionary_to_bytes(
-    headers: list[str], csv_dict_data: list[dict], encoding: str = "utf-8"
+    headers: list[str], csv_dict_data: list[dict], encoding: str = "utf-8",
 ) -> bytes:
     csv_buffer = BytesIO()
     csv_text_wrapper = TextIOWrapper(csv_buffer, encoding=encoding, newline="")
@@ -20,3 +25,39 @@ def convert_csv_dictionary_to_bytes(
     csv_buffer.close()
 
     return result
+
+
+def check_file_locked_or_corrupt(file_stream, ext):
+    file_stream.seek(0)
+    text_exts = ['rtf', 'csv', 'json', 'txt', 'xml']
+    media_exts = ['jpg', 'jpeg', 'png', 'tiff', 'tif']
+    try:
+        if ext == 'pdf' or ext == 'zip':
+            # Skipping PDF check, as this is covered by the antivirus scan
+            logger.info(f"Skipping check for {ext} files")
+            return False
+
+        elif ext in ['docx', 'xlsx', 'pptx', 'doc', 'xls', 'ppt']:
+            office_file = msoffcrypto.OfficeFile(file_stream)
+            encrypt = office_file.is_encrypted()
+            return encrypt
+
+        elif ext in text_exts:
+            sample = file_stream.read(1024)
+            sample.decode('utf-8')
+            if ext == 'rtf' and not sample.startswith(b'{\\rtf1'):
+                return True
+            return False
+
+        elif ext in media_exts:
+            with Image.open(file_stream) as img:
+                img.verify()
+            return False
+
+        else:
+            logger.info(f"File with extension {ext} is not supported for locked/corrupt check, treating as valid.")
+            return False
+
+    except Exception as e:
+        logger.error(f"Error checking file validity for .{ext}: {type(e).__name__} - {str(e)}")
+        return True
\ No newline at end of file
diff --git a/poetry.lock b/poetry.lock
index c329268964..4bf25323d7 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -205,7 +205,7 @@ version = "2.0.0"
 description = "Foreign Function Interface for Python calling C code."
 optional = false
 python-versions = ">=3.9"
-groups = ["core-lambda"]
+groups = ["core-lambda", "files-lambda"]
 markers = "platform_python_implementation != \"PyPy\""
 files = [
     {file = "cffi-2.0.0-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:0cf2d91ecc3fcc0625c2c530fe004f82c110405f101548512cce44322fa8ac44"},
@@ -517,56 +517,74 @@ toml = ["tomli ; python_full_version <= \"3.11.0a6\""]
 
 [[package]]
 name = "cryptography"
-version = "44.0.1"
+version = "46.0.5"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 optional = false
-python-versions = "!=3.9.0,!=3.9.1,>=3.7"
-groups = ["core-lambda"]
-files = [
-    {file = "cryptography-44.0.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:bf688f615c29bfe9dfc44312ca470989279f0e94bb9f631f85e3459af8efc009"},
-    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd7c7e2d71d908dc0f8d2027e1604102140d84b155e658c20e8ad1304317691f"},
-    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:887143b9ff6bad2b7570da75a7fe8bbf5f65276365ac259a5d2d5147a73775f2"},
-    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:322eb03ecc62784536bc173f1483e76747aafeb69c8728df48537eb431cd1911"},
-    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:21377472ca4ada2906bc313168c9dc7b1d7ca417b63c1c3011d0c74b7de9ae69"},
-    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:df978682c1504fc93b3209de21aeabf2375cb1571d4e61907b3e7a2540e83026"},
-    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:eb3889330f2a4a148abead555399ec9a32b13b7c8ba969b72d8e500eb7ef84cd"},
-    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:8e6a85a93d0642bd774460a86513c5d9d80b5c002ca9693e63f6e540f1815ed0"},
-    {file = "cryptography-44.0.1-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6f76fdd6fd048576a04c5210d53aa04ca34d2ed63336d4abd306d0cbe298fddf"},
-    {file = "cryptography-44.0.1-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6c8acf6f3d1f47acb2248ec3ea261171a671f3d9428e34ad0357148d492c7864"},
-    {file = "cryptography-44.0.1-cp37-abi3-win32.whl", hash = "sha256:24979e9f2040c953a94bf3c6782e67795a4c260734e5264dceea65c8f4bae64a"},
-    {file = "cryptography-44.0.1-cp37-abi3-win_amd64.whl", hash = "sha256:fd0ee90072861e276b0ff08bd627abec29e32a53b2be44e41dbcdf87cbee2b00"},
-    {file = "cryptography-44.0.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:a2d8a7045e1ab9b9f803f0d9531ead85f90c5f2859e653b61497228b18452008"},
-    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8272f257cf1cbd3f2e120f14c68bff2b6bdfcc157fafdee84a1b795efd72862"},
-    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e8d181e90a777b63f3f0caa836844a1182f1f265687fac2115fcf245f5fbec3"},
-    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:436df4f203482f41aad60ed1813811ac4ab102765ecae7a2bbb1dbb66dcff5a7"},
-    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4f422e8c6a28cf8b7f883eb790695d6d45b0c385a2583073f3cec434cc705e1a"},
-    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:72198e2b5925155497a5a3e8c216c7fb3e64c16ccee11f0e7da272fa93b35c4c"},
-    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:2a46a89ad3e6176223b632056f321bc7de36b9f9b93b2cc1cccf935a3849dc62"},
-    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:53f23339864b617a3dfc2b0ac8d5c432625c80014c25caac9082314e9de56f41"},
-    {file = "cryptography-44.0.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:888fcc3fce0c888785a4876ca55f9f43787f4c5c1cc1e2e0da71ad481ff82c5b"},
-    {file = "cryptography-44.0.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:00918d859aa4e57db8299607086f793fa7813ae2ff5a4637e318a25ef82730f7"},
-    {file = "cryptography-44.0.1-cp39-abi3-win32.whl", hash = "sha256:9b336599e2cb77b1008cb2ac264b290803ec5e8e89d618a5e978ff5eb6f715d9"},
-    {file = "cryptography-44.0.1-cp39-abi3-win_amd64.whl", hash = "sha256:e403f7f766ded778ecdb790da786b418a9f2394f36e8cc8b796cc056ab05f44f"},
-    {file = "cryptography-44.0.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1f9a92144fa0c877117e9748c74501bea842f93d21ee00b0cf922846d9d0b183"},
-    {file = "cryptography-44.0.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:610a83540765a8d8ce0f351ce42e26e53e1f774a6efb71eb1b41eb01d01c3d12"},
-    {file = "cryptography-44.0.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:5fed5cd6102bb4eb843e3315d2bf25fede494509bddadb81e03a859c1bc17b83"},
-    {file = "cryptography-44.0.1-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:f4daefc971c2d1f82f03097dc6f216744a6cd2ac0f04c68fb935ea2ba2a0d420"},
-    {file = "cryptography-44.0.1-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:94f99f2b943b354a5b6307d7e8d19f5c423a794462bde2bf310c770ba052b1c4"},
-    {file = "cryptography-44.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d9c5b9f698a83c8bd71e0f4d3f9f839ef244798e5ffe96febfa9714717db7af7"},
-    {file = "cryptography-44.0.1.tar.gz", hash = "sha256:f51f5705ab27898afda1aaa430f34ad90dc117421057782022edf0600bec5f14"},
+python-versions = "!=3.9.0,!=3.9.1,>=3.8"
+groups = ["core-lambda", "files-lambda"]
+files = [
+    {file = "cryptography-46.0.5-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731"},
+    {file = "cryptography-46.0.5-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82"},
+    {file = "cryptography-46.0.5-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1"},
+    {file = "cryptography-46.0.5-cp311-abi3-win32.whl", hash = "sha256:60ee7e19e95104d4c03871d7d7dfb3d22ef8a9b9c6778c94e1c8fcc8365afd48"},
+    {file = "cryptography-46.0.5-cp311-abi3-win_amd64.whl", hash = "sha256:38946c54b16c885c72c4f59846be9743d699eee2b69b6988e0a00a01f46a61a4"},
+    {file = "cryptography-46.0.5-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:94a76daa32eb78d61339aff7952ea819b1734b46f73646a07decb40e5b3448e2"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5be7bf2fb40769e05739dd0046e7b26f9d4670badc7b032d6ce4db64dddc0678"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe346b143ff9685e40192a4960938545c699054ba11d4f9029f94751e3f71d87"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c69fd885df7d089548a42d5ec05be26050ebcd2283d89b3d30676eb32ff87dee"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:8293f3dea7fc929ef7240796ba231413afa7b68ce38fd21da2995549f5961981"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:1abfdb89b41c3be0365328a410baa9df3ff8a9110fb75e7b52e66803ddabc9a9"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:d66e421495fdb797610a08f43b05269e0a5ea7f5e652a89bfd5a7d3c1dee3648"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:4e817a8920bfbcff8940ecfd60f23d01836408242b30f1a708d93198393a80b4"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:68f68d13f2e1cb95163fa3b4db4bf9a159a418f5f6e7242564fc75fcae667fd0"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:a3d1fae9863299076f05cb8a778c467578262fae09f9dc0ee9b12eb4268ce663"},
+    {file = "cryptography-46.0.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c4143987a42a2397f2fc3b4d7e3a7d313fbe684f67ff443999e803dd75a76826"},
+    {file = "cryptography-46.0.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7d731d4b107030987fd61a7f8ab512b25b53cef8f233a97379ede116f30eb67d"},
+    {file = "cryptography-46.0.5-cp314-cp314t-win32.whl", hash = "sha256:c3bcce8521d785d510b2aad26ae2c966092b7daa8f45dd8f44734a104dc0bc1a"},
+    {file = "cryptography-46.0.5-cp314-cp314t-win_amd64.whl", hash = "sha256:4d8ae8659ab18c65ced284993c2265910f6c9e650189d4e3f68445ef82a810e4"},
+    {file = "cryptography-46.0.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:4108d4c09fbbf2789d0c926eb4152ae1760d5a2d97612b92d508d96c861e4d31"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c"},
+    {file = "cryptography-46.0.5-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4"},
+    {file = "cryptography-46.0.5-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9"},
+    {file = "cryptography-46.0.5-cp38-abi3-win32.whl", hash = "sha256:02f547fce831f5096c9a567fd41bc12ca8f11df260959ecc7c3202555cc47a72"},
+    {file = "cryptography-46.0.5-cp38-abi3-win_amd64.whl", hash = "sha256:556e106ee01aa13484ce9b0239bca667be5004efb0aabbed28d353df86445595"},
+    {file = "cryptography-46.0.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:3b4995dc971c9fb83c25aa44cf45f02ba86f71ee600d81091c2f0cbae116b06c"},
+    {file = "cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:bc84e875994c3b445871ea7181d424588171efec3e185dced958dad9e001950a"},
+    {file = "cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:2ae6971afd6246710480e3f15824ed3029a60fc16991db250034efd0b9fb4356"},
+    {file = "cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:d861ee9e76ace6cf36a6a89b959ec08e7bc2493ee39d07ffe5acb23ef46d27da"},
+    {file = "cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:2b7a67c9cd56372f3249b39699f2ad479f6991e62ea15800973b956f4b73e257"},
+    {file = "cryptography-46.0.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:8456928655f856c6e1533ff59d5be76578a7157224dbd9ce6872f25055ab9ab7"},
+    {file = "cryptography-46.0.5.tar.gz", hash = "sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d"},
 ]
 
 [package.dependencies]
-cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""}
+cffi = {version = ">=2.0.0", markers = "python_full_version >= \"3.9.0\" and platform_python_implementation != \"PyPy\""}
 
 [package.extras]
-docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=3.0.0) ; python_version >= \"3.8\""]
+docs = ["sphinx (>=5.3.0)", "sphinx-inline-tabs", "sphinx-rtd-theme (>=3.0.0)"]
 docstest = ["pyenchant (>=3)", "readme-renderer (>=30.0)", "sphinxcontrib-spelling (>=7.3.1)"]
-nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2) ; python_version >= \"3.8\""]
-pep8test = ["check-sdist ; python_version >= \"3.8\"", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"]
+nox = ["nox[uv] (>=2024.4.15)"]
+pep8test = ["check-sdist", "click (>=8.0.1)", "mypy (>=1.14)", "ruff (>=0.11.11)"]
 sdist = ["build (>=1.0.0)"]
 ssh = ["bcrypt (>=3.1.5)"]
-test = ["certifi (>=2024)", "cryptography-vectors (==44.0.1)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"]
+test = ["certifi (>=2024)", "cryptography-vectors (==46.0.5)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"]
 test-randomorder = ["pytest-randomly"]
 
 [[package]]
@@ -1190,6 +1208,22 @@ files = [
     {file = "msgpack-1.1.2.tar.gz", hash = "sha256:3b60763c1373dd60f398488069bcdc703cd08a711477b5d480eecc9f9626f47e"},
 ]
 
+[[package]]
+name = "msoffcrypto-tool"
+version = "6.0.0"
+description = "Python tool and library for decrypting and encrypting MS Office files using a password or other keys"
+optional = false
+python-versions = "<4.0,>=3.10"
+groups = ["files-lambda"]
+files = [
+    {file = "msoffcrypto_tool-6.0.0-py3-none-any.whl", hash = "sha256:46c394ed5d9641e802fc79bf3fb0666a53748b23fa8c4aa634ae9d30d46fe397"},
+    {file = "msoffcrypto_tool-6.0.0.tar.gz", hash = "sha256:9a5ebc4c0096b42e5d7ebc2350afdc92dc511061e935ca188468094fdd032bbe"},
+]
+
+[package.dependencies]
+cryptography = ">=39.0"
+olefile = ">=0.46"
+
 [[package]]
 name = "mypy-extensions"
 version = "1.1.0"
@@ -1219,6 +1253,21 @@ rsa = ["cryptography (>=3.0.0)"]
 signals = ["blinker (>=1.4.0)"]
 signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
 
+[[package]]
+name = "olefile"
+version = "0.47"
+description = "Python package to parse, read and write Microsoft OLE2 files (Structured Storage or Compound Document, Microsoft Office)"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+groups = ["files-lambda"]
+files = [
+    {file = "olefile-0.47-py2.py3-none-any.whl", hash = "sha256:543c7da2a7adadf21214938bb79c83ea12b473a4b6ee4ad4bf854e7715e13d1f"},
+    {file = "olefile-0.47.zip", hash = "sha256:599383381a0bf3dfbd932ca0ca6515acd174ed48870cbf7fee123d698c192c1c"},
+]
+
+[package.extras]
+tests = ["pytest", "pytest-cov"]
+
 [[package]]
 name = "openpyxl"
 version = "3.1.5"
@@ -1664,7 +1713,7 @@ version = "2.23"
 description = "C parser in Python"
 optional = false
 python-versions = ">=3.8"
-groups = ["core-lambda"]
+groups = ["core-lambda", "files-lambda"]
 markers = "platform_python_implementation != \"PyPy\" and implementation_name != \"PyPy\""
 files = [
     {file = "pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934"},
@@ -2450,4 +2499,4 @@ requests = "*"
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.11"
-content-hash = "77d0249d2dd6c9fbb02b0e434ade72ac221f4df82e8d98ec49250fe0b7ff74df"
+content-hash = "9d5fc7af2841c90590cd173e32abcf400d064cd303e49a5b2ac33964170a0e9b"
diff --git a/pyproject.toml b/pyproject.toml
index 4abaa1617d..fe89cf8388 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,6 +51,9 @@ pikepdf = "8.4.0"
 [tool.poetry.group.data_lambda.dependencies]
 polars = "1.31.0"
 
+[tool.poetry.group.files_lambda.dependencies]
+msoffcrypto-tool = "6.0.0"
+
 [tool.poetry.group.reports_lambda.dependencies]
 openpyxl = "^3.1.5"
 reportlab = "^4.3.1"

From 8e33f200f5913e29dc127d367a2c2fef23f0e506 Mon Sep 17 00:00:00 2001
From: NogaNHS <127490765+NogaNHS@users.noreply.github.com>
Date: Wed, 11 Feb 2026 11:01:46 +0000
Subject: [PATCH 02/13] Fix virus scan result constant to use uppercase
 enumeration

---
 lambdas/requirements/requirements_test.txt            | 2 +-
 lambdas/services/upload_document_reference_service.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lambdas/requirements/requirements_test.txt b/lambdas/requirements/requirements_test.txt
index ac64194651..2a3ae5bf28 100644
--- a/lambdas/requirements/requirements_test.txt
+++ b/lambdas/requirements/requirements_test.txt
@@ -9,4 +9,4 @@ pytest-unordered==0.6.0
 pytest==7.4.3
 requests_mock==1.11.0
 ruff==0.12.8
-syrupy==4.9.1
\ No newline at end of file
+syrupy==4.9.1
diff --git a/lambdas/services/upload_document_reference_service.py b/lambdas/services/upload_document_reference_service.py
index eaded11355..bccbcf91c6 100644
--- a/lambdas/services/upload_document_reference_service.py
+++ b/lambdas/services/upload_document_reference_service.py
@@ -159,7 +159,7 @@ def _process_preliminary_document_reference(
                         f"Document {preliminary_document_reference.id} is password protected or corrupt, "
                         f"marking as such in database",
                     )
-                    virus_scan_result = VirusScanResult.Invalid
+                    virus_scan_result = VirusScanResult.INVALID
                 else:
                     self._process_clean_document(
                         preliminary_document_reference,

From 04a44db8a181aff7c32db3ef9494c82f54d8d9ef Mon Sep 17 00:00:00 2001
From: adamwhitingnhs <adam.whiting3@nhs.net>
Date: Wed, 11 Feb 2026 11:33:40 +0000
Subject: [PATCH 03/13] fix changed files queries on lint and format

---
 .github/workflows/automated-pr-validator.yml | 2 +-
 Makefile                                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/automated-pr-validator.yml b/.github/workflows/automated-pr-validator.yml
index a290624edd..2fc24de896 100644
--- a/.github/workflows/automated-pr-validator.yml
+++ b/.github/workflows/automated-pr-validator.yml
@@ -218,7 +218,7 @@ jobs:
         id: changed-files
         run: |
           git remote set-branches origin main && git fetch --depth 1 origin main && git branch main origin/main
-          echo "CHANGED_FILES=$(git diff main --name-only | grep '.py$' | tr '\n' ' ')" >> $GITHUB_OUTPUT
+          echo "CHANGED_FILES=$(git diff main --name-status | grep -E '^[^D].*\.py$' | tr '\n' ' ')" >> $GITHUB_OUTPUT
 
       - name: Run black
         id: black
diff --git a/Makefile b/Makefile
index 704f970661..a08c2187bb 100644
--- a/Makefile
+++ b/Makefile
@@ -87,7 +87,7 @@ format:
 	@if [ $(FORMAT_ALL) = true ]; then \
 		CHANGED_FILES=''; \
 	else \
-		CHANGED_FILES=$$(git diff main --name-only | grep '.py$$' | xargs); \
+		CHANGED_FILES=$$(git diff main --name-status | grep -E '^[^D].*\.py$$' | xargs); \
 		echo $$CHANGED_FILES; \
 		if [ -z "$$CHANGED_FILES" ]; then echo "No changed files to format"; exit 0; fi; \
 	fi; \

From 0bce0f0e3f0a3ea93d1b62c031af9af9efee5233 Mon Sep 17 00:00:00 2001
From: NogaNHS <127490765+NogaNHS@users.noreply.github.com>
Date: Wed, 11 Feb 2026 11:47:30 +0000
Subject: [PATCH 04/13] Refactor virus scan result handling to check for file
 name presence before processing

---
 .../upload_document_reference_service.py        | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/lambdas/services/upload_document_reference_service.py b/lambdas/services/upload_document_reference_service.py
index bccbcf91c6..762133df50 100644
--- a/lambdas/services/upload_document_reference_service.py
+++ b/lambdas/services/upload_document_reference_service.py
@@ -152,14 +152,15 @@ def _process_preliminary_document_reference(
             )
 
             if virus_scan_result == VirusScanResult.CLEAN:
-                file_type_extension = preliminary_document_reference.file_name.split('.')[-1].lower()
-                is_file_protected = self.is_file_invalid(object_key, file_type_extension)
-                if is_file_protected:
-                    logger.warning(
-                        f"Document {preliminary_document_reference.id} is password protected or corrupt, "
-                        f"marking as such in database",
-                    )
-                    virus_scan_result = VirusScanResult.INVALID
+                if getattr(preliminary_document_reference, "file_name", None):
+                    file_type_extension = preliminary_document_reference.file_name.split('.')[-1].lower()
+                    is_file_protected = self.is_file_invalid(object_key, file_type_extension)
+                    if is_file_protected:
+                        logger.warning(
+                            f"Document {preliminary_document_reference.id} is password protected or corrupt, "
+                            f"marking as such in database",
+                        )
+                        virus_scan_result = VirusScanResult.INVALID
                 else:
                     self._process_clean_document(
                         preliminary_document_reference,

From e8aa8e97d50d113eb01cc36249a3b0e571d09c9a Mon Sep 17 00:00:00 2001
From: adamwhitingnhs <adam.whiting3@nhs.net>
Date: Wed, 11 Feb 2026 12:26:31 +0000
Subject: [PATCH 05/13] fix format ordering

---
 .github/workflows/automated-pr-validator.yml | 2 +-
 .lintstagedrc                                | 2 +-
 Makefile                                     | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/automated-pr-validator.yml b/.github/workflows/automated-pr-validator.yml
index 2fc24de896..ba656d5869 100644
--- a/.github/workflows/automated-pr-validator.yml
+++ b/.github/workflows/automated-pr-validator.yml
@@ -218,7 +218,7 @@ jobs:
         id: changed-files
         run: |
           git remote set-branches origin main && git fetch --depth 1 origin main && git branch main origin/main
-          echo "CHANGED_FILES=$(git diff main --name-status | grep -E '^[^D].*\.py$' | tr '\n' ' ')" >> $GITHUB_OUTPUT
+          echo "CHANGED_FILES=$(git diff main --name-status | grep -E '^[^D].*\.py$' | cut -f2 | tr '\n' ' ')" >> $GITHUB_OUTPUT
 
       - name: Run black
         id: black
diff --git a/.lintstagedrc b/.lintstagedrc
index 5ac098b07a..0cfe966e6a 100644
--- a/.lintstagedrc
+++ b/.lintstagedrc
@@ -7,8 +7,8 @@
     "./app/node_modules/prettier/bin/prettier.cjs --write"
     ],
   "*.py": [
+    "./lambdas/venv/bin/ruff check --fix",
     "./lambdas/venv/bin/python3 -m black",
-    "./lambdas/venv/bin/ruff check ./lambdas",
     "./lambdas/venv/bin/python3 -m isort --profile black",
   ]
 }
\ No newline at end of file
diff --git a/Makefile b/Makefile
index a08c2187bb..7dd4df5c7c 100644
--- a/Makefile
+++ b/Makefile
@@ -87,12 +87,12 @@ format:
 	@if [ $(FORMAT_ALL) = true ]; then \
 		CHANGED_FILES=''; \
 	else \
-		CHANGED_FILES=$$(git diff main --name-status | grep -E '^[^D].*\.py$$' | xargs); \
+		CHANGED_FILES=$$(git diff main --name-status | grep -E '^[^D].*\.py$$' | cut -f2 | xargs); \
 		echo $$CHANGED_FILES; \
 		if [ -z "$$CHANGED_FILES" ]; then echo "No changed files to format"; exit 0; fi; \
 	fi; \
-	$(VENV_PATH_PREFIX)/bin/python3 -m black $$CHANGED_FILES; \
 	$(VENV_PATH_PREFIX)/bin/ruff check $$CHANGED_FILES --fix; \
+	$(VENV_PATH_PREFIX)/bin/python3 -m black $$CHANGED_FILES; \
 	$(VENV_PATH_PREFIX)/bin/python3 -m isort --profile black $$CHANGED_FILES
 
 sort-requirements:

From bd7eee79b37bd836fd9ae302f364ce7258b2959b Mon Sep 17 00:00:00 2001
From: NogaNHS <127490765+NogaNHS@users.noreply.github.com>
Date: Wed, 11 Feb 2026 13:02:55 +0000
Subject: [PATCH 06/13] Refactor virus scan result handling to improve file
 protection checks

---
 .../services/upload_document_reference_service.py   | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/lambdas/services/upload_document_reference_service.py b/lambdas/services/upload_document_reference_service.py
index 762133df50..f869b26eb1 100644
--- a/lambdas/services/upload_document_reference_service.py
+++ b/lambdas/services/upload_document_reference_service.py
@@ -152,15 +152,16 @@ def _process_preliminary_document_reference(
             )
 
             if virus_scan_result == VirusScanResult.CLEAN:
+                is_file_protected = False
                 if getattr(preliminary_document_reference, "file_name", None):
                     file_type_extension = preliminary_document_reference.file_name.split('.')[-1].lower()
                     is_file_protected = self.is_file_invalid(object_key, file_type_extension)
-                    if is_file_protected:
-                        logger.warning(
-                            f"Document {preliminary_document_reference.id} is password protected or corrupt, "
-                            f"marking as such in database",
-                        )
-                        virus_scan_result = VirusScanResult.INVALID
+                if is_file_protected:
+                    logger.warning(
+                        f"Document {preliminary_document_reference.id} is password protected or corrupt, "
+                        f"marking as such in database",
+                    )
+                    virus_scan_result = VirusScanResult.INVALID
                 else:
                     self._process_clean_document(
                         preliminary_document_reference,

From 24dd1a5c691c76d3782e2a8d38d9051252e09f34 Mon Sep 17 00:00:00 2001
From: NogaNHS <127490765+NogaNHS@users.noreply.github.com>
Date: Wed, 11 Feb 2026 13:04:34 +0000
Subject: [PATCH 07/13] format

---
 lambdas/enums/lambda_error.py                 |   8 +-
 lambdas/services/base/s3_service.py           |  35 ++++--
 .../services/get_document_upload_status.py    |   7 +-
 .../upload_document_reference_service.py      |  55 ++++++---
 .../test_upload_document_reference_service.py | 106 +++++++++++++-----
 lambdas/tests/unit/utils/test_file_utils.py   |  28 ++++-
 lambdas/utils/file_utils.py                   |  27 +++--
 7 files changed, 193 insertions(+), 73 deletions(-)

diff --git a/lambdas/enums/lambda_error.py b/lambdas/enums/lambda_error.py
index 95b60ccdfb..d5c2a62e5f 100644
--- a/lambdas/enums/lambda_error.py
+++ b/lambdas/enums/lambda_error.py
@@ -43,7 +43,9 @@ def create_error_response(
         return error_response
 
     def to_str(
-        self, params: Optional[dict] = None, details: Optional[str] = None,
+        self,
+        params: Optional[dict] = None,
+        details: Optional[str] = None,
     ) -> str:
         message = self.value["message"]
         if "%" in message and params:
@@ -59,7 +61,9 @@ def create_error_body(
         **kwargs,
     ) -> str:
         return self.create_error_response(
-            params=params, details=details, **kwargs,
+            params=params,
+            details=details,
+            **kwargs,
         ).create()
 
     """
diff --git a/lambdas/services/base/s3_service.py b/lambdas/services/base/s3_service.py
index c9de2a9bea..1da0a82f68 100644
--- a/lambdas/services/base/s3_service.py
+++ b/lambdas/services/base/s3_service.py
@@ -40,7 +40,9 @@ def __init__(self, custom_aws_role=None):
             if custom_aws_role:
                 self.iam_service = IAMService()
                 self.custom_client, self.expiration_time = self.iam_service.assume_role(
-                    self.custom_aws_role, "s3", config=self.config,
+                    self.custom_aws_role,
+                    "s3",
+                    config=self.config,
                 )
 
     # S3 Location should be a minimum of a s3_object_key but can also be a directory location in the form of
@@ -52,7 +54,9 @@ def create_upload_presigned_url(self, s3_bucket_name: str, s3_object_location: s
             ):
                 logger.info(S3Service.EXPIRED_SESSION_WARNING)
                 self.custom_client, self.expiration_time = self.iam_service.assume_role(
-                    self.custom_aws_role, "s3", config=self.config,
+                    self.custom_aws_role,
+                    "s3",
+                    config=self.config,
                 )
             return self.custom_client.generate_presigned_post(
                 s3_bucket_name,
@@ -69,7 +73,9 @@ def create_put_presigned_url(self, s3_bucket_name: str, file_key: str):
             ):
                 logger.info(S3Service.EXPIRED_SESSION_WARNING)
                 self.custom_client, self.expiration_time = self.iam_service.assume_role(
-                    self.custom_aws_role, "s3", config=self.config,
+                    self.custom_aws_role,
+                    "s3",
+                    config=self.config,
                 )
             logger.info("Generating presigned URL")
             return self.custom_client.generate_presigned_url(
@@ -86,7 +92,9 @@ def create_download_presigned_url(self, s3_bucket_name: str, file_key: str):
             ):
                 logger.info(S3Service.EXPIRED_SESSION_WARNING)
                 self.custom_client, self.expiration_time = self.iam_service.assume_role(
-                    self.custom_aws_role, "s3", config=self.config,
+                    self.custom_aws_role,
+                    "s3",
+                    config=self.config,
                 )
             logger.info("Generating presigned URL")
             return self.custom_client.generate_presigned_url(
@@ -150,17 +158,26 @@ def copy_across_bucket(
                 raise e
 
     def delete_object(
-        self, s3_bucket_name: str, file_key: str, version_id: str | None = None,
+        self,
+        s3_bucket_name: str,
+        file_key: str,
+        version_id: str | None = None,
     ):
         if version_id is None:
             return self.client.delete_object(Bucket=s3_bucket_name, Key=file_key)
 
         return self.client.delete_object(
-            Bucket=s3_bucket_name, Key=file_key, VersionId=version_id,
+            Bucket=s3_bucket_name,
+            Key=file_key,
+            VersionId=version_id,
         )
 
     def create_object_tag(
-        self, s3_bucket_name: str, file_key: str, tag_key: str, tag_value: str,
+        self,
+        s3_bucket_name: str,
+        file_key: str,
+        tag_key: str,
+        tag_value: str,
     ):
         return self.client.put_object_tagging(
             Bucket=s3_bucket_name,
@@ -256,5 +273,7 @@ def upload_file_obj(
 
     def save_or_create_file(self, source_bucket: str, file_key: str, body: bytes):
         return self.client.put_object(
-            Bucket=source_bucket, Key=file_key, Body=BytesIO(body),
+            Bucket=source_bucket,
+            Key=file_key,
+            Body=BytesIO(body),
         )
diff --git a/lambdas/services/get_document_upload_status.py b/lambdas/services/get_document_upload_status.py
index f6c67c6b67..7c06d6fac9 100644
--- a/lambdas/services/get_document_upload_status.py
+++ b/lambdas/services/get_document_upload_status.py
@@ -31,7 +31,9 @@ def _determine_document_status(self, doc_ref, nhs_number):
         return doc_ref.doc_status, None
 
     def get_document_references_by_id(
-        self, nhs_number: str, document_ids: list[str],
+        self,
+        nhs_number: str,
+        document_ids: list[str],
     ) -> dict:
         """
         Checks the status of a list of documents for a given patient.
@@ -44,7 +46,8 @@ def get_document_references_by_id(
             A dictionary with a list of document IDs and their corresponding statuses.
         """
         found_docs = self.document_service.get_batch_document_references_by_id(
-            document_ids, SupportedDocumentTypes.LG,
+            document_ids,
+            SupportedDocumentTypes.LG,
         )
         found_docs_by_id = {doc.id: doc for doc in found_docs}
         results = {}
diff --git a/lambdas/services/upload_document_reference_service.py b/lambdas/services/upload_document_reference_service.py
index f869b26eb1..56804f28b5 100644
--- a/lambdas/services/upload_document_reference_service.py
+++ b/lambdas/services/upload_document_reference_service.py
@@ -44,7 +44,9 @@ def __init__(self):
         self.bucket_router = DocTypeS3BucketRouter()
 
     def handle_upload_document_reference_request(
-        self, object_key: str, object_size: int = 0,
+        self,
+        object_key: str,
+        object_size: int = 0,
     ):
         """Handle the upload document reference request with comprehensive error handling"""
         if not object_key:
@@ -60,13 +62,16 @@ def handle_upload_document_reference_request(
             self._get_infrastructure_for_document_key(object_parts)
 
             preliminary_document_reference = self._fetch_preliminary_document_reference(
-                document_key, nhs_number,
+                document_key,
+                nhs_number,
             )
             if not preliminary_document_reference:
                 return
 
             self._process_preliminary_document_reference(
-                preliminary_document_reference, object_key, object_size,
+                preliminary_document_reference,
+                object_key,
+                object_size,
             )
 
         except Exception as e:
@@ -93,7 +98,9 @@ def _get_infrastructure_for_document_key(self, object_parts: list[str]) -> None:
             raise InvalidDocTypeException(400, LambdaError.DocTypeDB)
 
     def _fetch_preliminary_document_reference(
-        self, document_key: str, nhs_number: str | None = None,
+        self,
+        document_key: str,
+        nhs_number: str | None = None,
     ) -> Optional[DocumentReference]:
         """Fetch document reference from the database"""
         try:
@@ -148,14 +155,19 @@ def _process_preliminary_document_reference(
         """Process the preliminary (uploading) document reference with virus scanning and file operations"""
         try:
             virus_scan_result = self._perform_virus_scan(
-                preliminary_document_reference, object_key,
+                preliminary_document_reference,
+                object_key,
             )
 
             if virus_scan_result == VirusScanResult.CLEAN:
                 is_file_protected = False
                 if getattr(preliminary_document_reference, "file_name", None):
-                    file_type_extension = preliminary_document_reference.file_name.split('.')[-1].lower()
-                    is_file_protected = self.is_file_invalid(object_key, file_type_extension)
+                    file_type_extension = (
+                        preliminary_document_reference.file_name.split(".")[-1].lower()
+                    )
+                    is_file_protected = self.is_file_invalid(
+                        object_key, file_type_extension
+                    )
                 if is_file_protected:
                     logger.warning(
                         f"Document {preliminary_document_reference.id} is password protected or corrupt, "
@@ -333,26 +345,30 @@ def _finalize_and_supersede_with_transaction(self, new_document: DocumentReferen
                 logger.error(
                     f"Unexpected error while finalizing document for {new_document.nhs_number}: {e}",
                 )
-                
+
             new_document.doc_status = "cancelled"
             new_document.uploaded = False
             new_document.uploading = False
             new_document.file_size = None
             self._update_dynamo_table(new_document)
             self.delete_file_from_bucket(
-                new_document.file_location, new_document.s3_version_id,
+                new_document.file_location,
+                new_document.s3_version_id,
             )
 
     def document_reference_key(self, document_id):
         return {DocumentReferenceMetadataFields.ID.value: document_id}
 
     def _perform_virus_scan(
-        self, document_reference: DocumentReference, object_key: str,
+        self,
+        document_reference: DocumentReference,
+        object_key: str,
     ) -> VirusScanResult:
         """Perform a virus scan on the document"""
         try:
             return self.virus_scan_service.scan_file(
-                object_key, nhs_number=document_reference.nhs_number,
+                object_key,
+                nhs_number=document_reference.nhs_number,
             )
 
         except Exception as e:
@@ -362,12 +378,14 @@ def _perform_virus_scan(
             return VirusScanResult.ERROR
 
     def _process_clean_document(
-        self, document_reference: DocumentReference, object_key: str,
+        self,
+        document_reference: DocumentReference,
+        object_key: str,
     ):
         """Process a document that passed virus scanning"""
         try:
             self.copy_files_from_staging_bucket(document_reference, object_key)
-            
+
             logger.info(
                 f"Successfully processed clean document: {document_reference.id}",
             )
@@ -380,7 +398,9 @@ def _process_clean_document(
             raise FileProcessingException(f"Failed to process clean document: {str(e)}")
 
     def copy_files_from_staging_bucket(
-        self, document_reference: DocumentReference, source_file_key: str,
+        self,
+        document_reference: DocumentReference,
+        source_file_key: str,
     ):
         """Copy files from staging bucket to destination bucket"""
         try:
@@ -402,7 +422,8 @@ def copy_files_from_staging_bucket(
             )
             document_reference.s3_bucket_name = self.destination_bucket_name
             document_reference.file_location = document_reference._build_s3_location(
-                self.destination_bucket_name, dest_file_key,
+                self.destination_bucket_name,
+                dest_file_key,
             )
             document_reference.s3_version_id = copy_result.get("VersionId")
             return copy_result
@@ -474,6 +495,8 @@ def _update_dynamo_table(
             )
 
     def is_file_invalid(self, object_key: str, file_type_extension: str) -> bool:
-        entire_object = self.s3_service.get_object_stream(self.staging_s3_bucket_name, object_key)
+        entire_object = self.s3_service.get_object_stream(
+            self.staging_s3_bucket_name, object_key
+        )
         file_stream = io.BytesIO(entire_object.read())
         return check_file_locked_or_corrupt(file_stream, file_type_extension)
diff --git a/lambdas/tests/unit/services/test_upload_document_reference_service.py b/lambdas/tests/unit/services/test_upload_document_reference_service.py
index 3d85e5df70..d8b6a5e865 100644
--- a/lambdas/tests/unit/services/test_upload_document_reference_service.py
+++ b/lambdas/tests/unit/services/test_upload_document_reference_service.py
@@ -61,7 +61,10 @@ def service(set_env, mock_virus_scan_service, mocker):
         service.virus_scan_service = MockVirusScanService()
         service.s3_service = Mock()
         mocker.patch("io.BytesIO", return_value=None)
-        mocker.patch("services.upload_document_reference_service.check_file_locked_or_corrupt", return_value=False)
+        mocker.patch(
+            "services.upload_document_reference_service.check_file_locked_or_corrupt",
+            return_value=False,
+        )
         return service
 
 
@@ -80,7 +83,9 @@ def test_handle_upload_document_reference_request_with_none_object_key(service):
 
 
 def test_handle_upload_document_reference_request_success(
-    service, mock_document_reference, mocker,
+    service,
+    mock_document_reference,
+    mocker,
 ):
     object_key = "staging/test-doc-id"
     object_size = 1111
@@ -94,7 +99,10 @@ def test_handle_upload_document_reference_request_success(
         "VersionId": "test-version-id",
     }
     mocker.patch("io.BytesIO", return_value=None)
-    mocker.patch("services.upload_document_reference_service.check_file_locked_or_corrupt", return_value=False)
+    mocker.patch(
+        "services.upload_document_reference_service.check_file_locked_or_corrupt",
+        return_value=False,
+    )
     # First call fetches preliminary doc, second call fetches existing final docs to supersede
     service.document_service.fetch_documents_from_table.side_effect = [
         [mock_document_reference],
@@ -154,7 +162,8 @@ def test_fetch_preliminary_document_reference_no_documents_found(service):
 
 
 def test_fetch_preliminary_document_reference_multiple_documents_warning(
-    service, mock_document_reference,
+    service,
+    mock_document_reference,
 ):
     """Test handling when multiple documents are found"""
     document_key = "test-doc-id"
@@ -181,23 +190,30 @@ def test_fetch_preliminary_document_reference_exception(service):
 
 
 def test__process_preliminary_document_reference_clean_virus_scan(
-    service, mock_document_reference, mocker,
+    service,
+    mock_document_reference,
+    mocker,
 ):
     """Test processing document reference with a clean virus scan"""
     object_key = "staging/test-doc-id"
 
     mocker.patch.object(
-        service, "_perform_virus_scan", return_value=VirusScanResult.CLEAN,
+        service,
+        "_perform_virus_scan",
+        return_value=VirusScanResult.CLEAN,
     )
     mock_delete = mocker.patch.object(service, "delete_file_from_staging_bucket")
 
     mock_process_clean = mocker.patch.object(service, "_process_clean_document")
     mock_finalize_transaction = mocker.patch.object(
-        service, "_finalize_and_supersede_with_transaction",
+        service,
+        "_finalize_and_supersede_with_transaction",
     )
 
     service._process_preliminary_document_reference(
-        mock_document_reference, object_key, 1222,
+        mock_document_reference,
+        object_key,
+        1222,
     )
 
     mock_process_clean.assert_called_once()
@@ -209,26 +225,33 @@ def test__process_preliminary_document_reference_clean_virus_scan(
 
 
 def test__process_preliminary_document_reference_infected_virus_scan(
-    service, mock_document_reference, mocker,
+    service,
+    mock_document_reference,
+    mocker,
 ):
     """Test processing document reference with an infected virus scan"""
     object_key = "staging/test-doc-id"
 
     mocker.patch.object(
-        service, "_perform_virus_scan", return_value=VirusScanResult.INFECTED,
+        service,
+        "_perform_virus_scan",
+        return_value=VirusScanResult.INFECTED,
     )
     mock_delete = mocker.patch.object(service, "delete_file_from_staging_bucket")
 
     mock_process_clean = mocker.patch.object(service, "_process_clean_document")
     mock_update_dynamo = mocker.patch.object(service, "_update_dynamo_table")
     service._process_preliminary_document_reference(
-        mock_document_reference, object_key, 1222,
+        mock_document_reference,
+        object_key,
+        1222,
     )
 
     mock_process_clean.assert_not_called()
     mock_update_dynamo.assert_called_once()
     mock_delete.assert_called_once_with(object_key)
 
+
 def test_perform_virus_scan_returns_clean_hardcoded(service, mock_document_reference):
     """Test virus scan returns hardcoded CLEAN result"""
     object_key = "staging/test-doc-id"
@@ -237,7 +260,9 @@ def test_perform_virus_scan_returns_clean_hardcoded(service, mock_document_refer
 
 
 def test_perform_virus_scan_exception_returns_infected(
-    service, mock_document_reference, mocker,
+    service,
+    mock_document_reference,
+    mocker,
 ):
     """Test virus scan exception handling returns INFECTED for safety"""
     mock_virus_service = mocker.patch.object(service, "virus_scan_service")
@@ -264,7 +289,9 @@ def test_process_clean_document_success(service, mock_document_reference, mocker
 
 
 def test_process_clean_document_exception_restores_original_values(
-    service, mock_document_reference, mocker,
+    service,
+    mock_document_reference,
+    mocker,
 ):
     """Test that original values are restored when processing fails"""
     object_key = "staging/test-doc-id"
@@ -273,7 +300,9 @@ def test_process_clean_document_exception_restores_original_values(
     original_location = "original-location"
 
     mocker.patch.object(
-        service, "copy_files_from_staging_bucket", side_effect=Exception("Copy failed"),
+        service,
+        "copy_files_from_staging_bucket",
+        side_effect=Exception("Copy failed"),
     )
     with pytest.raises(FileProcessingException):
         service._process_clean_document(
@@ -327,7 +356,8 @@ def test_delete_file_from_staging_bucket_success(service):
     service.delete_file_from_staging_bucket(source_file_key)
 
     service.s3_service.delete_object.assert_called_once_with(
-        MOCK_STAGING_STORE_BUCKET, source_file_key,
+        MOCK_STAGING_STORE_BUCKET,
+        source_file_key,
     )
 
 
@@ -340,7 +370,8 @@ def test_delete_pdm_file_from_staging_bucket_success(service):
     service.delete_file_from_staging_bucket(source_file_key)
 
     service.s3_service.delete_object.assert_called_once_with(
-        MOCK_STAGING_STORE_BUCKET, source_file_key,
+        MOCK_STAGING_STORE_BUCKET,
+        source_file_key,
     )
 
 
@@ -393,7 +424,10 @@ def test_update_dynamo_table_client_error(service, mock_document_reference):
     """Test handling of ClientError during DynamoDB update"""
     client_error = ClientError(
         error_response={
-            "Error": {"Code": "ResourceNotFoundException", "Message": "Table not found"},
+            "Error": {
+                "Code": "ResourceNotFoundException",
+                "Message": "Table not found",
+            },
         },
         operation_name="UpdateItem",
     )
@@ -450,7 +484,8 @@ def test_document_key_extraction_from_object_key_for_lg(
 
 
 def test_finalize_and_supersede_with_transaction_with_existing_finals(
-    service, mock_document_reference,
+    service,
+    mock_document_reference,
 ):
     """Test transaction-based finalisation with existing final documents to supersede"""
     new_doc = mock_document_reference
@@ -515,7 +550,8 @@ def test_finalize_and_supersede_with_transaction_with_existing_finals(
 
 
 def test_finalize_and_supersede_with_transaction_no_existing_docs(
-    service, mock_document_reference,
+    service,
+    mock_document_reference,
 ):
     """Test transaction-based finalization when no existing final documents found"""
     new_doc = mock_document_reference
@@ -539,7 +575,8 @@ def test_finalize_and_supersede_with_transaction_no_existing_docs(
 
 
 def test_finalize_and_supersede_with_transaction_multiple_existing(
-    service, mock_document_reference,
+    service,
+    mock_document_reference,
 ):
     """Test transaction-based finalization superseding multiple existing final documents"""
     new_doc = mock_document_reference
@@ -575,7 +612,8 @@ def test_finalize_and_supersede_with_transaction_multiple_existing(
 
 
 def test_finalize_and_supersede_with_transaction_skips_same_id(
-    service, mock_document_reference,
+    service,
+    mock_document_reference,
 ):
     """Test that transaction skips documents with the same ID"""
     new_doc = mock_document_reference
@@ -603,7 +641,8 @@ def test_finalize_and_supersede_with_transaction_skips_same_id(
 
 
 def test_finalize_and_supersede_with_transaction_handles_transaction_cancelled(
-    service, mock_document_reference,
+    service,
+    mock_document_reference,
 ):
 
     new_doc = mock_document_reference
@@ -643,25 +682,34 @@ def test_handle_upload_document_reference_request_no_document_found(service):
 
 
 def test_process_preliminary_document_reference_exception_during_processing(
-    service, mock_document_reference, mocker,
+    service,
+    mock_document_reference,
+    mocker,
 ):
     """Test that exceptions during processing are properly raised"""
     object_key = "staging/test-doc-id"
 
     mocker.patch.object(
-        service, "_perform_virus_scan", return_value=VirusScanResult.CLEAN,
+        service,
+        "_perform_virus_scan",
+        return_value=VirusScanResult.CLEAN,
     )
     mocker.patch.object(
-        service, "_process_clean_document", side_effect=Exception("Processing failed"),
+        service,
+        "_process_clean_document",
+        side_effect=Exception("Processing failed"),
     )
 
     with pytest.raises(Exception) as exc_info:
         service._process_preliminary_document_reference(
-            mock_document_reference, object_key, 1222,
+            mock_document_reference,
+            object_key,
+            1222,
         )
 
     assert "Processing failed" in str(exc_info.value)
 
+
 def test_get_infrastructure_for_document_key_non_pdm(service):
     assert service.table_name == ""
     infra = service._get_infrastructure_for_document_key(object_parts=["1234", "123"])
@@ -695,6 +743,7 @@ def test_get_infra_invalid_doc_type(monkeypatch, service):
     with pytest.raises(InvalidDocTypeException):
         service._get_infrastructure_for_document_key(["fhir_upload", "999999"])
 
+
 def test_is_file_invalid_calls_correct_functions(service, mocker):
     """Test that is_file_invalid calls the right functions in the correct order"""
     object_key = "test-folder/test-file.docx"
@@ -716,10 +765,9 @@ def test_is_file_invalid_calls_correct_functions(service, mocker):
 
     assert result is True
     service.s3_service.get_object_stream.assert_called_once_with(
-        service.staging_s3_bucket_name, object_key,
+        service.staging_s3_bucket_name,
+        object_key,
     )
     mock_stream.read.assert_called_once_with()
     mock_bytesio.assert_called_once_with(file_content)
     mock_check.assert_called_once_with(mock_file_stream, file_extension)
-
-
diff --git a/lambdas/tests/unit/utils/test_file_utils.py b/lambdas/tests/unit/utils/test_file_utils.py
index 6bda04566c..a6dd202289 100644
--- a/lambdas/tests/unit/utils/test_file_utils.py
+++ b/lambdas/tests/unit/utils/test_file_utils.py
@@ -16,7 +16,9 @@ def test_convert_csv_dictionary_to_bytes():
     ]
 
     result_bytes = convert_csv_dictionary_to_bytes(
-        headers=headers, csv_dict_data=metadata_csv_data, encoding="utf-8",
+        headers=headers,
+        csv_dict_data=metadata_csv_data,
+        encoding="utf-8",
     )
 
     result_str = result_bytes.decode("utf-8")
@@ -24,6 +26,7 @@ def test_convert_csv_dictionary_to_bytes():
 
     assert result_str == expected_output
 
+
 @pytest.mark.parametrize(
     "file_extension,file_content,expected_result",
     [
@@ -37,6 +40,7 @@ def test_skipped_file_types(file_extension, file_content, expected_result):
     result = check_file_locked_or_corrupt(file_stream, file_extension)
     assert result == expected_result
 
+
 @pytest.mark.parametrize(
     "file_extension,is_encrypted,expected_result",
     [
@@ -67,10 +71,15 @@ def test_office_files(mock_office_file, file_extension, is_encrypted, expected_r
     mock_office_file.assert_called_once_with(file_stream)
     mock_instance.is_encrypted.assert_called_once()
 
+
 @pytest.mark.parametrize(
     "file_extension,file_content,expected_result",
     [
-        ("rtf", b"{\\rtf1\\ansi\\deff0 {\\fonttbl {\\f0 Times New Roman;}}\\f0\\fs60 Hello!}", False),
+        (
+            "rtf",
+            b"{\\rtf1\\ansi\\deff0 {\\fonttbl {\\f0 Times New Roman;}}\\f0\\fs60 Hello!}",
+            False,
+        ),
         ("rtf", b"This is not an RTF file", True),
         ("csv", b"name,age,city\nAlice,30,NYC\nBob,25,LA", False),
         ("csv", b"\xff\xfe Invalid UTF-8", True),
@@ -85,9 +94,16 @@ def test_text_based_files(file_extension, file_content, expected_result):
     result = check_file_locked_or_corrupt(file_stream, file_extension)
     assert result == expected_result
 
+
 @pytest.mark.parametrize(
     "file_extension",
-    ["jpg", "jpeg", "png", "tiff", "tif",],
+    [
+        "jpg",
+        "jpeg",
+        "png",
+        "tiff",
+        "tif",
+    ],
     ids=["jpg", "jpeg", "png", "tiff", "tif"],
 )
 @patch("utils.file_utils.Image.open")
@@ -102,6 +118,7 @@ def test_image_files_valid(mock_image_open, file_extension):
     mock_image_open.assert_called_once_with(file_stream)
     mock_img.verify.assert_called_once()
 
+
 @pytest.mark.parametrize(
     "file_extension",
     ["jpg", "png", "tiff"],
@@ -115,6 +132,7 @@ def test_image_files_corrupt(mock_image_open, file_extension):
 
     assert result is True
 
+
 @pytest.mark.parametrize(
     "file_extension",
     ["unknown", "mp4", "mp3", "avi", "mov"],
@@ -125,6 +143,7 @@ def test_unsupported_file_extensions(file_extension):
     result = check_file_locked_or_corrupt(file_stream, file_extension)
     assert result is False
 
+
 @pytest.mark.parametrize(
     "file_extension",
     ["docx", "xlsx", "pptx", "doc", "xls"],
@@ -138,6 +157,3 @@ def test_office_file_exception_returns_true(mock_office_file, file_extension):
     result = check_file_locked_or_corrupt(file_stream, file_extension)
 
     assert result is True
-
-
-
diff --git a/lambdas/utils/file_utils.py b/lambdas/utils/file_utils.py
index 4623c3bfeb..48578b729d 100644
--- a/lambdas/utils/file_utils.py
+++ b/lambdas/utils/file_utils.py
@@ -7,8 +7,11 @@
 
 logger = LoggingService(__name__)
 
+
 def convert_csv_dictionary_to_bytes(
-    headers: list[str], csv_dict_data: list[dict], encoding: str = "utf-8",
+    headers: list[str],
+    csv_dict_data: list[dict],
+    encoding: str = "utf-8",
 ) -> bytes:
     csv_buffer = BytesIO()
     csv_text_wrapper = TextIOWrapper(csv_buffer, encoding=encoding, newline="")
@@ -29,23 +32,23 @@ def convert_csv_dictionary_to_bytes(
 
 def check_file_locked_or_corrupt(file_stream, ext):
     file_stream.seek(0)
-    text_exts = ['rtf', 'csv', 'json', 'txt', 'xml']
-    media_exts = ['jpg', 'jpeg', 'png', 'tiff', 'tif']
+    text_exts = ["rtf", "csv", "json", "txt", "xml"]
+    media_exts = ["jpg", "jpeg", "png", "tiff", "tif"]
     try:
-        if ext == 'pdf' or ext == 'zip':
+        if ext == "pdf" or ext == "zip":
             # Skipping PDF check, as this is covered by the antivirus scan
             logger.info(f"Skipping check for {ext} files")
             return False
 
-        elif ext in ['docx', 'xlsx', 'pptx', 'doc', 'xls', 'ppt']:
+        elif ext in ["docx", "xlsx", "pptx", "doc", "xls", "ppt"]:
             office_file = msoffcrypto.OfficeFile(file_stream)
             encrypt = office_file.is_encrypted()
             return encrypt
 
         elif ext in text_exts:
             sample = file_stream.read(1024)
-            sample.decode('utf-8')
-            if ext == 'rtf' and not sample.startswith(b'{\\rtf1'):
+            sample.decode("utf-8")
+            if ext == "rtf" and not sample.startswith(b"{\\rtf1"):
                 return True
             return False
 
@@ -55,9 +58,13 @@ def check_file_locked_or_corrupt(file_stream, ext):
             return False
 
         else:
-            logger.info(f"File with extension {ext} is not supported for locked/corrupt check, treating as valid.")
+            logger.info(
+                f"File with extension {ext} is not supported for locked/corrupt check, treating as valid."
+            )
             return False
 
     except Exception as e:
-        logger.error(f"Error checking file validity for .{ext}: {type(e).__name__} - {str(e)}")
-        return True
\ No newline at end of file
+        logger.error(
+            f"Error checking file validity for .{ext}: {type(e).__name__} - {str(e)}"
+        )
+        return True

From e38cd2c3fb554b5d7f5252715ed7a2e4417ad7ce Mon Sep 17 00:00:00 2001
From: NogaNHS <127490765+NogaNHS@users.noreply.github.com>
Date: Wed, 11 Feb 2026 13:35:25 +0000
Subject: [PATCH 08/13] add test for document upload status service

---
 .../upload_document_reference_service.py      |  6 +-
 .../test_get_document_upload_status.py        | 82 ++++++++++++++-----
 lambdas/utils/file_utils.py                   |  4 +-
 3 files changed, 69 insertions(+), 23 deletions(-)

diff --git a/lambdas/services/upload_document_reference_service.py b/lambdas/services/upload_document_reference_service.py
index 56804f28b5..5917047d9a 100644
--- a/lambdas/services/upload_document_reference_service.py
+++ b/lambdas/services/upload_document_reference_service.py
@@ -166,7 +166,8 @@ def _process_preliminary_document_reference(
                         preliminary_document_reference.file_name.split(".")[-1].lower()
                     )
                     is_file_protected = self.is_file_invalid(
-                        object_key, file_type_extension
+                        object_key,
+                        file_type_extension,
                     )
                 if is_file_protected:
                     logger.warning(
@@ -496,7 +497,8 @@ def _update_dynamo_table(
 
     def is_file_invalid(self, object_key: str, file_type_extension: str) -> bool:
         entire_object = self.s3_service.get_object_stream(
-            self.staging_s3_bucket_name, object_key
+            self.staging_s3_bucket_name,
+            object_key,
         )
         file_stream = io.BytesIO(entire_object.read())
         return check_file_locked_or_corrupt(file_stream, file_type_extension)
diff --git a/lambdas/tests/unit/services/test_get_document_upload_status.py b/lambdas/tests/unit/services/test_get_document_upload_status.py
index b646a327f0..3000e11a6c 100644
--- a/lambdas/tests/unit/services/test_get_document_upload_status.py
+++ b/lambdas/tests/unit/services/test_get_document_upload_status.py
@@ -15,9 +15,12 @@ def mock_document_service():
 
 
 @pytest.fixture
-def get_document_upload_status_service(mock_document_service):
+def get_document_upload_status_service(mock_document_service, mocker):
+    mocker.patch(
+        "services.get_document_upload_status.DocumentService",
+        return_value=mock_document_service,
+    )
     service = GetDocumentUploadStatusService()
-    service.document_service = mock_document_service
     return service
 
 
@@ -80,11 +83,13 @@ def test_get_document_references_by_id_found_documents(
     )
 
     result = get_document_upload_status_service.get_document_references_by_id(
-        nhs_number, document_ids
+        nhs_number,
+        document_ids,
     )
 
     mock_document_service.get_batch_document_references_by_id.assert_called_once_with(
-        document_ids, SupportedDocumentTypes.LG
+        document_ids,
+        SupportedDocumentTypes.LG,
     )
     assert len(result) == 2
     assert result["doc-id-1"]["status"] == "final"
@@ -101,11 +106,12 @@ def test_get_document_references_by_id_not_found_documents(
     nhs_number = "1234567890"
     document_ids = ["doc-id-1", "non-existent-id"]
     mock_document_service.get_batch_document_references_by_id.return_value = [
-        sample_document_references[0]
+        sample_document_references[0],
     ]
 
     result = get_document_upload_status_service.get_document_references_by_id(
-        nhs_number, document_ids
+        nhs_number,
+        document_ids,
     )
 
     assert len(result) == 2
@@ -123,11 +129,12 @@ def test_get_document_references_by_id_access_denied(
     nhs_number = "1234567890"
     document_ids = ["doc-id-3"]
     mock_document_service.get_batch_document_references_by_id.return_value = [
-        sample_document_references[2]
+        sample_document_references[2],
     ]
 
     result = get_document_upload_status_service.get_document_references_by_id(
-        nhs_number, document_ids
+        nhs_number,
+        document_ids,
     )
 
     assert len(result) == 1
@@ -143,11 +150,12 @@ def test_get_document_references_by_id_infected_document(
     nhs_number = "1234567890"
     document_ids = ["doc-id-4"]
     mock_document_service.get_batch_document_references_by_id.return_value = [
-        sample_document_references[3]
+        sample_document_references[3],
     ]
 
     result = get_document_upload_status_service.get_document_references_by_id(
-        nhs_number, document_ids
+        nhs_number,
+        document_ids,
     )
 
     assert len(result) == 1
@@ -155,8 +163,38 @@ def test_get_document_references_by_id_infected_document(
     assert result["doc-id-4"]["error_code"] == DocumentStatus.INFECTED.code
 
 
+def test_get_document_references_by_id_invalid_document(
+    get_document_upload_status_service,
+    mock_document_service,
+):
+    nhs_number = "1234567890"
+    document_ids = ["doc-id-invalid"]
+
+    cancelled_doc = DocumentReference(
+        id="doc-id-invalid",
+        nhs_number="1234567890",
+        file_name="invalid_file.pdf",
+        doc_status=DocumentStatus.CANCELLED.display,
+        virus_scanner_result=VirusScanResult.INVALID,
+    )
+
+    mock_document_service.get_batch_document_references_by_id.return_value = [
+        cancelled_doc,
+    ]
+
+    result = get_document_upload_status_service.get_document_references_by_id(
+        nhs_number,
+        document_ids,
+    )
+
+    assert len(result) == 1
+    assert result["doc-id-invalid"]["status"] == DocumentStatus.INVALID.display
+    assert result["doc-id-invalid"]["error_code"] == DocumentStatus.INVALID.code
+
+
 def test_get_document_references_by_id_cancelled_document(
-    get_document_upload_status_service, mock_document_service
+    get_document_upload_status_service,
+    mock_document_service,
 ):
     nhs_number = "1234567890"
     document_ids = ["doc-id-cancelled"]
@@ -171,11 +209,12 @@ def test_get_document_references_by_id_cancelled_document(
     )
 
     mock_document_service.get_batch_document_references_by_id.return_value = [
-        cancelled_doc
+        cancelled_doc,
     ]
 
     result = get_document_upload_status_service.get_document_references_by_id(
-        nhs_number, document_ids
+        nhs_number,
+        document_ids,
     )
 
     assert len(result) == 1
@@ -191,11 +230,12 @@ def test_get_document_references_by_id_deleted_document(
     nhs_number = "1234567890"
     document_ids = ["doc-id-5"]
     mock_document_service.get_batch_document_references_by_id.return_value = [
-        sample_document_references[4]
+        sample_document_references[4],
     ]
 
     result = get_document_upload_status_service.get_document_references_by_id(
-        nhs_number, document_ids
+        nhs_number,
+        document_ids,
     )
 
     assert len(result) == 0
@@ -216,7 +256,8 @@ def test_get_document_references_by_id_multiple_mixed_statuses(
     ]
 
     result = get_document_upload_status_service.get_document_references_by_id(
-        nhs_number, document_ids
+        nhs_number,
+        document_ids,
     )
 
     assert len(result) == 4
@@ -236,18 +277,21 @@ def test_get_document_references_by_id_multiple_mixed_statuses(
 
 
 def test_get_document_references_by_id_no_results(
-    get_document_upload_status_service, mock_document_service
+    get_document_upload_status_service,
+    mock_document_service,
 ):
     nhs_number = "1234567890"
     document_ids = ["doc-id-6"]
     mock_document_service.get_batch_document_references_by_id.return_value = []
 
     result = get_document_upload_status_service.get_document_references_by_id(
-        nhs_number, document_ids
+        nhs_number,
+        document_ids,
     )
 
     mock_document_service.get_batch_document_references_by_id.assert_called_once_with(
-        document_ids, SupportedDocumentTypes.LG
+        document_ids,
+        SupportedDocumentTypes.LG,
     )
     assert result["doc-id-6"]["status"] == DocumentStatus.NOT_FOUND.display
     assert result["doc-id-6"]["error_code"] == DocumentStatus.NOT_FOUND.code
diff --git a/lambdas/utils/file_utils.py b/lambdas/utils/file_utils.py
index 48578b729d..3ffa85b15a 100644
--- a/lambdas/utils/file_utils.py
+++ b/lambdas/utils/file_utils.py
@@ -59,12 +59,12 @@ def check_file_locked_or_corrupt(file_stream, ext):
 
         else:
             logger.info(
-                f"File with extension {ext} is not supported for locked/corrupt check, treating as valid."
+                f"File with extension {ext} is not supported for locked/corrupt check, treating as valid.",
             )
             return False
 
     except Exception as e:
         logger.error(
-            f"Error checking file validity for .{ext}: {type(e).__name__} - {str(e)}"
+            f"Error checking file validity for .{ext}: {type(e).__name__} - {str(e)}",
         )
         return True

From 1d288398da911663bd9727db49e51e9b0cece4a4 Mon Sep 17 00:00:00 2001
From: adamwhitingnhs <adam.whiting3@nhs.net>
Date: Thu, 12 Feb 2026 16:25:33 +0000
Subject: [PATCH 09/13] pr comments

---
 lambdas/services/base/s3_service.py            | 2 +-
 lambdas/services/get_document_upload_status.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lambdas/services/base/s3_service.py b/lambdas/services/base/s3_service.py
index 1da0a82f68..a97e403d7c 100644
--- a/lambdas/services/base/s3_service.py
+++ b/lambdas/services/base/s3_service.py
@@ -235,7 +235,7 @@ def get_file_size(self, s3_bucket_name: str, object_key: str) -> int:
     def get_head_object(self, bucket: str, key: str):
         return self.client.head_object(Bucket=bucket, Key=key)
 
-    def get_object_stream(self, bucket: str, key: str, byte_range: str = None):
+    def get_object_stream(self, bucket: str, key: str, byte_range: str | None = None):
         params = {"Bucket": bucket, "Key": key}
         if byte_range:
             params["Range"] = byte_range
diff --git a/lambdas/services/get_document_upload_status.py b/lambdas/services/get_document_upload_status.py
index 7c06d6fac9..734671a3ec 100644
--- a/lambdas/services/get_document_upload_status.py
+++ b/lambdas/services/get_document_upload_status.py
@@ -24,7 +24,7 @@ def _determine_document_status(self, doc_ref, nhs_number):
         if doc_ref.doc_status == "cancelled":
             if doc_ref.virus_scanner_result == VirusScanResult.INFECTED:
                 return DocumentStatus.INFECTED.display, DocumentStatus.INFECTED.code
-            elif doc_ref.virus_scanner_result == VirusScanResult.INVALID:
+            if doc_ref.virus_scanner_result == VirusScanResult.INVALID:
                 return DocumentStatus.INVALID.display, DocumentStatus.INVALID.code
             return DocumentStatus.CANCELLED.display, DocumentStatus.CANCELLED.code
 

From ab75e936c42fdcf9cc536975c18efa9be06fc0a2 Mon Sep 17 00:00:00 2001
From: adamwhitingnhs <adam.whiting3@nhs.net>
Date: Thu, 12 Feb 2026 16:47:50 +0000
Subject: [PATCH 10/13] move file extensions to a const

---
 lambdas/utils/constants/file_extensions.py | 2 ++
 lambdas/utils/file_utils.py                | 7 +++----
 2 files changed, 5 insertions(+), 4 deletions(-)
 create mode 100644 lambdas/utils/constants/file_extensions.py

diff --git a/lambdas/utils/constants/file_extensions.py b/lambdas/utils/constants/file_extensions.py
new file mode 100644
index 0000000000..bab8f38492
--- /dev/null
+++ b/lambdas/utils/constants/file_extensions.py
@@ -0,0 +1,2 @@
+TEXT_FILE_EXTENSIONS = ["rtf", "csv", "json", "txt", "xml"]
+MEDIA_FILE_EXTENSIONS = ["jpg", "jpeg", "png", "tiff", "tif"]
diff --git a/lambdas/utils/file_utils.py b/lambdas/utils/file_utils.py
index 3ffa85b15a..be1fd3f893 100644
--- a/lambdas/utils/file_utils.py
+++ b/lambdas/utils/file_utils.py
@@ -4,6 +4,7 @@
 import msoffcrypto
 from PIL import Image
 from utils.audit_logging_setup import LoggingService
+from utils.constants.file_extensions import MEDIA_FILE_EXTENSIONS, TEXT_FILE_EXTENSIONS
 
 logger = LoggingService(__name__)
 
@@ -32,8 +33,6 @@ def convert_csv_dictionary_to_bytes(
 
 def check_file_locked_or_corrupt(file_stream, ext):
     file_stream.seek(0)
-    text_exts = ["rtf", "csv", "json", "txt", "xml"]
-    media_exts = ["jpg", "jpeg", "png", "tiff", "tif"]
     try:
         if ext == "pdf" or ext == "zip":
             # Skipping PDF check, as this is covered by the antivirus scan
@@ -45,14 +44,14 @@ def check_file_locked_or_corrupt(file_stream, ext):
             encrypt = office_file.is_encrypted()
             return encrypt
 
-        elif ext in text_exts:
+        elif ext in TEXT_FILE_EXTENSIONS:
             sample = file_stream.read(1024)
             sample.decode("utf-8")
             if ext == "rtf" and not sample.startswith(b"{\\rtf1"):
                 return True
             return False
 
-        elif ext in media_exts:
+        elif ext in MEDIA_FILE_EXTENSIONS:
             with Image.open(file_stream) as img:
                 img.verify()
             return False

From 84c4ed8edf2f0591bb56b1a0d2e1c8f8ca8cb4dd Mon Sep 17 00:00:00 2001
From: NogaNHS <127490765+NogaNHS@users.noreply.github.com>
Date: Thu, 12 Feb 2026 16:56:34 +0000
Subject: [PATCH 11/13] [PRMP-1384] text Add Microsoft Office file extensions
 to constants

---
 lambdas/utils/constants/file_extensions.py | 1 +
 lambdas/utils/file_utils.py                | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/lambdas/utils/constants/file_extensions.py b/lambdas/utils/constants/file_extensions.py
index bab8f38492..4e04dfd004 100644
--- a/lambdas/utils/constants/file_extensions.py
+++ b/lambdas/utils/constants/file_extensions.py
@@ -1,2 +1,3 @@
 TEXT_FILE_EXTENSIONS = ["rtf", "csv", "json", "txt", "xml"]
 MEDIA_FILE_EXTENSIONS = ["jpg", "jpeg", "png", "tiff", "tif"]
+MICROSOFT_OFFICE_FILE_EXTENSIONS = ["docx", "xlsx", "pptx", "doc", "xls", "ppt"]
diff --git a/lambdas/utils/file_utils.py b/lambdas/utils/file_utils.py
index be1fd3f893..26445077b9 100644
--- a/lambdas/utils/file_utils.py
+++ b/lambdas/utils/file_utils.py
@@ -4,7 +4,8 @@
 import msoffcrypto
 from PIL import Image
 from utils.audit_logging_setup import LoggingService
-from utils.constants.file_extensions import MEDIA_FILE_EXTENSIONS, TEXT_FILE_EXTENSIONS
+from utils.constants.file_extensions import MEDIA_FILE_EXTENSIONS, TEXT_FILE_EXTENSIONS, \
+    MICROSOFT_OFFICE_FILE_EXTENSIONS
 
 logger = LoggingService(__name__)
 
@@ -39,7 +40,7 @@ def check_file_locked_or_corrupt(file_stream, ext):
             logger.info(f"Skipping check for {ext} files")
             return False
 
-        elif ext in ["docx", "xlsx", "pptx", "doc", "xls", "ppt"]:
+        elif ext in MICROSOFT_OFFICE_FILE_EXTENSIONS:
             office_file = msoffcrypto.OfficeFile(file_stream)
             encrypt = office_file.is_encrypted()
             return encrypt

From 1ab851a73763ba62e61160960e2e68aceefaa467 Mon Sep 17 00:00:00 2001
From: adamwhitingnhs <adam.whiting3@nhs.net>
Date: Thu, 12 Feb 2026 17:07:40 +0000
Subject: [PATCH 12/13] fix linting

---
 lambdas/ruff.toml                   |  2 +-
 lambdas/services/base/s3_service.py |  3 +--
 lambdas/utils/file_utils.py         | 15 +++++++--------
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/lambdas/ruff.toml b/lambdas/ruff.toml
index 10a91ceaf7..9b92d3839a 100644
--- a/lambdas/ruff.toml
+++ b/lambdas/ruff.toml
@@ -33,7 +33,7 @@ line-length = 130
 # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
 # McCabe complexity (`C901`) by default.
 # COM812: Enforce trailing commas on multi-line constructs.
-select = ["E", "F", "COM812"]
+select = ["E", "F", "COM812", "RET505"]
 ignore = []
 
 # Allow autofix for all enabled rules (when `--fix`) is provided.
diff --git a/lambdas/services/base/s3_service.py b/lambdas/services/base/s3_service.py
index a97e403d7c..ef04724e61 100644
--- a/lambdas/services/base/s3_service.py
+++ b/lambdas/services/base/s3_service.py
@@ -151,8 +151,7 @@ def copy_across_bucket(
                         if_none_match,
                         False,
                     )
-                else:
-                    raise e
+                raise e
             else:
                 logger.error(f"Copy failed: {e}")
                 raise e
diff --git a/lambdas/utils/file_utils.py b/lambdas/utils/file_utils.py
index 26445077b9..a0f6bf2375 100644
--- a/lambdas/utils/file_utils.py
+++ b/lambdas/utils/file_utils.py
@@ -40,28 +40,27 @@ def check_file_locked_or_corrupt(file_stream, ext):
             logger.info(f"Skipping check for {ext} files")
             return False
 
-        elif ext in MICROSOFT_OFFICE_FILE_EXTENSIONS:
+        if ext in MICROSOFT_OFFICE_FILE_EXTENSIONS:
             office_file = msoffcrypto.OfficeFile(file_stream)
             encrypt = office_file.is_encrypted()
             return encrypt
 
-        elif ext in TEXT_FILE_EXTENSIONS:
+        if ext in TEXT_FILE_EXTENSIONS:
             sample = file_stream.read(1024)
             sample.decode("utf-8")
             if ext == "rtf" and not sample.startswith(b"{\\rtf1"):
                 return True
             return False
 
-        elif ext in MEDIA_FILE_EXTENSIONS:
+        if ext in MEDIA_FILE_EXTENSIONS:
             with Image.open(file_stream) as img:
                 img.verify()
             return False
 
-        else:
-            logger.info(
-                f"File with extension {ext} is not supported for locked/corrupt check, treating as valid.",
-            )
-            return False
+        logger.info(
+            f"File with extension {ext} is not supported for locked/corrupt check, treating as valid.",
+        )
+        return False
 
     except Exception as e:
         logger.error(

From c332c3d7c755baf73bf7c128dad497a86fb70a19 Mon Sep 17 00:00:00 2001
From: adamwhitingnhs <adam.whiting3@nhs.net>
Date: Thu, 12 Feb 2026 17:13:15 +0000
Subject: [PATCH 13/13] fix import order

---
 lambdas/utils/file_utils.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lambdas/utils/file_utils.py b/lambdas/utils/file_utils.py
index a0f6bf2375..90e5d9ed0b 100644
--- a/lambdas/utils/file_utils.py
+++ b/lambdas/utils/file_utils.py
@@ -4,8 +4,11 @@
 import msoffcrypto
 from PIL import Image
 from utils.audit_logging_setup import LoggingService
-from utils.constants.file_extensions import MEDIA_FILE_EXTENSIONS, TEXT_FILE_EXTENSIONS, \
-    MICROSOFT_OFFICE_FILE_EXTENSIONS
+from utils.constants.file_extensions import (
+    MEDIA_FILE_EXTENSIONS,
+    MICROSOFT_OFFICE_FILE_EXTENSIONS,
+    TEXT_FILE_EXTENSIONS,
+)
 
 logger = LoggingService(__name__)