33from __future__ import annotations
44
55import hashlib
6- import json
76import logging
87import os
98from typing import TYPE_CHECKING
1312import boto3
1413from botocore .exceptions import ClientError
1514
15+ from server .models import S3Metadata
16+
1617if TYPE_CHECKING :
1718 from botocore .client import BaseClient
1819
20+
1921# Initialize logger for this module
2022logger = logging .getLogger (__name__ )
2123
@@ -232,13 +234,13 @@ def upload_to_s3(content: str, s3_file_path: str, ingest_id: UUID) -> str:
232234 return public_url
233235
234236
235- def upload_metadata_to_s3 (metadata : dict , s3_file_path : str , ingest_id : UUID ) -> str :
237+ def upload_metadata_to_s3 (metadata : S3Metadata , s3_file_path : str , ingest_id : UUID ) -> str :
236238 """Upload metadata JSON to S3 alongside the digest file.
237239
238240 Parameters
239241 ----------
240- metadata : dict
241- The metadata dictionary containing summary, tree, and content.
242+ metadata : S3Metadata
243+ The metadata struct containing summary, tree, and content.
242244 s3_file_path : str
243245 The S3 file path for the digest (metadata will use .json extension).
244246 ingest_id : UUID
@@ -272,7 +274,7 @@ def upload_metadata_to_s3(metadata: dict, s3_file_path: str, ingest_id: UUID) ->
272274 "bucket_name" : bucket_name ,
273275 "metadata_file_path" : metadata_file_path ,
274276 "ingest_id" : str (ingest_id ),
275- "metadata_size" : len (json . dumps ( metadata )),
277+ "metadata_size" : len (metadata . model_dump_json ( )),
276278 }
277279
278280 # Log upload attempt
@@ -283,7 +285,7 @@ def upload_metadata_to_s3(metadata: dict, s3_file_path: str, ingest_id: UUID) ->
283285 s3_client .put_object (
284286 Bucket = bucket_name ,
285287 Key = metadata_file_path ,
286- Body = json . dumps ( metadata , indent = 2 ).encode ("utf-8" ),
288+ Body = metadata . model_dump_json ( indent = 2 ).encode ("utf-8" ),
287289 ContentType = "application/json" ,
288290 Tagging = f"ingest_id={ ingest_id !s} " ,
289291 )
@@ -331,7 +333,7 @@ def upload_metadata_to_s3(metadata: dict, s3_file_path: str, ingest_id: UUID) ->
331333 return public_url
332334
333335
334- def get_metadata_from_s3 (s3_file_path : str ) -> dict | None :
336+ def get_metadata_from_s3 (s3_file_path : str ) -> S3Metadata | None :
335337 """Retrieve metadata JSON from S3.
336338
337339 Parameters
@@ -341,8 +343,8 @@ def get_metadata_from_s3(s3_file_path: str) -> dict | None:
341343
342344 Returns
343345 -------
344- dict | None
345- The metadata dictionary if found, None otherwise.
346+ S3Metadata | None
347+ The metadata struct if found, None otherwise.
346348
347349 """
348350 if not is_s3_enabled ():
@@ -359,7 +361,7 @@ def get_metadata_from_s3(s3_file_path: str) -> dict | None:
359361 response = s3_client .get_object (Bucket = bucket_name , Key = metadata_file_path )
360362 metadata_content = response ["Body" ].read ().decode ("utf-8" )
361363
362- return json . loads (metadata_content )
364+ return S3Metadata . model_validate_json (metadata_content )
363365 except ClientError as err :
364366 # Object doesn't exist if we get a 404 error
365367 error_code = err .response .get ("Error" , {}).get ("Code" )
0 commit comments