Skip to content

Commit 9af0dcc

Browse files
committed
feat: add S3Metadata model for structured metadata handling
1 parent 579894d commit 9af0dcc

File tree

3 files changed

+40
-25
lines changed

3 files changed

+40
-25
lines changed

src/server/models.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,25 @@ class IngestErrorResponse(BaseModel):
116116
IngestResponse = Union[IngestSuccessResponse, IngestErrorResponse]
117117

118118

119+
class S3Metadata(BaseModel):
120+
"""Model for S3 metadata structure.
121+
122+
Attributes
123+
----------
124+
summary : str
125+
Summary of the ingestion process including token estimates.
126+
tree : str
127+
File tree structure of the repository.
128+
content : str
129+
Processed content from the repository files.
130+
131+
"""
132+
133+
summary: str = Field(..., description="Ingestion summary with token estimates")
134+
tree: str = Field(..., description="File tree structure")
135+
content: str = Field(..., description="Processed file content")
136+
137+
119138
class QueryForm(BaseModel):
120139
"""Form data for the query.
121140

src/server/query_processor.py

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from gitingest.query_parser import parse_remote_repo
1212
from gitingest.utils.git_utils import resolve_commit, validate_github_token
1313
from gitingest.utils.pattern_utils import process_patterns
14-
from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType
14+
from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType, S3Metadata
1515
from server.s3_utils import (
1616
_build_s3_url,
1717
check_s3_object_exists,
@@ -94,15 +94,9 @@ async def _check_s3_cache(
9494

9595
if metadata:
9696
# Use cached metadata if available
97-
summary = metadata.get(
98-
"summary",
99-
"Digest served from cache (S3). Download the full digest to see content details.",
100-
)
101-
tree = metadata.get("tree", "Digest served from cache. Download the full digest to see the file tree.")
102-
content = metadata.get(
103-
"content",
104-
"Digest served from cache. Download the full digest to see the content.",
105-
)
97+
summary = metadata.summary
98+
tree = metadata.tree
99+
content = metadata.content
106100
else:
107101
# Fallback to placeholder messages if metadata not available
108102
summary = "Digest served from cache (S3). Download the full digest to see content details."
@@ -166,11 +160,11 @@ def _store_digest_content(
166160
s3_url = upload_to_s3(content=digest_content, s3_file_path=s3_file_path, ingest_id=query.id)
167161

168162
# Also upload metadata JSON for caching
169-
metadata = {
170-
"summary": summary,
171-
"tree": tree,
172-
"content": content,
173-
}
163+
metadata = S3Metadata(
164+
summary=summary,
165+
tree=tree,
166+
content=content,
167+
)
174168
try:
175169
upload_metadata_to_s3(metadata=metadata, s3_file_path=s3_file_path, ingest_id=query.id)
176170
logger.debug("Successfully uploaded metadata to S3")

src/server/s3_utils.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from __future__ import annotations
44

55
import hashlib
6-
import json
76
import logging
87
import os
98
from typing import TYPE_CHECKING
@@ -13,9 +12,12 @@
1312
import boto3
1413
from botocore.exceptions import ClientError
1514

15+
from server.models import S3Metadata
16+
1617
if TYPE_CHECKING:
1718
from botocore.client import BaseClient
1819

20+
1921
# Initialize logger for this module
2022
logger = logging.getLogger(__name__)
2123

@@ -232,13 +234,13 @@ def upload_to_s3(content: str, s3_file_path: str, ingest_id: UUID) -> str:
232234
return public_url
233235

234236

235-
def upload_metadata_to_s3(metadata: dict, s3_file_path: str, ingest_id: UUID) -> str:
237+
def upload_metadata_to_s3(metadata: S3Metadata, s3_file_path: str, ingest_id: UUID) -> str:
236238
"""Upload metadata JSON to S3 alongside the digest file.
237239
238240
Parameters
239241
----------
240-
metadata : dict
241-
The metadata dictionary containing summary, tree, and content.
242+
metadata : S3Metadata
243+
The metadata struct containing summary, tree, and content.
242244
s3_file_path : str
243245
The S3 file path for the digest (metadata will use .json extension).
244246
ingest_id : UUID
@@ -272,7 +274,7 @@ def upload_metadata_to_s3(metadata: dict, s3_file_path: str, ingest_id: UUID) ->
272274
"bucket_name": bucket_name,
273275
"metadata_file_path": metadata_file_path,
274276
"ingest_id": str(ingest_id),
275-
"metadata_size": len(json.dumps(metadata)),
277+
"metadata_size": len(metadata.model_dump_json()),
276278
}
277279

278280
# Log upload attempt
@@ -283,7 +285,7 @@ def upload_metadata_to_s3(metadata: dict, s3_file_path: str, ingest_id: UUID) ->
283285
s3_client.put_object(
284286
Bucket=bucket_name,
285287
Key=metadata_file_path,
286-
Body=json.dumps(metadata, indent=2).encode("utf-8"),
288+
Body=metadata.model_dump_json(indent=2).encode("utf-8"),
287289
ContentType="application/json",
288290
Tagging=f"ingest_id={ingest_id!s}",
289291
)
@@ -331,7 +333,7 @@ def upload_metadata_to_s3(metadata: dict, s3_file_path: str, ingest_id: UUID) ->
331333
return public_url
332334

333335

334-
def get_metadata_from_s3(s3_file_path: str) -> dict | None:
336+
def get_metadata_from_s3(s3_file_path: str) -> S3Metadata | None:
335337
"""Retrieve metadata JSON from S3.
336338
337339
Parameters
@@ -341,8 +343,8 @@ def get_metadata_from_s3(s3_file_path: str) -> dict | None:
341343
342344
Returns
343345
-------
344-
dict | None
345-
The metadata dictionary if found, None otherwise.
346+
S3Metadata | None
347+
The metadata struct if found, None otherwise.
346348
347349
"""
348350
if not is_s3_enabled():
@@ -359,7 +361,7 @@ def get_metadata_from_s3(s3_file_path: str) -> dict | None:
359361
response = s3_client.get_object(Bucket=bucket_name, Key=metadata_file_path)
360362
metadata_content = response["Body"].read().decode("utf-8")
361363

362-
return json.loads(metadata_content)
364+
return S3Metadata.model_validate_json(metadata_content)
363365
except ClientError as err:
364366
# Object doesn't exist if we get a 404 error
365367
error_code = err.response.get("Error", {}).get("Code")

0 commit comments

Comments
 (0)