Skip to content

Commit 18ff94b

Browse files
committed
test
1 parent d1f8a80 commit 18ff94b

File tree

5 files changed

+93
-25
lines changed

5 files changed

+93
-25
lines changed

src/gitingest/entrypoint.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from types import TracebackType
2828

2929
from gitingest.schemas import IngestionQuery
30+
from gitingest.schemas import Context
3031

3132
# Initialize logger for this module
3233
logger = get_logger(__name__)
@@ -51,6 +52,8 @@ async def ingest_async(
5152
and processes its files according to the specified query parameters. It returns a summary, a tree-like
5253
structure of the files, and the content of the files. The results can optionally be written to an output file.
5354
55+
The output is generated lazily using a Context object and its .generate_digest() method.
56+
5457
Parameters
5558
----------
5659
source : str
@@ -142,6 +145,8 @@ async def ingest_async(
142145

143146
if output:
144147
logger.debug("Writing output to file", extra={"output_path": output})
148+
context = ingest_query(query)
149+
summary, tree, content = context.generate_digest()
145150
await _write_output(tree, content=content, target=output)
146151

147152
logger.info("Ingestion completed successfully")
@@ -167,6 +172,8 @@ def ingest(
167172
and processes its files according to the specified query parameters. It returns a summary, a tree-like
168173
structure of the files, and the content of the files. The results can optionally be written to an output file.
169174
175+
The output is generated lazily using a Context object and its .generate_digest() method.
176+
170177
Parameters
171178
----------
172179
source : str
@@ -206,20 +213,20 @@ def ingest(
206213
``ingest_async`` : The asynchronous version of this function.
207214
208215
"""
209-
return asyncio.run(
210-
ingest_async(
211-
source=source,
212-
max_file_size=max_file_size,
213-
include_patterns=include_patterns,
214-
exclude_patterns=exclude_patterns,
215-
branch=branch,
216-
tag=tag,
217-
include_gitignored=include_gitignored,
218-
include_submodules=include_submodules,
219-
token=token,
220-
output=output,
221-
),
222-
)
216+
import asyncio
217+
context = asyncio.run(ingest_async(
218+
source,
219+
max_file_size=max_file_size,
220+
include_patterns=include_patterns,
221+
exclude_patterns=exclude_patterns,
222+
branch=branch,
223+
tag=tag,
224+
include_gitignored=include_gitignored,
225+
include_submodules=include_submodules,
226+
token=token,
227+
output=output,
228+
))
229+
return context.generate_digest()
223230

224231

225232
def _override_branch_and_tag(query: IngestionQuery, branch: str | None, tag: str | None) -> None:

src/gitingest/ingestion.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
99
from gitingest.output_formatter import format_node
10-
from gitingest.schemas import FileSystemNode, FileSystemNodeType, FileSystemStats
10+
from gitingest.schemas import FileSystemNode, FileSystemNodeType, FileSystemStats, Context
1111
from gitingest.utils.ingestion_utils import _should_exclude, _should_include
1212
from gitingest.utils.logging_config import get_logger
1313

@@ -18,12 +18,11 @@
1818
logger = get_logger(__name__)
1919

2020

21-
def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
21+
def ingest_query(query: IngestionQuery) -> Context:
2222
"""Run the ingestion process for a parsed query.
2323
2424
This is the main entry point for analyzing a codebase directory or single file. It processes the query
25-
parameters, reads the file or directory content, and generates a summary, directory structure, and file content,
26-
along with token estimations.
25+
parameters, reads the file or directory content, and returns a Context object that can generate the final output digest on demand.
2726
2827
Parameters
2928
----------
@@ -32,8 +31,8 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
3231
3332
Returns
3433
-------
35-
tuple[str, str, str]
36-
A tuple containing the summary, directory structure, and file contents.
34+
Context
35+
A Context object representing the ingested file system nodes. Call .generate_digest() to get the summary, directory structure, and file contents.
3736
3837
Raises
3938
------
@@ -91,7 +90,7 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
9190
"file_size": file_node.size,
9291
},
9392
)
94-
return format_node(file_node, query=query)
93+
return Context([file_node])
9594

9695
logger.info("Processing directory", extra={"directory_path": str(path)})
9796

@@ -117,7 +116,7 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
117116
},
118117
)
119118

120-
return format_node(root_node, query=query)
119+
return Context([root_node])
121120

122121

123122
def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystemStats) -> None:

src/gitingest/schemas/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Module containing the schemas for the Gitingest package."""
22

33
from gitingest.schemas.cloning import CloneConfig
4-
from gitingest.schemas.filesystem import FileSystemNode, FileSystemNodeType, FileSystemStats
4+
from gitingest.schemas.filesystem import Context, FileSystemNode, FileSystemNodeType, FileSystemStats
55
from gitingest.schemas.ingestion import IngestionQuery
66

7-
__all__ = ["CloneConfig", "FileSystemNode", "FileSystemNodeType", "FileSystemStats", "IngestionQuery"]
7+
__all__ = ["CloneConfig", "FileSystemNode", "FileSystemNodeType", "FileSystemStats", "IngestionQuery", "Context"]

src/gitingest/schemas/filesystem.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,3 +159,61 @@ def content(self) -> str: # pylint: disable=too-many-return-statements
159159
return fp.read()
160160
except (OSError, UnicodeDecodeError) as exc:
161161
return f"Error reading file with {good_enc!r}: {exc}"
162+
163+
164+
@dataclass
165+
class Context:
166+
"""Context for holding a list of FileSystemNode objects and generating a digest on demand."""
167+
nodes: list[FileSystemNode]
168+
169+
def generate_digest(self) -> tuple[str, str, str]:
170+
"""Generate a summary, directory structure, and file contents for the context's nodes.
171+
172+
Returns
173+
-------
174+
tuple[str, str, str]
175+
A tuple containing the summary, directory structure, and file contents.
176+
"""
177+
summary_lines = ["Context Digest"]
178+
total_files = 0
179+
for node in self.nodes:
180+
if node.type == FileSystemNodeType.DIRECTORY:
181+
total_files += node.file_count
182+
elif node.type == FileSystemNodeType.FILE:
183+
total_files += 1
184+
summary_lines.append(f"Files analyzed: {total_files}")
185+
summary = "\n".join(summary_lines)
186+
187+
# Directory structure
188+
tree_lines = ["Directory structure:"]
189+
for node in self.nodes:
190+
tree_lines.append(self._create_tree_structure(node))
191+
tree = "\n".join(tree_lines)
192+
193+
# File contents
194+
content_lines = []
195+
for node in self.nodes:
196+
content_lines.append(self._gather_file_contents(node))
197+
content = "\n".join(content_lines)
198+
199+
return summary, tree, content
200+
201+
def _gather_file_contents(self, node: FileSystemNode) -> str:
202+
if node.type != FileSystemNodeType.DIRECTORY:
203+
return node.content_string
204+
return "\n".join(self._gather_file_contents(child) for child in node.children)
205+
206+
def _create_tree_structure(self, node: FileSystemNode, prefix: str = "", is_last: bool = True) -> str:
207+
tree_str = ""
208+
current_prefix = "└── " if is_last else "├── "
209+
display_name = node.name
210+
if node.type == FileSystemNodeType.DIRECTORY:
211+
display_name += "/"
212+
elif node.type == FileSystemNodeType.SYMLINK:
213+
display_name += " -> " + readlink(node.path).name
214+
tree_str += f"{prefix}{current_prefix}{display_name}\n"
215+
if node.type == FileSystemNodeType.DIRECTORY and node.children:
216+
prefix += " " if is_last else "│ "
217+
for i, child in enumerate(node.children):
218+
tree_str += self._create_tree_structure(child, prefix=prefix, is_last=i == len(node.children) - 1)
219+
return tree_str

src/server/query_processor.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
upload_metadata_to_s3,
2222
upload_to_s3,
2323
)
24+
from gitingest.schemas import Context
2425
from server.server_config import MAX_DISPLAY_SIZE
2526

2627
# Initialize logger for this module
@@ -287,7 +288,10 @@ async def process_query(
287288
raise RuntimeError(msg)
288289

289290
try:
290-
summary, tree, content = ingest_query(query)
291+
context = ingest_query(query)
292+
summary, tree, content = context.generate_digest()
293+
294+
# Prepare the digest content (tree + content)
291295
digest_content = tree + "\n" + content
292296
_store_digest_content(query, clone_config, digest_content, summary, tree, content)
293297
except Exception as exc:

0 commit comments

Comments
 (0)