Skip to content

Commit f38cebe

Browse files
committed
test
1 parent 148f171 commit f38cebe

File tree

5 files changed

+91
-26
lines changed

5 files changed

+91
-26
lines changed

src/gitingest/entrypoint.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from types import TracebackType
2828

2929
from gitingest.schemas import IngestionQuery
30+
from gitingest.schemas import Context
3031

3132

3233
async def ingest_async(
@@ -48,6 +49,8 @@ async def ingest_async(
4849
and processes its files according to the specified query parameters. It returns a summary, a tree-like
4950
structure of the files, and the content of the files. The results can optionally be written to an output file.
5051
52+
The output is generated lazily using a Context object and its .generate_digest() method.
53+
5154
Parameters
5255
----------
5356
source : str
@@ -112,7 +115,8 @@ async def ingest_async(
112115
async with _clone_repo_if_remote(query, token=token):
113116
if not include_gitignored:
114117
_apply_gitignores(query)
115-
summary, tree, content = ingest_query(query)
118+
context = ingest_query(query)
119+
summary, tree, content = context.generate_digest()
116120
await _write_output(tree, content=content, target=output)
117121
return summary, tree, content
118122

@@ -136,6 +140,8 @@ def ingest(
136140
and processes its files according to the specified query parameters. It returns a summary, a tree-like
137141
structure of the files, and the content of the files. The results can optionally be written to an output file.
138142
143+
The output is generated lazily using a Context object and its .generate_digest() method.
144+
139145
Parameters
140146
----------
141147
source : str
@@ -175,20 +181,20 @@ def ingest(
175181
``ingest_async`` : The asynchronous version of this function.
176182
177183
"""
178-
return asyncio.run(
179-
ingest_async(
180-
source=source,
181-
max_file_size=max_file_size,
182-
include_patterns=include_patterns,
183-
exclude_patterns=exclude_patterns,
184-
branch=branch,
185-
tag=tag,
186-
include_gitignored=include_gitignored,
187-
include_submodules=include_submodules,
188-
token=token,
189-
output=output,
190-
),
191-
)
184+
import asyncio
185+
context = asyncio.run(ingest_async(
186+
source,
187+
max_file_size=max_file_size,
188+
include_patterns=include_patterns,
189+
exclude_patterns=exclude_patterns,
190+
branch=branch,
191+
tag=tag,
192+
include_gitignored=include_gitignored,
193+
include_submodules=include_submodules,
194+
token=token,
195+
output=output,
196+
))
197+
return context.generate_digest()
192198

193199

194200
def _override_branch_and_tag(query: IngestionQuery, branch: str | None, tag: str | None) -> None:

src/gitingest/ingestion.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,18 @@
77

88
from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
99
from gitingest.output_formatter import format_node
10-
from gitingest.schemas import FileSystemNode, FileSystemNodeType, FileSystemStats
10+
from gitingest.schemas import FileSystemNode, FileSystemNodeType, FileSystemStats, Context
1111
from gitingest.utils.ingestion_utils import _should_exclude, _should_include
1212

1313
if TYPE_CHECKING:
1414
from gitingest.schemas import IngestionQuery
1515

1616

17-
def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
17+
def ingest_query(query: IngestionQuery) -> Context:
1818
"""Run the ingestion process for a parsed query.
1919
2020
This is the main entry point for analyzing a codebase directory or single file. It processes the query
21-
parameters, reads the file or directory content, and generates a summary, directory structure, and file content,
22-
along with token estimations.
21+
parameters, reads the file or directory content, and returns a Context object that can generate the final output digest on demand.
2322
2423
Parameters
2524
----------
@@ -28,8 +27,8 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
2827
2928
Returns
3029
-------
31-
tuple[str, str, str]
32-
A tuple containing the summary, directory structure, and file contents.
30+
Context
31+
A Context object representing the ingested file system nodes. Call .generate_digest() to get the summary, directory structure, and file contents.
3332
3433
Raises
3534
------
@@ -65,7 +64,7 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
6564
msg = f"File {file_node.name} has no content"
6665
raise ValueError(msg)
6766

68-
return format_node(file_node, query=query)
67+
return Context([file_node])
6968

7069
root_node = FileSystemNode(
7170
name=path.name,
@@ -78,7 +77,7 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
7877

7978
_process_node(node=root_node, query=query, stats=stats)
8079

81-
return format_node(root_node, query=query)
80+
return Context([root_node])
8281

8382

8483
def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystemStats) -> None:

src/gitingest/schemas/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Module containing the schemas for the Gitingest package."""
22

33
from gitingest.schemas.cloning import CloneConfig
4-
from gitingest.schemas.filesystem import FileSystemNode, FileSystemNodeType, FileSystemStats
4+
from gitingest.schemas.filesystem import Context, FileSystemNode, FileSystemNodeType, FileSystemStats
55
from gitingest.schemas.ingestion import IngestionQuery
66

7-
__all__ = ["CloneConfig", "FileSystemNode", "FileSystemNodeType", "FileSystemStats", "IngestionQuery"]
7+
__all__ = ["CloneConfig", "FileSystemNode", "FileSystemNodeType", "FileSystemStats", "IngestionQuery", "Context"]

src/gitingest/schemas/filesystem.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,3 +159,61 @@ def content(self) -> str: # pylint: disable=too-many-return-statements
159159
return fp.read()
160160
except (OSError, UnicodeDecodeError) as exc:
161161
return f"Error reading file with {good_enc!r}: {exc}"
162+
163+
164+
@dataclass
165+
class Context:
166+
"""Context for holding a list of FileSystemNode objects and generating a digest on demand."""
167+
nodes: list[FileSystemNode]
168+
169+
def generate_digest(self) -> tuple[str, str, str]:
170+
"""Generate a summary, directory structure, and file contents for the context's nodes.
171+
172+
Returns
173+
-------
174+
tuple[str, str, str]
175+
A tuple containing the summary, directory structure, and file contents.
176+
"""
177+
summary_lines = ["Context Digest"]
178+
total_files = 0
179+
for node in self.nodes:
180+
if node.type == FileSystemNodeType.DIRECTORY:
181+
total_files += node.file_count
182+
elif node.type == FileSystemNodeType.FILE:
183+
total_files += 1
184+
summary_lines.append(f"Files analyzed: {total_files}")
185+
summary = "\n".join(summary_lines)
186+
187+
# Directory structure
188+
tree_lines = ["Directory structure:"]
189+
for node in self.nodes:
190+
tree_lines.append(self._create_tree_structure(node))
191+
tree = "\n".join(tree_lines)
192+
193+
# File contents
194+
content_lines = []
195+
for node in self.nodes:
196+
content_lines.append(self._gather_file_contents(node))
197+
content = "\n".join(content_lines)
198+
199+
return summary, tree, content
200+
201+
def _gather_file_contents(self, node: FileSystemNode) -> str:
202+
if node.type != FileSystemNodeType.DIRECTORY:
203+
return node.content_string
204+
return "\n".join(self._gather_file_contents(child) for child in node.children)
205+
206+
def _create_tree_structure(self, node: FileSystemNode, prefix: str = "", is_last: bool = True) -> str:
207+
tree_str = ""
208+
current_prefix = "└── " if is_last else "├── "
209+
display_name = node.name
210+
if node.type == FileSystemNodeType.DIRECTORY:
211+
display_name += "/"
212+
elif node.type == FileSystemNodeType.SYMLINK:
213+
display_name += " -> " + readlink(node.path).name
214+
tree_str += f"{prefix}{current_prefix}{display_name}\n"
215+
if node.type == FileSystemNodeType.DIRECTORY and node.children:
216+
prefix += " " if is_last else "│ "
217+
for i, child in enumerate(node.children):
218+
tree_str += self._create_tree_structure(child, prefix=prefix, is_last=i == len(node.children) - 1)
219+
return tree_str

src/server/query_processor.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from gitingest.query_parser import parse_remote_repo
1111
from gitingest.utils.git_utils import validate_github_token
1212
from gitingest.utils.pattern_utils import process_patterns
13+
from gitingest.schemas import Context
1314
from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType
1415
from server.s3_utils import generate_s3_file_path, is_s3_enabled, upload_to_s3
1516
from server.server_config import MAX_DISPLAY_SIZE
@@ -82,7 +83,8 @@ async def process_query(
8283
raise RuntimeError(msg)
8384

8485
try:
85-
summary, tree, content = ingest_query(query)
86+
context = ingest_query(query)
87+
summary, tree, content = context.generate_digest()
8688

8789
# Prepare the digest content (tree + content)
8890
digest_content = tree + "\n" + content

0 commit comments

Comments
 (0)