Skip to content

Commit f82619a

Browse files
committed
wip before jinja
1 parent 18ff94b commit f82619a

File tree

7 files changed

+160
-234
lines changed

7 files changed

+160
-234
lines changed

src/gitingest/entrypoint.py

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,11 @@ async def ingest_async(
4545
include_submodules: bool = False,
4646
token: str | None = None,
4747
output: str | None = None,
48-
) -> tuple[str, str, str]:
48+
) -> str:
4949
"""Ingest a source and process its contents.
5050
5151
This function analyzes a source (URL or local path), clones the corresponding repository (if applicable),
52-
and processes its files according to the specified query parameters. It returns a summary, a tree-like
53-
structure of the files, and the content of the files. The results can optionally be written to an output file.
52+
and processes its files according to the specified query parameters. It returns a single digest string.
5453
5554
The output is generated lazily using a Context object and its .generate_digest() method.
5655
@@ -82,11 +81,8 @@ async def ingest_async(
8281
8382
Returns
8483
-------
85-
tuple[str, str, str]
86-
A tuple containing:
87-
- A summary string of the analyzed repository or directory.
88-
- A tree-like string representation of the file structure.
89-
- The content of the files in the repository or directory.
84+
str
85+
The full digest string.
9086
9187
"""
9288
logger.info("Starting ingestion process", extra={"source": source})
@@ -146,11 +142,10 @@ async def ingest_async(
146142
if output:
147143
logger.debug("Writing output to file", extra={"output_path": output})
148144
context = ingest_query(query)
149-
summary, tree, content = context.generate_digest()
150-
await _write_output(tree, content=content, target=output)
151-
145+
digest = context.generate_digest()
146+
await _write_output(digest, content=None, target=output)
152147
logger.info("Ingestion completed successfully")
153-
return summary, tree, content
148+
return digest
154149

155150

156151
def ingest(
@@ -165,12 +160,11 @@ def ingest(
165160
include_submodules: bool = False,
166161
token: str | None = None,
167162
output: str | None = None,
168-
) -> tuple[str, str, str]:
163+
) -> str:
169164
"""Provide a synchronous wrapper around ``ingest_async``.
170165
171166
This function analyzes a source (URL or local path), clones the corresponding repository (if applicable),
172-
and processes its files according to the specified query parameters. It returns a summary, a tree-like
173-
structure of the files, and the content of the files. The results can optionally be written to an output file.
167+
and processes its files according to the specified query parameters. It returns a single digest string.
174168
175169
The output is generated lazily using a Context object and its .generate_digest() method.
176170
@@ -202,18 +196,14 @@ def ingest(
202196
203197
Returns
204198
-------
205-
tuple[str, str, str]
206-
A tuple containing:
207-
- A summary string of the analyzed repository or directory.
208-
- A tree-like string representation of the file structure.
209-
- The content of the files in the repository or directory.
199+
str
200+
The full digest string.
210201
211202
See Also
212203
--------
213204
``ingest_async`` : The asynchronous version of this function.
214205
215206
"""
216-
import asyncio
217207
context = asyncio.run(ingest_async(
218208
source,
219209
max_file_size=max_file_size,

src/gitingest/ingestion.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66
from typing import TYPE_CHECKING
77

88
from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
9-
from gitingest.output_formatter import format_node
10-
from gitingest.schemas import FileSystemNode, FileSystemNodeType, FileSystemStats, Context
9+
from gitingest.output_formatter import DefaultFormatter
10+
from gitingest.schemas import FileSystemNode, FileSystemStats, Context
11+
from gitingest.schemas.filesystem import FileSystemDirectory, FileSystemFile, FileSystemSymlink
1112
from gitingest.utils.ingestion_utils import _should_exclude, _should_include
1213
from gitingest.utils.logging_config import get_logger
1314

@@ -69,11 +70,16 @@ def ingest_query(query: IngestionQuery) -> Context:
6970

7071
relative_path = path.relative_to(query.local_path)
7172

72-
file_node = FileSystemNode(
73+
# file_node = FileSystemNode(
74+
# name=path.name,
75+
# type=FileSystemNodeType.FILE,
76+
# size=path.stat().st_size,
77+
# file_count=1,
78+
# path_str=str(relative_path),
79+
# path=path,
80+
# )
81+
file_node = FileSystemFile(
7382
name=path.name,
74-
type=FileSystemNodeType.FILE,
75-
size=path.stat().st_size,
76-
file_count=1,
7783
path_str=str(relative_path),
7884
path=path,
7985
)
@@ -90,13 +96,15 @@ def ingest_query(query: IngestionQuery) -> Context:
9096
"file_size": file_node.size,
9197
},
9298
)
93-
return Context([file_node])
9499

95-
logger.info("Processing directory", extra={"directory_path": str(path)})
96-
97-
root_node = FileSystemNode(
100+
# root_node = FileSystemNode(
101+
# name=path.name,
102+
# type=FileSystemNodeType.DIRECTORY,
103+
# path_str=str(path.relative_to(query.local_path)),
104+
# path=path,
105+
# )
106+
root_node = FileSystemDirectory(
98107
name=path.name,
99-
type=FileSystemNodeType.DIRECTORY,
100108
path_str=str(path.relative_to(query.local_path)),
101109
path=path,
102110
)
@@ -116,7 +124,7 @@ def ingest_query(query: IngestionQuery) -> Context:
116124
},
117125
)
118126

119-
return Context([root_node])
127+
return Context([root_node], DefaultFormatter(), query)
120128

121129

122130
def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystemStats) -> None:
@@ -160,9 +168,8 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
160168
continue
161169
_process_file(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
162170
elif sub_path.is_dir():
163-
child_directory_node = FileSystemNode(
171+
child_directory_node = FileSystemDirectory(
164172
name=sub_path.name,
165-
type=FileSystemNodeType.DIRECTORY,
166173
path_str=str(sub_path.relative_to(query.local_path)),
167174
path=sub_path,
168175
depth=node.depth + 1,
@@ -200,9 +207,8 @@ def _process_symlink(path: Path, parent_node: FileSystemNode, stats: FileSystemS
200207
The base path of the repository or directory being processed.
201208
202209
"""
203-
child = FileSystemNode(
210+
child = FileSystemSymlink(
204211
name=path.name,
205-
type=FileSystemNodeType.SYMLINK,
206212
path_str=str(path.relative_to(local_path)),
207213
path=path,
208214
depth=parent_node.depth + 1,
@@ -212,7 +218,7 @@ def _process_symlink(path: Path, parent_node: FileSystemNode, stats: FileSystemS
212218
parent_node.file_count += 1
213219

214220

215-
def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStats, local_path: Path) -> None:
221+
def _process_file(path: Path, parent_node: FileSystemDirectory, stats: FileSystemStats, local_path: Path) -> None:
216222
"""Process a file in the file system.
217223
218224
This function checks the file's size, increments the statistics, and reads its content.
@@ -222,7 +228,7 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
222228
----------
223229
path : Path
224230
The full path of the file.
225-
parent_node : FileSystemNode
231+
parent_node : FileSystemDirectory
226232
The dictionary to accumulate the results.
227233
stats : FileSystemStats
228234
Statistics tracking object for the total file count and size.
@@ -257,11 +263,8 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
257263
stats.total_files += 1
258264
stats.total_size += file_size
259265

260-
child = FileSystemNode(
266+
child = FileSystemFile(
261267
name=path.name,
262-
type=FileSystemNodeType.FILE,
263-
size=file_size,
264-
file_count=1,
265268
path_str=str(path.relative_to(local_path)),
266269
path=path,
267270
depth=parent_node.depth + 1,

src/gitingest/output_formatter.py

Lines changed: 39 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,11 @@
88
import requests.exceptions
99
import tiktoken
1010

11-
from gitingest.schemas import FileSystemNode, FileSystemNodeType
11+
from gitingest.schemas import FileSystemNode
1212
from gitingest.utils.compat_func import readlink
13+
from functools import singledispatchmethod
14+
from gitingest.schemas import Source, FileSystemFile, FileSystemDirectory, FileSystemSymlink
15+
from gitingest.schemas.filesystem import SEPARATOR
1316
from gitingest.utils.logging_config import get_logger
1417

1518
if TYPE_CHECKING:
@@ -24,43 +27,41 @@
2427
]
2528

2629

27-
def format_node(node: FileSystemNode, query: IngestionQuery) -> tuple[str, str, str]:
28-
"""Generate a summary, directory structure, and file contents for a given file system node.
29-
30-
If the node represents a directory, the function will recursively process its contents.
31-
32-
Parameters
33-
----------
34-
node : FileSystemNode
35-
The file system node to be summarized.
36-
query : IngestionQuery
37-
The parsed query object containing information about the repository and query parameters.
38-
39-
Returns
40-
-------
41-
tuple[str, str, str]
42-
A tuple containing the summary, directory structure, and file contents.
43-
44-
"""
45-
is_single_file = node.type == FileSystemNodeType.FILE
46-
summary = _create_summary_prefix(query, single_file=is_single_file)
47-
48-
if node.type == FileSystemNodeType.DIRECTORY:
49-
summary += f"Files analyzed: {node.file_count}\n"
50-
elif node.type == FileSystemNodeType.FILE:
51-
summary += f"File: {node.name}\n"
52-
summary += f"Lines: {len(node.content.splitlines()):,}\n"
53-
54-
tree = "Directory structure:\n" + _create_tree_structure(query, node=node)
55-
56-
content = _gather_file_contents(node)
57-
58-
token_estimate = _format_token_count(tree + content)
59-
if token_estimate:
60-
summary += f"\nEstimated tokens: {token_estimate}"
61-
62-
return summary, tree, content
63-
30+
class Formatter:
31+
@singledispatchmethod
32+
def format(self, node: Source, query):
33+
return f"{getattr(node, 'content', '')}"
34+
35+
@format.register
36+
def _(self, node: FileSystemFile, query):
37+
return (
38+
f"{SEPARATOR}\n"
39+
f"{node.name}\n"
40+
f"{SEPARATOR}\n\n"
41+
f"{node.content}"
42+
)
43+
44+
@format.register
45+
def _(self, node: FileSystemDirectory, query):
46+
formatted = []
47+
for child in node.children:
48+
formatted.append(self.format(child, query))
49+
return "\n".join(formatted)
50+
51+
@format.register
52+
def _(self, node: FileSystemSymlink, query):
53+
target = getattr(node, 'target', None)
54+
target_str = f" -> {target}" if target else ""
55+
return (
56+
f"{SEPARATOR}\n"
57+
f"{node.name}{target_str}\n"
58+
f"{SEPARATOR}\n"
59+
)
60+
61+
class DefaultFormatter(Formatter):
62+
pass
63+
64+
# Backward compatibility
6465

6566
def _create_summary_prefix(query: IngestionQuery, *, single_file: bool = False) -> str:
6667
"""Create a prefix string for summarizing a repository or local directory.

src/gitingest/schemas/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Module containing the schemas for the Gitingest package."""
22

33
from gitingest.schemas.cloning import CloneConfig
4-
from gitingest.schemas.filesystem import Context, FileSystemNode, FileSystemNodeType, FileSystemStats
4+
from gitingest.schemas.filesystem import FileSystemNode, FileSystemFile, FileSystemDirectory, FileSystemSymlink, FileSystemStats, Context, Source
55
from gitingest.schemas.ingestion import IngestionQuery
66

7-
__all__ = ["CloneConfig", "FileSystemNode", "FileSystemNodeType", "FileSystemStats", "IngestionQuery", "Context"]
7+
__all__ = ["CloneConfig", "FileSystemNode", "FileSystemFile", "FileSystemDirectory", "FileSystemSymlink", "FileSystemStats", "IngestionQuery", "Context"]

0 commit comments

Comments
 (0)