Skip to content

Commit 1ec52b7

Browse files
committed
wip before jinja
1 parent f38cebe commit 1ec52b7

File tree

7 files changed

+161
-231
lines changed

7 files changed

+161
-231
lines changed

src/gitingest/entrypoint.py

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,11 @@ async def ingest_async(
4242
include_submodules: bool = False,
4343
token: str | None = None,
4444
output: str | None = None,
45-
) -> tuple[str, str, str]:
45+
) -> str:
4646
"""Ingest a source and process its contents.
4747
4848
This function analyzes a source (URL or local path), clones the corresponding repository (if applicable),
49-
and processes its files according to the specified query parameters. It returns a summary, a tree-like
50-
structure of the files, and the content of the files. The results can optionally be written to an output file.
49+
and processes its files according to the specified query parameters. It returns a single digest string.
5150
5251
The output is generated lazily using a Context object and its .generate_digest() method.
5352
@@ -79,11 +78,8 @@ async def ingest_async(
7978
8079
Returns
8180
-------
82-
tuple[str, str, str]
83-
A tuple containing:
84-
- A summary string of the analyzed repository or directory.
85-
- A tree-like string representation of the file structure.
86-
- The content of the files in the repository or directory.
81+
str
82+
The full digest string.
8783
8884
"""
8985
token = resolve_token(token)
@@ -116,9 +112,9 @@ async def ingest_async(
116112
if not include_gitignored:
117113
_apply_gitignores(query)
118114
context = ingest_query(query)
119-
summary, tree, content = context.generate_digest()
120-
await _write_output(tree, content=content, target=output)
121-
return summary, tree, content
115+
digest = context.generate_digest()
116+
await _write_output(digest, content=None, target=output)
117+
return digest
122118

123119

124120
def ingest(
@@ -133,12 +129,11 @@ def ingest(
133129
include_submodules: bool = False,
134130
token: str | None = None,
135131
output: str | None = None,
136-
) -> tuple[str, str, str]:
132+
) -> str:
137133
"""Provide a synchronous wrapper around ``ingest_async``.
138134
139135
This function analyzes a source (URL or local path), clones the corresponding repository (if applicable),
140-
and processes its files according to the specified query parameters. It returns a summary, a tree-like
141-
structure of the files, and the content of the files. The results can optionally be written to an output file.
136+
and processes its files according to the specified query parameters. It returns a single digest string.
142137
143138
The output is generated lazily using a Context object and its .generate_digest() method.
144139
@@ -170,18 +165,14 @@ def ingest(
170165
171166
Returns
172167
-------
173-
tuple[str, str, str]
174-
A tuple containing:
175-
- A summary string of the analyzed repository or directory.
176-
- A tree-like string representation of the file structure.
177-
- The content of the files in the repository or directory.
168+
str
169+
The full digest string.
178170
179171
See Also
180172
--------
181173
``ingest_async`` : The asynchronous version of this function.
182174
183175
"""
184-
import asyncio
185176
context = asyncio.run(ingest_async(
186177
source,
187178
max_file_size=max_file_size,

src/gitingest/ingestion.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66
from typing import TYPE_CHECKING
77

88
from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
9-
from gitingest.output_formatter import format_node
10-
from gitingest.schemas import FileSystemNode, FileSystemNodeType, FileSystemStats, Context
9+
from gitingest.output_formatter import DefaultFormatter
10+
from gitingest.schemas import FileSystemNode, FileSystemStats, Context
11+
from gitingest.schemas.filesystem import FileSystemDirectory, FileSystemFile, FileSystemSymlink
1112
from gitingest.utils.ingestion_utils import _should_exclude, _should_include
1213

1314
if TYPE_CHECKING:
@@ -51,11 +52,16 @@ def ingest_query(query: IngestionQuery) -> Context:
5152

5253
relative_path = path.relative_to(query.local_path)
5354

54-
file_node = FileSystemNode(
55+
# file_node = FileSystemNode(
56+
# name=path.name,
57+
# type=FileSystemNodeType.FILE,
58+
# size=path.stat().st_size,
59+
# file_count=1,
60+
# path_str=str(relative_path),
61+
# path=path,
62+
# )
63+
file_node = FileSystemFile(
5564
name=path.name,
56-
type=FileSystemNodeType.FILE,
57-
size=path.stat().st_size,
58-
file_count=1,
5965
path_str=str(relative_path),
6066
path=path,
6167
)
@@ -64,11 +70,16 @@ def ingest_query(query: IngestionQuery) -> Context:
6470
msg = f"File {file_node.name} has no content"
6571
raise ValueError(msg)
6672

67-
return Context([file_node])
73+
return Context([file_node], DefaultFormatter(), query)
6874

69-
root_node = FileSystemNode(
75+
# root_node = FileSystemNode(
76+
# name=path.name,
77+
# type=FileSystemNodeType.DIRECTORY,
78+
# path_str=str(path.relative_to(query.local_path)),
79+
# path=path,
80+
# )
81+
root_node = FileSystemDirectory(
7082
name=path.name,
71-
type=FileSystemNodeType.DIRECTORY,
7283
path_str=str(path.relative_to(query.local_path)),
7384
path=path,
7485
)
@@ -77,7 +88,7 @@ def ingest_query(query: IngestionQuery) -> Context:
7788

7889
_process_node(node=root_node, query=query, stats=stats)
7990

80-
return Context([root_node])
91+
return Context([root_node], DefaultFormatter(), query)
8192

8293

8394
def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystemStats) -> None:
@@ -114,9 +125,8 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
114125
continue
115126
_process_file(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
116127
elif sub_path.is_dir():
117-
child_directory_node = FileSystemNode(
128+
child_directory_node = FileSystemDirectory(
118129
name=sub_path.name,
119-
type=FileSystemNodeType.DIRECTORY,
120130
path_str=str(sub_path.relative_to(query.local_path)),
121131
path=sub_path,
122132
depth=node.depth + 1,
@@ -154,9 +164,8 @@ def _process_symlink(path: Path, parent_node: FileSystemNode, stats: FileSystemS
154164
The base path of the repository or directory being processed.
155165
156166
"""
157-
child = FileSystemNode(
167+
child = FileSystemSymlink(
158168
name=path.name,
159-
type=FileSystemNodeType.SYMLINK,
160169
path_str=str(path.relative_to(local_path)),
161170
path=path,
162171
depth=parent_node.depth + 1,
@@ -166,7 +175,7 @@ def _process_symlink(path: Path, parent_node: FileSystemNode, stats: FileSystemS
166175
parent_node.file_count += 1
167176

168177

169-
def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStats, local_path: Path) -> None:
178+
def _process_file(path: Path, parent_node: FileSystemDirectory, stats: FileSystemStats, local_path: Path) -> None:
170179
"""Process a file in the file system.
171180
172181
This function checks the file's size, increments the statistics, and reads its content.
@@ -176,7 +185,7 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
176185
----------
177186
path : Path
178187
The full path of the file.
179-
parent_node : FileSystemNode
188+
parent_node : FileSystemDirectory
180189
The dictionary to accumulate the results.
181190
stats : FileSystemStats
182191
Statistics tracking object for the total file count and size.
@@ -196,11 +205,8 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
196205
stats.total_files += 1
197206
stats.total_size += file_size
198207

199-
child = FileSystemNode(
208+
child = FileSystemFile(
200209
name=path.name,
201-
type=FileSystemNodeType.FILE,
202-
size=file_size,
203-
file_count=1,
204210
path_str=str(path.relative_to(local_path)),
205211
path=path,
206212
depth=parent_node.depth + 1,

src/gitingest/output_formatter.py

Lines changed: 39 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,11 @@
66

77
import tiktoken
88

9-
from gitingest.schemas import FileSystemNode, FileSystemNodeType
9+
from gitingest.schemas import FileSystemNode
1010
from gitingest.utils.compat_func import readlink
11+
from functools import singledispatchmethod
12+
from gitingest.schemas import Source, FileSystemFile, FileSystemDirectory, FileSystemSymlink
13+
from gitingest.schemas.filesystem import SEPARATOR
1114

1215
if TYPE_CHECKING:
1316
from gitingest.schemas import IngestionQuery
@@ -18,43 +21,41 @@
1821
]
1922

2023

21-
def format_node(node: FileSystemNode, query: IngestionQuery) -> tuple[str, str, str]:
22-
"""Generate a summary, directory structure, and file contents for a given file system node.
23-
24-
If the node represents a directory, the function will recursively process its contents.
25-
26-
Parameters
27-
----------
28-
node : FileSystemNode
29-
The file system node to be summarized.
30-
query : IngestionQuery
31-
The parsed query object containing information about the repository and query parameters.
32-
33-
Returns
34-
-------
35-
tuple[str, str, str]
36-
A tuple containing the summary, directory structure, and file contents.
37-
38-
"""
39-
is_single_file = node.type == FileSystemNodeType.FILE
40-
summary = _create_summary_prefix(query, single_file=is_single_file)
41-
42-
if node.type == FileSystemNodeType.DIRECTORY:
43-
summary += f"Files analyzed: {node.file_count}\n"
44-
elif node.type == FileSystemNodeType.FILE:
45-
summary += f"File: {node.name}\n"
46-
summary += f"Lines: {len(node.content.splitlines()):,}\n"
47-
48-
tree = "Directory structure:\n" + _create_tree_structure(query, node=node)
49-
50-
content = _gather_file_contents(node)
51-
52-
token_estimate = _format_token_count(tree + content)
53-
if token_estimate:
54-
summary += f"\nEstimated tokens: {token_estimate}"
55-
56-
return summary, tree, content
57-
24+
class Formatter:
25+
@singledispatchmethod
26+
def format(self, node: Source, query):
27+
return f"{getattr(node, 'content', '')}"
28+
29+
@format.register
30+
def _(self, node: FileSystemFile, query):
31+
return (
32+
f"{SEPARATOR}\n"
33+
f"{node.name}\n"
34+
f"{SEPARATOR}\n\n"
35+
f"{node.content}"
36+
)
37+
38+
@format.register
39+
def _(self, node: FileSystemDirectory, query):
40+
formatted = []
41+
for child in node.children:
42+
formatted.append(self.format(child, query))
43+
return "\n".join(formatted)
44+
45+
@format.register
46+
def _(self, node: FileSystemSymlink, query):
47+
target = getattr(node, 'target', None)
48+
target_str = f" -> {target}" if target else ""
49+
return (
50+
f"{SEPARATOR}\n"
51+
f"{node.name}{target_str}\n"
52+
f"{SEPARATOR}\n"
53+
)
54+
55+
class DefaultFormatter(Formatter):
56+
pass
57+
58+
# Backward compatibility
5859

5960
def _create_summary_prefix(query: IngestionQuery, *, single_file: bool = False) -> str:
6061
"""Create a prefix string for summarizing a repository or local directory.

src/gitingest/schemas/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Module containing the schemas for the Gitingest package."""
22

33
from gitingest.schemas.cloning import CloneConfig
4-
from gitingest.schemas.filesystem import Context, FileSystemNode, FileSystemNodeType, FileSystemStats
4+
from gitingest.schemas.filesystem import FileSystemNode, FileSystemFile, FileSystemDirectory, FileSystemSymlink, FileSystemStats, Context, Source
55
from gitingest.schemas.ingestion import IngestionQuery
66

7-
__all__ = ["CloneConfig", "FileSystemNode", "FileSystemNodeType", "FileSystemStats", "IngestionQuery", "Context"]
7+
__all__ = ["CloneConfig", "FileSystemNode", "FileSystemFile", "FileSystemDirectory", "FileSystemSymlink", "FileSystemStats", "IngestionQuery", "Context"]

0 commit comments

Comments
 (0)