Skip to content

Commit c5d83ff

Browse files
fix max_file_size limit
1 parent 4b62650 commit c5d83ff

File tree

2 files changed

+12
-18
lines changed

2 files changed

+12
-18
lines changed

src/gitingest/ingestion.py

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
6565
msg = f"File {file_node.name} has no content"
6666
raise ValueError(msg)
6767

68-
return format_node(file_node, query)
68+
return format_node(file_node, query=query)
6969

7070
root_node = FileSystemNode(
7171
name=path.name,
@@ -76,13 +76,9 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
7676

7777
stats = FileSystemStats()
7878

79-
_process_node(
80-
node=root_node,
81-
query=query,
82-
stats=stats,
83-
)
79+
_process_node(node=root_node, query=query, stats=stats)
8480

85-
return format_node(root_node, query)
81+
return format_node(root_node, query=query)
8682

8783

8884
def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystemStats) -> None:
@@ -101,7 +97,7 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
10197
Statistics tracking object for the total file count and size.
10298
10399
"""
104-
if limit_exceeded(stats, node.depth):
100+
if limit_exceeded(stats, depth=node.depth):
105101
return
106102

107103
for sub_path in node.path.iterdir():
@@ -114,6 +110,9 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
114110
if sub_path.is_symlink():
115111
_process_symlink(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
116112
elif sub_path.is_file():
113+
if sub_path.stat().st_size > query.max_file_size:
114+
print(f"Skipping file {sub_path}: would exceed max file size limit")
115+
continue
117116
_process_file(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
118117
elif sub_path.is_dir():
119118
child_directory_node = FileSystemNode(
@@ -124,11 +123,7 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
124123
depth=node.depth + 1,
125124
)
126125

127-
_process_node(
128-
node=child_directory_node,
129-
query=query,
130-
stats=stats,
131-
)
126+
_process_node(node=child_directory_node, query=query, stats=stats)
132127

133128
if not child_directory_node.children:
134129
continue
@@ -190,6 +185,10 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
190185
The base path of the repository or directory being processed.
191186
192187
"""
188+
if stats.total_files + 1 > MAX_FILES:
189+
print(f"Maximum file limit ({MAX_FILES}) reached")
190+
return
191+
193192
file_size = path.stat().st_size
194193
if stats.total_size + file_size > MAX_TOTAL_SIZE_BYTES:
195194
print(f"Skipping file {path}: would exceed total size limit")
@@ -198,10 +197,6 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
198197
stats.total_files += 1
199198
stats.total_size += file_size
200199

201-
if stats.total_files > MAX_FILES:
202-
print(f"Maximum file limit ({MAX_FILES}) reached")
203-
return
204-
205200
child = FileSystemNode(
206201
name=path.name,
207202
type=FileSystemNodeType.FILE,

src/gitingest/schemas/filesystem.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ class FileSystemNodeType(Enum):
2929
class FileSystemStats:
3030
"""Class for tracking statistics during file system traversal."""
3131

32-
visited: set[Path] = field(default_factory=set)
3332
total_files: int = 0
3433
total_size: int = 0
3534

0 commit comments

Comments
 (0)