@@ -65,7 +65,7 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
6565 msg = f"File { file_node .name } has no content"
6666 raise ValueError (msg )
6767
68- return format_node (file_node , query )
68+ return format_node (file_node , query = query )
6969
7070 root_node = FileSystemNode (
7171 name = path .name ,
@@ -76,13 +76,9 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
7676
7777 stats = FileSystemStats ()
7878
79- _process_node (
80- node = root_node ,
81- query = query ,
82- stats = stats ,
83- )
79+ _process_node (node = root_node , query = query , stats = stats )
8480
85- return format_node (root_node , query )
81+ return format_node (root_node , query = query )
8682
8783
8884def _process_node (node : FileSystemNode , query : IngestionQuery , stats : FileSystemStats ) -> None :
@@ -101,7 +97,7 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
10197 Statistics tracking object for the total file count and size.
10298
10399 """
104- if limit_exceeded (stats , node .depth ):
100+ if limit_exceeded (stats , depth = node .depth ):
105101 return
106102
107103 for sub_path in node .path .iterdir ():
@@ -114,6 +110,9 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
114110 if sub_path .is_symlink ():
115111 _process_symlink (path = sub_path , parent_node = node , stats = stats , local_path = query .local_path )
116112 elif sub_path .is_file ():
113+ if sub_path .stat ().st_size > query .max_file_size :
114+ print (f"Skipping file { sub_path } : would exceed max file size limit" )
115+ continue
117116 _process_file (path = sub_path , parent_node = node , stats = stats , local_path = query .local_path )
118117 elif sub_path .is_dir ():
119118 child_directory_node = FileSystemNode (
@@ -124,11 +123,7 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
124123 depth = node .depth + 1 ,
125124 )
126125
127- _process_node (
128- node = child_directory_node ,
129- query = query ,
130- stats = stats ,
131- )
126+ _process_node (node = child_directory_node , query = query , stats = stats )
132127
133128 if not child_directory_node .children :
134129 continue
@@ -190,6 +185,10 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
190185 The base path of the repository or directory being processed.
191186
192187 """
188+ if stats .total_files + 1 > MAX_FILES :
189+ print (f"Maximum file limit ({ MAX_FILES } ) reached" )
190+ return
191+
193192 file_size = path .stat ().st_size
194193 if stats .total_size + file_size > MAX_TOTAL_SIZE_BYTES :
195194 print (f"Skipping file { path } : would exceed total size limit" )
@@ -198,10 +197,6 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
198197 stats .total_files += 1
199198 stats .total_size += file_size
200199
201- if stats .total_files > MAX_FILES :
202- print (f"Maximum file limit ({ MAX_FILES } ) reached" )
203- return
204-
205200 child = FileSystemNode (
206201 name = path .name ,
207202 type = FileSystemNodeType .FILE ,
0 commit comments