77from enum import Enum , auto
88from pathlib import Path
99
10- from gitingest .exceptions import InvalidNotebookError
1110from gitingest .utils .ingestion_utils import _get_encoding_list
1211from gitingest .utils .notebook_utils import process_notebook
1312from gitingest .utils .textfile_checker_utils import is_textfile
1413
15- SEPARATOR = "=" * 48 + " \n "
14+ SEPARATOR = "=" * 48 # Tiktoken, the tokenizer openai uses, counts 2 tokens if we have more than 48
1615
1716
1817class FileSystemNodeType (Enum ):
@@ -36,108 +35,105 @@ class FileSystemNode: # pylint: disable=too-many-instance-attributes
3635 """
3736 Class representing a node in the file system (either a file or directory).
3837
39- This class has more than the recommended number of attributes because it needs to
40- track various properties of files and directories for comprehensive analysis.
38+ Tracks properties of files/directories for comprehensive analysis.
4139 """
4240
4341 name : str
44- type : FileSystemNodeType # e.g., "directory" or "file"
42+ type : FileSystemNodeType
4543 path_str : str
4644 path : Path
4745 size : int = 0
4846 file_count : int = 0
4947 dir_count : int = 0
5048 depth : int = 0
51- children : list [FileSystemNode ] = field (default_factory = list ) # Using default_factory instead of empty list
49+ children : list [FileSystemNode ] = field (default_factory = list )
5250
5351 def sort_children (self ) -> None :
5452 """
5553 Sort the children nodes of a directory according to a specific order.
5654
5755 Order of sorting:
58- 1. README.md first
59- 2. Regular files (not starting with dot)
60- 3. Hidden files (starting with dot)
61- 4. Regular directories (not starting with dot)
62- 5. Hidden directories (starting with dot)
63- All groups are sorted alphanumerically within themselves.
64- """
65- # Separate files and directories
66- files = [child for child in self .children if child .type == FileSystemNodeType .FILE ]
67- directories = [child for child in self .children if child .type == FileSystemNodeType .DIRECTORY ]
56+ 2. Regular files (not starting with dot)
57+ 3. Hidden files (starting with dot)
58+ 4. Regular directories (not starting with dot)
59+ 5. Hidden directories (starting with dot)
6860
69- # Find README.md
70- readme_files = [f for f in files if f .name .lower () == "readme.md" ]
71- other_files = [f for f in files if f .name .lower () != "readme.md" ]
61+ All groups are sorted alphanumerically within themselves.
7262
73- # Separate hidden and regular files/directories
74- regular_files = [f for f in other_files if not f .name .startswith ("." )]
75- hidden_files = [f for f in other_files if f .name .startswith ("." )]
76- regular_dirs = [d for d in directories if not d .name .startswith ("." )]
77- hidden_dirs = [d for d in directories if d .name .startswith ("." )]
63+ Raises
64+ ------
65+ ValueError
66+ If the node is not a directory.
67+ """
68+ if self .type != FileSystemNodeType .DIRECTORY :
69+ raise ValueError ("Cannot sort children of a non-directory node" )
7870
79- # Sort each group alphanumerically
80- regular_files .sort (key = lambda x : x .name )
81- hidden_files .sort (key = lambda x : x .name )
82- regular_dirs .sort (key = lambda x : x .name )
83- hidden_dirs .sort (key = lambda x : x .name )
71+ def _sort_key (child : FileSystemNode ) -> tuple [int , str ]:
72+ # returns the priority order for the sort function, 0 is first
73+ # Groups: 0=README, 1=regular file, 2=hidden file, 3=regular dir, 4=hidden dir
74+ name = child .name .lower ()
75+ if child .type == FileSystemNodeType .FILE :
76+ if name == "readme.md" :
77+ return (0 , name )
78+ return (1 if not name .startswith ("." ) else 2 , name )
79+ return (3 if not name .startswith ("." ) else 4 , name )
8480
85- self .children = readme_files + regular_files + hidden_files + regular_dirs + hidden_dirs
81+ self .children . sort ( key = _sort_key )
8682
8783 @property
8884 def content_string (self ) -> str :
8985 """
90- Return the content of the node as a string.
91-
92- This property returns the content of the node as a string, including the path and content.
86+ Return the content of the node as a string, including path and content.
9387
9488 Returns
9589 -------
9690 str
9791 A string representation of the node's content.
9892 """
99- content_repr = SEPARATOR
93+ parts = [
94+ SEPARATOR ,
95+ f"File: { str (self .path_str ).replace (os .sep , '/' )} " ,
96+ SEPARATOR ,
97+ f"{ self .content } " ,
98+ ]
10099
101- # Use forward slashes in output paths
102- content_repr += f"File: { str (self .path_str ).replace (os .sep , '/' )} \n "
103- content_repr += SEPARATOR
104- content_repr += f"{ self .content } \n \n "
105- return content_repr
100+ return "\n " .join (parts ) + "\n \n "
106101
107102 @property
108103 def content (self ) -> str : # pylint: disable=too-many-return-statements
109104 """
110- Read the content of a file.
111-
112- This function attempts to open a file and read its contents using UTF-8 encoding.
113- If an error occurs during reading (e.g., file is not found or permission error),
114- it returns an error message.
105+ Read the content of a file if it's text (or a notebook). Return an error message otherwise.
115106
116107 Returns
117108 -------
118109 str
119110 The content of the file, or an error message if the file could not be read.
111+
112+ Raises
113+ ------
114+ ValueError
115+ If the node is a directory.
120116 """
121- if self .type == FileSystemNodeType .FILE and not is_textfile (self .path ):
117+ if self .type == FileSystemNodeType .DIRECTORY :
118+ raise ValueError ("Cannot read content of a directory node" )
119+
120+ if not is_textfile (self .path ):
122121 return "[Non-text file]"
123122
124- try :
125- if self .path .suffix == ".ipynb" :
126- try :
127- return process_notebook (self .path )
128- except Exception as exc :
129- return f"Error processing notebook: { exc } "
130-
131- for encoding in _get_encoding_list ():
132- try :
133- with self .path .open (encoding = encoding ) as f :
134- return f .read ()
135- except UnicodeDecodeError :
136- continue
137- except OSError as exc :
138- return f"Error reading file: { exc } "
139-
140- return "Error: Unable to decode file with available encodings"
141-
142- except (OSError , InvalidNotebookError ) as exc :
143- return f"Error reading file: { exc } "
123+ if self .path .suffix == ".ipynb" :
124+ try :
125+ return process_notebook (self .path )
126+ except Exception as exc :
127+ return f"Error processing notebook: { exc } "
128+
129+ # Try multiple encodings
130+ for encoding in _get_encoding_list ():
131+ try :
132+ with self .path .open (encoding = encoding ) as f :
133+ return f .read ()
134+ except UnicodeDecodeError :
135+ continue
136+ except OSError as exc :
137+ return f"Error reading file: { exc } "
138+
139+ return "Error: Unable to decode file with available encodings"
0 commit comments