|
1 | 1 | """ Functions to ingest and analyze a codebase directory or single file. """ |
2 | 2 |
|
3 | | -from fnmatch import fnmatch |
4 | | -from pathlib import Path |
5 | | -from typing import Any |
6 | 3 | import locale |
7 | 4 | import os |
8 | 5 | import platform |
| 6 | +from fnmatch import fnmatch |
| 7 | +from pathlib import Path |
| 8 | +from typing import Any |
9 | 9 |
|
10 | 10 | import tiktoken |
11 | 11 |
|
|
20 | 20 | from gitingest.query_parser import ParsedQuery |
21 | 21 |
|
22 | 22 | try: |
23 | | - locale.setlocale(locale.LC_ALL, '') |
| 23 | + locale.setlocale(locale.LC_ALL, "") |
24 | 24 | except locale.Error: |
25 | | - locale.setlocale(locale.LC_ALL, 'C') |
| 25 | + locale.setlocale(locale.LC_ALL, "C") |
| 26 | + |
26 | 27 |
|
27 | 28 | def _normalize_path(path: Path) -> Path: |
28 | | - """Normalize path for cross-platform compatibility.""" |
| 29 | + """ |
| 30 | + Normalize path for cross-platform compatibility. |
| 31 | +
|
| 32 | + Parameters |
| 33 | + ---------- |
| 34 | + path : Path |
| 35 | + The Path object to normalize. |
| 36 | +
|
| 37 | + Returns |
| 38 | + ------- |
| 39 | + Path |
| 40 | + The normalized path with platform-specific separators and resolved components. |
| 41 | + """ |
29 | 42 | return Path(os.path.normpath(str(path))) |
30 | 43 |
|
| 44 | + |
31 | 45 | def _normalize_path_str(path: str | Path) -> str: |
32 | | - """Convert path to string with forward slashes for consistent output.""" |
33 | | - return str(path).replace(os.sep, '/') |
| 46 | + """ |
| 47 | + Convert path to string with forward slashes for consistent output. |
| 48 | +
|
| 49 | + Parameters |
| 50 | + ---------- |
| 51 | + path : str | Path |
| 52 | + The path to convert, can be string or Path object. |
| 53 | +
|
| 54 | + Returns |
| 55 | + ------- |
| 56 | + str |
| 57 | + The normalized path string with forward slashes as separators. |
| 58 | + """ |
| 59 | + return str(path).replace(os.sep, "/") |
| 60 | + |
34 | 61 |
|
35 | 62 | def _get_encoding_list() -> list[str]: |
36 | | - """Get list of encodings to try, prioritized for the current platform.""" |
37 | | - encodings = ['utf-8', 'utf-8-sig'] |
38 | | - if platform.system() == 'Windows': |
39 | | - encodings.extend(['cp1252', 'iso-8859-1']) |
| 63 | + """ |
| 64 | + Get list of encodings to try, prioritized for the current platform. |
| 65 | +
|
| 66 | + Returns |
| 67 | + ------- |
| 68 | + list[str] |
| 69 | + List of encoding names to try in priority order, starting with the |
| 70 | + platform's default encoding followed by common fallback encodings. |
| 71 | + """ |
| 72 | + encodings = ["utf-8", "utf-8-sig"] |
| 73 | + if platform.system() == "Windows": |
| 74 | + encodings.extend(["cp1252", "iso-8859-1"]) |
40 | 75 | return encodings + [locale.getpreferredencoding()] |
41 | 76 |
|
| 77 | + |
42 | 78 | def _should_include(path: Path, base_path: Path, include_patterns: set[str]) -> bool: |
43 | 79 | """ |
44 | 80 | Determine if the given file or directory path matches any of the include patterns. |
@@ -129,13 +165,13 @@ def _is_safe_symlink(symlink_path: Path, base_path: Path) -> bool: |
129 | 165 | `True` if the symlink points within the base directory, `False` otherwise. |
130 | 166 | """ |
131 | 167 | try: |
132 | | - if platform.system() == 'Windows': |
| 168 | + if platform.system() == "Windows": |
133 | 169 | if not os.path.islink(str(symlink_path)): |
134 | 170 | return False |
135 | | - |
| 171 | + |
136 | 172 | target_path = _normalize_path(symlink_path.resolve()) |
137 | 173 | base_resolved = _normalize_path(base_path.resolve()) |
138 | | - |
| 174 | + |
139 | 175 | return base_resolved in target_path.parents or target_path == base_resolved |
140 | 176 | except (OSError, ValueError): |
141 | 177 | # If there's any error resolving the paths, consider it unsafe |
@@ -201,7 +237,7 @@ def _read_file_content(file_path: Path) -> str: |
201 | 237 | continue |
202 | 238 | except OSError as e: |
203 | 239 | return f"Error reading file: {e}" |
204 | | - |
| 240 | + |
205 | 241 | return "Error: Unable to decode file with available encodings" |
206 | 242 |
|
207 | 243 | except (OSError, InvalidNotebookError) as e: |
|
0 commit comments