Skip to content

Commit 8b8ad97

Browse files
refactor: prefix helper functions with an underscore
1 parent d77741b commit 8b8ad97

File tree

2 files changed

+76
-54
lines changed

2 files changed

+76
-54
lines changed

src/gitingest/exceptions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
class InvalidPatternError(ValueError):
2+
def __init__(self, pattern: str) -> None:
3+
super().__init__(
4+
f"Pattern '{pattern}' contains invalid characters. Only alphanumeric characters, dash (-), "
5+
"underscore (_), dot (.), forward slash (/), plus (+), and asterisk (*) are allowed."
6+
)

src/gitingest/parse_query.py

Lines changed: 70 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,71 @@
55
from typing import Any
66
from urllib.parse import unquote
77

8+
from gitingest.exceptions import InvalidPatternError
89
from gitingest.ignore_patterns import DEFAULT_IGNORE_PATTERNS
910

1011
TMP_BASE_PATH: str = "../tmp"
1112
HEX_DIGITS = set(string.hexdigits)
1213

1314

15+
def parse_query(
16+
source: str,
17+
max_file_size: int,
18+
from_web: bool,
19+
include_patterns: list[str] | str | None = None,
20+
ignore_patterns: list[str] | str | None = None,
21+
) -> dict[str, Any]:
22+
"""
23+
Parses the input source to construct a query dictionary with specified parameters.
24+
25+
Parameters
26+
----------
27+
source : str
28+
The source URL or file path to parse.
29+
max_file_size : int
30+
The maximum file size in bytes to include.
31+
from_web : bool
32+
Flag indicating whether the source is a web URL.
33+
include_patterns : Optional[Union[List[str], str]], optional
34+
Patterns to include, by default None. Can be a list of strings or a single string.
35+
ignore_patterns : Optional[Union[List[str], str]], optional
36+
Patterns to ignore, by default None. Can be a list of strings or a single string.
37+
38+
Returns
39+
-------
40+
Dict[str, Any]
41+
A dictionary containing the parsed query parameters, including 'max_file_size',
42+
'ignore_patterns', and 'include_patterns'.
43+
"""
44+
# Determine the parsing method based on the source type
45+
if from_web or source.startswith("https://") or "github.com" in source:
46+
query = _parse_url(source)
47+
else:
48+
query = _parse_path(source)
49+
50+
# Process ignore patterns
51+
ignore_patterns_list = DEFAULT_IGNORE_PATTERNS.copy()
52+
if ignore_patterns:
53+
ignore_patterns_list += _parse_patterns(ignore_patterns)
54+
55+
# Process include patterns and override ignore patterns accordingly
56+
if include_patterns:
57+
parsed_include = _parse_patterns(include_patterns)
58+
ignore_patterns_list = _override_ignore_patterns(ignore_patterns_list, include_patterns=parsed_include)
59+
else:
60+
parsed_include = None
61+
62+
# Update the query dictionary with max_file_size and processed patterns
63+
query.update(
64+
{
65+
"max_file_size": max_file_size,
66+
"ignore_patterns": ignore_patterns_list,
67+
"include_patterns": parsed_include,
68+
}
69+
)
70+
return query
71+
72+
1473
def _parse_url(url: str) -> dict[str, Any]:
1574
url = url.split(" ")[0]
1675
url = unquote(url) # Decode URL-encoded characters
@@ -96,12 +155,13 @@ def _parse_patterns(pattern: list[str] | str) -> list[str]:
96155
97156
Raises
98157
------
99-
ValueError
158+
InvalidPatternError
100159
If any pattern contains invalid characters. Only alphanumeric characters,
101160
dash (-), underscore (_), dot (.), forward slash (/), plus (+), and
102161
asterisk (*) are allowed.
103162
"""
104163
patterns = pattern if isinstance(pattern, list) else [pattern]
164+
patterns = [p.strip() for p in patterns]
105165

106166
parsed_patterns = []
107167
for p in patterns:
@@ -110,11 +170,8 @@ def _parse_patterns(pattern: list[str] | str) -> list[str]:
110170
parsed_patterns = [p for p in parsed_patterns if p != ""]
111171

112172
for p in parsed_patterns:
113-
if not all(c.isalnum() or c in "-_./+*" for c in p):
114-
raise ValueError(
115-
f"Pattern '{p}' contains invalid characters. Only alphanumeric characters, dash (-), "
116-
"underscore (_), dot (.), forward slash (/), plus (+), and asterisk (*) are allowed."
117-
)
173+
if not _is_valid_pattern(p):
174+
raise InvalidPatternError(p)
118175

119176
return [_normalize_pattern(p) for p in parsed_patterns]
120177

@@ -149,59 +206,18 @@ def _parse_path(path: str) -> dict[str, Any]:
149206
return query
150207

151208

152-
def parse_query(
153-
source: str,
154-
max_file_size: int,
155-
from_web: bool,
156-
include_patterns: list[str] | str | None = None,
157-
ignore_patterns: list[str] | str | None = None,
158-
) -> dict[str, Any]:
209+
def _is_valid_pattern(pattern: str) -> bool:
159210
"""
160-
Parses the input source to construct a query dictionary with specified parameters.
211+
_summary_
161212
162213
Parameters
163214
----------
164-
source : str
165-
The source URL or file path to parse.
166-
max_file_size : int
167-
The maximum file size in bytes to include.
168-
from_web : bool
169-
Flag indicating whether the source is a web URL.
170-
include_patterns : Optional[Union[List[str], str]], optional
171-
Patterns to include, by default None. Can be a list of strings or a single string.
172-
ignore_patterns : Optional[Union[List[str], str]], optional
173-
Patterns to ignore, by default None. Can be a list of strings or a single string.
215+
pattern : str
216+
_description_
174217
175218
Returns
176219
-------
177-
Dict[str, Any]
178-
A dictionary containing the parsed query parameters, including 'max_file_size',
179-
'ignore_patterns', and 'include_patterns'.
220+
bool
221+
_description_
180222
"""
181-
# Determine the parsing method based on the source type
182-
if from_web or source.startswith("https://") or "github.com" in source:
183-
query = _parse_url(source)
184-
else:
185-
query = _parse_path(source)
186-
187-
# Process ignore patterns
188-
ignore_patterns_list = DEFAULT_IGNORE_PATTERNS.copy()
189-
if ignore_patterns:
190-
ignore_patterns_list += _parse_patterns(ignore_patterns)
191-
192-
# Process include patterns and override ignore patterns accordingly
193-
if include_patterns:
194-
parsed_include = _parse_patterns(include_patterns)
195-
ignore_patterns_list = _override_ignore_patterns(ignore_patterns_list, include_patterns=parsed_include)
196-
else:
197-
parsed_include = None
198-
199-
# Update the query dictionary with max_file_size and processed patterns
200-
query.update(
201-
{
202-
"max_file_size": max_file_size,
203-
"ignore_patterns": ignore_patterns_list,
204-
"include_patterns": parsed_include,
205-
}
206-
)
207-
return query
223+
return all(c.isalnum() or c in "-_./+*" for c in pattern)

0 commit comments

Comments
 (0)