55from typing import Any
66from urllib .parse import unquote
77
8+ from gitingest .exceptions import InvalidPatternError
89from gitingest .ignore_patterns import DEFAULT_IGNORE_PATTERNS
910
1011TMP_BASE_PATH : str = "../tmp"
1112HEX_DIGITS = set (string .hexdigits )
1213
1314
15+ def parse_query (
16+ source : str ,
17+ max_file_size : int ,
18+ from_web : bool ,
19+ include_patterns : list [str ] | str | None = None ,
20+ ignore_patterns : list [str ] | str | None = None ,
21+ ) -> dict [str , Any ]:
22+ """
23+ Parses the input source to construct a query dictionary with specified parameters.
24+
25+ Parameters
26+ ----------
27+ source : str
28+ The source URL or file path to parse.
29+ max_file_size : int
30+ The maximum file size in bytes to include.
31+ from_web : bool
32+ Flag indicating whether the source is a web URL.
33+ include_patterns : Optional[Union[List[str], str]], optional
34+ Patterns to include, by default None. Can be a list of strings or a single string.
35+ ignore_patterns : Optional[Union[List[str], str]], optional
36+ Patterns to ignore, by default None. Can be a list of strings or a single string.
37+
38+ Returns
39+ -------
40+ Dict[str, Any]
41+ A dictionary containing the parsed query parameters, including 'max_file_size',
42+ 'ignore_patterns', and 'include_patterns'.
43+ """
44+ # Determine the parsing method based on the source type
45+ if from_web or source .startswith ("https://" ) or "github.com" in source :
46+ query = _parse_url (source )
47+ else :
48+ query = _parse_path (source )
49+
50+ # Process ignore patterns
51+ ignore_patterns_list = DEFAULT_IGNORE_PATTERNS .copy ()
52+ if ignore_patterns :
53+ ignore_patterns_list += _parse_patterns (ignore_patterns )
54+
55+ # Process include patterns and override ignore patterns accordingly
56+ if include_patterns :
57+ parsed_include = _parse_patterns (include_patterns )
58+ ignore_patterns_list = _override_ignore_patterns (ignore_patterns_list , include_patterns = parsed_include )
59+ else :
60+ parsed_include = None
61+
62+ # Update the query dictionary with max_file_size and processed patterns
63+ query .update (
64+ {
65+ "max_file_size" : max_file_size ,
66+ "ignore_patterns" : ignore_patterns_list ,
67+ "include_patterns" : parsed_include ,
68+ }
69+ )
70+ return query
71+
72+
1473def _parse_url (url : str ) -> dict [str , Any ]:
1574 url = url .split (" " )[0 ]
1675 url = unquote (url ) # Decode URL-encoded characters
@@ -96,12 +155,13 @@ def _parse_patterns(pattern: list[str] | str) -> list[str]:
96155
97156 Raises
98157 ------
99- ValueError
158+ InvalidPatternError
100159 If any pattern contains invalid characters. Only alphanumeric characters,
101160 dash (-), underscore (_), dot (.), forward slash (/), plus (+), and
102161 asterisk (*) are allowed.
103162 """
104163 patterns = pattern if isinstance (pattern , list ) else [pattern ]
164+ patterns = [p .strip () for p in patterns ]
105165
106166 parsed_patterns = []
107167 for p in patterns :
@@ -110,11 +170,8 @@ def _parse_patterns(pattern: list[str] | str) -> list[str]:
110170 parsed_patterns = [p for p in parsed_patterns if p != "" ]
111171
112172 for p in parsed_patterns :
113- if not all (c .isalnum () or c in "-_./+*" for c in p ):
114- raise ValueError (
115- f"Pattern '{ p } ' contains invalid characters. Only alphanumeric characters, dash (-), "
116- "underscore (_), dot (.), forward slash (/), plus (+), and asterisk (*) are allowed."
117- )
173+ if not _is_valid_pattern (p ):
174+ raise InvalidPatternError (p )
118175
119176 return [_normalize_pattern (p ) for p in parsed_patterns ]
120177
@@ -149,59 +206,18 @@ def _parse_path(path: str) -> dict[str, Any]:
149206 return query
150207
151208
152- def parse_query (
153- source : str ,
154- max_file_size : int ,
155- from_web : bool ,
156- include_patterns : list [str ] | str | None = None ,
157- ignore_patterns : list [str ] | str | None = None ,
158- ) -> dict [str , Any ]:
209+ def _is_valid_pattern (pattern : str ) -> bool :
159210 """
160- Parses the input source to construct a query dictionary with specified parameters.
211+ _summary_
161212
162213 Parameters
163214 ----------
164- source : str
165- The source URL or file path to parse.
166- max_file_size : int
167- The maximum file size in bytes to include.
168- from_web : bool
169- Flag indicating whether the source is a web URL.
170- include_patterns : Optional[Union[List[str], str]], optional
171- Patterns to include, by default None. Can be a list of strings or a single string.
172- ignore_patterns : Optional[Union[List[str], str]], optional
173- Patterns to ignore, by default None. Can be a list of strings or a single string.
215+ pattern : str
216+ _description_
174217
175218 Returns
176219 -------
177- Dict[str, Any]
178- A dictionary containing the parsed query parameters, including 'max_file_size',
179- 'ignore_patterns', and 'include_patterns'.
220+ bool
221+ _description_
180222 """
181- # Determine the parsing method based on the source type
182- if from_web or source .startswith ("https://" ) or "github.com" in source :
183- query = _parse_url (source )
184- else :
185- query = _parse_path (source )
186-
187- # Process ignore patterns
188- ignore_patterns_list = DEFAULT_IGNORE_PATTERNS .copy ()
189- if ignore_patterns :
190- ignore_patterns_list += _parse_patterns (ignore_patterns )
191-
192- # Process include patterns and override ignore patterns accordingly
193- if include_patterns :
194- parsed_include = _parse_patterns (include_patterns )
195- ignore_patterns_list = _override_ignore_patterns (ignore_patterns_list , include_patterns = parsed_include )
196- else :
197- parsed_include = None
198-
199- # Update the query dictionary with max_file_size and processed patterns
200- query .update (
201- {
202- "max_file_size" : max_file_size ,
203- "ignore_patterns" : ignore_patterns_list ,
204- "include_patterns" : parsed_include ,
205- }
206- )
207- return query
223+ return all (c .isalnum () or c in "-_./+*" for c in pattern )
0 commit comments