22import string
33import uuid
44from typing import Any , Dict , List , Optional , Union
5+ from urllib .parse import unquote
56
67from gitingest .ignore_patterns import DEFAULT_IGNORE_PATTERNS
78
@@ -25,7 +26,7 @@ def parse_url(url: str) -> Dict[str, Any]:
2526
2627 url = url .split (" " )[0 ]
2728 url = unquote (url ) # Decode URL-encoded characters
28-
29+
2930 if not url .startswith ('https://' ):
3031 url = 'https://' + url
3132
@@ -49,7 +50,7 @@ def parse_url(url: str) -> Dict[str, Any]:
4950 if len (path_parts ) > 3 :
5051
5152 parsed ["type" ] = path_parts [2 ] # Usually 'tree' or 'blob'
52-
53+
5354 # Find the commit hash or reconstruct the branch name
5455 remaining_parts = path_parts [3 :]
5556 if remaining_parts [0 ] and len (remaining_parts [0 ]) == 40 and all (c in HEX_DIGITS for c in remaining_parts [0 ]):
@@ -61,14 +62,15 @@ def parse_url(url: str) -> Dict[str, Any]:
6162 if part in ('tree' , 'blob' ):
6263 # Found another type indicator, everything before this was the branch name
6364 parsed ["branch" ] = "/" .join (remaining_parts [:i ])
64- parsed ["subpath" ] = "/" + "/" .join (remaining_parts [i + 2 :]) if len (remaining_parts ) > i + 2 else "/"
65+ parsed ["subpath" ] = (
66+ "/" + "/" .join (remaining_parts [i + 2 :]) if len (remaining_parts ) > i + 2 else "/"
67+ )
6568 break
6669 else :
6770 # No additional type indicator found, assume everything is part of the branch name
6871 parsed ["branch" ] = "/" .join (remaining_parts )
6972 parsed ["subpath" ] = "/"
7073
71-
7274 return parsed
7375
7476
@@ -130,7 +132,6 @@ def parse_query(
130132 include_patterns : Optional [Union [List [str ], str ]] = None ,
131133 ignore_patterns : Optional [Union [List [str ], str ]] = None ,
132134) -> Dict [str , Any ]:
133-
134135 """
135136 Parses the input source to construct a query dictionary with specified parameters.
136137
0 commit comments