1010
1111
1212def parse_url (url : str ) -> Dict [str , Any ]:
13+ parsed = {
14+ "user_name" : None ,
15+ "repo_name" : None ,
16+ "type" : None ,
17+ "branch" : None ,
18+ "commit" : None ,
19+ "subpath" : "/" ,
20+ "local_path" : None ,
21+ "url" : None ,
22+ "slug" : None ,
23+ "id" : None ,
24+ }
25+
1326 url = url .split (" " )[0 ]
27+ url = unquote (url ) # Decode URL-encoded characters
28+
1429 if not url .startswith ('https://' ):
1530 url = 'https://' + url
1631
@@ -22,33 +37,37 @@ def parse_url(url: str) -> Dict[str, Any]:
2237 if len (path_parts ) < 2 :
2338 raise ValueError ("Invalid repository URL. Please provide a valid Git repository URL." )
2439
25- user_name = path_parts [0 ]
26- repo_name = path_parts [1 ]
27- slug = f"{ user_name } -{ repo_name } "
28- _id = str (uuid .uuid4 ())
40+ parsed ["user_name" ] = path_parts [0 ]
41+ parsed ["repo_name" ] = path_parts [1 ]
2942
30- parsed = {
31- "url" : f"https://{ domain } /{ user_name } /{ repo_name } " ,
32- "local_path" : f"{ TMP_BASE_PATH } /{ _id } /{ slug } " ,
33- "commit" : None ,
34- "branch" : None ,
35- "user_name" : user_name ,
36- "repo_name" : repo_name ,
37- "type" : None ,
38- "subpath" : "/" ,
39- "slug" : slug ,
40- "id" : _id ,
41- }
43+ # Keep original URL format but with decoded components
44+ parsed ["url" ] = f"https://{ domain } /{ parsed ['user_name' ]} /{ parsed ['repo_name' ]} "
45+ parsed ['slug' ] = f"{ parsed ['user_name' ]} -{ parsed ['repo_name' ]} "
46+ parsed ["id" ] = str (uuid .uuid4 ())
47+ parsed ["local_path" ] = f"{ TMP_BASE_PATH } /{ parsed ['id' ]} /{ parsed ['slug' ]} "
4248
4349 if len (path_parts ) > 3 :
44- parsed ["type" ] = path_parts [2 ]
45- branch = path_parts [3 ]
4650
47- parsed ["branch" ] = branch
48- if len (branch ) == 40 and all (c in HEX_DIGITS for c in branch ):
49- parsed ["commit" ] = branch
51+ parsed ["type" ] = path_parts [2 ] # Usually 'tree' or 'blob'
52+
53+ # Find the commit hash or reconstruct the branch name
54+ remaining_parts = path_parts [3 :]
55+ if remaining_parts [0 ] and len (remaining_parts [0 ]) == 40 and all (c in HEX_DIGITS for c in remaining_parts [0 ]):
56+ parsed ["commit" ] = remaining_parts [0 ]
57+ parsed ["subpath" ] = "/" + "/" .join (remaining_parts [1 :]) if len (remaining_parts ) > 1 else "/"
58+ else :
59+ # Handle branch names with slashes and special characters
60+ for i , part in enumerate (remaining_parts ):
61+ if part in ('tree' , 'blob' ):
62+ # Found another type indicator, everything before this was the branch name
63+ parsed ["branch" ] = "/" .join (remaining_parts [:i ])
64+ parsed ["subpath" ] = "/" + "/" .join (remaining_parts [i + 2 :]) if len (remaining_parts ) > i + 2 else "/"
65+ break
66+ else :
67+ # No additional type indicator found, assume everything is part of the branch name
68+ parsed ["branch" ] = "/" .join (remaining_parts )
69+ parsed ["subpath" ] = "/"
5070
51- parsed ["subpath" ] += "/" .join (path_parts [4 :])
5271
5372 return parsed
5473
@@ -111,6 +130,7 @@ def parse_query(
111130 include_patterns : Optional [Union [List [str ], str ]] = None ,
112131 ignore_patterns : Optional [Union [List [str ], str ]] = None ,
113132) -> Dict [str , Any ]:
133+
114134 """
115135 Parses the input source to construct a query dictionary with specified parameters.
116136
@@ -159,5 +179,4 @@ def parse_query(
159179 'include_patterns' : parsed_include ,
160180 }
161181 )
162-
163182 return query
0 commit comments