1111
1212
1313def parse_url (url : str ) -> Dict [str , Any ]:
14- parsed = {
15- "user_name" : None ,
16- "repo_name" : None ,
17- "type" : None ,
18- "branch" : None ,
19- "commit" : None ,
20- "subpath" : "/" ,
21- "local_path" : None ,
22- "url" : None ,
23- "slug" : None ,
24- "id" : None ,
25- }
26-
2714 url = url .split (" " )[0 ]
2815 url = unquote (url ) # Decode URL-encoded characters
2916
@@ -38,42 +25,62 @@ def parse_url(url: str) -> Dict[str, Any]:
3825 if len (path_parts ) < 2 :
3926 raise ValueError ("Invalid repository URL. Please provide a valid Git repository URL." )
4027
41- parsed ["user_name" ] = path_parts [0 ]
42- parsed ["repo_name" ] = path_parts [1 ]
43-
44- # Keep original URL format but with decoded components
45- parsed ["url" ] = f"https://{ domain } /{ parsed ['user_name' ]} /{ parsed ['repo_name' ]} "
46- parsed ['slug' ] = f"{ parsed ['user_name' ]} -{ parsed ['repo_name' ]} "
47- parsed ["id" ] = str (uuid .uuid4 ())
48- parsed ["local_path" ] = f"{ TMP_BASE_PATH } /{ parsed ['id' ]} /{ parsed ['slug' ]} "
49-
50- if len (path_parts ) > 3 :
51-
52- parsed ["type" ] = path_parts [2 ] # Usually 'tree' or 'blob'
53-
54- # Find the commit hash or reconstruct the branch name
55- remaining_parts = path_parts [3 :]
56- if remaining_parts [0 ] and len (remaining_parts [0 ]) == 40 and all (c in HEX_DIGITS for c in remaining_parts [0 ]):
57- parsed ["commit" ] = remaining_parts [0 ]
58- parsed ["subpath" ] = "/" + "/" .join (remaining_parts [1 :]) if len (remaining_parts ) > 1 else "/"
59- else :
60- # Handle branch names with slashes and special characters
61- for i , part in enumerate (remaining_parts ):
62- if part in ('tree' , 'blob' ):
63- # Found another type indicator, everything before this was the branch name
64- parsed ["branch" ] = "/" .join (remaining_parts [:i ])
65- parsed ["subpath" ] = (
66- "/" + "/" .join (remaining_parts [i + 2 :]) if len (remaining_parts ) > i + 2 else "/"
67- )
68- break
69- else :
70- # No additional type indicator found, assume everything is part of the branch name
71- parsed ["branch" ] = "/" .join (remaining_parts )
72- parsed ["subpath" ] = "/"
28+ user_name = path_parts [0 ]
29+ repo_name = path_parts [1 ]
30+ _id = str (uuid .uuid4 ())
31+ slug = f"{ user_name } -{ repo_name } "
32+
33+ parsed = {
34+ "user_name" : user_name ,
35+ "repo_name" : repo_name ,
36+ "type" : None ,
37+ "branch" : None ,
38+ "commit" : None ,
39+ "subpath" : "/" ,
40+ "local_path" : f"{ TMP_BASE_PATH } /{ _id } /{ slug } " ,
41+ # Keep original URL format but with decoded components
42+ "url" : f"https://{ domain } /{ user_name } /{ repo_name } " ,
43+ "slug" : slug ,
44+ "id" : _id ,
45+ }
46+
47+ if len (path_parts ) < 4 :
48+ return parsed
49+
50+ parsed ["type" ] = path_parts [2 ] # Usually 'tree' or 'blob'
51+ commit = path_parts [3 ]
52+
53+ # Find the commit hash or reconstruct the branch name
54+ remaining_parts = path_parts [3 :]
55+
56+ if _is_valid_git_commit_hash (commit ):
57+ parsed ["commit" ] = commit
58+ if len (remaining_parts ) > 1 :
59+ parsed ["subpath" ] += "/" .join (remaining_parts [1 :])
60+ return parsed
61+
62+ # Handle branch names with slashes and special characters
63+
64+ # Find the index of the first type indicator ('tree' or 'blob'), if any
65+ type_indicator_index = next ((i for i , part in enumerate (remaining_parts ) if part in ('tree' , 'blob' )), None )
66+
67+ if type_indicator_index is None :
68+ # No type indicator found; assume the entire input is the branch name
69+ parsed ["branch" ] = "/" .join (remaining_parts )
70+ return parsed
71+
72+ # Found a type indicator; update branch and subpath
73+ parsed ["branch" ] = "/" .join (remaining_parts [:type_indicator_index ])
74+ if len (remaining_parts ) > type_indicator_index + 2 :
75+ parsed ["subpath" ] += "/" .join (remaining_parts [type_indicator_index + 2 :])
7376
7477 return parsed
7578
7679
80+ def _is_valid_git_commit_hash (commit : str ) -> bool :
81+ return len (commit ) == 40 and all (c in HEX_DIGITS for c in commit )
82+
83+
7784def normalize_pattern (pattern : str ) -> str :
7885 pattern = pattern .lstrip (os .sep )
7986 if pattern .endswith (os .sep ):
0 commit comments