11from typing import List , Union
22import uuid
33import os
4+ from urllib .parse import unquote
45
56
67DEFAULT_IGNORE_PATTERNS = [
@@ -64,6 +65,8 @@ def parse_url(url: str) -> dict:
6465 }
6566
6667 url = url .split (" " )[0 ]
68+ url = unquote (url ) # Decode URL-encoded characters
69+
6770 if not url .startswith ('https://' ):
6871 url = 'https://' + url
6972
@@ -78,7 +81,7 @@ def parse_url(url: str) -> dict:
7881 parsed ["user_name" ] = path_parts [0 ]
7982 parsed ["repo_name" ] = path_parts [1 ]
8083
81- # Keep original URL format
84+ # Keep original URL format but with decoded components
8285 parsed ["url" ] = f"https://{ domain } /{ parsed ['user_name' ]} /{ parsed ['repo_name' ]} "
8386 parsed ['slug' ] = f"{ parsed ['user_name' ]} -{ parsed ['repo_name' ]} "
8487 parsed ["id" ] = str (uuid .uuid4 ())
@@ -93,7 +96,7 @@ def parse_url(url: str) -> dict:
9396 parsed ["commit" ] = remaining_parts [0 ]
9497 parsed ["subpath" ] = "/" + "/" .join (remaining_parts [1 :]) if len (remaining_parts ) > 1 else "/"
9598 else :
96- # Handle branch names with slashes
99+ # Handle branch names with slashes and special characters
97100 for i , part in enumerate (remaining_parts ):
98101 if part in ('tree' , 'blob' ):
99102 # Found another type indicator, everything before this was the branch name
0 commit comments