Skip to content

Commit f95e529

Browse files
Decode URL-encoded characters in parse_url function and update comments for clarity
1 parent 6b57689 commit f95e529

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

src/gitingest/parse_query.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from typing import List, Union
22
import uuid
33
import os
4+
from urllib.parse import unquote
45

56

67
DEFAULT_IGNORE_PATTERNS = [
@@ -64,6 +65,8 @@ def parse_url(url: str) -> dict:
6465
}
6566

6667
url = url.split(" ")[0]
68+
url = unquote(url) # Decode URL-encoded characters
69+
6770
if not url.startswith('https://'):
6871
url = 'https://' + url
6972

@@ -78,7 +81,7 @@ def parse_url(url: str) -> dict:
7881
parsed["user_name"] = path_parts[0]
7982
parsed["repo_name"] = path_parts[1]
8083

81-
# Keep original URL format
84+
# Keep original URL format but with decoded components
8285
parsed["url"] = f"https://{domain}/{parsed['user_name']}/{parsed['repo_name']}"
8386
parsed['slug'] = f"{parsed['user_name']}-{parsed['repo_name']}"
8487
parsed["id"] = str(uuid.uuid4())
@@ -93,7 +96,7 @@ def parse_url(url: str) -> dict:
9396
parsed["commit"] = remaining_parts[0]
9497
parsed["subpath"] = "/" + "/".join(remaining_parts[1:]) if len(remaining_parts) > 1 else "/"
9598
else:
96-
# Handle branch names with slashes
99+
# Handle branch names with slashes and special characters
97100
for i, part in enumerate(remaining_parts):
98101
if part in ('tree', 'blob'):
99102
# Found another type indicator, everything before this was the branch name

0 commit comments

Comments
 (0)