Skip to content

Commit 91d5f0f

Browse files
committed
Fix clone not working on urls with query parameters
1 parent 93d789e commit 91d5f0f

File tree

2 files changed

+13
-1
lines changed

2 files changed

+13
-1
lines changed

src/gitingest/parse_query.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import string
44
import uuid
55
from typing import Any
6-
from urllib.parse import unquote
6+
from urllib.parse import unquote, urlparse
77

88
from gitingest.exceptions import InvalidPatternError
99
from gitingest.ignore_patterns import DEFAULT_IGNORE_PATTERNS
@@ -103,6 +103,10 @@ def _parse_url(url: str) -> dict[str, Any]:
103103
if not url.startswith(("https://", "http://")):
104104
url = "https://" + url
105105

106+
# Parse URL and reconstruct it without query parameters and fragments
107+
parsed_url = urlparse(url)
108+
url = f"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}"
109+
106110
# Extract domain and path
107111
url_parts = url.split("/")
108112
domain = url_parts[2]

src/gitingest/tests/test_parse_query.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,3 +153,11 @@ def test_parse_query_uuid_uniqueness() -> None:
153153
result1 = parse_query(path, max_file_size=100, from_web=False)
154154
result2 = parse_query(path, max_file_size=100, from_web=False)
155155
assert result1["id"] != result2["id"]
156+
157+
158+
def test_parse_url_with_query_and_fragment() -> None:
159+
url = "https://github.com/user/repo?arg=value#fragment"
160+
result = _parse_url(url)
161+
assert result["user_name"] == "user"
162+
assert result["repo_name"] == "repo"
163+
assert result["url"] == "https://github.com/user/repo" # URL should be cleaned

0 commit comments

Comments
 (0)