Skip to content

Commit 5344fb5

Browse files
committed
Fully fixing retro-compatibility with existing tests & adding GHE-related tests
1 parent 219a6cd commit 5344fb5

File tree

3 files changed

+153
-9
lines changed

3 files changed

+153
-9
lines changed

src/gitingest/cloning.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from pathlib import Path
44
from typing import Optional
5+
from urllib.parse import urlparse
56

67
from gitingest.config import DEFAULT_TIMEOUT
78
from gitingest.schemas import CloneConfig
@@ -61,7 +62,13 @@ async def clone_repo(config: CloneConfig, token: Optional[str] = None) -> None:
6162

6263
clone_cmd = ["git"]
6364
if token and _is_github_host(url):
64-
clone_cmd += ["-c", create_git_auth_header(token, url)]
65+
# Only pass URL if it's not the default github.com to maintain backward compatibility
66+
67+
parsed = urlparse(url)
68+
if parsed.hostname == "github.com":
69+
clone_cmd += ["-c", create_git_auth_header(token)]
70+
else:
71+
clone_cmd += ["-c", create_git_auth_header(token, url)]
6572

6673
clone_cmd += ["clone", "--single-branch"]
6774
# TODO: Re-enable --recurse-submodules when submodule support is needed

src/gitingest/utils/git_utils.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import base64
55
import re
66
from typing import List, Optional, Tuple
7+
from urllib.parse import urlparse
78

89
from gitingest.utils.exceptions import InvalidGitHubTokenError
910

@@ -13,18 +14,17 @@
1314
def _is_github_host(url: str) -> bool:
1415
"""
1516
Check if a URL is from a GitHub host (github.com or GitHub Enterprise).
16-
17+
1718
Parameters
1819
----------
1920
url : str
2021
The URL to check
21-
22+
2223
Returns
2324
-------
2425
bool
2526
True if the URL is from a GitHub host, False otherwise
2627
"""
27-
from urllib.parse import urlparse
2828
parsed = urlparse(url)
2929
hostname = parsed.hostname or ""
3030
return hostname == "github.com" or hostname.startswith("github.")
@@ -159,8 +159,8 @@ async def _check_github_repo_exists(url: str, token: Optional[str] = None) -> bo
159159
owner, repo = m.groups()
160160
api = f"https://api.github.com/repos/{owner}/{repo}"
161161
else:
162-
domain, owner, repo = m.groups()
163-
from urllib.parse import urlparse
162+
_, owner, repo = m.groups()
163+
164164
parsed = urlparse(url)
165165
api = f"https://{parsed.hostname}/api/v3/repos/{owner}/{repo}"
166166
cmd = [
@@ -216,7 +216,13 @@ async def fetch_remote_branch_list(url: str, token: Optional[str] = None) -> Lis
216216

217217
# Add authentication if needed
218218
if token and _is_github_host(url):
219-
fetch_branches_command += ["-c", create_git_auth_header(token, url)]
219+
# Only pass URL if it's not the default github.com to maintain backward compatibility
220+
221+
parsed = urlparse(url)
222+
if parsed.hostname == "github.com":
223+
fetch_branches_command += ["-c", create_git_auth_header(token)]
224+
else:
225+
fetch_branches_command += ["-c", create_git_auth_header(token, url)]
220226

221227
fetch_branches_command += ["ls-remote", "--heads", url]
222228

@@ -253,7 +259,13 @@ def create_git_command(base_cmd: List[str], local_path: str, url: str, token: Op
253259
cmd = base_cmd + ["-C", local_path]
254260
if token and _is_github_host(url):
255261
validate_github_token(token)
256-
cmd += ["-c", create_git_auth_header(token, url)]
262+
# Only pass URL if it's not the default github.com to maintain backward compatibility
263+
264+
parsed = urlparse(url)
265+
if parsed.hostname == "github.com":
266+
cmd += ["-c", create_git_auth_header(token)]
267+
else:
268+
cmd += ["-c", create_git_auth_header(token, url)]
257269
return cmd
258270

259271

@@ -264,13 +276,16 @@ def create_git_auth_header(token: str, url: str = "https://github.com") -> str:
264276
----------
265277
token : str
266278
GitHub personal access token
279+
url : str
280+
The GitHub URL to create the authentication header for.
281+
Defaults to "https://github.com".
267282
268283
Returns
269284
-------
270285
str
271286
The git config command for setting the authentication header
272287
"""
273-
from urllib.parse import urlparse
288+
274289
parsed = urlparse(url)
275290
hostname = parsed.hostname or "github.com"
276291
basic = base64.b64encode(f"x-oauth-basic:{token}".encode()).decode()

tests/test_git_utils.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from gitingest.utils.exceptions import InvalidGitHubTokenError
1313
from gitingest.utils.git_utils import (
14+
_is_github_host,
1415
create_git_auth_header,
1516
create_git_command,
1617
validate_github_token,
@@ -140,3 +141,124 @@ def test_create_git_command_helper_calls(mocker, url, token, should_call):
140141
header_mock.assert_not_called()
141142
# HEADER should not be included in command list
142143
assert "HEADER" not in cmd
144+
145+
146+
@pytest.mark.parametrize(
147+
"url, expected",
148+
[
149+
# GitHub.com URLs
150+
("https://github.com/owner/repo.git", True),
151+
("http://github.com/owner/repo.git", True),
152+
("https://github.com/owner/repo", True),
153+
# GitHub Enterprise URLs
154+
("https://github.company.com/owner/repo.git", True),
155+
("https://github.enterprise.org/owner/repo.git", True),
156+
("http://github.internal/owner/repo.git", True),
157+
("https://github.example.co.uk/owner/repo.git", True),
158+
# Non-GitHub URLs
159+
("https://gitlab.com/owner/repo.git", False),
160+
("https://bitbucket.org/owner/repo.git", False),
161+
("https://git.example.com/owner/repo.git", False),
162+
("https://mygithub.com/owner/repo.git", False), # doesn't start with "github."
163+
("https://subgithub.com/owner/repo.git", False),
164+
("https://example.com/github/repo.git", False),
165+
# Edge cases
166+
("", False),
167+
("not-a-url", False),
168+
("ftp://github.com/owner/repo.git", True), # Different protocol but still github.com
169+
],
170+
)
171+
def test_is_github_host(url, expected):
172+
"""_is_github_host should correctly identify GitHub and GitHub Enterprise URLs."""
173+
assert _is_github_host(url) == expected
174+
175+
176+
@pytest.mark.parametrize(
177+
"token, url, expected_hostname",
178+
[
179+
# GitHub.com URLs (default)
180+
("ghp_" + "a" * 36, "https://github.com", "github.com"),
181+
("ghp_" + "a" * 36, "https://github.com/owner/repo.git", "github.com"),
182+
# GitHub Enterprise URLs
183+
("ghp_" + "b" * 36, "https://github.company.com", "github.company.com"),
184+
("ghp_" + "c" * 36, "https://github.enterprise.org/owner/repo.git", "github.enterprise.org"),
185+
("ghp_" + "d" * 36, "http://github.internal", "github.internal"),
186+
],
187+
)
188+
def test_create_git_auth_header_with_ghe_url(token, url, expected_hostname):
189+
"""create_git_auth_header should handle GitHub Enterprise URLs correctly."""
190+
header = create_git_auth_header(token, url)
191+
expected_basic = base64.b64encode(f"x-oauth-basic:{token}".encode()).decode()
192+
expected = f"http.https://{expected_hostname}/.extraheader=Authorization: Basic {expected_basic}"
193+
assert header == expected
194+
195+
196+
@pytest.mark.parametrize(
197+
"base_cmd, local_path, url, token, expected_auth_hostname",
198+
[
199+
# GitHub.com URLs - should use default hostname
200+
(
201+
["git", "clone"],
202+
"/some/path",
203+
"https://github.com/owner/repo.git",
204+
"ghp_" + "a" * 36,
205+
"github.com",
206+
),
207+
# GitHub Enterprise URLs - should use custom hostname
208+
(
209+
["git", "clone"],
210+
"/some/path",
211+
"https://github.company.com/owner/repo.git",
212+
"ghp_" + "b" * 36,
213+
"github.company.com",
214+
),
215+
(
216+
["git", "clone"],
217+
"/some/path",
218+
"https://github.enterprise.org/owner/repo.git",
219+
"ghp_" + "c" * 36,
220+
"github.enterprise.org",
221+
),
222+
(
223+
["git", "clone"],
224+
"/some/path",
225+
"http://github.internal/owner/repo.git",
226+
"ghp_" + "d" * 36,
227+
"github.internal",
228+
),
229+
],
230+
)
231+
def test_create_git_command_with_ghe_urls(base_cmd, local_path, url, token, expected_auth_hostname):
232+
"""create_git_command should handle GitHub Enterprise URLs correctly."""
233+
cmd = create_git_command(base_cmd, local_path, url, token)
234+
235+
# Should have base command and -C option
236+
expected_prefix = base_cmd + ["-C", local_path]
237+
assert cmd[: len(expected_prefix)] == expected_prefix
238+
239+
# Should have -c and auth header
240+
assert "-c" in cmd
241+
auth_header_index = cmd.index("-c") + 1
242+
auth_header = cmd[auth_header_index]
243+
244+
# Verify the auth header contains the expected hostname
245+
assert f"http.https://{expected_auth_hostname}/" in auth_header
246+
assert "Authorization: Basic" in auth_header
247+
248+
249+
@pytest.mark.parametrize(
250+
"base_cmd, local_path, url, token",
251+
[
252+
# Should NOT add auth headers for non-GitHub URLs
253+
(["git", "clone"], "/some/path", "https://gitlab.com/owner/repo.git", "ghp_" + "a" * 36),
254+
(["git", "clone"], "/some/path", "https://bitbucket.org/owner/repo.git", "ghp_" + "b" * 36),
255+
(["git", "clone"], "/some/path", "https://git.example.com/owner/repo.git", "ghp_" + "c" * 36),
256+
],
257+
)
258+
def test_create_git_command_ignores_non_github_urls(base_cmd, local_path, url, token):
259+
"""create_git_command should not add auth headers for non-GitHub URLs."""
260+
cmd = create_git_command(base_cmd, local_path, url, token)
261+
262+
# Should only have base command and -C option, no auth headers
263+
expected = base_cmd + ["-C", local_path]
264+
assert cmd == expected

0 commit comments

Comments
 (0)