Skip to content

Commit 33170f0

Browse files
committed
Merge branch 'fix-cli-help-text' of https://github.com/Sendi0011/gitingest into fix-cli-help-text
2 parents c818dc1 + 3a9bdf0 commit 33170f0

File tree

7 files changed

+265
-6050
lines changed

7 files changed

+265
-6050
lines changed

current_help.txt

Lines changed: 0 additions & 36 deletions
This file was deleted.

src/gitingest/cloning.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22

33
from pathlib import Path
44
from typing import Optional
5+
from urllib.parse import urlparse
56

67
from gitingest.config import DEFAULT_TIMEOUT
78
from gitingest.schemas import CloneConfig
89
from gitingest.utils.git_utils import (
10+
_is_github_host,
911
check_repo_exists,
1012
create_git_auth_header,
1113
create_git_command,
@@ -48,7 +50,7 @@ async def clone_repo(config: CloneConfig, token: Optional[str] = None) -> None:
4850
partial_clone: bool = config.subpath != "/"
4951

5052
# Validate token if provided
51-
if token and url.startswith("https://github.com"):
53+
if token and _is_github_host(url):
5254
validate_github_token(token)
5355

5456
# Create parent directory if it doesn't exist
@@ -59,8 +61,14 @@ async def clone_repo(config: CloneConfig, token: Optional[str] = None) -> None:
5961
raise ValueError("Repository not found. Make sure it is public or that you have provided a valid token.")
6062

6163
clone_cmd = ["git"]
62-
if token and url.startswith("https://github.com"):
63-
clone_cmd += ["-c", create_git_auth_header(token)]
64+
if token and _is_github_host(url):
65+
# Only pass URL if it's not the default github.com to maintain backward compatibility
66+
67+
parsed = urlparse(url)
68+
if parsed.hostname == "github.com":
69+
clone_cmd += ["-c", create_git_auth_header(token)]
70+
else:
71+
clone_cmd += ["-c", create_git_auth_header(token, url)]
6472

6573
clone_cmd += ["clone", "--single-branch"]
6674
# TODO: Re-enable --recurse-submodules when submodule support is needed

src/gitingest/utils/git_utils.py

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,32 @@
44
import base64
55
import re
66
from typing import List, Optional, Tuple
7+
from urllib.parse import urlparse
78

89
from gitingest.utils.exceptions import InvalidGitHubTokenError
910

1011
GITHUB_PAT_PATTERN = r"^(?:github_pat_|ghp_)[A-Za-z0-9_]{36,}$"
1112

1213

14+
def _is_github_host(url: str) -> bool:
15+
"""
16+
Check if a URL is from a GitHub host (github.com or GitHub Enterprise).
17+
18+
Parameters
19+
----------
20+
url : str
21+
The URL to check
22+
23+
Returns
24+
-------
25+
bool
26+
True if the URL is from a GitHub host, False otherwise
27+
"""
28+
parsed = urlparse(url)
29+
hostname = parsed.hostname or ""
30+
return hostname == "github.com" or hostname.startswith("github.")
31+
32+
1333
async def run_command(*args: str) -> Tuple[bytes, bytes]:
1434
"""
1535
Execute a shell command asynchronously and return (stdout, stderr) bytes.
@@ -80,7 +100,7 @@ async def check_repo_exists(url: str, token: Optional[str] = None) -> bool:
80100
RuntimeError
81101
If the curl command returns an unexpected status code.
82102
"""
83-
if token and "github.com" in url:
103+
if token and _is_github_host(url):
84104
return await _check_github_repo_exists(url, token)
85105

86106
proc = await asyncio.create_subprocess_exec(
@@ -131,12 +151,18 @@ async def _check_github_repo_exists(url: str, token: Optional[str] = None) -> bo
131151
RuntimeError
132152
If the repository is not found, if the provided URL is invalid, or if the token format is invalid.
133153
"""
134-
m = re.match(r"https?://github\.com/([^/]+)/([^/]+?)(?:\.git)?/?$", url)
154+
m = re.match(r"https?://github\.([^/]*)/([^/]+)/([^/]+?)(?:\.git)?/?$", url)
135155
if not m:
136-
raise ValueError(f"Un-recognised GitHub URL: {url!r}")
137-
owner, repo = m.groups()
138-
139-
api = f"https://api.github.com/repos/{owner}/{repo}"
156+
m = re.match(r"https?://github\.com/([^/]+)/([^/]+?)(?:\.git)?/?$", url)
157+
if not m:
158+
raise ValueError(f"Un-recognised GitHub URL: {url!r}")
159+
owner, repo = m.groups()
160+
api = f"https://api.github.com/repos/{owner}/{repo}"
161+
else:
162+
_, owner, repo = m.groups()
163+
164+
parsed = urlparse(url)
165+
api = f"https://{parsed.hostname}/api/v3/repos/{owner}/{repo}"
140166
cmd = [
141167
"curl",
142168
"--silent",
@@ -189,8 +215,14 @@ async def fetch_remote_branch_list(url: str, token: Optional[str] = None) -> Lis
189215
fetch_branches_command = ["git"]
190216

191217
# Add authentication if needed
192-
if token and "github.com" in url:
193-
fetch_branches_command += ["-c", create_git_auth_header(token)]
218+
if token and _is_github_host(url):
219+
# Only pass URL if it's not the default github.com to maintain backward compatibility
220+
221+
parsed = urlparse(url)
222+
if parsed.hostname == "github.com":
223+
fetch_branches_command += ["-c", create_git_auth_header(token)]
224+
else:
225+
fetch_branches_command += ["-c", create_git_auth_header(token, url)]
194226

195227
fetch_branches_command += ["ls-remote", "--heads", url]
196228

@@ -225,27 +257,39 @@ def create_git_command(base_cmd: List[str], local_path: str, url: str, token: Op
225257
The git command with authentication if needed
226258
"""
227259
cmd = base_cmd + ["-C", local_path]
228-
if token and url.startswith("https://github.com"):
260+
if token and _is_github_host(url):
229261
validate_github_token(token)
230-
cmd += ["-c", create_git_auth_header(token)]
262+
# Only pass URL if it's not the default github.com to maintain backward compatibility
263+
264+
parsed = urlparse(url)
265+
if parsed.hostname == "github.com":
266+
cmd += ["-c", create_git_auth_header(token)]
267+
else:
268+
cmd += ["-c", create_git_auth_header(token, url)]
231269
return cmd
232270

233271

234-
def create_git_auth_header(token: str) -> str:
272+
def create_git_auth_header(token: str, url: str = "https://github.com") -> str:
235273
"""Create a Basic authentication header for GitHub git operations.
236274
237275
Parameters
238276
----------
239277
token : str
240278
GitHub personal access token
279+
url : str
280+
The GitHub URL to create the authentication header for.
281+
Defaults to "https://github.com".
241282
242283
Returns
243284
-------
244285
str
245286
The git config command for setting the authentication header
246287
"""
288+
289+
parsed = urlparse(url)
290+
hostname = parsed.hostname or "github.com"
247291
basic = base64.b64encode(f"x-oauth-basic:{token}".encode()).decode()
248-
return f"http.https://github.com/.extraheader=Authorization: Basic {basic}"
292+
return f"http.https://{hostname}/.extraheader=Authorization: Basic {basic}"
249293

250294

251295
def validate_github_token(token: str) -> None:

src/gitingest/utils/query_parser_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ def _looks_like_git_host(host: str) -> bool:
8585
"""
8686
Check if the given host looks like a Git host.
8787
88-
The current heuristic returns `True` when the host starts with `git.` (e.g. `git.example.com`) or starts with
89-
`gitlab.` (e.g. `gitlab.company.com`).
88+
The current heuristic returns `True` when the host starts with `git.` (e.g. `git.example.com`), starts with
89+
`gitlab.` (e.g. `gitlab.company.com`), or starts with `github.` (e.g. `github.company.com` for GitHub Enterprise).
9090
9191
Parameters
9292
----------
@@ -99,7 +99,7 @@ def _looks_like_git_host(host: str) -> bool:
9999
True if the host looks like a Git host, otherwise False.
100100
"""
101101
host = host.lower()
102-
return host.startswith(("git.", "gitlab."))
102+
return host.startswith(("git.", "gitlab.", "github."))
103103

104104

105105
def _validate_url_scheme(scheme: str) -> None:

0 commit comments

Comments
 (0)