|
4 | 4 | import base64 |
5 | 5 | import re |
6 | 6 | from typing import List, Optional, Tuple |
| 7 | +from urllib.parse import urlparse |
7 | 8 |
|
8 | 9 | from gitingest.utils.exceptions import InvalidGitHubTokenError |
9 | 10 |
|
10 | 11 | GITHUB_PAT_PATTERN = r"^(?:github_pat_|ghp_)[A-Za-z0-9_]{36,}$" |
11 | 12 |
|
12 | 13 |
|
| 14 | +def _is_github_host(url: str) -> bool: |
| 15 | + """ |
| 16 | + Check if a URL is from a GitHub host (github.com or GitHub Enterprise). |
| 17 | +
|
| 18 | + Parameters |
| 19 | + ---------- |
| 20 | + url : str |
| 21 | + The URL to check |
| 22 | +
|
| 23 | + Returns |
| 24 | + ------- |
| 25 | + bool |
| 26 | + True if the URL is from a GitHub host, False otherwise |
| 27 | + """ |
| 28 | + parsed = urlparse(url) |
| 29 | + hostname = parsed.hostname or "" |
| 30 | + return hostname == "github.com" or hostname.startswith("github.") |
| 31 | + |
| 32 | + |
13 | 33 | async def run_command(*args: str) -> Tuple[bytes, bytes]: |
14 | 34 | """ |
15 | 35 | Execute a shell command asynchronously and return (stdout, stderr) bytes. |
@@ -80,7 +100,7 @@ async def check_repo_exists(url: str, token: Optional[str] = None) -> bool: |
80 | 100 | RuntimeError |
81 | 101 | If the curl command returns an unexpected status code. |
82 | 102 | """ |
83 | | - if token and "github.com" in url: |
| 103 | + if token and _is_github_host(url): |
84 | 104 | return await _check_github_repo_exists(url, token) |
85 | 105 |
|
86 | 106 | proc = await asyncio.create_subprocess_exec( |
@@ -131,12 +151,18 @@ async def _check_github_repo_exists(url: str, token: Optional[str] = None) -> bo |
131 | 151 | RuntimeError |
132 | 152 | If the repository is not found, if the provided URL is invalid, or if the token format is invalid. |
133 | 153 | """ |
134 | | - m = re.match(r"https?://github\.com/([^/]+)/([^/]+?)(?:\.git)?/?$", url) |
| 154 | + m = re.match(r"https?://github\.([^/]*)/([^/]+)/([^/]+?)(?:\.git)?/?$", url) |
135 | 155 | if not m: |
136 | | - raise ValueError(f"Un-recognised GitHub URL: {url!r}") |
137 | | - owner, repo = m.groups() |
138 | | - |
139 | | - api = f"https://api.github.com/repos/{owner}/{repo}" |
| 156 | + m = re.match(r"https?://github\.com/([^/]+)/([^/]+?)(?:\.git)?/?$", url) |
| 157 | + if not m: |
| 158 | + raise ValueError(f"Un-recognised GitHub URL: {url!r}") |
| 159 | + owner, repo = m.groups() |
| 160 | + api = f"https://api.github.com/repos/{owner}/{repo}" |
| 161 | + else: |
| 162 | + _, owner, repo = m.groups() |
| 163 | + |
| 164 | + parsed = urlparse(url) |
| 165 | + api = f"https://{parsed.hostname}/api/v3/repos/{owner}/{repo}" |
140 | 166 | cmd = [ |
141 | 167 | "curl", |
142 | 168 | "--silent", |
@@ -189,8 +215,14 @@ async def fetch_remote_branch_list(url: str, token: Optional[str] = None) -> Lis |
189 | 215 | fetch_branches_command = ["git"] |
190 | 216 |
|
191 | 217 | # Add authentication if needed |
192 | | - if token and "github.com" in url: |
193 | | - fetch_branches_command += ["-c", create_git_auth_header(token)] |
| 218 | + if token and _is_github_host(url): |
| 219 | + # Only pass URL if it's not the default github.com to maintain backward compatibility |
| 220 | + |
| 221 | + parsed = urlparse(url) |
| 222 | + if parsed.hostname == "github.com": |
| 223 | + fetch_branches_command += ["-c", create_git_auth_header(token)] |
| 224 | + else: |
| 225 | + fetch_branches_command += ["-c", create_git_auth_header(token, url)] |
194 | 226 |
|
195 | 227 | fetch_branches_command += ["ls-remote", "--heads", url] |
196 | 228 |
|
@@ -225,27 +257,39 @@ def create_git_command(base_cmd: List[str], local_path: str, url: str, token: Op |
225 | 257 | The git command with authentication if needed |
226 | 258 | """ |
227 | 259 | cmd = base_cmd + ["-C", local_path] |
228 | | - if token and url.startswith("https://github.com"): |
| 260 | + if token and _is_github_host(url): |
229 | 261 | validate_github_token(token) |
230 | | - cmd += ["-c", create_git_auth_header(token)] |
| 262 | + # Only pass URL if it's not the default github.com to maintain backward compatibility |
| 263 | + |
| 264 | + parsed = urlparse(url) |
| 265 | + if parsed.hostname == "github.com": |
| 266 | + cmd += ["-c", create_git_auth_header(token)] |
| 267 | + else: |
| 268 | + cmd += ["-c", create_git_auth_header(token, url)] |
231 | 269 | return cmd |
232 | 270 |
|
233 | 271 |
|
234 | | -def create_git_auth_header(token: str) -> str: |
| 272 | +def create_git_auth_header(token: str, url: str = "https://github.com") -> str: |
235 | 273 | """Create a Basic authentication header for GitHub git operations. |
236 | 274 |
|
237 | 275 | Parameters |
238 | 276 | ---------- |
239 | 277 | token : str |
240 | 278 | GitHub personal access token |
| 279 | + url : str |
| 280 | + The GitHub URL to create the authentication header for. |
| 281 | + Defaults to "https://github.com". |
241 | 282 |
|
242 | 283 | Returns |
243 | 284 | ------- |
244 | 285 | str |
245 | 286 | The git config command for setting the authentication header |
246 | 287 | """ |
| 288 | + |
| 289 | + parsed = urlparse(url) |
| 290 | + hostname = parsed.hostname or "github.com" |
247 | 291 | basic = base64.b64encode(f"x-oauth-basic:{token}".encode()).decode() |
248 | | - return f"http.https://github.com/.extraheader=Authorization: Basic {basic}" |
| 292 | + return f"http.https://{hostname}/.extraheader=Authorization: Basic {basic}" |
249 | 293 |
|
250 | 294 |
|
251 | 295 | def validate_github_token(token: str) -> None: |
|
0 commit comments