|
5 | 5 | import asyncio |
6 | 6 | import base64 |
7 | 7 | import re |
8 | | -from typing import Final |
| 8 | +from pathlib import Path |
| 9 | +from typing import TYPE_CHECKING, Final, Iterable, Literal |
9 | 10 | from urllib.parse import urlparse |
10 | 11 |
|
11 | 12 | import httpx |
|
14 | 15 | from gitingest.utils.compat_func import removesuffix |
15 | 16 | from gitingest.utils.exceptions import InvalidGitHubTokenError |
16 | 17 |
|
| 18 | +if TYPE_CHECKING: |
| 19 | + from gitingest.schemas import CloneConfig |
| 20 | + |
17 | 21 | # GitHub Personal-Access tokens (classic + fine-grained). |
18 | 22 | # - ghp_ / gho_ / ghu_ / ghs_ / ghr_ → 36 alphanumerics |
19 | 23 | # - github_pat_ → 22 alphanumerics + "_" + 59 alphanumerics |
@@ -303,3 +307,146 @@ def validate_github_token(token: str) -> None: |
303 | 307 | """ |
304 | 308 | if not re.fullmatch(_GITHUB_PAT_PATTERN, token): |
305 | 309 | raise InvalidGitHubTokenError |
| 310 | + |
| 311 | + |
| 312 | +async def checkout_partial_clone(config: CloneConfig, token: str | None) -> None: |
| 313 | + """Configure sparse-checkout for a partially cloned repository. |
| 314 | +
|
| 315 | + Parameters |
| 316 | + ---------- |
| 317 | + config : CloneConfig |
| 318 | + The configuration for cloning the repository, including subpath and blob flag. |
| 319 | + token : str | None |
| 320 | + GitHub personal access token (PAT) for accessing private repositories. |
| 321 | +
|
| 322 | + """ |
| 323 | + subpath = config.subpath.lstrip("/") |
| 324 | + if config.blob: |
| 325 | + # Remove the file name from the subpath when ingesting from a file url (e.g. blob/branch/path/file.txt) |
| 326 | + subpath = str(Path(subpath).parent.as_posix()) |
| 327 | + checkout_cmd = create_git_command(["git"], config.local_path, config.url, token) |
| 328 | + await run_command(*checkout_cmd, "sparse-checkout", "set", subpath) |
| 329 | + |
| 330 | + |
| 331 | +async def resolve_commit(config: CloneConfig, url: str, token: str | None) -> str: |
| 332 | + """Resolve the commit to use for the clone. |
| 333 | +
|
| 334 | + Parameters |
| 335 | + ---------- |
| 336 | + config : CloneConfig |
| 337 | + The configuration for cloning the repository. |
| 338 | + url : str |
| 339 | + The URL of the remote repository. |
| 340 | + token : str | None |
| 341 | + GitHub personal access token (PAT) for accessing private repositories. |
| 342 | +
|
| 343 | + Returns |
| 344 | + ------- |
| 345 | + str |
| 346 | +
|
| 347 | + """ |
| 348 | + if config.commit: |
| 349 | + commit = config.commit |
| 350 | + elif config.tag: |
| 351 | + commit = await _resolve_ref_to_sha(url, ref=config.tag, kind="tag", token=token) |
| 352 | + elif config.branch: |
| 353 | + commit = await _resolve_ref_to_sha(url, ref=config.branch, kind="branch", token=token) |
| 354 | + else: |
| 355 | + commit = await _resolve_ref_to_sha(url, ref="HEAD", kind="branch", token=token) |
| 356 | + return commit |
| 357 | + |
| 358 | + |
| 359 | +async def _resolve_ref_to_sha( |
| 360 | + url: str, |
| 361 | + ref: str, |
| 362 | + kind: Literal["branch", "tag"], |
| 363 | + *, |
| 364 | + token: str | None = None, |
| 365 | +) -> str: |
| 366 | + """Return the commit SHA that <kind>/<ref> points to in <url>. |
| 367 | +
|
| 368 | + * Branch → first line from ``git ls-remote``. |
| 369 | + * Tag → if annotated, prefer the peeled ``^{}`` line (commit). |
| 370 | +
|
| 371 | + Parameters |
| 372 | + ---------- |
| 373 | + url : str |
| 374 | + The URL of the remote repository. |
| 375 | + ref : str |
| 376 | + The reference to resolve to a commit SHA. |
| 377 | + kind : Literal["branch", "tag"] |
| 378 | + The kind of reference to resolve to a commit SHA. |
| 379 | + token : str | None |
| 380 | + GitHub personal access token (PAT) for accessing private repositories. |
| 381 | +
|
| 382 | + Returns |
| 383 | + ------- |
| 384 | + str |
| 385 | + The commit SHA. |
| 386 | +
|
| 387 | + Raises |
| 388 | + ------ |
| 389 | + ValueError |
| 390 | + If the ref does not exist in the remote repository. |
| 391 | +
|
| 392 | + """ |
| 393 | + await ensure_git_installed() |
| 394 | + |
| 395 | + # Build: git [-c http.<host>/.extraheader=Auth...] ls-remote <url> <pattern> |
| 396 | + cmd: list[str] = ["git"] |
| 397 | + if token and is_github_host(url): |
| 398 | + cmd += ["-c", create_git_auth_header(token, url=url)] |
| 399 | + |
| 400 | + if ref == "HEAD": |
| 401 | + pattern = "HEAD" |
| 402 | + elif kind == "branch": |
| 403 | + pattern = f"refs/heads/{ref}" |
| 404 | + else: # tag |
| 405 | + pattern = f"refs/tags/{ref}*" |
| 406 | + |
| 407 | + cmd += ["ls-remote", url, pattern] |
| 408 | + stdout, _ = await run_command(*cmd) |
| 409 | + |
| 410 | + lines = stdout.decode().splitlines() |
| 411 | + |
| 412 | + sha = _pick_commit_sha(lines) |
| 413 | + if not sha: |
| 414 | + msg = f"{kind} {ref!r} not found in {url}" |
| 415 | + raise ValueError(msg) |
| 416 | + |
| 417 | + return sha |
| 418 | + |
| 419 | + |
| 420 | +def _pick_commit_sha(lines: Iterable[str]) -> str | None: |
| 421 | + """Return a commit SHA from ``git ls-remote`` output. |
| 422 | +
|
| 423 | + • Annotated tag → prefer the peeled line (<sha> refs/tags/x^{}) |
| 424 | + • Branch / lightweight tag → first non-peeled line |
| 425 | +
|
| 426 | +
|
| 427 | + Parameters |
| 428 | + ---------- |
| 429 | + lines : Iterable[str] |
| 430 | + The lines of a ``git ls-remote`` output. |
| 431 | +
|
| 432 | + Returns |
| 433 | + ------- |
| 434 | + str | None |
| 435 | + The commit SHA, or ``None`` if no commit SHA is found. |
| 436 | +
|
| 437 | + """ |
| 438 | + first_non_peeled: str | None = None |
| 439 | + |
| 440 | + for ln in lines: |
| 441 | + if not ln.strip(): |
| 442 | + continue |
| 443 | + |
| 444 | + sha, ref = ln.split(maxsplit=1) |
| 445 | + |
| 446 | + if ref.endswith("^{}"): # peeled commit of annotated tag |
| 447 | + return sha # ← best match, done |
| 448 | + |
| 449 | + if first_non_peeled is None: # remember the first ordinary line |
| 450 | + first_non_peeled = sha |
| 451 | + |
| 452 | + return first_non_peeled # branch or lightweight tag (or None) |
0 commit comments