Skip to content

Commit 65bacc2

Browse files
authored
Merge pull request #478 from Iamrodos/fix-477-fine-grained-pat-attachments
Fix 477 fine grained pat attachments
2 parents c63fb37 + ab0eebb commit 65bacc2

12 files changed

+382
-339
lines changed

README.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,8 @@ The tool automatically extracts file extensions from HTTP headers to ensure file
281281

282282
**Repository filtering** for repo files/assets handles renamed and transferred repositories gracefully. URLs are included if they either match the current repository name directly, or redirect to it (e.g., ``willmcgugan/rich`` redirects to ``Textualize/rich`` after transfer).
283283

284+
**Fine-grained token limitation:** Due to a GitHub platform limitation, fine-grained personal access tokens (``github_pat_...``) cannot download attachments from private repositories directly. This affects both ``/assets/`` (images) and ``/files/`` (documents) URLs. The tool implements a workaround for image attachments using GitHub's Markdown API, which converts URLs to temporary JWT-signed URLs that can be downloaded. However, this workaround only works for images - document attachments (PDFs, text files, etc.) will fail with 404 errors when using fine-grained tokens on private repos. For full attachment support on private repositories, use a classic token (``-t``) instead of a fine-grained token (``-f``). See `#477 <https://github.com/josegonzalez/python-github-backup/issues/477>`_ for details.
285+
284286

285287
Run in Docker container
286288
-----------------------

github_backup/cli.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,16 @@ def main():
4646
"Use -t/--token or -f/--token-fine to authenticate."
4747
)
4848

49+
# Issue #477: Fine-grained PATs cannot download all attachment types from
50+
# private repos. Image attachments will be retried via Markdown API workaround.
51+
if args.include_attachments and args.token_fine:
52+
logger.warning(
53+
"Using --attachments with fine-grained token. Due to GitHub platform "
54+
"limitations, file attachments (PDFs, etc.) from private repos may fail. "
55+
"Image attachments will be retried via workaround. For full attachment "
56+
"support, use --token-classic instead."
57+
)
58+
4959
if args.quiet:
5060
logger.setLevel(logging.WARNING)
5161

github_backup/github_backup.py

Lines changed: 100 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,6 +1062,65 @@ def download_attachment_file(url, path, auth, as_app=False, fine=False):
10621062
return metadata
10631063

10641064

1065+
def get_jwt_signed_url_via_markdown_api(url, token, repo_context):
1066+
"""Convert a user-attachments/assets URL to a JWT-signed URL via Markdown API.
1067+
1068+
GitHub's Markdown API renders image URLs and returns HTML containing
1069+
JWT-signed private-user-images.githubusercontent.com URLs that work
1070+
without token authentication.
1071+
1072+
This is a workaround for issue #477 where fine-grained PATs cannot
1073+
download user-attachments URLs from private repos directly.
1074+
1075+
Limitations:
1076+
- Only works for /assets/ URLs (images)
1077+
- Does NOT work for /files/ URLs (PDFs, text files, etc.)
1078+
- JWT URLs expire after ~5 minutes
1079+
1080+
Args:
1081+
url: The github.com/user-attachments/assets/UUID URL
1082+
token: Raw fine-grained PAT (github_pat_...)
1083+
repo_context: Repository context as "owner/repo"
1084+
1085+
Returns:
1086+
str: JWT-signed URL from private-user-images.githubusercontent.com
1087+
None: If conversion fails
1088+
"""
1089+
1090+
try:
1091+
payload = json.dumps(
1092+
{"text": f"![img]({url})", "mode": "gfm", "context": repo_context}
1093+
).encode("utf-8")
1094+
1095+
request = Request("https://api.github.com/markdown", data=payload, method="POST")
1096+
request.add_header("Authorization", f"token {token}")
1097+
request.add_header("Content-Type", "application/json")
1098+
request.add_header("Accept", "application/vnd.github+json")
1099+
1100+
html = urlopen(request, timeout=30).read().decode("utf-8")
1101+
1102+
# Parse JWT-signed URL from HTML response
1103+
# Format: <img src="https://private-user-images.githubusercontent.com/...?jwt=..." ...>
1104+
if match := re.search(
1105+
r'src="(https://private-user-images\.githubusercontent\.com/[^"]+)"', html
1106+
):
1107+
jwt_url = match.group(1)
1108+
logger.debug("Converted attachment URL to JWT-signed URL via Markdown API")
1109+
return jwt_url
1110+
1111+
logger.debug("Markdown API response did not contain JWT-signed URL")
1112+
return None
1113+
1114+
except HTTPError as e:
1115+
logger.debug(
1116+
"Markdown API request failed with HTTP {0}: {1}".format(e.code, e.reason)
1117+
)
1118+
return None
1119+
except Exception as e:
1120+
logger.debug("Markdown API request failed: {0}".format(str(e)))
1121+
return None
1122+
1123+
10651124
def extract_attachment_urls(item_data, issue_number=None, repository_full_name=None):
10661125
"""Extract GitHub-hosted attachment URLs from issue/PR body and comments.
10671126
@@ -1415,15 +1474,46 @@ def download_attachments(
14151474
filename = get_attachment_filename(url)
14161475
filepath = os.path.join(attachments_dir, filename)
14171476

1418-
# Download and get metadata
1419-
metadata = download_attachment_file(
1420-
url,
1421-
filepath,
1422-
get_auth(args, encode=not args.as_app),
1423-
as_app=args.as_app,
1424-
fine=args.token_fine is not None,
1477+
# Issue #477: Fine-grained PATs cannot download user-attachments/assets
1478+
# from private repos directly (404). Use Markdown API workaround to get
1479+
# a JWT-signed URL. Only works for /assets/ (images), not /files/.
1480+
needs_jwt = (
1481+
args.token_fine is not None
1482+
and repository.get("private", False)
1483+
and "github.com/user-attachments/assets/" in url
14251484
)
14261485

1486+
if not needs_jwt:
1487+
# NORMAL download path
1488+
metadata = download_attachment_file(
1489+
url,
1490+
filepath,
1491+
get_auth(args, encode=not args.as_app),
1492+
as_app=args.as_app,
1493+
fine=args.token_fine is not None,
1494+
)
1495+
elif jwt_url := get_jwt_signed_url_via_markdown_api(
1496+
url, args.token_fine, repository["full_name"]
1497+
):
1498+
# JWT needed and extracted, download via JWT
1499+
metadata = download_attachment_file(
1500+
jwt_url, filepath, auth=None, as_app=False, fine=False
1501+
)
1502+
metadata["url"] = url # Apply back the original URL
1503+
metadata["jwt_workaround"] = True
1504+
else:
1505+
# Markdown API workaround failed - skip download we know will fail
1506+
metadata = {
1507+
"url": url,
1508+
"success": False,
1509+
"skipped_at": datetime.now(timezone.utc).isoformat(),
1510+
"error": "Fine-grained token cannot download private repo attachments. "
1511+
"Markdown API workaround failed. Use --token-classic instead.",
1512+
}
1513+
logger.warning(
1514+
"Skipping attachment {0}: {1}".format(url, metadata["error"])
1515+
)
1516+
14271517
# If download succeeded but we got an extension from Content-Disposition,
14281518
# we may need to rename the file to add the extension
14291519
if metadata["success"] and metadata.get("original_filename"):
@@ -1951,7 +2041,9 @@ def backup_security_advisories(args, repo_cwd, repository, repos_template):
19512041
logger.info("Retrieving {0} security advisories".format(repository["full_name"]))
19522042
mkdir_p(repo_cwd, advisory_cwd)
19532043

1954-
template = "{0}/{1}/security-advisories".format(repos_template, repository["full_name"])
2044+
template = "{0}/{1}/security-advisories".format(
2045+
repos_template, repository["full_name"]
2046+
)
19552047

19562048
_advisories = retrieve_data(args, template)
19572049

tests/conftest.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
"""Shared pytest fixtures for github-backup tests."""
2+
3+
import pytest
4+
5+
from github_backup.github_backup import parse_args
6+
7+
8+
@pytest.fixture
9+
def create_args():
10+
"""Factory fixture that creates args with real CLI defaults.
11+
12+
Uses the actual argument parser so new CLI args are automatically
13+
available with their defaults - no test updates needed.
14+
15+
Usage:
16+
def test_something(self, create_args):
17+
args = create_args(include_releases=True, user="myuser")
18+
"""
19+
def _create(**overrides):
20+
# Use real parser to get actual defaults
21+
args = parse_args(["testuser"])
22+
for key, value in overrides.items():
23+
setattr(args, key, value)
24+
return args
25+
return _create

tests/test_all_starred.py

Lines changed: 9 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Tests for --all-starred flag behavior (issue #225)."""
22

33
import pytest
4-
from unittest.mock import Mock, patch
4+
from unittest.mock import patch
55

66
from github_backup import github_backup
77

@@ -12,58 +12,14 @@ class TestAllStarredCloning:
1212
Issue #225: --all-starred should clone starred repos without requiring --repositories.
1313
"""
1414

15-
def _create_mock_args(self, **overrides):
16-
"""Create a mock args object with sensible defaults."""
17-
args = Mock()
18-
args.user = "testuser"
19-
args.output_directory = "/tmp/backup"
20-
args.include_repository = False
21-
args.include_everything = False
22-
args.include_gists = False
23-
args.include_starred_gists = False
24-
args.all_starred = False
25-
args.skip_existing = False
26-
args.bare_clone = False
27-
args.lfs_clone = False
28-
args.no_prune = False
29-
args.include_wiki = False
30-
args.include_issues = False
31-
args.include_issue_comments = False
32-
args.include_issue_events = False
33-
args.include_pulls = False
34-
args.include_pull_comments = False
35-
args.include_pull_commits = False
36-
args.include_pull_details = False
37-
args.include_labels = False
38-
args.include_hooks = False
39-
args.include_milestones = False
40-
args.include_security_advisories = False
41-
args.include_releases = False
42-
args.include_assets = False
43-
args.include_attachments = False
44-
args.incremental = False
45-
args.incremental_by_files = False
46-
args.github_host = None
47-
args.prefer_ssh = False
48-
args.token_classic = None
49-
args.token_fine = None
50-
args.as_app = False
51-
args.osx_keychain_item_name = None
52-
args.osx_keychain_item_account = None
53-
54-
for key, value in overrides.items():
55-
setattr(args, key, value)
56-
57-
return args
58-
5915
@patch('github_backup.github_backup.fetch_repository')
6016
@patch('github_backup.github_backup.get_github_repo_url')
61-
def test_all_starred_clones_without_repositories_flag(self, mock_get_url, mock_fetch):
17+
def test_all_starred_clones_without_repositories_flag(self, mock_get_url, mock_fetch, create_args):
6218
"""--all-starred should clone starred repos without --repositories flag.
6319
6420
This is the core fix for issue #225.
6521
"""
66-
args = self._create_mock_args(all_starred=True)
22+
args = create_args(all_starred=True)
6723
mock_get_url.return_value = "https://github.com/otheruser/awesome-project.git"
6824

6925
# A starred repository (is_starred flag set by retrieve_repositories)
@@ -88,9 +44,9 @@ def test_all_starred_clones_without_repositories_flag(self, mock_get_url, mock_f
8844

8945
@patch('github_backup.github_backup.fetch_repository')
9046
@patch('github_backup.github_backup.get_github_repo_url')
91-
def test_starred_repo_not_cloned_without_all_starred_flag(self, mock_get_url, mock_fetch):
47+
def test_starred_repo_not_cloned_without_all_starred_flag(self, mock_get_url, mock_fetch, create_args):
9248
"""Starred repos should NOT be cloned if --all-starred is not set."""
93-
args = self._create_mock_args(all_starred=False)
49+
args = create_args(all_starred=False)
9450
mock_get_url.return_value = "https://github.com/otheruser/awesome-project.git"
9551

9652
starred_repo = {
@@ -111,9 +67,9 @@ def test_starred_repo_not_cloned_without_all_starred_flag(self, mock_get_url, mo
11167

11268
@patch('github_backup.github_backup.fetch_repository')
11369
@patch('github_backup.github_backup.get_github_repo_url')
114-
def test_non_starred_repo_not_cloned_with_only_all_starred(self, mock_get_url, mock_fetch):
70+
def test_non_starred_repo_not_cloned_with_only_all_starred(self, mock_get_url, mock_fetch, create_args):
11571
"""Non-starred repos should NOT be cloned when only --all-starred is set."""
116-
args = self._create_mock_args(all_starred=True)
72+
args = create_args(all_starred=True)
11773
mock_get_url.return_value = "https://github.com/testuser/my-project.git"
11874

11975
# A regular (non-starred) repository
@@ -135,9 +91,9 @@ def test_non_starred_repo_not_cloned_with_only_all_starred(self, mock_get_url, m
13591

13692
@patch('github_backup.github_backup.fetch_repository')
13793
@patch('github_backup.github_backup.get_github_repo_url')
138-
def test_repositories_flag_still_works(self, mock_get_url, mock_fetch):
94+
def test_repositories_flag_still_works(self, mock_get_url, mock_fetch, create_args):
13995
"""--repositories flag should still clone repos as before."""
140-
args = self._create_mock_args(include_repository=True)
96+
args = create_args(include_repository=True)
14197
mock_get_url.return_value = "https://github.com/testuser/my-project.git"
14298

14399
regular_repo = {

0 commit comments

Comments
 (0)