Skip to content

Commit f0df95e

Browse files
CM-57660-Remove PAT token from repository URL
1 parent 46cdd9e commit f0df95e

File tree

5 files changed

+169
-4
lines changed

5 files changed

+169
-4
lines changed

cycode/cli/apps/report/sbom/repository_url/repository_url_command.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
from cycode.cli.utils.get_api_client import get_report_cycode_client
99
from cycode.cli.utils.progress_bar import SbomReportProgressBarSection
1010
from cycode.cli.utils.sentry import add_breadcrumb
11+
from cycode.cli.utils.url_utils import sanitize_repository_url
12+
from cycode.logger import get_logger
13+
14+
logger = get_logger('Repository URL Command')
1115

1216

1317
def repository_url_command(
@@ -28,8 +32,13 @@ def repository_url_command(
2832
start_scan_time = time.time()
2933
report_execution_id = -1
3034

35+
# Sanitize repository URL to remove any embedded credentials/tokens before sending to API
36+
sanitized_uri = sanitize_repository_url(uri)
37+
if sanitized_uri != uri:
38+
logger.debug('Sanitized repository URL to remove credentials')
39+
3140
try:
32-
report_execution = client.request_sbom_report_execution(report_parameters, repository_url=uri)
41+
report_execution = client.request_sbom_report_execution(report_parameters, repository_url=sanitized_uri)
3342
report_execution_id = report_execution.id
3443

3544
create_sbom_report(progress_bar, client, report_execution_id, output_file, output_format)

cycode/cli/apps/scan/remote_url_resolver.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from cycode.cli import consts
44
from cycode.cli.utils.git_proxy import git_proxy
55
from cycode.cli.utils.shell_executor import shell
6+
from cycode.cli.utils.url_utils import sanitize_repository_url
67
from cycode.logger import get_logger
78

89
logger = get_logger('Remote URL Resolver')
@@ -102,7 +103,11 @@ def _try_get_git_remote_url(path: str) -> Optional[str]:
102103
repo = git_proxy.get_repo(path, search_parent_directories=True)
103104
remote_url = repo.remotes[0].config_reader.get('url')
104105
logger.debug('Found Git remote URL, %s', {'remote_url': remote_url, 'repo_path': repo.working_dir})
105-
return remote_url
106+
# Sanitize URL to remove any embedded credentials/tokens before returning
107+
sanitized_url = sanitize_repository_url(remote_url)
108+
if sanitized_url != remote_url:
109+
logger.debug('Sanitized repository URL to remove credentials')
110+
return sanitized_url
106111
except Exception as e:
107112
logger.debug('Failed to get Git remote URL. Probably not a Git repository', exc_info=e)
108113
return None
@@ -124,7 +129,9 @@ def get_remote_url_scan_parameter(paths: tuple[str, ...]) -> Optional[str]:
124129
# - len(paths)*2 Plastic SCM subprocess calls
125130
remote_url = _try_get_any_remote_url(path)
126131
if remote_url:
127-
remote_urls.add(remote_url)
132+
# URLs are already sanitized in _try_get_git_remote_url, but sanitize again as safety measure
133+
sanitized_url = sanitize_repository_url(remote_url)
134+
remote_urls.add(sanitized_url)
128135

129136
if len(remote_urls) == 1:
130137
# we are resolving remote_url only if all paths belong to the same repo (identical remote URLs),

cycode/cli/utils/url_utils.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from typing import Optional
2+
from urllib.parse import urlparse, urlunparse
3+
4+
from cycode.logger import get_logger
5+
6+
logger = get_logger('URL Utils')
7+
8+
9+
def sanitize_repository_url(url: Optional[str]) -> Optional[str]:
10+
"""Remove credentials (username, password, tokens) from repository URL.
11+
12+
This function sanitizes repository URLs to prevent sending PAT tokens or other
13+
credentials to the API. It handles both HTTP/HTTPS URLs with embedded credentials
14+
and SSH URLs (which are returned as-is since they don't contain credentials in the URL).
15+
16+
Args:
17+
url: Repository URL that may contain credentials (e.g., https://token@github.com/user/repo.git)
18+
19+
Returns:
20+
Sanitized URL without credentials (e.g., https://github.com/user/repo.git), or None if input is None
21+
22+
Examples:
23+
>>> sanitize_repository_url('https://token@github.com/user/repo.git')
24+
'https://github.com/user/repo.git'
25+
>>> sanitize_repository_url('https://user:token@github.com/user/repo.git')
26+
'https://github.com/user/repo.git'
27+
>>> sanitize_repository_url('git@github.com:user/repo.git')
28+
'git@github.com:user/repo.git'
29+
>>> sanitize_repository_url(None)
30+
None
31+
"""
32+
if not url:
33+
return url
34+
35+
# Handle SSH URLs (git@host:path format) - no credentials to remove
36+
if '@' in url and '://' not in url and url.startswith(('git@', 'ssh://')):
37+
return url
38+
39+
try:
40+
parsed = urlparse(url)
41+
# Remove username and password from netloc
42+
# Reconstruct URL without credentials
43+
sanitized_netloc = parsed.hostname
44+
if parsed.port:
45+
sanitized_netloc = f'{sanitized_netloc}:{parsed.port}'
46+
47+
sanitized = urlunparse((
48+
parsed.scheme,
49+
sanitized_netloc,
50+
parsed.path,
51+
parsed.params,
52+
parsed.query,
53+
parsed.fragment,
54+
))
55+
return sanitized
56+
except Exception as e:
57+
logger.debug('Failed to sanitize repository URL, returning original, %s', {'url': url, 'error': str(e)})
58+
# If parsing fails, return original URL to avoid breaking functionality
59+
return url

cycode/cyclient/report_client.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,12 @@
66

77
from cycode.cli.exceptions.custom_exceptions import CycodeError
88
from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip
9+
from cycode.cli.utils.url_utils import sanitize_repository_url
910
from cycode.cyclient import models
1011
from cycode.cyclient.cycode_client_base import CycodeClientBase
12+
from cycode.logger import get_logger
13+
14+
logger = get_logger('Report Client')
1115

1216

1317
@dataclasses.dataclass
@@ -49,7 +53,11 @@ def request_sbom_report_execution(
4953
# entity type required only for zipped-file
5054
request_data = {'report_parameters': params.to_json(without_entity_type=zip_file is None)}
5155
if repository_url:
52-
request_data['repository_url'] = repository_url
56+
# Sanitize repository URL to remove any embedded credentials/tokens before sending to API
57+
sanitized_url = sanitize_repository_url(repository_url)
58+
if sanitized_url != repository_url:
59+
logger.debug('Sanitized repository URL to remove credentials')
60+
request_data['repository_url'] = sanitized_url
5361

5462
request_args = {
5563
'url_path': url_path,

tests/utils/test_url_utils.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import pytest
2+
3+
from cycode.cli.utils.url_utils import sanitize_repository_url
4+
5+
6+
def test_sanitize_repository_url_with_token() -> None:
7+
"""Test that PAT tokens are removed from HTTPS URLs."""
8+
url = 'https://token@github.com/user/repo.git'
9+
expected = 'https://github.com/user/repo.git'
10+
assert sanitize_repository_url(url) == expected
11+
12+
13+
def test_sanitize_repository_url_with_username_and_token() -> None:
14+
"""Test that username and token are removed from HTTPS URLs."""
15+
url = 'https://user:token@github.com/user/repo.git'
16+
expected = 'https://github.com/user/repo.git'
17+
assert sanitize_repository_url(url) == expected
18+
19+
20+
def test_sanitize_repository_url_with_port() -> None:
21+
"""Test that URLs with ports are handled correctly."""
22+
url = 'https://token@github.com:443/user/repo.git'
23+
expected = 'https://github.com:443/user/repo.git'
24+
assert sanitize_repository_url(url) == expected
25+
26+
27+
def test_sanitize_repository_url_ssh_format() -> None:
28+
"""Test that SSH URLs are returned as-is (no credentials in URL format)."""
29+
url = 'git@github.com:user/repo.git'
30+
assert sanitize_repository_url(url) == url
31+
32+
33+
def test_sanitize_repository_url_ssh_protocol() -> None:
34+
"""Test that ssh:// URLs are returned as-is."""
35+
url = 'ssh://git@github.com/user/repo.git'
36+
assert sanitize_repository_url(url) == url
37+
38+
39+
def test_sanitize_repository_url_no_credentials() -> None:
40+
"""Test that URLs without credentials are returned unchanged."""
41+
url = 'https://github.com/user/repo.git'
42+
assert sanitize_repository_url(url) == url
43+
44+
45+
def test_sanitize_repository_url_none() -> None:
46+
"""Test that None input returns None."""
47+
assert sanitize_repository_url(None) is None
48+
49+
50+
def test_sanitize_repository_url_empty_string() -> None:
51+
"""Test that empty string is returned as-is."""
52+
assert sanitize_repository_url('') == ''
53+
54+
55+
def test_sanitize_repository_url_gitlab() -> None:
56+
"""Test that GitLab URLs are sanitized correctly."""
57+
url = 'https://oauth2:token@gitlab.com/user/repo.git'
58+
expected = 'https://gitlab.com/user/repo.git'
59+
assert sanitize_repository_url(url) == expected
60+
61+
62+
def test_sanitize_repository_url_bitbucket() -> None:
63+
"""Test that Bitbucket URLs are sanitized correctly."""
64+
url = 'https://x-token-auth:token@bitbucket.org/user/repo.git'
65+
expected = 'https://bitbucket.org/user/repo.git'
66+
assert sanitize_repository_url(url) == expected
67+
68+
69+
def test_sanitize_repository_url_with_path_and_query() -> None:
70+
"""Test that URLs with paths, query params, and fragments are preserved."""
71+
url = 'https://token@github.com/user/repo.git?ref=main#section'
72+
expected = 'https://github.com/user/repo.git?ref=main#section'
73+
assert sanitize_repository_url(url) == expected
74+
75+
76+
def test_sanitize_repository_url_invalid_url() -> None:
77+
"""Test that invalid URLs are returned as-is (graceful degradation)."""
78+
# This should not raise an exception, but return the original
79+
url = 'not-a-valid-url'
80+
result = sanitize_repository_url(url)
81+
# Should return original since parsing fails
82+
assert result == url

0 commit comments

Comments
 (0)