Skip to content

Commit ea50974

Browse files
committed
Add batch 2 support for purl to download URL
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent 3090030 commit ea50974

File tree

3 files changed

+139
-0
lines changed

3 files changed

+139
-0
lines changed

src/fetchcode/cran.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# fetchcode is a free software tool from nexB Inc. and others.
2+
# Visit https://github.com/aboutcode-org/fetchcode for support and download.
3+
#
4+
# Copyright (c) nexB Inc. and others. All rights reserved.
5+
# http://nexb.com and http://aboutcode.org
6+
#
7+
# This software is licensed under the Apache License version 2.0.
8+
#
9+
# You may not use this software except in compliance with the License.
10+
# You may obtain a copy of the License at:
11+
# http://apache.org/licenses/LICENSE-2.0
12+
# Unless required by applicable law or agreed to in writing, software distributed
13+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
14+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations under the License.
16+
17+
from packageurl import PackageURL
18+
19+
from fetchcode.utils import _http_exists
20+
21+
22+
class Cran:
23+
def get_download_url(purl: str):
24+
"""
25+
Resolve a CRAN PURL to a verified, downloadable source tarball URL.
26+
Tries current contrib first, then Archive.
27+
"""
28+
p = PackageURL.from_string(purl)
29+
if not p.name or not p.version:
30+
return None
31+
32+
current_url = f"https://cran.r-project.org/src/contrib/{p.name}_{p.version}.tar.gz"
33+
if _http_exists(current_url):
34+
return current_url
35+
36+
archive_url = (
37+
f"https://cran.r-project.org/src/contrib/Archive/{p.name}/{p.name}_{p.version}.tar.gz"
38+
)
39+
if _http_exists(archive_url):
40+
return archive_url
41+
42+
return None

src/fetchcode/utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,3 +243,14 @@ def get_first_three_md5_hash_characters(podname):
243243
create a hash (using md5) of it and take the first three characters."
244244
"""
245245
return md5_hasher(podname.encode("utf-8")).hexdigest()[0:3]
246+
247+
248+
def _http_exists(url: str) -> bool:
249+
"""
250+
Lightweight existence check using a ranged GET so CDNs/servers that ignore HEAD still work.
251+
"""
252+
try:
253+
resp = make_head_request(url, headers={"Range": "bytes=0-0"})
254+
return resp is not None and resp.status_code in (200, 206)
255+
except Exception:
256+
return False

tests/test_cran.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# fetchcode is a free software tool from nexB Inc. and others.
2+
# Visit https://github.com/aboutcode-org/fetchcode for support and download.
3+
#
4+
# Copyright (c) nexB Inc. and others. All rights reserved.
5+
# http://nexb.com and http://aboutcode.org
6+
#
7+
# This software is licensed under the Apache License version 2.0.
8+
#
9+
# You may not use this software except in compliance with the License.
10+
# You may obtain a copy of the License at:
11+
# http://apache.org/licenses/LICENSE-2.0
12+
# Unless required by applicable law or agreed to in writing, software distributed
13+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
14+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations under the License.
16+
17+
from unittest.mock import patch
18+
19+
import pytest
20+
21+
from fetchcode.cran import Cran
22+
23+
get_download_url = Cran.get_download_url
24+
25+
26+
@pytest.fixture
27+
def valid_purl():
28+
return "pkg:cran/dplyr@1.0.0"
29+
30+
31+
def test_current_url_exists(valid_purl):
32+
current_url = "https://cran.r-project.org/src/contrib/dplyr_1.0.0.tar.gz"
33+
34+
with patch("fetchcode.cran._http_exists", return_value=True) as mock_check:
35+
result = get_download_url(valid_purl)
36+
assert result == current_url
37+
mock_check.assert_called_once_with(current_url)
38+
39+
40+
def test_fallback_to_archive(valid_purl):
41+
current_url = "https://cran.r-project.org/src/contrib/dplyr_1.0.0.tar.gz"
42+
archive_url = "https://cran.r-project.org/src/contrib/Archive/dplyr/dplyr_1.0.0.tar.gz"
43+
44+
def side_effect(url):
45+
return url == archive_url
46+
47+
with patch("fetchcode.cran._http_exists", side_effect=side_effect) as mock_check:
48+
result = get_download_url(valid_purl)
49+
assert result == archive_url
50+
assert mock_check.call_count == 2
51+
mock_check.assert_any_call(current_url)
52+
mock_check.assert_any_call(archive_url)
53+
54+
55+
def test_neither_url_exists(valid_purl):
56+
with patch("fetchcode.cran._http_exists", return_value=False) as mock_check:
57+
result = get_download_url(valid_purl)
58+
assert result is None
59+
assert mock_check.call_count == 2
60+
61+
62+
def test_missing_version_returns_none():
63+
result = get_download_url("pkg:cran/dplyr")
64+
assert result is None
65+
66+
67+
def test_version_with_dash():
68+
purl = "pkg:cran/somepkg@1.2-3"
69+
70+
with patch("fetchcode.cran._http_exists", return_value=True) as mock_check:
71+
result = get_download_url(purl)
72+
assert result == "https://cran.r-project.org/src/contrib/somepkg_1.2-3.tar.gz"
73+
mock_check.assert_called_once_with(
74+
"https://cran.r-project.org/src/contrib/somepkg_1.2-3.tar.gz"
75+
)
76+
77+
78+
def test_name_with_dot():
79+
purl = "pkg:cran/foo.bar@2.0.1"
80+
81+
with patch("fetchcode.cran._http_exists", return_value=True) as mock_check:
82+
result = get_download_url(purl)
83+
assert result == "https://cran.r-project.org/src/contrib/foo.bar_2.0.1.tar.gz"
84+
mock_check.assert_called_once_with(
85+
"https://cran.r-project.org/src/contrib/foo.bar_2.0.1.tar.gz"
86+
)

0 commit comments

Comments
 (0)