33
44require "json"
55require "open3"
6+ require "uri"
67
78ALLOWED_FILES = [
89 "README.md" ,
@@ -75,12 +76,25 @@ def check_json_urls_from_content(filename, content, allowed_prefixes)
7576 urls = extract_urls ( data )
7677
7778 urls . each do |url |
78- unless allowed_prefixes . any? { |prefix | url . start_with? ( prefix ) }
79+ begin
80+ canonical_url = canonicalize_url ( url )
81+ rescue URI ::InvalidURIError
82+ @errors << "#{ filename } : invalid URL #{ url } "
83+ next
84+ end
85+ unless allowed_prefixes . any? { |prefix | canonical_url . start_with? ( prefix ) }
7986 @errors << "#{ filename } : invalid URL #{ url } "
8087 end
8188 end
8289 end
8390
91+ def canonicalize_url ( url )
92+ uri = URI . parse ( url )
93+ # Normalize path to resolve . and .. segments
94+ uri . path = File . expand_path ( uri . path ) if uri . path
95+ uri . to_s
96+ end
97+
8498 def read_file_at_ref ( filename )
8599 output , _ , status = Open3 . capture3 ( "git" , "show" , "#{ @head_ref } :#{ filename } " )
86100 unless status . success?
@@ -90,16 +104,8 @@ def read_file_at_ref(filename)
90104 output
91105 end
92106
93- def extract_urls ( data , urls = [ ] )
94- case data
95- when Hash
96- data . each_value { |v | extract_urls ( v , urls ) }
97- when Array
98- data . each { |v | extract_urls ( v , urls ) }
99- when String
100- urls << data if data . start_with? ( "http://" , "https://" )
101- end
102- urls
107+ def extract_urls ( data )
108+ data . values . flat_map ( &:values )
103109 end
104110
105111 private
@@ -154,10 +160,14 @@ def test_url_extraction_nested
154160 assert_equal 3 , urls . length
155161 end
156162
157- def test_url_extraction_ignores_non_urls
158- data = { "version" => "3.0.0" , "url" => "https://example.com/a.7z" }
163+ def test_url_extraction_extracts_all_values
164+ data = {
165+ "3.0.0" => { "x64" => "https://example.com/a.7z" } ,
166+ "3.1.0" => { "x64" => "not a url" } ,
167+ }
159168 urls = @checker . extract_urls ( data )
160- assert_equal [ "https://example.com/a.7z" ] , urls
169+ assert_includes urls , "https://example.com/a.7z"
170+ assert_includes urls , "not a url"
161171 end
162172
163173 def test_valid_toolchain_urls
@@ -224,6 +234,35 @@ def test_check_json_urls_from_content_with_invalid_urls
224234 assert_match ( /invalid URL/ , @checker . errors . first )
225235 end
226236
237+ def test_path_traversal_urls_are_rejected
238+ # These URLs pass the prefix check but resolve to different locations after canonicalization
239+ malicious_urls = [
240+ "https://github.com/ruby/setup-msys2-gcc/releases/../../evil-repo/releases/download/malware.exe" ,
241+ "https://github.com/ruby/setup-msys2-gcc/releases/./../../evil-repo/releases/download/malware.exe" ,
242+ ]
243+ malicious_urls . each do |url |
244+ checker = AutomergeCheck . new ( "master" )
245+ content = JSON . generate ( { "3.0.0" => { "x64" => url } } )
246+ checker . check_json_urls_from_content ( "test.json" , content , WINDOWS_TOOLCHAIN_URL_PREFIXES )
247+ assert_equal 1 , checker . errors . length , "Expected path traversal URL to be rejected: #{ url } "
248+ end
249+ end
250+
251+ def test_malformed_urls_are_rejected
252+ malformed_urls = [
253+ "https://evil.com/file\x00 .exe" , # null byte causes URI::InvalidURIError
254+ "not a url at all" , # not a valid URI
255+ "https://github.com/ruby/setup-msys2-gcc/releases/\x00 malware.exe" , # matches allowed prefix but malformed
256+ ]
257+ malformed_urls . each do |url |
258+ checker = AutomergeCheck . new ( "master" )
259+ content = JSON . generate ( { "3.0.0" => { "x64" => url } } )
260+ checker . check_json_urls_from_content ( "test.json" , content , WINDOWS_TOOLCHAIN_URL_PREFIXES )
261+ assert_equal 1 , checker . errors . length , "Expected malformed URL to be rejected: #{ url . inspect } "
262+ assert_match ( /invalid URL/ , checker . errors . first )
263+ end
264+ end
265+
227266 def test_read_file_at_ref_returns_nil_for_missing_file
228267 result = @checker . send ( :read_file_at_ref , "nonexistent-file-that-does-not-exist.json" )
229268 assert_nil result
0 commit comments