From eb4c1302abab231b9078ae67736cc0412a724d4f Mon Sep 17 00:00:00 2001 From: Shubhangi Singh Date: Wed, 20 Aug 2025 14:39:17 +0000 Subject: [PATCH 1/9] making crc32c as default --- .../lib/google/cloud/storage/bucket.rb | 1 + .../test/google/cloud/storage/bucket_test.rb | 45 ++++++++++++++++++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/google-cloud-storage/lib/google/cloud/storage/bucket.rb b/google-cloud-storage/lib/google/cloud/storage/bucket.rb index ca8461354bb7..b55485149875 100644 --- a/google-cloud-storage/lib/google/cloud/storage/bucket.rb +++ b/google-cloud-storage/lib/google/cloud/storage/bucket.rb @@ -1805,6 +1805,7 @@ def create_file file, path ||= file.path if file.respond_to? :path path ||= file if file.is_a? String raise ArgumentError, "must provide path" if path.nil? + checksum = :crc32c if checksum == nil && crc32c.nil? && md5.nil? crc32c = crc32c_for file, checksum, crc32c md5 = md5_for file, checksum, md5 diff --git a/google-cloud-storage/test/google/cloud/storage/bucket_test.rb b/google-cloud-storage/test/google/cloud/storage/bucket_test.rb index 203e423467e5..b32f6a23c0e0 100644 --- a/google-cloud-storage/test/google/cloud/storage/bucket_test.rb +++ b/google-cloud-storage/test/google/cloud/storage/bucket_test.rb @@ -101,6 +101,40 @@ _(bucket_complete.autoclass_enabled).must_equal bucket_autoclass_enabled _(bucket_complete.autoclass_terminal_storage_class).must_equal bucket_autoclass_terminal_storage_class end + + it "creates a file with checksum: :crc32c by default" do + new_file_name = random_file_path + + Tempfile.open ["google-cloud", ".txt"] do |tmpfile| + tmpfile.write "Hello world!" + tmpfile.rewind + + crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for tmpfile + + mock = Minitest::Mock.new + mock.expect :insert_object, create_file_gapi(bucket.name, new_file_name), + [bucket.name, empty_file_gapi(crc32c: crc32c)], **insert_object_args(name: new_file_name, upload_source: tmpfile, options: {retries: 0}) + + bucket.service.mocked_service = mock + bucket.create_file tmpfile, new_file_name + + mock.verify + end + end + + it "creates a file with a StringIO and checksum: :crc32c by default" do + new_file_name = random_file_path + new_file_contents = StringIO.new "Hello world" + mock = Minitest::Mock.new + mock.expect :insert_object, create_file_gapi(bucket.name, new_file_name), + [bucket.name, empty_file_gapi(crc32c: "crUfeA==")], **insert_object_args(name: new_file_name, upload_source: new_file_contents, options: {retries: 0}) + + bucket.service.mocked_service = mock + + bucket.create_file new_file_contents, new_file_name + + mock.verify + end it "returns frozen cors" do bucket_complete.cors.each do |cors| @@ -597,7 +631,7 @@ Tempfile.create ["google-cloud", ".txt"] do |tmpfile| mock = Minitest::Mock.new mock.expect :insert_object, create_file_gapi(bucket_user_project.name, new_file_name), - [bucket.name, empty_file_gapi], **insert_object_args(name: new_file_name, upload_source: tmpfile, user_project: "test", options: {retries: 0}) + [bucket.name, empty_file_gapi(crc32c: "AAAAAA==")], **insert_object_args(name: new_file_name, upload_source: tmpfile, user_project: "test", options: {retries: 0}) bucket_user_project.service.mocked_service = mock @@ -610,7 +644,7 @@ it "creates an file with a StringIO" do new_file_name = random_file_path - new_file_contents = StringIO.new + new_file_contents = StringIO.new("Hello world") mock = Minitest::Mock.new mock.expect :insert_object, create_file_gapi(bucket.name, new_file_name), @@ -1417,6 +1451,13 @@ def empty_file_gapi cache_control: nil, content_disposition: nil, content_type: nil, crc32c: nil, md5: nil, metadata: nil, storage_class: nil, temporary_hold: nil, event_based_hold: nil + + # new_file_contents = StringIO.new + # Set crc32c if both md5 and crc32c are not provided + if md5.nil? && crc32c.nil? + crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for(StringIO.new("Hello world")) + end + params = { cache_control: cache_control, content_type: content_type, content_disposition: content_disposition, md5_hash: md5, From a5bb38559c6b328df67dc22c20e05fe2684435b7 Mon Sep 17 00:00:00 2001 From: Shubhangi Singh Date: Wed, 20 Aug 2025 14:53:12 +0000 Subject: [PATCH 2/9] refactoring --- google-cloud-storage/lib/google/cloud/storage/bucket.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google-cloud-storage/lib/google/cloud/storage/bucket.rb b/google-cloud-storage/lib/google/cloud/storage/bucket.rb index b55485149875..e0cd98b00300 100644 --- a/google-cloud-storage/lib/google/cloud/storage/bucket.rb +++ b/google-cloud-storage/lib/google/cloud/storage/bucket.rb @@ -1805,7 +1805,7 @@ def create_file file, path ||= file.path if file.respond_to? :path path ||= file if file.is_a? String raise ArgumentError, "must provide path" if path.nil? - checksum = :crc32c if checksum == nil && crc32c.nil? && md5.nil? + checksum = :crc32c if checksum.nil? && crc32c.nil? && md5.nil? crc32c = crc32c_for file, checksum, crc32c md5 = md5_for file, checksum, md5 From 9000bdb7327238746db8c1b028be45c1b8071cd6 Mon Sep 17 00:00:00 2001 From: Shubhangi Singh Date: Thu, 21 Aug 2025 09:06:32 +0000 Subject: [PATCH 3/9] modifying tests --- .../test/google/cloud/storage/bucket_test.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/google-cloud-storage/test/google/cloud/storage/bucket_test.rb b/google-cloud-storage/test/google/cloud/storage/bucket_test.rb index b32f6a23c0e0..cf7861a4349a 100644 --- a/google-cloud-storage/test/google/cloud/storage/bucket_test.rb +++ b/google-cloud-storage/test/google/cloud/storage/bucket_test.rb @@ -127,7 +127,7 @@ new_file_contents = StringIO.new "Hello world" mock = Minitest::Mock.new mock.expect :insert_object, create_file_gapi(bucket.name, new_file_name), - [bucket.name, empty_file_gapi(crc32c: "crUfeA==")], **insert_object_args(name: new_file_name, upload_source: new_file_contents, options: {retries: 0}) + [bucket.name, empty_file_gapi], **insert_object_args(name: new_file_name, upload_source: new_file_contents, options: {retries: 0}) bucket.service.mocked_service = mock @@ -629,9 +629,11 @@ new_file_name = random_file_path Tempfile.create ["google-cloud", ".txt"] do |tmpfile| + + crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for tmpfile mock = Minitest::Mock.new mock.expect :insert_object, create_file_gapi(bucket_user_project.name, new_file_name), - [bucket.name, empty_file_gapi(crc32c: "AAAAAA==")], **insert_object_args(name: new_file_name, upload_source: tmpfile, user_project: "test", options: {retries: 0}) + [bucket.name, empty_file_gapi(crc32c: crc32c)], **insert_object_args(name: new_file_name, upload_source: tmpfile, user_project: "test", options: {retries: 0}) bucket_user_project.service.mocked_service = mock From fae8845fe735b19565c46916f59d7856d95a60b7 Mon Sep 17 00:00:00 2001 From: Shubhangi Singh Date: Thu, 21 Aug 2025 09:11:45 +0000 Subject: [PATCH 4/9] removing commented line --- google-cloud-storage/test/google/cloud/storage/bucket_test.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/google-cloud-storage/test/google/cloud/storage/bucket_test.rb b/google-cloud-storage/test/google/cloud/storage/bucket_test.rb index cf7861a4349a..b200f79aff0e 100644 --- a/google-cloud-storage/test/google/cloud/storage/bucket_test.rb +++ b/google-cloud-storage/test/google/cloud/storage/bucket_test.rb @@ -1454,7 +1454,6 @@ def empty_file_gapi cache_control: nil, content_disposition: nil, storage_class: nil, temporary_hold: nil, event_based_hold: nil - # new_file_contents = StringIO.new # Set crc32c if both md5 and crc32c are not provided if md5.nil? && crc32c.nil? crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for(StringIO.new("Hello world")) From 0147623c7fbc8a88e3cc3aca8c833e1f8ce89b0a Mon Sep 17 00:00:00 2001 From: Shubhangi Singh Date: Thu, 21 Aug 2025 11:23:21 +0000 Subject: [PATCH 5/9] modifying test cases --- .../lib/google/cloud/storage/file/verifier.rb | 2 ++ .../cloud/storage/bucket_encryption_test.rb | 4 +++ .../test/google/cloud/storage/bucket_test.rb | 8 +++--- .../google/cloud/storage/lazy/bucket_test.rb | 27 ++++++++++++++++++- 4 files changed, 36 insertions(+), 5 deletions(-) diff --git a/google-cloud-storage/lib/google/cloud/storage/file/verifier.rb b/google-cloud-storage/lib/google/cloud/storage/file/verifier.rb index dcfefc6c616d..52bf667bf976 100644 --- a/google-cloud-storage/lib/google/cloud/storage/file/verifier.rb +++ b/google-cloud-storage/lib/google/cloud/storage/file/verifier.rb @@ -55,6 +55,7 @@ def self.md5_for local_file ::Digest::MD5.file(f).base64digest end else # StringIO + (local_file = ::File.open Pathname(local_file)) unless local_file.respond_to? :rewind local_file.rewind md5 = ::Digest::MD5.base64digest local_file.read local_file.rewind @@ -68,6 +69,7 @@ def self.crc32c_for local_file ::Digest::CRC32c.file(f).base64digest end else # StringIO + (local_file = ::File.open Pathname(local_file)) unless local_file.respond_to? :rewind local_file.rewind crc32c = ::Digest::CRC32c.base64digest local_file.read local_file.rewind diff --git a/google-cloud-storage/test/google/cloud/storage/bucket_encryption_test.rb b/google-cloud-storage/test/google/cloud/storage/bucket_encryption_test.rb index ce617da110e6..7fe89376cfd2 100644 --- a/google-cloud-storage/test/google/cloud/storage/bucket_encryption_test.rb +++ b/google-cloud-storage/test/google/cloud/storage/bucket_encryption_test.rb @@ -131,6 +131,10 @@ def empty_file_gapi cache_control: nil, content_disposition: nil, content_encoding: nil, content_language: nil, content_type: nil, crc32c: nil, md5: nil, metadata: nil, storage_class: nil + # Set crc32c if both md5 and crc32c are not provided + if md5.nil? && crc32c.nil? + crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for(StringIO.new("Hello world")) + end params = { cache_control: cache_control, content_type: content_type, content_disposition: content_disposition, md5_hash: md5, diff --git a/google-cloud-storage/test/google/cloud/storage/bucket_test.rb b/google-cloud-storage/test/google/cloud/storage/bucket_test.rb index b200f79aff0e..7b9685f1dc08 100644 --- a/google-cloud-storage/test/google/cloud/storage/bucket_test.rb +++ b/google-cloud-storage/test/google/cloud/storage/bucket_test.rb @@ -1454,10 +1454,10 @@ def empty_file_gapi cache_control: nil, content_disposition: nil, storage_class: nil, temporary_hold: nil, event_based_hold: nil - # Set crc32c if both md5 and crc32c are not provided - if md5.nil? && crc32c.nil? - crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for(StringIO.new("Hello world")) - end + # Set crc32c if both md5 and crc32c are not provided + if md5.nil? && crc32c.nil? + crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for(StringIO.new("Hello world")) + end params = { cache_control: cache_control, content_type: content_type, diff --git a/google-cloud-storage/test/google/cloud/storage/lazy/bucket_test.rb b/google-cloud-storage/test/google/cloud/storage/lazy/bucket_test.rb index c304f54b3838..f801118a5e54 100644 --- a/google-cloud-storage/test/google/cloud/storage/lazy/bucket_test.rb +++ b/google-cloud-storage/test/google/cloud/storage/lazy/bucket_test.rb @@ -244,6 +244,26 @@ mock.verify end end + + it "creates a file with checksum: :crc32c by default" do + new_file_name = random_file_path + + Tempfile.open ["google-cloud", ".txt"] do |tmpfile| + tmpfile.write "Hello world!" + tmpfile.rewind + + crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for tmpfile + + mock = Minitest::Mock.new + mock.expect :insert_object, create_file_gapi(bucket.name, new_file_name), + [bucket.name, empty_file_gapi(crc32c: crc32c)], **insert_object_args(name: new_file_name, upload_source: tmpfile, options: {retries: 0}) + + bucket.service.mocked_service = mock + bucket.create_file tmpfile, new_file_name + + mock.verify + end + end it "creates a file with attributes" do new_file_name = random_file_path @@ -340,9 +360,10 @@ new_file_name = random_file_path Tempfile.create ["google-cloud", ".txt"] do |tmpfile| + crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for tmpfile mock = Minitest::Mock.new mock.expect :insert_object, create_file_gapi(bucket_user_project.name, new_file_name), - [bucket.name, empty_file_gapi], **insert_object_args(name: new_file_name, upload_source: tmpfile, user_project: "test", options: {retries: 0}) + [bucket.name, empty_file_gapi(crc32c: crc32c)], **insert_object_args(name: new_file_name, upload_source: tmpfile, user_project: "test", options: {retries: 0}) bucket_user_project.service.mocked_service = mock @@ -1091,6 +1112,10 @@ def empty_file_gapi cache_control: nil, content_disposition: nil, content_encoding: nil, content_language: nil, content_type: nil, crc32c: nil, md5: nil, metadata: nil, storage_class: nil + # Set crc32c if both md5 and crc32c are not provided + if md5.nil? && crc32c.nil? + crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for(StringIO.new("Hello world")) + end params = { cache_control: cache_control, content_type: content_type, content_disposition: content_disposition, md5_hash: md5, From 13275c451929e7fb0617f6c0393af73e7aee0b98 Mon Sep 17 00:00:00 2001 From: Shubhangi Singh Date: Thu, 21 Aug 2025 17:30:28 +0530 Subject: [PATCH 6/9] Update bucket_test.rb --- .../test/google/cloud/storage/lazy/bucket_test.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/google-cloud-storage/test/google/cloud/storage/lazy/bucket_test.rb b/google-cloud-storage/test/google/cloud/storage/lazy/bucket_test.rb index f801118a5e54..5f530634f16b 100644 --- a/google-cloud-storage/test/google/cloud/storage/lazy/bucket_test.rb +++ b/google-cloud-storage/test/google/cloud/storage/lazy/bucket_test.rb @@ -253,7 +253,6 @@ tmpfile.rewind crc32c = Google::Cloud::Storage::File::Verifier.crc32c_for tmpfile - mock = Minitest::Mock.new mock.expect :insert_object, create_file_gapi(bucket.name, new_file_name), [bucket.name, empty_file_gapi(crc32c: crc32c)], **insert_object_args(name: new_file_name, upload_source: tmpfile, options: {retries: 0}) From a89ca600de9b24b7b5eab792da974a837861fbf1 Mon Sep 17 00:00:00 2001 From: Shubhangi Singh Date: Thu, 16 Oct 2025 05:23:58 +0000 Subject: [PATCH 7/9] closing the intentionally opened file --- .../lib/google/cloud/storage/file/verifier.rb | 65 +++++++++++++------ 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/google-cloud-storage/lib/google/cloud/storage/file/verifier.rb b/google-cloud-storage/lib/google/cloud/storage/file/verifier.rb index 52bf667bf976..c6d3626f7a77 100644 --- a/google-cloud-storage/lib/google/cloud/storage/file/verifier.rb +++ b/google-cloud-storage/lib/google/cloud/storage/file/verifier.rb @@ -48,32 +48,55 @@ def self.verify_md5 gcloud_file, local_file def self.verify_crc32c gcloud_file, local_file gcloud_file.crc32c == crc32c_for(local_file) end + # Calculates MD5 digest using either file path or open stream. + def self.md5_for(local_file) + _digest_for(local_file, ::Digest::MD5) + end - def self.md5_for local_file - if local_file.respond_to? :to_path - ::File.open Pathname(local_file).to_path, "rb" do |f| - ::Digest::MD5.file(f).base64digest - end - else # StringIO - (local_file = ::File.open Pathname(local_file)) unless local_file.respond_to? :rewind - local_file.rewind - md5 = ::Digest::MD5.base64digest local_file.read - local_file.rewind - md5 - end + # Calculates CRC32c digest using either file path or open stream. + def self.crc32c_for(local_file) + _digest_for(local_file, ::Digest::CRC32c) end - def self.crc32c_for local_file - if local_file.respond_to? :to_path + private + + # @private + # Computes a base64-encoded digest for a local file or IO stream. + # + # This method handles two types of inputs for `local_file`: + # 1. A file path (String or Pathname): It efficiently streams the file + # to compute the digest without loading the entire file into memory. + # 2. An IO-like stream (e.g., File, StringIO): It reads the stream's + # content to compute the digest. The stream is rewound before and after + # reading to ensure its position is not permanently changed. + # + # @param local_file [String, Pathname, IO] The local file path or IO + # stream for which to compute the digest. + # @param digest_class [Class] The digest class to use for the + # calculation (e.g., `Digest::MD5`). It must respond to `.file` and + # `.base64digest`. + # + # @return [String] The base64-encoded digest of the file's content. + # + def self._digest_for(local_file, digest_class) + if local_file.respond_to?(:to_path) + # Case 1: Input is a file path (or Pathname). Use the safe block form. ::File.open Pathname(local_file).to_path, "rb" do |f| - ::Digest::CRC32c.file(f).base64digest + digest_class.file(f).base64digest + end + else + # Case 2: Input is an open stream (like File or StringIO). + file_to_close = nil + file_to_close = local_file = ::File.open(Pathname(local_file).to_path, "rb") unless local_file.respond_to?(:rewind) + begin + local_file.rewind + digest = digest_class.base64digest local_file.read + local_file.rewind + digest + ensure + # Only close the stream if we explicitly opened it + file_to_close.close if file_to_close.respond_to?(:close) && !file_to_close.closed? end - else # StringIO - (local_file = ::File.open Pathname(local_file)) unless local_file.respond_to? :rewind - local_file.rewind - crc32c = ::Digest::CRC32c.base64digest local_file.read - local_file.rewind - crc32c end end end From cbcbe63f15ed89cbc846a122d37f6542f978588d Mon Sep 17 00:00:00 2001 From: Shubhangi Singh Date: Thu, 16 Oct 2025 05:31:44 +0000 Subject: [PATCH 8/9] lintfix --- .../lib/google/cloud/storage/file/verifier.rb | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/google-cloud-storage/lib/google/cloud/storage/file/verifier.rb b/google-cloud-storage/lib/google/cloud/storage/file/verifier.rb index c6d3626f7a77..de807bd2dff8 100644 --- a/google-cloud-storage/lib/google/cloud/storage/file/verifier.rb +++ b/google-cloud-storage/lib/google/cloud/storage/file/verifier.rb @@ -48,18 +48,17 @@ def self.verify_md5 gcloud_file, local_file def self.verify_crc32c gcloud_file, local_file gcloud_file.crc32c == crc32c_for(local_file) end + # Calculates MD5 digest using either file path or open stream. - def self.md5_for(local_file) - _digest_for(local_file, ::Digest::MD5) + def self.md5_for local_file + _digest_for local_file, ::Digest::MD5 end # Calculates CRC32c digest using either file path or open stream. - def self.crc32c_for(local_file) - _digest_for(local_file, ::Digest::CRC32c) + def self.crc32c_for local_file + _digest_for local_file, ::Digest::CRC32c end - private - # @private # Computes a base64-encoded digest for a local file or IO stream. # @@ -78,8 +77,8 @@ def self.crc32c_for(local_file) # # @return [String] The base64-encoded digest of the file's content. # - def self._digest_for(local_file, digest_class) - if local_file.respond_to?(:to_path) + def self._digest_for local_file, digest_class + if local_file.respond_to? :to_path # Case 1: Input is a file path (or Pathname). Use the safe block form. ::File.open Pathname(local_file).to_path, "rb" do |f| digest_class.file(f).base64digest @@ -87,14 +86,17 @@ def self._digest_for(local_file, digest_class) else # Case 2: Input is an open stream (like File or StringIO). file_to_close = nil - file_to_close = local_file = ::File.open(Pathname(local_file).to_path, "rb") unless local_file.respond_to?(:rewind) + unless local_file.respond_to? :rewind + file_to_close = local_file = ::File.open Pathname(local_file).to_path, + "rb" + end begin local_file.rewind digest = digest_class.base64digest local_file.read local_file.rewind digest ensure - # Only close the stream if we explicitly opened it + # Only close the stream if we explicitly opened it file_to_close.close if file_to_close.respond_to?(:close) && !file_to_close.closed? end end From 6158a74c8803d5137938b92e396e0ad27e6d0f4e Mon Sep 17 00:00:00 2001 From: Shubhangi Singh Date: Wed, 19 Nov 2025 04:46:16 +0000 Subject: [PATCH 9/9] refactor --- .../lib/google/cloud/storage/file/verifier.rb | 25 ++++++------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/google-cloud-storage/lib/google/cloud/storage/file/verifier.rb b/google-cloud-storage/lib/google/cloud/storage/file/verifier.rb index de807bd2dff8..c65f33bb7c08 100644 --- a/google-cloud-storage/lib/google/cloud/storage/file/verifier.rb +++ b/google-cloud-storage/lib/google/cloud/storage/file/verifier.rb @@ -78,27 +78,18 @@ def self.crc32c_for local_file # @return [String] The base64-encoded digest of the file's content. # def self._digest_for local_file, digest_class - if local_file.respond_to? :to_path - # Case 1: Input is a file path (or Pathname). Use the safe block form. + + if local_file.respond_to?(:to_path) || local_file.is_a?(String) + # Case 1: Input is a file path (String, Pathname, or object that responds to :to_path). ::File.open Pathname(local_file).to_path, "rb" do |f| digest_class.file(f).base64digest end else - # Case 2: Input is an open stream (like File or StringIO). - file_to_close = nil - unless local_file.respond_to? :rewind - file_to_close = local_file = ::File.open Pathname(local_file).to_path, - "rb" - end - begin - local_file.rewind - digest = digest_class.base64digest local_file.read - local_file.rewind - digest - ensure - # Only close the stream if we explicitly opened it - file_to_close.close if file_to_close.respond_to?(:close) && !file_to_close.closed? - end + # Case 2: Input is an open stream (File or StringIO). + local_file.rewind + digest = digest_class.base64digest local_file.read + local_file.rewind + digest end end end