diff --git a/gems/aws-sdk-s3/CHANGELOG.md b/gems/aws-sdk-s3/CHANGELOG.md index 945e1e7a102..61e9b8697c0 100644 --- a/gems/aws-sdk-s3/CHANGELOG.md +++ b/gems/aws-sdk-s3/CHANGELOG.md @@ -1,6 +1,8 @@ Unreleased Changes ------------------ +* Issue - Normalize response encoding to UTF-8 for proper XML error parsing in HTTP 200 responses. + 1.210.0 (2026-01-05) ------------------ diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/plugins/http_200_errors.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/plugins/http_200_errors.rb index d8538b84e9b..ca4a649af43 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/plugins/http_200_errors.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/plugins/http_200_errors.rb @@ -3,15 +3,28 @@ module Aws module S3 module Plugins - # A handful of Amazon S3 operations will respond with a 200 status # code but will send an error in the response body. This plugin # injects a handler that will parse 200 response bodies for potential # errors, allowing them to be retried. # @api private class Http200Errors < Seahorse::Client::Plugin - class Handler < Seahorse::Client::Handler + # A regular expression to match error codes in the response body + CODE_PATTERN = %r{(.+?)}.freeze + private_constant :CODE_PATTERN + + # A list of encodings we force into UTF-8 + ENCODINGS_TO_FIX = [Encoding::US_ASCII, Encoding::ASCII_8BIT].freeze + private_constant :ENCODINGS_TO_FIX + + # A regular expression to match detect errors in the response body + ERROR_PATTERN = /<\?xml\s[^>]*\?>\s*/.freeze + private_constant :ERROR_PATTERN + + # A regular expression to match an error message in the response body + MESSAGE_PATTERN = %r{(.+?)}.freeze + private_constant :MESSAGE_PATTERN def call(context) @handler.call(context).on(200) do |response| @@ -28,29 +41,37 @@ def call(context) private - # Streaming outputs are not subject to 200 errors. - def streaming_output?(output) - if (payload = output[:payload_member]) - # checking ref and shape - payload['streaming'] || payload.shape['streaming'] || - payload.eventstream - else - false + def build_error(context, code, message) + S3::Errors.error_class(code).new(context, message) + end + + def check_for_error(context) + xml = normalize_encoding(context.http_response.body_contents) + + if xml.match?(ERROR_PATTERN) + error_code = xml.match(CODE_PATTERN)[1] + error_message = xml.match(MESSAGE_PATTERN)[1] + build_error(context, error_code, error_message) + elsif incomplete_xml_body?(xml, context.operation.output) + Seahorse::Client::NetworkingError.new( + build_error(context, 'InternalError', 'Empty or incomplete response body') + ) end end + # Must have a member in the body and have the start of an XML Tag. + # Other incomplete xml bodies will result in an XML ParsingError. + def incomplete_xml_body?(xml, output) + members_in_body?(output) && !xml.match(/<\w/) + end + # Checks if the output shape is a structure shape and has members that # are in the body for the case of a payload and a normal structure. A # non-structure shape will not have members in the body. In the case # of a string or blob, the body contents would have been checked first # before this method is called in incomplete_xml_body?. def members_in_body?(output) - shape = - if output[:payload_member] - output[:payload_member].shape - else - output.shape - end + shape = resolve_shape(output) if structure_shape?(shape) shape.members.any? { |_, k| k.location.nil? } @@ -59,30 +80,33 @@ def members_in_body?(output) end end - def structure_shape?(shape) - shape.is_a?(Seahorse::Model::Shapes::StructureShape) + # Fixes encoding issues when S3 returns UTF-8 content with missing charset in Content-Type header or omits + # Content-Type header entirely. Net::HTTP defaults to US-ASCII or ASCII-8BIT when charset is unspecified. + def normalize_encoding(xml) + return xml unless xml.is_a?(String) && ENCODINGS_TO_FIX.include?(xml.encoding) + + xml.force_encoding('UTF-8') end - # Must have a member in the body and have the start of an XML Tag. - # Other incomplete xml bodies will result in an XML ParsingError. - def incomplete_xml_body?(xml, output) - members_in_body?(output) && !xml.match(/<\w/) + def resolve_shape(output) + return output.shape unless output[:payload_member] + + output[:payload_member].shape end - def check_for_error(context) - xml = context.http_response.body_contents - if xml.match(/<\?xml\s[^>]*\?>\s*/) - error_code = xml.match(%r{(.+?)})[1] - error_message = xml.match(%r{(.+?)})[1] - S3::Errors.error_class(error_code).new(context, error_message) - elsif incomplete_xml_body?(xml, context.operation.output) - Seahorse::Client::NetworkingError.new( - S3::Errors - .error_class('InternalError') - .new(context, 'Empty or incomplete response body') - ) + # Streaming outputs are not subject to 200 errors. + def streaming_output?(output) + if (payload = output[:payload_member]) + # checking ref and shape + payload['streaming'] || payload.shape['streaming'] || payload.eventstream + else + false end end + + def structure_shape?(shape) + shape.is_a?(Seahorse::Model::Shapes::StructureShape) + end end handler(Handler, step: :sign) diff --git a/gems/aws-sdk-s3/spec/plugins/http_200_errors_spec.rb b/gems/aws-sdk-s3/spec/plugins/http_200_errors_spec.rb new file mode 100644 index 00000000000..6b51f7e2977 --- /dev/null +++ b/gems/aws-sdk-s3/spec/plugins/http_200_errors_spec.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +require_relative '../spec_helper' + +module Aws + module S3 + module Plugins + describe Http200Errors do + let(:creds) { Aws::Credentials.new('akid', 'secret') } + let(:client) { S3::Client.new(region: 'us-east-1', credentials: creds, retry_limit: 0) } + + it 'correctly parses error in response body with proper encoding' do + error_xml = <<~XML + + + NoSuchKey + The specified key does not exist. + + XML + + stub_request(:post, 'https://test-bucket.s3.amazonaws.com/?delete') + .to_return(status: 200, body: error_xml, headers: { 'content-type' => 'application/xml' }) + + expect { client.delete_objects(bucket: 'test-bucket', delete: { objects: [{ key: 'test' }] }) } + .to raise_error(Errors::NoSuchKey) + end + + it 'correctly parses error when incomplete body is given' do + stub_request(:post, 'https://test-bucket.s3.amazonaws.com/?delete') + .to_return(status: 200, body: '', headers: { 'content-type' => 'application/xml' }) + + expect { client.delete_objects(bucket: 'test-bucket', delete: { objects: [{ key: 'test' }] }) } + .to raise_error(Seahorse::Client::NetworkingError, /Empty or incomplete response body/) + end + + it 'gracefully handle non-UTF encoding' do + response = <<~XML + + + + test + "abc123" + + + XML + + # No headers to replicate omitted Content-Type header + stub_request(:post, 'https://test-bucket.s3.amazonaws.com/?delete') + .to_return(status: 200, body: response, headers: {}) + + expect { client.delete_objects(bucket: 'test-bucket', delete: { objects: [{ key: 'test' }] }) } + .not_to raise_error + end + + end + end + end +end