Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changes/next-release/enhancement-s3-54404.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"type": "enhancement",
"category": "``s3``",
"description": "Adds new parameter ``--case-conflict`` that configures how case conflicts are handled on case-insensitive filesystems"
}
4 changes: 4 additions & 0 deletions awscli/customizations/s3/filegenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ def __init__(
operation_name=None,
response_data=None,
etag=None,
case_conflict_submitted=None,
case_conflict_key=None,
):
self.src = src
self.dest = dest
Expand All @@ -120,6 +122,8 @@ def __init__(
self.operation_name = operation_name
self.response_data = response_data
self.etag = etag
self.case_conflict_submitted = case_conflict_submitted
self.case_conflict_key = case_conflict_key


class FileGenerator:
Expand Down
4 changes: 4 additions & 0 deletions awscli/customizations/s3/fileinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def __init__(
is_stream=False,
associated_response_data=None,
etag=None,
case_conflict_submitted=None,
case_conflict_key=None,
):
self.src = src
self.src_type = src_type
Expand All @@ -73,6 +75,8 @@ def __init__(
self.is_stream = is_stream
self.associated_response_data = associated_response_data
self.etag = etag
self.case_conflict_submitted = case_conflict_submitted
self.case_conflict_key = case_conflict_key

def is_glacier_compatible(self):
"""Determines if a file info object is glacier compatible
Expand Down
6 changes: 6 additions & 0 deletions awscli/customizations/s3/fileinfobuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ def _inject_info(self, file_base):
file_info_attr['is_stream'] = self._is_stream
file_info_attr['associated_response_data'] = file_base.response_data
file_info_attr['etag'] = file_base.etag
file_info_attr['case_conflict_submitted'] = getattr(
file_base, 'case_conflict_submitted', None
)
file_info_attr['case_conflict_key'] = getattr(
file_base, 'case_conflict_key', None
)

# This is a bit quirky. The below conditional hinges on the --delete
# flag being set, which only occurs during a sync command. The source
Expand Down
8 changes: 8 additions & 0 deletions awscli/customizations/s3/s3handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
SuccessResult,
)
from awscli.customizations.s3.subscribers import (
CaseConflictCleanupSubscriber,
CopyPropsSubscriberFactory,
DeleteCopySourceObjectSubscriber,
DeleteSourceFileSubscriber,
Expand Down Expand Up @@ -425,6 +426,13 @@ def _add_additional_subscribers(self, subscribers, fileinfo):
subscribers.append(
DeleteSourceObjectSubscriber(fileinfo.source_client)
)
if fileinfo.case_conflict_submitted is not None:
subscribers.append(
CaseConflictCleanupSubscriber(
fileinfo.case_conflict_submitted,
fileinfo.case_conflict_key,
)
)

def _submit_transfer_request(self, fileinfo, extra_args, subscribers):
bucket, key = find_bucket_key(fileinfo.src)
Expand Down
138 changes: 132 additions & 6 deletions awscli/customizations/s3/subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@
from awscli.customizations.s3.filters import create_filter
from awscli.customizations.s3.s3handler import S3TransferHandlerFactory
from awscli.customizations.s3.syncstrategy.base import (
AlwaysSync,
MissingFileSync,
NeverSync,
SizeAndLastModifiedSync,
)
from awscli.customizations.s3.syncstrategy.caseconflict import CaseConflictSync
from awscli.customizations.s3.utils import (
AppendFilter,
RequestParamsMapper,
Expand Down Expand Up @@ -543,9 +545,7 @@
'name': 'progress-multiline',
'dest': 'progress_multiline',
'action': 'store_true',
'help_text': (
'Show progress on multiple lines.'
),
'help_text': ('Show progress on multiple lines.'),
}


Expand Down Expand Up @@ -664,6 +664,35 @@
),
}


CASE_CONFLICT = {
'name': 'case-conflict',
'choices': [
'ignore',
'skip',
'warn',
'error',
],
'default': 'warn',
'help_text': (
"Configures behavior when attempting to download multiple objects "
"whose keys differ only by case, which can cause undefined behavior "
"on case-insensitive filesystems. "
"This parameter only applies for commands that perform multiple S3 "
"to local downloads. "
f"See <a href='{CaseConflictSync.DOC_URI}'>Handling case "
"conflicts</a> for details. Valid values are: "
"<ul>"
"<li>``error`` - Raise an error and abort downloads.</li>"
"<li>``warn`` - The default value. Emit a warning and download "
"the object.</li>"
"<li>``skip`` - Skip downloading the object.</li>"
"<li>``ignore`` - Ignore the conflict and download the object.</li>"
"</ul>"
),
}


TRANSFER_ARGS = [
DRYRUN,
QUIET,
Expand Down Expand Up @@ -1081,7 +1110,14 @@ class CpCommand(S3TransferCommand):
}
]
+ TRANSFER_ARGS
+ [METADATA, COPY_PROPS, METADATA_DIRECTIVE, EXPECTED_SIZE, RECURSIVE]
+ [
METADATA,
COPY_PROPS,
METADATA_DIRECTIVE,
EXPECTED_SIZE,
RECURSIVE,
CASE_CONFLICT,
]
)


Expand All @@ -1105,6 +1141,7 @@ class MvCommand(S3TransferCommand):
METADATA_DIRECTIVE,
RECURSIVE,
VALIDATE_SAME_S3_PATHS,
CASE_CONFLICT,
]
)

Expand Down Expand Up @@ -1150,7 +1187,7 @@ class SyncCommand(S3TransferCommand):
}
]
+ TRANSFER_ARGS
+ [METADATA, COPY_PROPS, METADATA_DIRECTIVE]
+ [METADATA, COPY_PROPS, METADATA_DIRECTIVE, CASE_CONFLICT]
)


Expand Down Expand Up @@ -1307,7 +1344,16 @@ def choose_sync_strategies(self):
sync_strategies['file_at_src_and_dest_sync_strategy'] = (
SizeAndLastModifiedSync()
)
sync_strategies['file_not_at_dest_sync_strategy'] = MissingFileSync()
if self._should_handle_case_conflicts():
sync_strategies['file_not_at_dest_sync_strategy'] = (
CaseConflictSync(
on_case_conflict=self.parameters['case_conflict']
)
)
else:
sync_strategies['file_not_at_dest_sync_strategy'] = (
MissingFileSync()
)
sync_strategies['file_not_at_src_sync_strategy'] = NeverSync()

# Determine what strategies to override if any.
Expand Down Expand Up @@ -1440,6 +1486,12 @@ def run(self):
'file_info_builder': [file_info_builder],
's3_handler': [s3_transfer_handler],
}
if self._should_handle_case_conflicts():
self._handle_case_conflicts(
command_dict,
rev_files,
rev_generator,
)
elif self.cmd == 'rm':
command_dict = {
'setup': [files],
Expand All @@ -1456,6 +1508,12 @@ def run(self):
'file_info_builder': [file_info_builder],
's3_handler': [s3_transfer_handler],
}
if self._should_handle_case_conflicts():
self._handle_case_conflicts(
command_dict,
rev_files,
rev_generator,
)

files = command_dict['setup']
while self.instructions:
Expand Down Expand Up @@ -1517,6 +1575,74 @@ def _map_sse_c_params(self, request_parameters, paths_type):
},
)

def _should_handle_case_conflicts(self):
return (
self.cmd in {'sync', 'cp', 'mv'}
and self.parameters.get('paths_type') == 's3local'
and self.parameters['case_conflict'] != 'ignore'
and self.parameters.get('dir_op')
)

def _handle_case_conflicts(self, command_dict, rev_files, rev_generator):
# Objects are not returned in lexicographical order when
# operated on S3 Express directory buckets. This is required
# for sync operations to behave correctly, which is what
# recursive copies and moves fall back to so potential case
# conflicts can be detected and handled.
if not is_s3express_bucket(
split_s3_bucket_key(self.parameters['src'])[0]
):
self._modify_instructions_for_case_conflicts(
command_dict, rev_files, rev_generator
)
return
# `skip` and `error` are not valid choices in this case because
# it's not possible to detect case conflicts.
if self.parameters['case_conflict'] not in {'ignore', 'warn'}:
raise ParamValidationError(
f"`{self.parameters['case_conflict']}` is not a valid value "
"for `--case-conflict` when operating on S3 Express "
"directory buckets. Valid values: `warn`, `ignore`."
)
msg = (
"warning: Recursive copies/moves from an S3 Express "
"directory bucket to a case-insensitive local filesystem "
"may result in undefined behavior if there are "
"S3 object key names that differ only by case. To disable "
"this warning, set the `--case-conflict` parameter to `ignore`. "
f"For more information, see {CaseConflictSync.DOC_URI}."
)
uni_print(msg, sys.stderr)

def _modify_instructions_for_case_conflicts(
self, command_dict, rev_files, rev_generator
):
# Command will perform recursive S3 to local downloads.
# Checking for potential case conflicts requires knowledge
# of local files. Instead of writing a separate validation
# mechanism for recursive downloads, we modify the instructions
# to mimic a sync command.
sync_strategies = {
# Local filename exists with exact case match. Always sync
# because it's a copy operation.
'file_at_src_and_dest_sync_strategy': AlwaysSync(),
# Local filename either doesn't exist or differs only by case.
# Let `CaseConflictSync` determine which it is and handle it
# according to configured `--case-conflict` parameter.
'file_not_at_dest_sync_strategy': CaseConflictSync(
on_case_conflict=self.parameters['case_conflict']
),
# Copy is one-way so never sync if not at source.
'file_not_at_src_sync_strategy': NeverSync(),
}
command_dict['setup'].append(rev_files)
command_dict['file_generator'].append(rev_generator)
command_dict['filters'].append(create_filter(self.parameters))
command_dict['comparator'] = [Comparator(**sync_strategies)]
self.instructions.insert(
self.instructions.index('file_info_builder'), 'comparator'
)


# TODO: This class is fairly quirky in the sense that it is both a builder
# and a data object. In the future we should make the following refactorings
Expand Down
14 changes: 14 additions & 0 deletions awscli/customizations/s3/subscribers.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,20 @@ def on_queued(self, future, **kwargs):
)


class CaseConflictCleanupSubscriber(BaseSubscriber):
"""
A subscriber which removes object compare key from case conflict set
when download finishes.
"""

def __init__(self, submitted, case_conflict_key):
self._submitted = submitted
self._key = case_conflict_key

def on_done(self, future, **kwargs):
self._submitted.discard(self._key)


class DeleteSourceSubscriber(OnDoneFilteredSubscriber):
"""A subscriber which deletes the source of the transfer."""

Expand Down
9 changes: 9 additions & 0 deletions awscli/customizations/s3/syncstrategy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,12 @@ def determine_should_sync(self, src_file, dest_file):
src_file.dest,
)
return True


class AlwaysSync(BaseSync):
def __init__(self, sync_type='file_at_src_and_dest'):
super(AlwaysSync, self).__init__(sync_type)

def determine_should_sync(self, src_file, dest_file):
LOG.debug(f"syncing: {src_file.src} -> {src_file.dest}")
return True
Loading
Loading