diff --git a/.changes/next-release/enhancement-cloudtrail-89408.json b/.changes/next-release/enhancement-cloudtrail-89408.json new file mode 100644 index 000000000000..a752bebf8659 --- /dev/null +++ b/.changes/next-release/enhancement-cloudtrail-89408.json @@ -0,0 +1,5 @@ +{ + "type": "enhancement", + "category": "cloudtrail", + "description": "Fixed performance issue in cloudtrail validate-logs command by scoping S3 digest file listing to the trail's region instead of processing digest files from all regions." +} diff --git a/awscli/customizations/cloudtrail/validation.py b/awscli/customizations/cloudtrail/validation.py index f4229bade451..d5255f4b8438 100644 --- a/awscli/customizations/cloudtrail/validation.py +++ b/awscli/customizations/cloudtrail/validation.py @@ -274,9 +274,10 @@ def load_digest_keys_in_range(self, bucket, prefix, start_date, end_date): """ digests = [] marker = self._create_digest_key(start_date, prefix) + s3_digest_files_prefix = self._create_digest_prefix(start_date, prefix) client = self._client_provider.get_client(bucket) paginator = client.get_paginator('list_objects') - page_iterator = paginator.paginate(Bucket=bucket, Marker=marker) + page_iterator = paginator.paginate(Bucket=bucket, Marker=marker, Prefix=s3_digest_files_prefix) key_filter = page_iterator.search('Contents[*].Key') # Create a target start end end date target_start_date = format_date(normalize_date(start_date)) @@ -287,7 +288,7 @@ def load_digest_keys_in_range(self, bucket, prefix, start_date, end_date): # Ensure digests are from the same trail. digest_key_regex = re.compile(self._create_digest_key_regex(prefix)) for key in key_filter: - if digest_key_regex.match(key): + if key and digest_key_regex.match(key): # Use a lexicographic comparison to know when to stop. extracted_date = extract_digest_key_date(key) if extracted_date > target_end_date: @@ -358,6 +359,25 @@ def _create_digest_key(self, start_date, key_prefix): key = key_prefix + '/' + key return key + def _create_digest_prefix(self, start_date, key_prefix): + """Creates an S3 prefix to scope listing to trail's region. + + :return: Returns a prefix string to limit S3 listing scope. + """ + template = 'AWSLogs/' + template_params = { + 'account_id': self.account_id, + 'source_region': self.trail_source_region + } + if self.organization_id: + template += '{organization_id}/' + template_params['organization_id'] = self.organization_id + template += '{account_id}/CloudTrail-Digest/{source_region}' + prefix = template.format(**template_params) + if key_prefix: + prefix = key_prefix + '/' + prefix + return prefix + def _create_digest_key_regex(self, key_prefix): """Creates a regular expression used to match against S3 keys""" template = 'AWSLogs/' diff --git a/tests/unit/customizations/cloudtrail/test_validation.py b/tests/unit/customizations/cloudtrail/test_validation.py index 6c9a28599994..41a8f485cb4a 100644 --- a/tests/unit/customizations/cloudtrail/test_validation.py +++ b/tests/unit/customizations/cloudtrail/test_validation.py @@ -595,15 +595,16 @@ def test_calls_list_objects_correctly(self): mock_search = mock_paginate.return_value.search mock_search.return_value = [] provider = self._get_mock_provider(s3_client) - provider.load_digest_keys_in_range('1', 'prefix', START_DATE, END_DATE) - marker = ( - 'prefix/AWSLogs/{account}/CloudTrail-Digest/us-east-1/' - '2014/08/09/{account}_CloudTrail-Digest_us-east-1_foo_' - 'us-east-1_20140809T235900Z.json.gz' - ) + provider.load_digest_keys_in_range( + '1', 'prefix', START_DATE, END_DATE) + marker = ('prefix/AWSLogs/{account}/CloudTrail-Digest/us-east-1/' + '2014/08/09/{account}_CloudTrail-Digest_us-east-1_foo_' + 'us-east-1_20140809T235900Z.json.gz') + prefix = 'prefix/AWSLogs/{account}/CloudTrail-Digest/us-east-1' mock_paginate.assert_called_once_with( - Bucket='1', Marker=marker.format(account=TEST_ACCOUNT_ID) - ) + Bucket='1', + Marker=marker.format(account=TEST_ACCOUNT_ID), + Prefix=prefix.format(account=TEST_ACCOUNT_ID)) def test_calls_list_objects_correctly_org_trails(self): s3_client = mock.Mock() @@ -627,14 +628,62 @@ def test_calls_list_objects_correctly_org_trails(self): '2014/08/09/{member_account}_CloudTrail-Digest_us-east-1_foo_' 'us-east-1_20140809T235900Z.json.gz' ) + prefix = ( + 'prefix/AWSLogs/{organization_id}/{member_account}/' + 'CloudTrail-Digest/us-east-1' + ) mock_paginate.assert_called_once_with( Bucket='1', Marker=marker.format( member_account=TEST_ORGANIZATION_ACCOUNT_ID, - organization_id=TEST_ORGANIZATION_ID, + organization_id=TEST_ORGANIZATION_ID ), + Prefix=prefix.format( + member_account=TEST_ORGANIZATION_ACCOUNT_ID, + organization_id=TEST_ORGANIZATION_ID + ) ) + def test_create_digest_prefix_without_key_prefix(self): + mock_s3_client_provider = mock.Mock() + provider = DigestProvider( + mock_s3_client_provider, TEST_ACCOUNT_ID, 'foo', 'us-east-1') + prefix = provider._create_digest_prefix(START_DATE, None) + expected = 'AWSLogs/{account}/CloudTrail-Digest/us-east-1'.format( + account=TEST_ACCOUNT_ID) + self.assertEqual(expected, prefix) + + def test_create_digest_prefix_with_key_prefix(self): + mock_s3_client_provider = mock.Mock() + provider = DigestProvider( + mock_s3_client_provider, TEST_ACCOUNT_ID, 'foo', 'us-east-1') + prefix = provider._create_digest_prefix(START_DATE, 'my-prefix') + expected = 'my-prefix/AWSLogs/{account}/CloudTrail-Digest/us-east-1'.format( + account=TEST_ACCOUNT_ID) + self.assertEqual(expected, prefix) + + def test_create_digest_prefix_org_trail(self): + mock_s3_client_provider = mock.Mock() + provider = DigestProvider( + mock_s3_client_provider, TEST_ORGANIZATION_ACCOUNT_ID, + 'foo', 'us-east-1', 'us-east-1', TEST_ORGANIZATION_ID) + prefix = provider._create_digest_prefix(START_DATE, None) + expected = 'AWSLogs/{org}/{account}/CloudTrail-Digest/us-east-1'.format( + org=TEST_ORGANIZATION_ID, + account=TEST_ORGANIZATION_ACCOUNT_ID) + self.assertEqual(expected, prefix) + + def test_create_digest_prefix_org_trail_with_key_prefix(self): + mock_s3_client_provider = mock.Mock() + provider = DigestProvider( + mock_s3_client_provider, TEST_ORGANIZATION_ACCOUNT_ID, + 'foo', 'us-east-1', 'us-east-1', TEST_ORGANIZATION_ID) + prefix = provider._create_digest_prefix(START_DATE, 'custom-prefix') + expected = 'custom-prefix/AWSLogs/{org}/{account}/CloudTrail-Digest/us-east-1'.format( + org=TEST_ORGANIZATION_ID, + account=TEST_ORGANIZATION_ACCOUNT_ID) + self.assertEqual(expected, prefix) + def test_ensures_digest_has_proper_metadata(self): out = BytesIO() f = gzip.GzipFile(fileobj=out, mode="wb")