diff --git a/.gitignore b/.gitignore index 4bf7237e8..981f56859 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,9 @@ override.tf.json *_override.tf *_override.tf.json +# Ignore output of data object +terraform/account-wide-infrastructure/modules/glue/files/src.zip + # Include override files you do wish to add to version control using negated pattern # # !example_override.tf diff --git a/terraform/account-wide-infrastructure/dev/athena.tf b/terraform/account-wide-infrastructure/dev/athena.tf new file mode 100644 index 000000000..cb7e78a97 --- /dev/null +++ b/terraform/account-wide-infrastructure/dev/athena.tf @@ -0,0 +1,5 @@ +module "dev-athena" { + source = "../modules/athena" + name_prefix = "nhsd-nrlf--dev" + target_bucket_name = module.dev-glue.target_bucket_name +} diff --git a/terraform/account-wide-infrastructure/dev/glue.tf b/terraform/account-wide-infrastructure/dev/glue.tf new file mode 100644 index 000000000..e8fbd713a --- /dev/null +++ b/terraform/account-wide-infrastructure/dev/glue.tf @@ -0,0 +1,5 @@ +module "dev-glue" { + source = "../modules/glue" + name_prefix = "nhsd-nrlf--dev" + python_version = 3 +} diff --git a/terraform/account-wide-infrastructure/modules/athena/athena.tf b/terraform/account-wide-infrastructure/modules/athena/athena.tf new file mode 100644 index 000000000..b5765e113 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/athena/athena.tf @@ -0,0 +1,31 @@ +resource "aws_athena_database" "reporting-db" { + name = var.database + + bucket = var.target_bucket_name + + encryption_configuration { + encryption_option = "SSE_KMS" + kms_key = aws_kms_key.athena.arn + } + + force_destroy = true +} + +resource "aws_athena_workgroup" "athena" { + name = "${var.name_prefix}-athena-wg" + + configuration { + enforce_workgroup_configuration = true + publish_cloudwatch_metrics_enabled = true + + result_configuration { + output_location = "s3://{aws_s3_bucket.athena.bucket}/output/" + + encryption_configuration { + encryption_option = "SSE_KMS" + kms_key_arn = aws_kms_key.athena.arn + } + } + } + +} diff --git a/terraform/account-wide-infrastructure/modules/athena/kms.tf b/terraform/account-wide-infrastructure/modules/athena/kms.tf new file mode 100644 index 000000000..e53d7c96a --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/athena/kms.tf @@ -0,0 +1,7 @@ +resource "aws_kms_key" "athena" { +} + +resource "aws_kms_alias" "athena" { + name = "alias/${var.name_prefix}-athena" + target_key_id = aws_kms_key.athena.key_id +} diff --git a/terraform/account-wide-infrastructure/modules/athena/outputs.tf b/terraform/account-wide-infrastructure/modules/athena/outputs.tf new file mode 100644 index 000000000..574aeb3f8 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/athena/outputs.tf @@ -0,0 +1,11 @@ +output "workgroup" { + value = aws_athena_workgroup.athena +} + +output "bucket" { + value = aws_s3_bucket.athena +} + +output "database" { + value = aws_athena_database.reporting-db +} diff --git a/terraform/account-wide-infrastructure/modules/athena/s3.tf b/terraform/account-wide-infrastructure/modules/athena/s3.tf new file mode 100644 index 000000000..ea0d144c1 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/athena/s3.tf @@ -0,0 +1,52 @@ +resource "aws_s3_bucket" "athena" { + bucket = "${var.name_prefix}-athena" +} + +resource "aws_s3_bucket_policy" "athena" { + bucket = "${var.name_prefix}-athena" + + policy = jsonencode({ + Version = "2012-10-17" + Id = "athena-policy" + Statement = [ + { + Sid = "HTTPSOnly" + Effect = "Deny" + Principal = { + "AWS" : "*" + } + Action = "s3:*" + Resource = [ + aws_s3_bucket.athena.arn, + "${aws_s3_bucket.athena.arn}/*", + ] + Condition = { + Bool = { + "aws:SecureTransport" = "false" + } + } + }, + ] + }) +} + +resource "aws_s3_bucket_public_access_block" "athena-public-access-block" { + bucket = aws_s3_bucket.athena.id + + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + + +resource "aws_s3_bucket_server_side_encryption_configuration" "athena" { + bucket = aws_s3_bucket.athena.bucket + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "aws:kms" + kms_master_key_id = aws_kms_key.athena.arn + } + } + +} diff --git a/terraform/account-wide-infrastructure/modules/athena/vars.tf b/terraform/account-wide-infrastructure/modules/athena/vars.tf new file mode 100644 index 000000000..d09d6f65c --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/athena/vars.tf @@ -0,0 +1,13 @@ +variable "database" { + description = "What the db will be called" + default = "nrl_reporting" +} + +variable "name_prefix" { + type = string + description = "The prefix to apply to all resources in the module." +} + +variable "target_bucket_name" { + type = string +} diff --git a/terraform/account-wide-infrastructure/modules/glue/glue.tf b/terraform/account-wide-infrastructure/modules/glue/glue.tf new file mode 100644 index 000000000..64cca24f6 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/glue/glue.tf @@ -0,0 +1,59 @@ +# Create Glue Data Catalog Database +resource "aws_glue_catalog_database" "raw_log_database" { + name = "${var.name_prefix}-raw_log" + location_uri = "${aws_s3_bucket.source-data-bucket.id}/" +} + +# Create Glue Crawler +resource "aws_glue_crawler" "raw_log_crawler" { + name = "${var.name_prefix}-raw-log-crawler" + database_name = aws_glue_catalog_database.raw_log_database.name + role = aws_iam_role.glue_service_role.name + s3_target { + path = "${aws_s3_bucket.source-data-bucket.id}/" + } + schema_change_policy { + delete_behavior = "LOG" + } + configuration = jsonencode({ + "Version" : 1.0, + "Grouping" : { + "TableGroupingPolicy" : "CombineCompatibleSchemas" + } + }) +} +resource "aws_glue_trigger" "raw_log_trigger" { + name = "${var.name_prefix}-org-report-trigger" + type = "ON_DEMAND" + actions { + crawler_name = aws_glue_crawler.raw_log_crawler.name + } +} + +resource "aws_glue_job" "glue_job" { + name = "${var.name_prefix}-glue-job" + role_arn = aws_iam_role.glue_service_role.arn + description = "Transfer logs from source to bucket" + glue_version = "4.0" + worker_type = "G.1X" + timeout = 2880 + max_retries = 1 + number_of_workers = 2 + command { + name = "glueetl" + python_version = var.python_version + script_location = "s3://${aws_s3_bucket.code-bucket.id}/main.py" + } + + default_arguments = { + "--enable-auto-scaling" = "true" + "--enable-continous-cloudwatch-log" = "true" + "--datalake-formats" = "delta" + "--source-path" = "s3://${aws_s3_bucket.source-data-bucket.id}/" # Specify the source S3 path + "--destination-path" = "s3://${aws_s3_bucket.target-data-bucket.id}/" # Specify the destination S3 path + "--job-name" = "poc-glue-job" + "--enable-continuous-log-filter" = "true" + "--enable-metrics" = "true" + "--extra-py-files" = "s3://${aws_s3_bucket.code-bucket.id}/src.zip" + } +} diff --git a/terraform/account-wide-infrastructure/modules/glue/iam.tf b/terraform/account-wide-infrastructure/modules/glue/iam.tf new file mode 100644 index 000000000..890b47593 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/glue/iam.tf @@ -0,0 +1,21 @@ +resource "aws_iam_role" "glue_service_role" { + name = "${var.name_prefix}-glue_service_role" + + assume_role_policy = jsonencode({ + "Version" : "2012-10-17", + "Statement" : [ + { + "Effect" : "Allow", + "Principal" : { + "Service" : "glue.amazonaws.com" + }, + "Action" : "sts:AssumeRole" + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "glue_service" { + role = aws_iam_role.glue_service_role.id + policy_arn = "arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole" +} diff --git a/terraform/account-wide-infrastructure/modules/glue/kms.tf b/terraform/account-wide-infrastructure/modules/glue/kms.tf new file mode 100644 index 000000000..067c1ad5a --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/glue/kms.tf @@ -0,0 +1,7 @@ +resource "aws_kms_key" "glue" { +} + +resource "aws_kms_alias" "glue" { + name = "alias/${var.name_prefix}-glue" + target_key_id = aws_kms_key.glue.key_id +} diff --git a/terraform/account-wide-infrastructure/modules/glue/outputs.tf b/terraform/account-wide-infrastructure/modules/glue/outputs.tf new file mode 100644 index 000000000..d17fc4d09 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/glue/outputs.tf @@ -0,0 +1,13 @@ +output "target_bucket_name" { + description = "Name of destination bucket" + value = aws_s3_bucket.target-data-bucket.id +} + +output "source_bucket_name" { + description = "Name of source bucket" + value = aws_s3_bucket.source-data-bucket.id +} + +output "glue_crawler_name" { + value = "s3//${aws_s3_bucket.source-data-bucket.id}/" +} diff --git a/terraform/account-wide-infrastructure/modules/glue/s3.tf b/terraform/account-wide-infrastructure/modules/glue/s3.tf new file mode 100644 index 000000000..4695f2b5b --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/glue/s3.tf @@ -0,0 +1,179 @@ +# S3 Bucket for Raw Data +resource "aws_s3_bucket" "source-data-bucket" { + bucket = "${var.name_prefix}-source-data-bucket" +} + +resource "aws_s3_bucket_policy" "source-data-bucket" { + bucket = "${var.name_prefix}-source-data-bucket" + + policy = jsonencode({ + Version = "2012-10-17" + Id = "source-data-bucket-policy" + Statement = [ + { + Sid = "HTTPSOnly" + Effect = "Deny" + Principal = { + "AWS" : "*" + } + Action = "s3:*" + Resource = [ + aws_s3_bucket.source-data-bucket.arn, + "${aws_s3_bucket.source-data-bucket.arn}/*", + ] + Condition = { + Bool = { + "aws:SecureTransport" = "false" + } + } + }, + ] + }) +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "source-data-bucket" { + bucket = aws_s3_bucket.source-data-bucket.bucket + + rule { + apply_server_side_encryption_by_default { + kms_master_key_id = aws_kms_key.glue.arn + sse_algorithm = "aws:kms" + } + } +} + +resource "aws_s3_bucket_public_access_block" "source-data-bucket-public-access-block" { + bucket = aws_s3_bucket.source-data-bucket.id + + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + + +# S3 Bucket for Processed Data +resource "aws_s3_bucket" "target-data-bucket" { + bucket = "${var.name_prefix}-target-data-bucket" +} + +resource "aws_s3_bucket_policy" "target-data-bucket" { + bucket = "${var.name_prefix}-target-data-bucket" + + policy = jsonencode({ + Version = "2012-10-17" + Id = "target-data-bucket-policy" + Statement = [ + { + Sid = "HTTPSOnly" + Effect = "Deny" + Principal = { + "AWS" : "*" + } + Action = "s3:*" + Resource = [ + aws_s3_bucket.target-data-bucket.arn, + "${aws_s3_bucket.target-data-bucket.arn}/*", + ] + Condition = { + Bool = { + "aws:SecureTransport" = "false" + } + } + }, + ] + }) +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "target-data-bucket" { + bucket = aws_s3_bucket.target-data-bucket.bucket + + rule { + apply_server_side_encryption_by_default { + kms_master_key_id = aws_kms_key.glue.arn + sse_algorithm = "aws:kms" + } + } +} + +resource "aws_s3_bucket_public_access_block" "target-data-bucket-public-access-block" { + bucket = aws_s3_bucket.target-data-bucket.id + + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +# S3 Bucket for Code +resource "aws_s3_bucket" "code-bucket" { + bucket = "${var.name_prefix}-code-bucket" +} + +resource "aws_s3_bucket_policy" "code-bucket" { + bucket = "${var.name_prefix}-code-bucket" + + policy = jsonencode({ + Version = "2012-10-17" + Id = "code-bucket-policy" + Statement = [ + { + Sid = "HTTPSOnly" + Effect = "Deny" + Principal = { + "AWS" : "*" + } + Action = "s3:*" + Resource = [ + aws_s3_bucket.code-bucket.arn, + "${aws_s3_bucket.code-bucket.arn}/*", + ] + Condition = { + Bool = { + "aws:SecureTransport" = "false" + } + } + }, + ] + }) +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "code-bucket" { + bucket = aws_s3_bucket.code-bucket.bucket + + rule { + apply_server_side_encryption_by_default { + kms_master_key_id = aws_kms_key.glue.arn + sse_algorithm = "aws:kms" + } + } +} + +resource "aws_s3_bucket_public_access_block" "code-bucket-public-access-block" { + bucket = aws_s3_bucket.code-bucket.id + + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +resource "aws_s3_object" "script" { + bucket = aws_s3_bucket.code-bucket.bucket + key = "main.py" + source = "${path.module}/src/main.py" + etag = filemd5("${path.module}/src/main.py") +} + +data "archive_file" "python" { + type = "zip" + output_path = "${path.module}/files/src.zip" + + source_dir = "${path.module}/src" +} + +resource "aws_s3_object" "zip" { + bucket = aws_s3_bucket.code-bucket.bucket + key = "main.py" + source = "${path.module}/files/src.zip" +} diff --git a/terraform/account-wide-infrastructure/modules/glue/src/__init__.py b/terraform/account-wide-infrastructure/modules/glue/src/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/terraform/account-wide-infrastructure/modules/glue/src/instances.py b/terraform/account-wide-infrastructure/modules/glue/src/instances.py new file mode 100644 index 000000000..335ba7ecd --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/glue/src/instances.py @@ -0,0 +1,30 @@ +import logging + +from awsglue.context import GlueContext +from pyspark.sql import SparkSession + + +class GlueContextSingleton: + """Singleton for GlueContext and SparkSession""" + + _instance = None + + def __new__(cls, spark_context): + if not cls._instance: + cls._instance = super().__new__(cls) + cls._instance.spark = SparkSession.builder.getOrCreate() + cls._instance.context = GlueContext(spark_context) + return cls._instance + + +class LoggerSingleton: + """Singleton for logger""" + + _instance = None + + def __new__(cls): + if not cls._instance: + cls._instance = super().__new__(cls) + cls._instance.logger = logging.getLogger("ETLLogger") + cls._instance.logger.setLevel(logging.INFO) + return cls._instance diff --git a/terraform/account-wide-infrastructure/modules/glue/src/main.py b/terraform/account-wide-infrastructure/modules/glue/src/main.py new file mode 100644 index 000000000..a29ef78d8 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/glue/src/main.py @@ -0,0 +1,28 @@ +import sys + +from awsglue.utils import getResolvedOptions +from pyspark.context import SparkContext +from src.pipeline import LogPipeline +from src.transformations import placeholder + +# Get arguments from AWS Glue job +args = getResolvedOptions( + sys.argv, ["JOB_NAME", "SOURCE_PATH", "TARGET_PATH", "PARTITION_COLS"] +) + +# Start Glue context +sc = SparkContext() + +partition_cols = args["PARTITION_COLS"].split(",") if "PARTITION_COLS" in args else [] + +# Initialize ETL process +etl_job = LogPipeline( + spark_context=sc, + source_path=args["SOURCE_PATH"], + target_path=args["TARGET_PATH"], + partition_cols=partition_cols, + transformations=[placeholder], +) + +# Run the job +etl_job.run() diff --git a/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py b/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py new file mode 100644 index 000000000..50c34af23 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py @@ -0,0 +1,53 @@ +from src.instances import GlueContextSingleton, LoggerSingleton + + +class LogPipeline: + def __init__( + self, + spark_context, + source_path, + target_path, + partition_cols=None, + transformations=[], + ): + """Initialize Glue context, Spark session, logger, and paths""" + self.glue_context = GlueContextSingleton(spark_context).context + self.spark = GlueContextSingleton(spark_context).spark + self.logger = LoggerSingleton().logger + self.source_path = source_path + self.target_path = target_path + self.partition_cols = partition_cols + self.transformations = transformations + + def run(self): + """Runs ETL""" + try: + self.logger.info("ETL Process started.") + df = self.extract() + self.logger.info(f"Data extracted from {self.source_path}.") + df = self.transform(df) + self.logger.info("Data transformed successfully.") + self.load(df) + self.logger.info(f"Data loaded into {self.target_path}.") + except Exception as e: + self.logger.error(f"ETL process failed: {e}") + raise e + + def extract(self): + """Extract JSON data from S3""" + self.logger.info(f"Extracting data from {self.source_path} as JSON") + return self.spark.read.json(self.source_path) + + def transform(self, dataframe): + """Apply a list of transformations on the dataframe""" + for transformation in self.transformations: + self.logger.info(f"Applying transformation: {transformation.__name__}") + dataframe = transformation(dataframe) + return dataframe + + def load(self, dataframe): + """Load transformed data into Parquet format""" + self.logger.info(f"Loading data into {self.target_path} as Parquet") + dataframe.write.mode("overwrite").partitionBy(*self.partition_cols).parquet( + self.target_path + ) diff --git a/terraform/account-wide-infrastructure/modules/glue/src/transformations.py b/terraform/account-wide-infrastructure/modules/glue/src/transformations.py new file mode 100644 index 000000000..1d59d52bc --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/glue/src/transformations.py @@ -0,0 +1 @@ +def placeholder(): ... diff --git a/terraform/account-wide-infrastructure/modules/glue/vars.tf b/terraform/account-wide-infrastructure/modules/glue/vars.tf new file mode 100644 index 000000000..cb03095bf --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/glue/vars.tf @@ -0,0 +1,24 @@ +variable "name_prefix" { + type = string + description = "The prefix to apply to all resources in the module." +} + +variable "python_version" { + type = number + description = "Python version to run in script" +} + +variable "source_bucket" { + description = "S3 bucket for source data" + default = "source-data-bucket" +} + +variable "target_bucket" { + description = "S3 bucket for target data" + default = "target-data-bucket" +} + +variable "code_bucket" { + description = "S3 bucket for Glue job scripts" + default = "code-bucket" +} diff --git a/terraform/infrastructure/data.tf b/terraform/infrastructure/data.tf index e66b237ed..2c3512fa3 100644 --- a/terraform/infrastructure/data.tf +++ b/terraform/infrastructure/data.tf @@ -41,3 +41,8 @@ data "external" "current-info" { "../../scripts/get-current-info.sh", ] } + +data "aws_s3_bucket" "source-data-bucket" { + count = local.is_dev_env ? 1 : 0 + bucket = "${local.shared_prefix}-source-data-bucket" +} diff --git a/terraform/infrastructure/firehose.tf b/terraform/infrastructure/firehose.tf index 2a0ebd221..fea8712ee 100644 --- a/terraform/infrastructure/firehose.tf +++ b/terraform/infrastructure/firehose.tf @@ -1,11 +1,13 @@ module "firehose__processor" { - source = "./modules/firehose" - assume_account = local.aws_account_id - prefix = local.prefix - region = local.region - environment = local.environment - cloudwatch_kms_arn = module.kms__cloudwatch.kms_arn - splunk_environment = local.splunk_environment - splunk_index = local.splunk_index - destination = "splunk" + source = "./modules/firehose" + assume_account = local.aws_account_id + prefix = local.prefix + region = local.region + environment = local.environment + cloudwatch_kms_arn = module.kms__cloudwatch.kms_arn + splunk_environment = local.splunk_environment + splunk_index = local.splunk_index + destination = "splunk" + reporting_bucket_arn = local.reporting_bucket_arn + reporting_infra_toggle = local.is_dev_env } diff --git a/terraform/infrastructure/locals.tf b/terraform/infrastructure/locals.tf index 998bd8ed1..dd1cd0f06 100644 --- a/terraform/infrastructure/locals.tf +++ b/terraform/infrastructure/locals.tf @@ -22,11 +22,15 @@ locals { dynamodb_timeout_seconds = "3" is_sandbox_env = length(regexall("-sandbox-", local.stack_name)) > 0 + is_dev_env = var.account_name == "dev" environment = local.is_sandbox_env ? "${var.account_name}-sandbox" : var.account_name shared_prefix = "${local.project}--${local.environment}" public_domain = local.is_sandbox_env ? var.public_sandbox_domain : var.public_domain + # Logic / vars for reporting + reporting_bucket_arn = local.is_dev_env ? data.aws_s3_bucket.source-data-bucket[0].arn : null + # Logic / vars for splunk environment splunk_environment = local.is_sandbox_env ? "${var.account_name}sandbox" : var.account_name splunk_index = "aws_recordlocator_${local.splunk_environment}" diff --git a/terraform/infrastructure/modules/firehose/cloudwatch.tf b/terraform/infrastructure/modules/firehose/cloudwatch.tf index 8e0a1ddac..86aff3fd2 100644 --- a/terraform/infrastructure/modules/firehose/cloudwatch.tf +++ b/terraform/infrastructure/modules/firehose/cloudwatch.tf @@ -7,3 +7,15 @@ resource "aws_cloudwatch_log_stream" "firehose" { name = "${var.prefix}-firehose" log_group_name = aws_cloudwatch_log_group.firehose.name } + +resource "aws_cloudwatch_log_group" "firehose_reporting" { + count = var.reporting_infra_toggle ? 1 : 0 + name = "/aws/kinesisfirehose/${var.prefix}-firehose-reporting" + retention_in_days = local.cloudwatch.retention.days +} + +resource "aws_cloudwatch_log_stream" "firehose_reporting" { + count = var.reporting_infra_toggle ? 1 : 0 + name = "${var.prefix}-firehose-reporting" + log_group_name = aws_cloudwatch_log_group.firehose_reporting[0].name +} diff --git a/terraform/infrastructure/modules/firehose/iam_firehose.tf b/terraform/infrastructure/modules/firehose/iam_firehose.tf index 991d4d84f..89e72587d 100644 --- a/terraform/infrastructure/modules/firehose/iam_firehose.tf +++ b/terraform/infrastructure/modules/firehose/iam_firehose.tf @@ -27,10 +27,11 @@ data "aws_iam_policy_document" "firehose" { "s3:PutObject", ] - resources = [ + resources = compact([ aws_s3_bucket.firehose.arn, "${aws_s3_bucket.firehose.arn}/*", - ] + var.reporting_bucket_arn, + ]) effect = "Allow" } @@ -70,10 +71,12 @@ data "aws_iam_policy_document" "firehose" { actions = [ "logs:PutLogEvents", ] - resources = [ + resources = compact([ aws_cloudwatch_log_group.firehose.arn, - aws_cloudwatch_log_stream.firehose.arn - ] + aws_cloudwatch_log_stream.firehose.arn, + local.iam_firehose.cloudwatch_reporting_log_group_arn, + local.iam_firehose.cloudwatch_reporting_log_stream_arn, + ]) effect = "Allow" } } diff --git a/terraform/infrastructure/modules/firehose/iam_subscriptions.tf b/terraform/infrastructure/modules/firehose/iam_subscriptions.tf index 05006243b..3fe217ac3 100644 --- a/terraform/infrastructure/modules/firehose/iam_subscriptions.tf +++ b/terraform/infrastructure/modules/firehose/iam_subscriptions.tf @@ -20,9 +20,10 @@ data "aws_iam_policy_document" "firehose_subscription" { "firehose:*", ] effect = "Allow" - resources = [ + resources = compact([ aws_kinesis_firehose_delivery_stream.firehose.arn, - ] + local.iam_subscriptions.firehose_reporting_stream_arn, + ]) } statement { actions = [ diff --git a/terraform/infrastructure/modules/firehose/kinesis.tf b/terraform/infrastructure/modules/firehose/kinesis.tf index de9a65162..7c0c4a288 100644 --- a/terraform/infrastructure/modules/firehose/kinesis.tf +++ b/terraform/infrastructure/modules/firehose/kinesis.tf @@ -56,3 +56,24 @@ resource "aws_kinesis_firehose_delivery_stream" "firehose" { } } } + +resource "aws_kinesis_firehose_delivery_stream" "reporting_stream" { + count = var.reporting_infra_toggle ? 1 : 0 + name = "${var.prefix}--cloudwatch-reporting-delivery-stream" + destination = "extended_s3" + + extended_s3_configuration { + role_arn = aws_iam_role.firehose.arn + bucket_arn = var.reporting_bucket_arn + + processing_configuration { + enabled = "false" + } + + cloudwatch_logging_options { + enabled = true + log_group_name = aws_cloudwatch_log_group.firehose_reporting[0].name + log_stream_name = aws_cloudwatch_log_stream.firehose_reporting[0].name + } + } +} diff --git a/terraform/infrastructure/modules/firehose/locals.tf b/terraform/infrastructure/modules/firehose/locals.tf index 04b405d77..4658e993a 100644 --- a/terraform/infrastructure/modules/firehose/locals.tf +++ b/terraform/infrastructure/modules/firehose/locals.tf @@ -31,4 +31,13 @@ locals { compression_format = "GZIP" } + iam_firehose = { + cloudwatch_reporting_log_group_arn = var.reporting_infra_toggle ? aws_cloudwatch_log_group.firehose_reporting[0].arn : null + cloudwatch_reporting_log_stream_arn = var.reporting_infra_toggle ? aws_cloudwatch_log_stream.firehose_reporting[0].arn : null + } + + iam_subscriptions = { + firehose_reporting_stream_arn = var.reporting_infra_toggle ? aws_kinesis_firehose_delivery_stream.reporting_stream[0].arn : null + } + } diff --git a/terraform/infrastructure/modules/firehose/vars.tf b/terraform/infrastructure/modules/firehose/vars.tf index 9d9a70385..dec876c12 100644 --- a/terraform/infrastructure/modules/firehose/vars.tf +++ b/terraform/infrastructure/modules/firehose/vars.tf @@ -34,3 +34,12 @@ variable "error_prefix" { type = string default = "errors" } + +variable "reporting_bucket_arn" { + type = string + default = null +} + +variable "reporting_infra_toggle" { + type = bool +}