From c3c803a4c40b9f9c3a5a43ef247ea99ad502d889 Mon Sep 17 00:00:00 2001 From: jackleary Date: Mon, 9 Jun 2025 15:31:13 +0100 Subject: [PATCH 01/27] NRL-1479 update firehose buffer time for quicker testing --- terraform/infrastructure/modules/firehose/kinesis.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/infrastructure/modules/firehose/kinesis.tf b/terraform/infrastructure/modules/firehose/kinesis.tf index 144b8fa29..c246492d8 100644 --- a/terraform/infrastructure/modules/firehose/kinesis.tf +++ b/terraform/infrastructure/modules/firehose/kinesis.tf @@ -66,7 +66,7 @@ resource "aws_kinesis_firehose_delivery_stream" "reporting_stream" { role_arn = aws_iam_role.firehose.arn bucket_arn = var.reporting_bucket_arn buffering_size = 64 - buffering_interval = 600 + buffering_interval = 180 processing_configuration { enabled = "true" From aca8513b4451cae3eb2845d3aad03bfb8ff129bf Mon Sep 17 00:00:00 2001 From: jackleary Date: Mon, 9 Jun 2025 17:12:08 +0100 Subject: [PATCH 02/27] NRL-1479 update logging set up --- .../modules/glue/src/instances.py | 32 ------------------- .../modules/glue/src/main.py | 19 ++++++++--- .../modules/glue/src/pipeline.py | 11 ++++--- 3 files changed, 20 insertions(+), 42 deletions(-) delete mode 100644 terraform/account-wide-infrastructure/modules/glue/src/instances.py diff --git a/terraform/account-wide-infrastructure/modules/glue/src/instances.py b/terraform/account-wide-infrastructure/modules/glue/src/instances.py deleted file mode 100644 index a94bcc459..000000000 --- a/terraform/account-wide-infrastructure/modules/glue/src/instances.py +++ /dev/null @@ -1,32 +0,0 @@ -import logging - -from awsglue.context import GlueContext -from pyspark.sql import SparkSession - - -class GlueContextSingleton: - """Singleton for GlueContext and SparkSession""" - - _instance = None - - def __new__(cls, spark_context): - if not cls._instance: - cls._instance = super().__new__(cls) - cls._instance.spark = SparkSession.builder.config( - "spark.sql.caseSensitive", "true" - ).getOrCreate() - cls._instance.context = GlueContext(spark_context) - return cls._instance - - -class LoggerSingleton: - """Singleton for logger""" - - _instance = None - - def __new__(cls): - if not cls._instance: - cls._instance = super().__new__(cls) - cls._instance.logger = logging.getLogger("ETLLogger") - cls._instance.logger.setLevel(logging.INFO) - return cls._instance diff --git a/terraform/account-wide-infrastructure/modules/glue/src/main.py b/terraform/account-wide-infrastructure/modules/glue/src/main.py index 64f616b59..872ff0baf 100644 --- a/terraform/account-wide-infrastructure/modules/glue/src/main.py +++ b/terraform/account-wide-infrastructure/modules/glue/src/main.py @@ -1,18 +1,25 @@ +import logging import sys +from awsglue.context import GlueContext from awsglue.utils import getResolvedOptions from pipeline import LogPipeline -from pyspark.context import SparkContext +from pyspark.sql import SparkSession from transformations import dtype_conversion, rename_cols, resolve_dupes +# Spark and Glue Context initialization +spark = SparkSession.builder.config("spark.sql.caseSensitive", "true").getOrCreate() +glue_context = GlueContext(spark.sparkContext) + +# Logger setup +logger = logging.getLogger("ETLLogger") +logger.setLevel(logging.INFO) + # Get arguments from AWS Glue job args = getResolvedOptions( sys.argv, ["job_name", "source_path", "target_path", "partition_cols"] ) -# Start Glue context -sc = SparkContext() - partition_cols = args["partition_cols"].split(",") if "partition_cols" in args else [] host_prefixes = [ @@ -31,7 +38,9 @@ # Initialize ETL process etl_job = LogPipeline( - spark_context=sc, + glue_context=glue_context, + spark=spark, + logger=logger, source_path=args["source_path"], target_path=args["target_path"], host_prefixes=host_prefixes, diff --git a/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py b/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py index f018911ad..c0ce21e6b 100644 --- a/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py +++ b/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py @@ -1,13 +1,14 @@ import time import boto3 -from instances import GlueContextSingleton, LoggerSingleton class LogPipeline: def __init__( self, - spark_context, + glue_context, + spark, + logger, source_path, target_path, host_prefixes, @@ -16,9 +17,9 @@ def __init__( transformations=[], ): """Initialize Glue context, Spark session, logger, and paths""" - self.glue_context = GlueContextSingleton(spark_context).context - self.spark = GlueContextSingleton(spark_context).spark - self.logger = LoggerSingleton().logger + self.glue_context = glue_context + self.spark = spark + self.logger = logger self.source_path = source_path self.target_path = target_path self.host_prefixes = host_prefixes From 67152e7aecd70b13bbc46e51fb95e845e61eaa87 Mon Sep 17 00:00:00 2001 From: jackleary Date: Tue, 10 Jun 2025 13:55:06 +0100 Subject: [PATCH 03/27] NRL-1479 Update logging --- terraform/account-wide-infrastructure/modules/glue/src/main.py | 3 +++ .../account-wide-infrastructure/modules/glue/src/pipeline.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/terraform/account-wide-infrastructure/modules/glue/src/main.py b/terraform/account-wide-infrastructure/modules/glue/src/main.py index 872ff0baf..712be8877 100644 --- a/terraform/account-wide-infrastructure/modules/glue/src/main.py +++ b/terraform/account-wide-infrastructure/modules/glue/src/main.py @@ -12,6 +12,9 @@ glue_context = GlueContext(spark.sparkContext) # Logger setup +MSG_FORMAT = "%(asctime)s %(levelname)s %(name)s: %(message)s" +DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S" +logging.basicConfig(format=MSG_FORMAT, datefmt=DATETIME_FORMAT) logger = logging.getLogger("ETLLogger") logger.setLevel(logging.INFO) diff --git a/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py b/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py index c0ce21e6b..8429af37b 100644 --- a/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py +++ b/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py @@ -106,6 +106,9 @@ def load(self, data): for name, dataframe in data.items(): name = name.replace("--", "_") try: + self.logger.info( + f"Attempting to load dataframe {name} into {self.target_path}{name}" + ) dataframe.coalesce(1).write.mode("append").partitionBy( *self.partition_cols ).parquet(f"{self.target_path}{name}") From da6b10d33914d89dcf3ceaac0ca76dbedb06da79 Mon Sep 17 00:00:00 2001 From: jackleary Date: Wed, 11 Jun 2025 15:42:05 +0100 Subject: [PATCH 04/27] NRL-1479 Update iam role to allow notebooks to run --- terraform/account-wide-infrastructure/modules/glue/iam.tf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/terraform/account-wide-infrastructure/modules/glue/iam.tf b/terraform/account-wide-infrastructure/modules/glue/iam.tf index 097fe0386..e24851ef6 100644 --- a/terraform/account-wide-infrastructure/modules/glue/iam.tf +++ b/terraform/account-wide-infrastructure/modules/glue/iam.tf @@ -98,7 +98,8 @@ data "aws_iam_policy_document" "glue_service" { ] effect = "Allow" resources = [ - "arn:aws:iam::*:role/AWSGlueServiceRole*" + "arn:aws:iam::*:role/AWSGlueServiceRole*", + aws_iam_role.glue_service_role.arn, ] } } From 2c6322c24f3f7246c08573b0d781fdcbf635fe92 Mon Sep 17 00:00:00 2001 From: jackleary Date: Wed, 11 Jun 2025 15:44:54 +0100 Subject: [PATCH 05/27] NRL-1479 Update logging and lower all columns before writing to parquet --- .../modules/glue/src/pipeline.py | 2 +- .../modules/glue/src/transformations.py | 54 +++++++++++-------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py b/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py index 8429af37b..809fb6575 100644 --- a/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py +++ b/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py @@ -97,7 +97,7 @@ def transform(self, dataframe, name): ) for transformation in self.transformations: self.logger.info(f"Applying transformation: {transformation.__name__}") - dataframe = transformation(dataframe) + dataframe = transformation(dataframe, self.logger) return dataframe def load(self, data): diff --git a/terraform/account-wide-infrastructure/modules/glue/src/transformations.py b/terraform/account-wide-infrastructure/modules/glue/src/transformations.py index 3b23d6515..425e5a061 100644 --- a/terraform/account-wide-infrastructure/modules/glue/src/transformations.py +++ b/terraform/account-wide-infrastructure/modules/glue/src/transformations.py @@ -1,3 +1,6 @@ +from collections import defaultdict +from functools import reduce + from pyspark.sql.functions import ( coalesce, col, @@ -12,34 +15,41 @@ from pyspark.sql.types import NullType -def resolve_dupes(df): - drop = [] - for i in range(len(df.columns)): - for j in range(i + 1, len(df.columns)): - if df.columns[i].lower() == df.columns[j].lower(): - df = df.withColumn( - df.columns[i].lower() + "_", - when( - col(df.columns[i]).isNull() | col(df.columns[j]).isNull(), - coalesce(col(df.columns[i]), col(df.columns[j])), - ).otherwise( - concat(col(df.columns[i]), lit(","), col(df.columns[j])) - ), - ) - drop.extend([df.columns[i], df.columns[j]]) - df = df.drop(*drop) +def resolve_dupes(df, logger): + column_groups = defaultdict(list) + for column_name in df.columns: + normalised_name = column_name.lower().rstrip("_") + column_groups[normalised_name].append(column_name) - return df + final_select_exprs = [] + for lower_name, original_names in column_groups.items(): + + if len(original_names) == 1: + final_select_exprs.append(col(original_names[0]).alias(lower_name)) + else: + logger.info(f"Resolving duplicate group '{lower_name}': {original_names}") + + merge_logic = lambda col1, col2: when( + col1.isNull() | col2.isNull(), coalesce(col1, col2) + ).otherwise(concat(col1, lit(", "), col2)) + + merged_column_expr = reduce(merge_logic, [col(c) for c in original_names]) + + final_select_exprs.append(merged_column_expr.alias(lower_name)) + + return df.select(*final_select_exprs) -def rename_cols(df): +def rename_cols(df, logger): + logger.info("Replacing '.' with '_'") for col_name in df.columns: df = df.withColumnRenamed(col_name, col_name.replace(".", "_")) return df -def dtype_conversion(df): +def dtype_conversion(df, logger): try: + logger.info("Formatting event_timestamp") df = ( df.withColumn( "event_timestamp_cleaned", @@ -56,13 +66,15 @@ def dtype_conversion(df): ) df = df.drop("event_timestamp_cleaned") - except: - ... + except Exception as e: + logger.info(f"Failed formatting of timestamp column with error: {e}") + logger.info("Handling Null Type columns") select_exprs = [] for column_name in df.columns: column_type = df.schema[column_name].dataType if isinstance(column_type, NullType): + logger.info(f"Converting {column_name} to string") select_exprs.append(col(column_name).cast("string").alias(column_name)) else: select_exprs.append(col(column_name)) From cfa08a5a846faca74c1563d19e465d6077a1fffa Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Wed, 11 Jun 2025 18:04:47 +0100 Subject: [PATCH 06/27] [NRL-1386] Add reporting infra to all envs --- .../prod/athena.tf | 5 +++ .../account-wide-infrastructure/prod/ec2.tf | 23 ++++++++++ .../account-wide-infrastructure/prod/glue.tf | 5 +++ .../account-wide-infrastructure/prod/vars.tf | 42 +++++++++++++++++++ .../test/athena.tf | 18 ++++++++ .../account-wide-infrastructure/test/ec2.tf | 23 ++++++++++ .../account-wide-infrastructure/test/glue.tf | 18 ++++++++ .../account-wide-infrastructure/test/vars.tf | 42 +++++++++++++++++++ terraform/infrastructure/data.tf | 2 - terraform/infrastructure/firehose.tf | 23 +++++----- terraform/infrastructure/locals.tf | 9 ++-- .../modules/firehose/cloudwatch.tf | 2 - .../modules/firehose/kinesis.tf | 1 - .../infrastructure/modules/firehose/locals.tf | 8 ++-- .../infrastructure/modules/firehose/output.tf | 4 +- .../infrastructure/modules/firehose/vars.tf | 4 -- 16 files changed, 196 insertions(+), 33 deletions(-) create mode 100644 terraform/account-wide-infrastructure/prod/athena.tf create mode 100644 terraform/account-wide-infrastructure/prod/ec2.tf create mode 100644 terraform/account-wide-infrastructure/prod/glue.tf create mode 100644 terraform/account-wide-infrastructure/test/ec2.tf diff --git a/terraform/account-wide-infrastructure/prod/athena.tf b/terraform/account-wide-infrastructure/prod/athena.tf new file mode 100644 index 000000000..d5d59c9b7 --- /dev/null +++ b/terraform/account-wide-infrastructure/prod/athena.tf @@ -0,0 +1,5 @@ +module "prod-athena" { + source = "../modules/athena" + name_prefix = "nhsd-nrlf--prod" + target_bucket_name = module.prod-glue.target_bucket_name +} diff --git a/terraform/account-wide-infrastructure/prod/ec2.tf b/terraform/account-wide-infrastructure/prod/ec2.tf new file mode 100644 index 000000000..73e76d3c0 --- /dev/null +++ b/terraform/account-wide-infrastructure/prod/ec2.tf @@ -0,0 +1,23 @@ +module "vpc" { + source = "../modules/vpc" + vpc_cidr_block = var.vpc_cidr_block + enable_dns_hostnames = var.enable_dns_hostnames + vpc_public_subnets_cidr_block = var.vpc_public_subnets_cidr_block + vpc_private_subnets_cidr_block = var.vpc_private_subnets_cidr_block + aws_azs = var.aws_azs + name_prefix = "nhsd-nrlf--prod" +} + +module "powerbi_gw_instance_v2" { + source = "../modules/ec2" + use_custom_ami = true + instance_type = var.instance_type + name_prefix = "nhsd-nrlf--test-powerbi-gw-v2" + target_bucket_arn = module.prod-glue.target_bucket_arn + glue_kms_key_arn = module.prod-glue.aws_kms_key_arn + athena_kms_key_arn = module.prod-athena.kms_key_arn + athena_bucket_arn = module.prod-athena.bucket_arn + + subnet_id = module.vpc.private_subnet_id + security_groups = [module.vpc.powerbi_gw_security_group_id] +} diff --git a/terraform/account-wide-infrastructure/prod/glue.tf b/terraform/account-wide-infrastructure/prod/glue.tf new file mode 100644 index 000000000..7a03d7fa3 --- /dev/null +++ b/terraform/account-wide-infrastructure/prod/glue.tf @@ -0,0 +1,5 @@ +module "prod-glue" { + source = "../modules/glue" + name_prefix = "nhsd-nrlf--prod" + python_version = 3 +} diff --git a/terraform/account-wide-infrastructure/prod/vars.tf b/terraform/account-wide-infrastructure/prod/vars.tf index 3a6abb601..d7d33ba64 100644 --- a/terraform/account-wide-infrastructure/prod/vars.tf +++ b/terraform/account-wide-infrastructure/prod/vars.tf @@ -8,3 +8,45 @@ variable "prod_api_domain_name" { description = "The internal DNS name of the API Gateway for the prod environment" default = "prod.api.record-locator.national.nhs.uk" } + +variable "aws_azs" { + type = string + description = "AWS Availability Zones" + default = "eu-west-2a" +} + +variable "enable_dns_hostnames" { + type = bool + description = "Enable DNS hostnames in VPC" + default = true +} + +variable "vpc_cidr_block" { + type = string + description = "Base CIDR Block for VPC" + default = "10.0.0.0/16" +} + +variable "vpc_public_subnets_cidr_block" { + type = string + description = "CIDR Block for Public Subnets in VPC" + default = "10.0.0.0/24" +} + +variable "vpc_private_subnets_cidr_block" { + type = string + description = "CIDR Block for Private Subnets in VPC" + default = "10.0.1.0/24" +} + +variable "instance_type" { + type = string + description = "Type for EC2 Instance" + default = "t2.micro" +} + +variable "use_custom_ami" { + type = bool + description = "Use custom image" + default = false +} diff --git a/terraform/account-wide-infrastructure/test/athena.tf b/terraform/account-wide-infrastructure/test/athena.tf index b64111d44..dd1702f22 100644 --- a/terraform/account-wide-infrastructure/test/athena.tf +++ b/terraform/account-wide-infrastructure/test/athena.tf @@ -1,5 +1,23 @@ +module "qa-athena" { + source = "../modules/athena" + name_prefix = "nhsd-nrlf--qa" + target_bucket_name = module.qa-glue.target_bucket_name +} + module "int-athena" { source = "../modules/athena" name_prefix = "nhsd-nrlf--int" target_bucket_name = module.int-glue.target_bucket_name } + +module "int-sandbox-athena" { + source = "../modules/athena" + name_prefix = "nhsd-nrlf--int-sandbox" + target_bucket_name = module.int-sandbox-glue.target_bucket_name +} + +module "ref-athena" { + source = "../modules/athena" + name_prefix = "nhsd-nrlf--ref" + target_bucket_name = module.ref-glue.target_bucket_name +} diff --git a/terraform/account-wide-infrastructure/test/ec2.tf b/terraform/account-wide-infrastructure/test/ec2.tf new file mode 100644 index 000000000..2ed6a2246 --- /dev/null +++ b/terraform/account-wide-infrastructure/test/ec2.tf @@ -0,0 +1,23 @@ +module "vpc" { + source = "../modules/vpc" + vpc_cidr_block = var.vpc_cidr_block + enable_dns_hostnames = var.enable_dns_hostnames + vpc_public_subnets_cidr_block = var.vpc_public_subnets_cidr_block + vpc_private_subnets_cidr_block = var.vpc_private_subnets_cidr_block + aws_azs = var.aws_azs + name_prefix = "nhsd-nrlf--dev" +} + +module "powerbi_gw_instance_v2" { + source = "../modules/ec2" + use_custom_ami = true + instance_type = var.instance_type + name_prefix = "nhsd-nrlf--dev-powerbi-gw-v2" + target_bucket_arn = module.dev-glue.target_bucket_arn + glue_kms_key_arn = module.dev-glue.aws_kms_key_arn + athena_kms_key_arn = module.dev-athena.kms_key_arn + athena_bucket_arn = module.dev-athena.bucket_arn + + subnet_id = module.vpc.private_subnet_id + security_groups = [module.vpc.powerbi_gw_security_group_id] +} diff --git a/terraform/account-wide-infrastructure/test/glue.tf b/terraform/account-wide-infrastructure/test/glue.tf index 57e2e82e9..d40394f9d 100644 --- a/terraform/account-wide-infrastructure/test/glue.tf +++ b/terraform/account-wide-infrastructure/test/glue.tf @@ -3,3 +3,21 @@ module "int-glue" { name_prefix = "nhsd-nrlf--int" python_version = 3 } + +module "int-glue" { + source = "../modules/glue" + name_prefix = "nhsd-nrlf--int-sandbox" + python_version = 3 +} + +module "int-glue" { + source = "../modules/glue" + name_prefix = "nhsd-nrlf--qa" + python_version = 3 +} + +module "int-glue" { + source = "../modules/glue" + name_prefix = "nhsd-nrlf--ref" + python_version = 3 +} diff --git a/terraform/account-wide-infrastructure/test/vars.tf b/terraform/account-wide-infrastructure/test/vars.tf index be722db5e..adc7a1bcd 100644 --- a/terraform/account-wide-infrastructure/test/vars.tf +++ b/terraform/account-wide-infrastructure/test/vars.tf @@ -28,3 +28,45 @@ variable "ref_api_domain_name" { description = "The internal DNS name of the API Gateway for the ref environment" default = "ref.api.record-locator.ref.national.nhs.uk" } + +variable "aws_azs" { + type = string + description = "AWS Availability Zones" + default = "eu-west-2a" +} + +variable "enable_dns_hostnames" { + type = bool + description = "Enable DNS hostnames in VPC" + default = true +} + +variable "vpc_cidr_block" { + type = string + description = "Base CIDR Block for VPC" + default = "10.0.0.0/16" +} + +variable "vpc_public_subnets_cidr_block" { + type = string + description = "CIDR Block for Public Subnets in VPC" + default = "10.0.0.0/24" +} + +variable "vpc_private_subnets_cidr_block" { + type = string + description = "CIDR Block for Private Subnets in VPC" + default = "10.0.1.0/24" +} + +variable "instance_type" { + type = string + description = "Type for EC2 Instance" + default = "t2.micro" +} + +variable "use_custom_ami" { + type = bool + description = "Use custom image" + default = false +} diff --git a/terraform/infrastructure/data.tf b/terraform/infrastructure/data.tf index e2d2d23d0..506a60af9 100644 --- a/terraform/infrastructure/data.tf +++ b/terraform/infrastructure/data.tf @@ -43,11 +43,9 @@ data "external" "current-info" { } data "aws_s3_bucket" "source-data-bucket" { - count = local.is_dev_env && !local.is_sandbox_env ? 1 : 0 bucket = "${local.shared_prefix}-source-data-bucket" } data "aws_kms_key" "glue" { - count = local.is_dev_env && !local.is_sandbox_env ? 1 : 0 key_id = "alias/${local.shared_prefix}-glue" } diff --git a/terraform/infrastructure/firehose.tf b/terraform/infrastructure/firehose.tf index db063e6f4..b2cdf7aab 100644 --- a/terraform/infrastructure/firehose.tf +++ b/terraform/infrastructure/firehose.tf @@ -1,14 +1,13 @@ module "firehose__processor" { - source = "./modules/firehose" - assume_account = local.aws_account_id - prefix = local.prefix - region = local.region - environment = local.environment - cloudwatch_kms_arn = module.kms__cloudwatch.kms_arn - splunk_environment = local.splunk_environment - splunk_index = local.splunk_index - destination = "splunk" - reporting_bucket_arn = local.reporting_bucket_arn - reporting_kms_arn = local.reporting_kms_arn - reporting_infra_toggle = local.is_dev_env && !local.is_sandbox_env + source = "./modules/firehose" + assume_account = local.aws_account_id + prefix = local.prefix + region = local.region + environment = local.environment + cloudwatch_kms_arn = module.kms__cloudwatch.kms_arn + splunk_environment = local.splunk_environment + splunk_index = local.splunk_index + destination = "splunk" + reporting_bucket_arn = local.reporting_bucket_arn + reporting_kms_arn = local.reporting_kms_arn } diff --git a/terraform/infrastructure/locals.tf b/terraform/infrastructure/locals.tf index 74882da5a..298c6a42d 100644 --- a/terraform/infrastructure/locals.tf +++ b/terraform/infrastructure/locals.tf @@ -22,20 +22,17 @@ locals { dynamodb_timeout_seconds = "3" is_sandbox_env = length(regexall("-sandbox-", local.stack_name)) > 0 - is_dev_env = var.account_name == "dev" || var.account_name == "int" environment = local.is_sandbox_env ? "${var.account_name}-sandbox" : var.account_name shared_prefix = "${local.project}--${local.environment}" public_domain = local.is_sandbox_env ? var.public_sandbox_domain : var.public_domain # Logic / vars for reporting - reporting_bucket_arn = local.is_dev_env && !local.is_sandbox_env ? data.aws_s3_bucket.source-data-bucket[0].arn : null - reporting_kms_arn = local.is_dev_env && !local.is_sandbox_env ? data.aws_kms_key.glue[0].arn : null - firehose_lambda_subscriptions = local.is_dev_env && !local.is_sandbox_env ? [ + reporting_bucket_arn = data.aws_s3_bucket.source-data-bucket[0].arn + reporting_kms_arn = data.aws_kms_key.glue[0].arn + firehose_lambda_subscriptions = [ module.firehose__processor.firehose_subscription, module.firehose__processor.firehose_reporting_subscription - ] : [ - module.firehose__processor.firehose_subscription ] # Logic / vars for splunk environment diff --git a/terraform/infrastructure/modules/firehose/cloudwatch.tf b/terraform/infrastructure/modules/firehose/cloudwatch.tf index 86aff3fd2..29df97a3f 100644 --- a/terraform/infrastructure/modules/firehose/cloudwatch.tf +++ b/terraform/infrastructure/modules/firehose/cloudwatch.tf @@ -9,13 +9,11 @@ resource "aws_cloudwatch_log_stream" "firehose" { } resource "aws_cloudwatch_log_group" "firehose_reporting" { - count = var.reporting_infra_toggle ? 1 : 0 name = "/aws/kinesisfirehose/${var.prefix}-firehose-reporting" retention_in_days = local.cloudwatch.retention.days } resource "aws_cloudwatch_log_stream" "firehose_reporting" { - count = var.reporting_infra_toggle ? 1 : 0 name = "${var.prefix}-firehose-reporting" log_group_name = aws_cloudwatch_log_group.firehose_reporting[0].name } diff --git a/terraform/infrastructure/modules/firehose/kinesis.tf b/terraform/infrastructure/modules/firehose/kinesis.tf index 144b8fa29..316edf863 100644 --- a/terraform/infrastructure/modules/firehose/kinesis.tf +++ b/terraform/infrastructure/modules/firehose/kinesis.tf @@ -58,7 +58,6 @@ resource "aws_kinesis_firehose_delivery_stream" "firehose" { } resource "aws_kinesis_firehose_delivery_stream" "reporting_stream" { - count = var.reporting_infra_toggle ? 1 : 0 name = "${var.prefix}--cloudwatch-reporting-delivery-stream" destination = "extended_s3" diff --git a/terraform/infrastructure/modules/firehose/locals.tf b/terraform/infrastructure/modules/firehose/locals.tf index 80a0f3367..558586ead 100644 --- a/terraform/infrastructure/modules/firehose/locals.tf +++ b/terraform/infrastructure/modules/firehose/locals.tf @@ -32,13 +32,13 @@ locals { } iam_firehose = { - cloudwatch_reporting_log_group_arn = var.reporting_infra_toggle ? aws_cloudwatch_log_group.firehose_reporting[0].arn : null - cloudwatch_reporting_log_stream_arn = var.reporting_infra_toggle ? aws_cloudwatch_log_stream.firehose_reporting[0].arn : null - reporting_s3_arn = var.reporting_infra_toggle ? "${var.reporting_bucket_arn}/*" : null + cloudwatch_reporting_log_group_arn = aws_cloudwatch_log_group.firehose_reporting[0].arn + cloudwatch_reporting_log_stream_arn = aws_cloudwatch_log_stream.firehose_reporting[0].arn + reporting_s3_arn = "${var.reporting_bucket_arn}/*" } iam_subscriptions = { - firehose_reporting_stream_arn = var.reporting_infra_toggle ? aws_kinesis_firehose_delivery_stream.reporting_stream[0].arn : null + firehose_reporting_stream_arn = aws_kinesis_firehose_delivery_stream.reporting_stream[0].arn } iam_kms_resources = compact([ diff --git a/terraform/infrastructure/modules/firehose/output.tf b/terraform/infrastructure/modules/firehose/output.tf index a0b594642..d8b8766a0 100644 --- a/terraform/infrastructure/modules/firehose/output.tf +++ b/terraform/infrastructure/modules/firehose/output.tf @@ -33,7 +33,7 @@ output "firehose_subscription" { } output "firehose_reporting_subscription" { - value = var.reporting_infra_toggle ? { + value = { destination = { arn = local.iam_subscriptions.firehose_reporting_stream_arn } @@ -44,5 +44,5 @@ output "firehose_reporting_subscription" { # At least two items, and the first not any of INIT_START, START, END, REPORT pattern = "[first_item_on_this_log_line != \"INIT_START\" && first_item_on_this_log_line != \"START\" && first_item_on_this_log_line != \"END\" && first_item_on_this_log_line != \"REPORT\", everything_else_on_this_log_line]" } - } : null + } } diff --git a/terraform/infrastructure/modules/firehose/vars.tf b/terraform/infrastructure/modules/firehose/vars.tf index e98affd1d..cce26b76c 100644 --- a/terraform/infrastructure/modules/firehose/vars.tf +++ b/terraform/infrastructure/modules/firehose/vars.tf @@ -44,7 +44,3 @@ variable "reporting_kms_arn" { type = string default = null } - -variable "reporting_infra_toggle" { - type = bool -} From 7a349591f121f03a99887456aeb36556a5092564 Mon Sep 17 00:00:00 2001 From: jackleary Date: Thu, 12 Jun 2025 13:28:28 +0100 Subject: [PATCH 07/27] NRL-1479 Add views as saved queries --- .../modules/athena/athena.tf | 14 ++ .../modules/athena/sql/rep_consumer.sql | 123 +++++++++++ .../modules/athena/sql/rep_producer.sql | 192 ++++++++++++++++++ .../modules/glue/outputs.tf | 4 + 4 files changed, 333 insertions(+) create mode 100644 terraform/account-wide-infrastructure/modules/athena/sql/rep_consumer.sql create mode 100644 terraform/account-wide-infrastructure/modules/athena/sql/rep_producer.sql diff --git a/terraform/account-wide-infrastructure/modules/athena/athena.tf b/terraform/account-wide-infrastructure/modules/athena/athena.tf index d111611e5..0916f6892 100644 --- a/terraform/account-wide-infrastructure/modules/athena/athena.tf +++ b/terraform/account-wide-infrastructure/modules/athena/athena.tf @@ -16,3 +16,17 @@ resource "aws_athena_workgroup" "athena" { } } + +resource "aws_athena_named_query" "rep_consumer" { + name = "rep_consumer" + workgroup = aws_athena_workgroup.athena.id + database = module.dev-glue.glue_database + query = file("${path.module}/sql/rep_consumer.sql") +} + +resource "aws_athena_named_query" "rep_producer" { + name = "rep_producer" + workgroup = aws_athena_workgroup.athena.id + database = module.dev-glue.glue_database + query = file("${path.module}/sql/rep_producer.sql") +} diff --git a/terraform/account-wide-infrastructure/modules/athena/sql/rep_consumer.sql b/terraform/account-wide-infrastructure/modules/athena/sql/rep_consumer.sql new file mode 100644 index 000000000..2e28aaaa8 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/athena/sql/rep_consumer.sql @@ -0,0 +1,123 @@ +CREATE OR REPLACE VIEW "rep_consumer" AS +WITH + cc AS ( + SELECT + time + , event_timestamp + , date + , host + , event_log_reference + , event_level + , event_location + , event_message + , event_service + , event_function_request_id + , event_correlation_id + , event_xray_trace_id + , event_pointer_types + , '' event_custodian + , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods + FROM + consumer_countdocumentreference +) +, cr AS ( + SELECT + time + , event_timestamp + , date + , host + , event_log_reference + , event_level + , event_location + , event_message + , event_service + , event_function_request_id + , event_correlation_id + , event_xray_trace_id + , event_pointer_types + , event_custodian + , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods + FROM + consumer_readdocumentreference +) +, cs AS ( + SELECT + time + , event_timestamp + , date + , host + , event_log_reference + , event_level + , event_location + , event_message + , event_service + , event_function_request_id + , event_correlation_id + , event_xray_trace_id + , event_pointer_types + , event_custodian + , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods + FROM + consumer_searchdocumentreference +) +, csp AS ( + SELECT + time + , event_timestamp + , date + , host + , event_log_reference + , event_level + , event_location + , event_message + , event_service + , event_function_request_id + , event_correlation_id + , event_xray_trace_id + , event_pointer_types + , event_custodian + , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods + FROM + consumer_searchpostdocumentreference +) +, base AS ( + SELECT * + FROM + cc +UNION SELECT * + FROM + cr +UNION SELECT * + FROM + cs +UNION SELECT * + FROM + csp +) +, ods_codes AS ( + SELECT DISTINCT + user_ods + , event_xray_trace_id + FROM + base + WHERE (user_ods IS NOT NULL) +) +SELECT + time +, event_timestamp +, date +, host +, event_log_reference +, event_level +, event_location +, event_message +, event_service +, event_function_request_id +, b.event_correlation_id +, b.event_xray_trace_id +, event_pointer_types +, COALESCE(COALESCE(event_custodian, LAG(event_custodian) IGNORE NULLS OVER (PARTITION BY b.event_xray_trace_id ORDER BY event_timestamp ASC)), COALESCE(event_custodian, LEAD(event_custodian) IGNORE NULLS OVER (PARTITION BY b.event_xray_trace_id ORDER BY event_timestamp ASC))) event_custodian +, oc.user_ods +FROM + (base b +LEFT JOIN ods_codes oc ON (b.event_xray_trace_id = oc.event_xray_trace_id)) diff --git a/terraform/account-wide-infrastructure/modules/athena/sql/rep_producer.sql b/terraform/account-wide-infrastructure/modules/athena/sql/rep_producer.sql new file mode 100644 index 000000000..2b01fbdaa --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/athena/sql/rep_producer.sql @@ -0,0 +1,192 @@ +CREATE OR REPLACE VIEW "rep_producer" AS +WITH + pc AS ( + SELECT + time + , event_timestamp + , date + , host + , event_log_reference + , event_level + , event_location + , event_message + , event_service + , event_function_request_id + , event_correlation_id + , event_xray_trace_id + , event_pointer_types + , event_custodian + , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods + FROM + producer_createdocumentreference +) +, pd AS ( + SELECT + time + , event_timestamp + , date + , host + , event_log_reference + , event_level + , event_location + , event_message + , event_service + , event_function_request_id + , event_correlation_id + , event_xray_trace_id + , event_pointer_types + , event_custodian + , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods + FROM + producer_deletedocumentreference +) +, pr AS ( + SELECT + time + , event_timestamp + , date + , host + , event_log_reference + , event_level + , event_location + , event_message + , event_service + , event_function_request_id + , event_correlation_id + , event_xray_trace_id + , event_pointer_types + , event_custodian + , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods + FROM + producer_readdocumentreference +) +, ps AS ( + SELECT + time + , event_timestamp + , date + , host + , event_log_reference + , event_level + , event_location + , event_message + , event_service + , event_function_request_id + , event_correlation_id + , event_xray_trace_id + , event_pointer_types + , event_custodian + , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods + FROM + producer_searchdocumentreference +) +, psp AS ( + SELECT + time + , event_timestamp + , date + , host + , event_log_reference + , event_level + , event_location + , event_message + , event_service + , event_function_request_id + , event_correlation_id + , event_xray_trace_id + , event_pointer_types + , event_custodian + , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods + FROM + producer_searchpostdocumentreference +) +, pu AS ( + SELECT + time + , event_timestamp + , date + , host + , event_log_reference + , event_level + , event_location + , event_message + , event_service + , event_function_request_id + , event_correlation_id + , event_xray_trace_id + , event_pointer_types + , event_custodian + , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods + FROM + producer_updatedocumentreference +) +, pus AS ( + SELECT + time + , event_timestamp + , date + , host + , event_log_reference + , event_level + , event_location + , event_message + , event_service + , event_function_request_id + , event_correlation_id + , event_xray_trace_id + , event_pointer_types + , event_custodian + , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods + FROM + producer_upsertdocumentreference +) +, base AS ( + SELECT * + FROM + pc +UNION SELECT * + FROM + pd +UNION SELECT * + FROM + pr +UNION SELECT * + FROM + ps +UNION SELECT * + FROM + psp +UNION SELECT * + FROM + pu +UNION SELECT * + FROM + pus +) +, ods_codes AS ( + SELECT DISTINCT + user_ods + , event_xray_trace_id + FROM + base + WHERE (user_ods IS NOT NULL) +) +SELECT + time +, event_timestamp +, date +, host +, event_log_reference +, event_level +, event_location +, event_message +, event_service +, event_function_request_id +, b.event_correlation_id +, b.event_xray_trace_id +, event_pointer_types +, COALESCE(COALESCE(event_custodian, LAG(event_custodian) IGNORE NULLS OVER (PARTITION BY b.event_xray_trace_id ORDER BY event_timestamp ASC)), COALESCE(event_custodian, LEAD(event_custodian) IGNORE NULLS OVER (PARTITION BY b.event_xray_trace_id ORDER BY event_timestamp ASC))) event_custodian +, oc.user_ods +FROM + (base b +LEFT JOIN ods_codes oc ON (b.event_xray_trace_id = oc.event_xray_trace_id)) diff --git a/terraform/account-wide-infrastructure/modules/glue/outputs.tf b/terraform/account-wide-infrastructure/modules/glue/outputs.tf index dfc12029b..b1eee041b 100644 --- a/terraform/account-wide-infrastructure/modules/glue/outputs.tf +++ b/terraform/account-wide-infrastructure/modules/glue/outputs.tf @@ -21,3 +21,7 @@ output "aws_kms_key_arn" { output "glue_crawler_name" { value = "s3//${aws_s3_bucket.source-data-bucket.id}/" } + +output "glue_database" { + value = aws_glue_catalog_database.log_database.name +} From 9cf8f547e50242e53b66c121f2cccb654fcc78b5 Mon Sep 17 00:00:00 2001 From: jackleary Date: Fri, 13 Jun 2025 09:46:39 +0100 Subject: [PATCH 08/27] NRL-1479 Add athena views to deployment --- terraform/account-wide-infrastructure/dev/athena.tf | 1 + .../modules/athena/athena.tf | 4 ++-- .../account-wide-infrastructure/modules/athena/vars.tf | 10 +++++----- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/terraform/account-wide-infrastructure/dev/athena.tf b/terraform/account-wide-infrastructure/dev/athena.tf index cb7e78a97..d0fd98178 100644 --- a/terraform/account-wide-infrastructure/dev/athena.tf +++ b/terraform/account-wide-infrastructure/dev/athena.tf @@ -2,4 +2,5 @@ module "dev-athena" { source = "../modules/athena" name_prefix = "nhsd-nrlf--dev" target_bucket_name = module.dev-glue.target_bucket_name + glue_database = module.dev-glue.glue_database } diff --git a/terraform/account-wide-infrastructure/modules/athena/athena.tf b/terraform/account-wide-infrastructure/modules/athena/athena.tf index 0916f6892..b907541be 100644 --- a/terraform/account-wide-infrastructure/modules/athena/athena.tf +++ b/terraform/account-wide-infrastructure/modules/athena/athena.tf @@ -20,13 +20,13 @@ resource "aws_athena_workgroup" "athena" { resource "aws_athena_named_query" "rep_consumer" { name = "rep_consumer" workgroup = aws_athena_workgroup.athena.id - database = module.dev-glue.glue_database + database = var.glue_database query = file("${path.module}/sql/rep_consumer.sql") } resource "aws_athena_named_query" "rep_producer" { name = "rep_producer" workgroup = aws_athena_workgroup.athena.id - database = module.dev-glue.glue_database + database = var.glue_database query = file("${path.module}/sql/rep_producer.sql") } diff --git a/terraform/account-wide-infrastructure/modules/athena/vars.tf b/terraform/account-wide-infrastructure/modules/athena/vars.tf index d09d6f65c..a1d447972 100644 --- a/terraform/account-wide-infrastructure/modules/athena/vars.tf +++ b/terraform/account-wide-infrastructure/modules/athena/vars.tf @@ -1,8 +1,3 @@ -variable "database" { - description = "What the db will be called" - default = "nrl_reporting" -} - variable "name_prefix" { type = string description = "The prefix to apply to all resources in the module." @@ -11,3 +6,8 @@ variable "name_prefix" { variable "target_bucket_name" { type = string } + +variable "glue_database" { + type = string + description = "The Glue database in use" +} From 53acc81ad29b81979941f61621d5b1cbf7399218 Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Fri, 13 Jun 2025 14:53:12 +0100 Subject: [PATCH 09/27] [NRL-1386] Rename ec2 module, add toggle for powerbi gw deploy. --- .../account-wide-infrastructure/dev/ec2.tf | 14 +++++----- .../account-wide-infrastructure/dev/vars.tf | 16 ++++++++---- .../modules/{ec2 => powerbi-gw-ec2}/data.tf | 0 .../modules/{ec2 => powerbi-gw-ec2}/ec2.tf | 0 .../modules/{ec2 => powerbi-gw-ec2}/iam.tf | 0 .../modules/{ec2 => powerbi-gw-ec2}/locals.tf | 0 .../{ec2 => powerbi-gw-ec2}/outputs.tf | 0 .../scripts/user_data.tpl | 0 .../modules/{ec2 => powerbi-gw-ec2}/vars.tf | 0 .../modules/vpc/vpc.tf | 1 - .../account-wide-infrastructure/prod/ec2.tf | 14 +++++----- .../account-wide-infrastructure/prod/vars.tf | 16 ++++++++++-- .../account-wide-infrastructure/test/ec2.tf | 26 ++++++++++--------- .../account-wide-infrastructure/test/glue.tf | 12 ++++----- .../account-wide-infrastructure/test/vars.tf | 16 ++++++++---- 15 files changed, 72 insertions(+), 43 deletions(-) rename terraform/account-wide-infrastructure/modules/{ec2 => powerbi-gw-ec2}/data.tf (100%) rename terraform/account-wide-infrastructure/modules/{ec2 => powerbi-gw-ec2}/ec2.tf (100%) rename terraform/account-wide-infrastructure/modules/{ec2 => powerbi-gw-ec2}/iam.tf (100%) rename terraform/account-wide-infrastructure/modules/{ec2 => powerbi-gw-ec2}/locals.tf (100%) rename terraform/account-wide-infrastructure/modules/{ec2 => powerbi-gw-ec2}/outputs.tf (100%) rename terraform/account-wide-infrastructure/modules/{ec2 => powerbi-gw-ec2}/scripts/user_data.tpl (100%) rename terraform/account-wide-infrastructure/modules/{ec2 => powerbi-gw-ec2}/vars.tf (100%) diff --git a/terraform/account-wide-infrastructure/dev/ec2.tf b/terraform/account-wide-infrastructure/dev/ec2.tf index 2ed6a2246..2bc3824ad 100644 --- a/terraform/account-wide-infrastructure/dev/ec2.tf +++ b/terraform/account-wide-infrastructure/dev/ec2.tf @@ -1,4 +1,5 @@ module "vpc" { + count = var.enable_powerbi_auto_push ? 1 : 0 source = "../modules/vpc" vpc_cidr_block = var.vpc_cidr_block enable_dns_hostnames = var.enable_dns_hostnames @@ -8,16 +9,17 @@ module "vpc" { name_prefix = "nhsd-nrlf--dev" } -module "powerbi_gw_instance_v2" { - source = "../modules/ec2" - use_custom_ami = true - instance_type = var.instance_type +module "powerbi_gw_instance" { + count = var.enable_powerbi_auto_push ? 1 : 0 + source = "../modules/powerbi-gw-ec2" + use_custom_ami = var.use_powerbi_gw_custom_ami + instance_type = var.powerbi_gw_instance_type name_prefix = "nhsd-nrlf--dev-powerbi-gw-v2" target_bucket_arn = module.dev-glue.target_bucket_arn glue_kms_key_arn = module.dev-glue.aws_kms_key_arn athena_kms_key_arn = module.dev-athena.kms_key_arn athena_bucket_arn = module.dev-athena.bucket_arn - subnet_id = module.vpc.private_subnet_id - security_groups = [module.vpc.powerbi_gw_security_group_id] + subnet_id = module.vpc[0].private_subnet_id + security_groups = [module.vpc[0].powerbi_gw_security_group_id] } diff --git a/terraform/account-wide-infrastructure/dev/vars.tf b/terraform/account-wide-infrastructure/dev/vars.tf index dcbbfd78b..d70175173 100644 --- a/terraform/account-wide-infrastructure/dev/vars.tf +++ b/terraform/account-wide-infrastructure/dev/vars.tf @@ -44,14 +44,20 @@ variable "vpc_private_subnets_cidr_block" { default = "10.0.1.0/24" } -variable "instance_type" { +variable "enable_powerbi_auto_push" { + type = bool + description = "Enable automatic pushing of info into PowerBI" + default = false +} + +variable "powerbi_gw_instance_type" { type = string - description = "Type for EC2 Instance" + description = "Type for PowerBI GW EC2 Instance" default = "t2.micro" } -variable "use_custom_ami" { +variable "use_powerbi_gw_custom_ami" { type = bool - description = "Use custom image" - default = false + description = "Use custom image for PowerBI GW instance" + default = true } diff --git a/terraform/account-wide-infrastructure/modules/ec2/data.tf b/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/data.tf similarity index 100% rename from terraform/account-wide-infrastructure/modules/ec2/data.tf rename to terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/data.tf diff --git a/terraform/account-wide-infrastructure/modules/ec2/ec2.tf b/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/ec2.tf similarity index 100% rename from terraform/account-wide-infrastructure/modules/ec2/ec2.tf rename to terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/ec2.tf diff --git a/terraform/account-wide-infrastructure/modules/ec2/iam.tf b/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/iam.tf similarity index 100% rename from terraform/account-wide-infrastructure/modules/ec2/iam.tf rename to terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/iam.tf diff --git a/terraform/account-wide-infrastructure/modules/ec2/locals.tf b/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/locals.tf similarity index 100% rename from terraform/account-wide-infrastructure/modules/ec2/locals.tf rename to terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/locals.tf diff --git a/terraform/account-wide-infrastructure/modules/ec2/outputs.tf b/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/outputs.tf similarity index 100% rename from terraform/account-wide-infrastructure/modules/ec2/outputs.tf rename to terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/outputs.tf diff --git a/terraform/account-wide-infrastructure/modules/ec2/scripts/user_data.tpl b/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/scripts/user_data.tpl similarity index 100% rename from terraform/account-wide-infrastructure/modules/ec2/scripts/user_data.tpl rename to terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/scripts/user_data.tpl diff --git a/terraform/account-wide-infrastructure/modules/ec2/vars.tf b/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/vars.tf similarity index 100% rename from terraform/account-wide-infrastructure/modules/ec2/vars.tf rename to terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/vars.tf diff --git a/terraform/account-wide-infrastructure/modules/vpc/vpc.tf b/terraform/account-wide-infrastructure/modules/vpc/vpc.tf index b1fa293ea..b2678bc6c 100644 --- a/terraform/account-wide-infrastructure/modules/vpc/vpc.tf +++ b/terraform/account-wide-infrastructure/modules/vpc/vpc.tf @@ -55,7 +55,6 @@ resource "aws_route_table" "private_rt" { } resource "aws_eip" "natgw-ip" { - domain = "vpc" } resource "aws_nat_gateway" "nat" { diff --git a/terraform/account-wide-infrastructure/prod/ec2.tf b/terraform/account-wide-infrastructure/prod/ec2.tf index 73e76d3c0..648b6af3c 100644 --- a/terraform/account-wide-infrastructure/prod/ec2.tf +++ b/terraform/account-wide-infrastructure/prod/ec2.tf @@ -1,4 +1,5 @@ module "vpc" { + count = var.enable_powerbi_auto_push ? 1 : 0 source = "../modules/vpc" vpc_cidr_block = var.vpc_cidr_block enable_dns_hostnames = var.enable_dns_hostnames @@ -8,16 +9,17 @@ module "vpc" { name_prefix = "nhsd-nrlf--prod" } -module "powerbi_gw_instance_v2" { - source = "../modules/ec2" - use_custom_ami = true - instance_type = var.instance_type +module "powerbi_gw_instance" { + count = var.enable_powerbi_auto_push ? 1 : 0 + source = "../modules/powerbi-gw-ec2" + use_custom_ami = false + instance_type = var.powerbi_gw_instance_type name_prefix = "nhsd-nrlf--test-powerbi-gw-v2" target_bucket_arn = module.prod-glue.target_bucket_arn glue_kms_key_arn = module.prod-glue.aws_kms_key_arn athena_kms_key_arn = module.prod-athena.kms_key_arn athena_bucket_arn = module.prod-athena.bucket_arn - subnet_id = module.vpc.private_subnet_id - security_groups = [module.vpc.powerbi_gw_security_group_id] + subnet_id = module.vpc[0].private_subnet_id + security_groups = [module.vpc[0].powerbi_gw_security_group_id] } diff --git a/terraform/account-wide-infrastructure/prod/vars.tf b/terraform/account-wide-infrastructure/prod/vars.tf index d7d33ba64..666021f18 100644 --- a/terraform/account-wide-infrastructure/prod/vars.tf +++ b/terraform/account-wide-infrastructure/prod/vars.tf @@ -45,8 +45,20 @@ variable "instance_type" { default = "t2.micro" } -variable "use_custom_ami" { +variable "enable_powerbi_auto_push" { type = bool - description = "Use custom image" + description = "Enable automatic pushing of info into PowerBI" default = false } + +variable "powerbi_gw_instance_type" { + type = string + description = "Type for PowerBI GW EC2 Instance" + default = "t2.micro" +} + +variable "use_powerbi_gw_custom_ami" { + type = bool + description = "Use custom image for PowerBI GW instance" + default = true +} diff --git a/terraform/account-wide-infrastructure/test/ec2.tf b/terraform/account-wide-infrastructure/test/ec2.tf index 2ed6a2246..a20444e44 100644 --- a/terraform/account-wide-infrastructure/test/ec2.tf +++ b/terraform/account-wide-infrastructure/test/ec2.tf @@ -1,23 +1,25 @@ module "vpc" { + count = var.enable_powerbi_auto_push ? 1 : 0 source = "../modules/vpc" vpc_cidr_block = var.vpc_cidr_block enable_dns_hostnames = var.enable_dns_hostnames vpc_public_subnets_cidr_block = var.vpc_public_subnets_cidr_block vpc_private_subnets_cidr_block = var.vpc_private_subnets_cidr_block aws_azs = var.aws_azs - name_prefix = "nhsd-nrlf--dev" + name_prefix = "nhsd-nrlf--test" } -module "powerbi_gw_instance_v2" { - source = "../modules/ec2" - use_custom_ami = true - instance_type = var.instance_type - name_prefix = "nhsd-nrlf--dev-powerbi-gw-v2" - target_bucket_arn = module.dev-glue.target_bucket_arn - glue_kms_key_arn = module.dev-glue.aws_kms_key_arn - athena_kms_key_arn = module.dev-athena.kms_key_arn - athena_bucket_arn = module.dev-athena.bucket_arn +module "powerbi_gw_instance" { + count = var.enable_powerbi_auto_push ? 1 : 0 + source = "../modules/powerbi-gw-ec2" + use_custom_ami = var.use_powerbi_gw_custom_ami + instance_type = var.powerbi_gw_instance_type + name_prefix = "nhsd-nrlf--test-powerbi-gw-v2" + target_bucket_arn = module.int-glue.target_bucket_arn + glue_kms_key_arn = module.int-glue.aws_kms_key_arn + athena_kms_key_arn = module.int-athena.kms_key_arn + athena_bucket_arn = module.int-athena.bucket_arn - subnet_id = module.vpc.private_subnet_id - security_groups = [module.vpc.powerbi_gw_security_group_id] + subnet_id = module.vpc[0].private_subnet_id + security_groups = [module.vpc[0].powerbi_gw_security_group_id] } diff --git a/terraform/account-wide-infrastructure/test/glue.tf b/terraform/account-wide-infrastructure/test/glue.tf index d40394f9d..51c721b5f 100644 --- a/terraform/account-wide-infrastructure/test/glue.tf +++ b/terraform/account-wide-infrastructure/test/glue.tf @@ -1,22 +1,22 @@ -module "int-glue" { +module "qa-glue" { source = "../modules/glue" - name_prefix = "nhsd-nrlf--int" + name_prefix = "nhsd-nrlf--qa" python_version = 3 } module "int-glue" { source = "../modules/glue" - name_prefix = "nhsd-nrlf--int-sandbox" + name_prefix = "nhsd-nrlf--int" python_version = 3 } -module "int-glue" { +module "int-sandbox-glue" { source = "../modules/glue" - name_prefix = "nhsd-nrlf--qa" + name_prefix = "nhsd-nrlf--int-sandbox" python_version = 3 } -module "int-glue" { +module "ref-glue" { source = "../modules/glue" name_prefix = "nhsd-nrlf--ref" python_version = 3 diff --git a/terraform/account-wide-infrastructure/test/vars.tf b/terraform/account-wide-infrastructure/test/vars.tf index adc7a1bcd..c71bf86b2 100644 --- a/terraform/account-wide-infrastructure/test/vars.tf +++ b/terraform/account-wide-infrastructure/test/vars.tf @@ -59,14 +59,20 @@ variable "vpc_private_subnets_cidr_block" { default = "10.0.1.0/24" } -variable "instance_type" { +variable "enable_powerbi_auto_push" { + type = bool + description = "Enable automatic pushing of info into PowerBI" + default = false +} + +variable "powerbi_gw_instance_type" { type = string - description = "Type for EC2 Instance" + description = "Type for PowerBI GW EC2 Instance" default = "t2.micro" } -variable "use_custom_ami" { +variable "use_powerbi_gw_custom_ami" { type = bool - description = "Use custom image" - default = false + description = "Use custom image for PowerBI GW instance" + default = true } From 8cdf23876930aa54d51d1b88ffd5fd16e43a8070 Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Fri, 13 Jun 2025 15:03:21 +0100 Subject: [PATCH 10/27] [NRL-1386] Revert unwanted firehose buffering change --- terraform/infrastructure/modules/firehose/kinesis.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/infrastructure/modules/firehose/kinesis.tf b/terraform/infrastructure/modules/firehose/kinesis.tf index 799bb63cb..316edf863 100644 --- a/terraform/infrastructure/modules/firehose/kinesis.tf +++ b/terraform/infrastructure/modules/firehose/kinesis.tf @@ -65,7 +65,7 @@ resource "aws_kinesis_firehose_delivery_stream" "reporting_stream" { role_arn = aws_iam_role.firehose.arn bucket_arn = var.reporting_bucket_arn buffering_size = 64 - buffering_interval = 180 + buffering_interval = 600 processing_configuration { enabled = "true" From 910dd30b9898cf297469e3a912314cabd6617252 Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Mon, 16 Jun 2025 13:37:25 +0100 Subject: [PATCH 11/27] [NRL-1386] WIP Fixup powerbi gw TF prov issues --- terraform/account-wide-infrastructure/dev/vars.tf | 2 +- .../account-wide-infrastructure/modules/powerbi-gw-ec2/ec2.tf | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/terraform/account-wide-infrastructure/dev/vars.tf b/terraform/account-wide-infrastructure/dev/vars.tf index d70175173..0590f74d3 100644 --- a/terraform/account-wide-infrastructure/dev/vars.tf +++ b/terraform/account-wide-infrastructure/dev/vars.tf @@ -47,7 +47,7 @@ variable "vpc_private_subnets_cidr_block" { variable "enable_powerbi_auto_push" { type = bool description = "Enable automatic pushing of info into PowerBI" - default = false + default = true } variable "powerbi_gw_instance_type" { diff --git a/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/ec2.tf b/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/ec2.tf index e5016f13f..0f9d0e572 100644 --- a/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/ec2.tf +++ b/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/ec2.tf @@ -5,7 +5,9 @@ resource "aws_instance" "web" { instance_type = var.instance_type key_name = aws_key_pair.ec2_key_pair.key_name subnet_id = var.subnet_id - security_groups = var.security_groups + # TODO WORK OUT WHY THIS IS HAVING SUCH AN ISSUE..... + #security_groups = var.security_groups + vpc_security_group_ids = var.security_groups user_data = file("${path.module}/scripts/user_data.tpl") From 0e8116a0bcabe725a6d243fab0d887aaa77ed38a Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Mon, 16 Jun 2025 16:24:30 +0100 Subject: [PATCH 12/27] [NRL-1386] Add missing glue_database to test+prod. Fix TF apply state issues --- .../account-wide-infrastructure/dev/ec2.tf | 2 +- .../modules/glue/glue.tf | 22 +++++++++---------- .../modules/glue/s3.tf | 16 +++++++------- .../modules/powerbi-gw-ec2/ec2.tf | 6 ++--- .../modules/powerbi-gw-ec2/outputs.tf | 6 +---- .../prod/athena.tf | 1 + .../account-wide-infrastructure/prod/ec2.tf | 2 +- .../test/athena.tf | 4 ++++ .../account-wide-infrastructure/test/ec2.tf | 2 +- 9 files changed, 30 insertions(+), 31 deletions(-) diff --git a/terraform/account-wide-infrastructure/dev/ec2.tf b/terraform/account-wide-infrastructure/dev/ec2.tf index 2bc3824ad..c0748a950 100644 --- a/terraform/account-wide-infrastructure/dev/ec2.tf +++ b/terraform/account-wide-infrastructure/dev/ec2.tf @@ -14,7 +14,7 @@ module "powerbi_gw_instance" { source = "../modules/powerbi-gw-ec2" use_custom_ami = var.use_powerbi_gw_custom_ami instance_type = var.powerbi_gw_instance_type - name_prefix = "nhsd-nrlf--dev-powerbi-gw-v2" + name_prefix = "nhsd-nrlf--dev-powerbi-gw" target_bucket_arn = module.dev-glue.target_bucket_arn glue_kms_key_arn = module.dev-glue.aws_kms_key_arn athena_kms_key_arn = module.dev-athena.kms_key_arn diff --git a/terraform/account-wide-infrastructure/modules/glue/glue.tf b/terraform/account-wide-infrastructure/modules/glue/glue.tf index e36433b5c..530101297 100644 --- a/terraform/account-wide-infrastructure/modules/glue/glue.tf +++ b/terraform/account-wide-infrastructure/modules/glue/glue.tf @@ -10,37 +10,37 @@ resource "aws_glue_crawler" "log_crawler" { database_name = aws_glue_catalog_database.log_database.name role = aws_iam_role.glue_service_role.name s3_target { - path = "${aws_s3_bucket.target-data-bucket.id}/consumer_countDocumentReference/" + path = "s3://${aws_s3_bucket.target-data-bucket.id}/consumer_countDocumentReference/" } s3_target { - path = "${aws_s3_bucket.target-data-bucket.id}/consumer_readDocumentReference/" + path = "s3://${aws_s3_bucket.target-data-bucket.id}/consumer_readDocumentReference/" } s3_target { - path = "${aws_s3_bucket.target-data-bucket.id}/consumer_searchDocumentReference/" + path = "s3://${aws_s3_bucket.target-data-bucket.id}/consumer_searchDocumentReference/" } s3_target { - path = "${aws_s3_bucket.target-data-bucket.id}/consumer_searchPostDocumentReference/" + path = "s3://${aws_s3_bucket.target-data-bucket.id}/consumer_searchPostDocumentReference/" } s3_target { - path = "${aws_s3_bucket.target-data-bucket.id}/producer_createDocumentReference/" + path = "s3://${aws_s3_bucket.target-data-bucket.id}/producer_createDocumentReference/" } s3_target { - path = "${aws_s3_bucket.target-data-bucket.id}/producer_deleteDocumentReference/" + path = "s3://${aws_s3_bucket.target-data-bucket.id}/producer_deleteDocumentReference/" } s3_target { - path = "${aws_s3_bucket.target-data-bucket.id}/producer_readDocumentReference/" + path = "s3://${aws_s3_bucket.target-data-bucket.id}/producer_readDocumentReference/" } s3_target { - path = "${aws_s3_bucket.target-data-bucket.id}/producer_searchDocumentReference/" + path = "s3://${aws_s3_bucket.target-data-bucket.id}/producer_searchDocumentReference/" } s3_target { - path = "${aws_s3_bucket.target-data-bucket.id}/producer_searchPostDocumentReference/" + path = "s3://${aws_s3_bucket.target-data-bucket.id}/producer_searchPostDocumentReference/" } s3_target { - path = "${aws_s3_bucket.target-data-bucket.id}/producer_updateDocumentReference/" + path = "s3://${aws_s3_bucket.target-data-bucket.id}/producer_updateDocumentReference/" } s3_target { - path = "${aws_s3_bucket.target-data-bucket.id}/producer_upsertDocumentReference/" + path = "s3://${aws_s3_bucket.target-data-bucket.id}/producer_upsertDocumentReference/" } schema_change_policy { delete_behavior = "LOG" diff --git a/terraform/account-wide-infrastructure/modules/glue/s3.tf b/terraform/account-wide-infrastructure/modules/glue/s3.tf index 14f7b9824..b4e9133b1 100644 --- a/terraform/account-wide-infrastructure/modules/glue/s3.tf +++ b/terraform/account-wide-infrastructure/modules/glue/s3.tf @@ -180,10 +180,10 @@ resource "aws_s3_bucket_public_access_block" "code-bucket-public-access-block" { } resource "aws_s3_object" "script" { - bucket = aws_s3_bucket.code-bucket.bucket - key = "main.py" - source = "${path.module}/src/main.py" - etag = filemd5("${path.module}/src/main.py") + bucket = aws_s3_bucket.code-bucket.bucket + key = "main.py" + source = "${path.module}/src/main.py" + source_hash = filemd5("${path.module}/src/main.py") } data "archive_file" "python" { @@ -194,8 +194,8 @@ data "archive_file" "python" { } resource "aws_s3_object" "zip" { - bucket = aws_s3_bucket.code-bucket.bucket - key = "src.zip" - source = data.archive_file.python.output_path - etag = filemd5(data.archive_file.python.output_path) + bucket = aws_s3_bucket.code-bucket.bucket + key = "src.zip" + source = data.archive_file.python.output_path + source_hash = filemd5(data.archive_file.python.output_path) } diff --git a/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/ec2.tf b/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/ec2.tf index 0f9d0e572..37f160cbf 100644 --- a/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/ec2.tf +++ b/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/ec2.tf @@ -1,13 +1,11 @@ -resource "aws_instance" "web" { +resource "aws_instance" "powerbi_gw" { associate_public_ip_address = false iam_instance_profile = aws_iam_instance_profile.powerbi_profile.name ami = local.selected_ami_id instance_type = var.instance_type key_name = aws_key_pair.ec2_key_pair.key_name subnet_id = var.subnet_id - # TODO WORK OUT WHY THIS IS HAVING SUCH AN ISSUE..... - #security_groups = var.security_groups - vpc_security_group_ids = var.security_groups + vpc_security_group_ids = var.security_groups user_data = file("${path.module}/scripts/user_data.tpl") diff --git a/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/outputs.tf b/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/outputs.tf index 10e5a82d1..02ba7f310 100644 --- a/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/outputs.tf +++ b/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/outputs.tf @@ -1,7 +1,3 @@ output "instance_id" { - value = aws_instance.web.id -} - -output "public_ip" { - value = aws_instance.web.public_ip + value = aws_instance.powerbi_gw.id } diff --git a/terraform/account-wide-infrastructure/prod/athena.tf b/terraform/account-wide-infrastructure/prod/athena.tf index d5d59c9b7..6123b24b6 100644 --- a/terraform/account-wide-infrastructure/prod/athena.tf +++ b/terraform/account-wide-infrastructure/prod/athena.tf @@ -2,4 +2,5 @@ module "prod-athena" { source = "../modules/athena" name_prefix = "nhsd-nrlf--prod" target_bucket_name = module.prod-glue.target_bucket_name + glue_database = module.prod-glue.glue_database } diff --git a/terraform/account-wide-infrastructure/prod/ec2.tf b/terraform/account-wide-infrastructure/prod/ec2.tf index 648b6af3c..e5510bb88 100644 --- a/terraform/account-wide-infrastructure/prod/ec2.tf +++ b/terraform/account-wide-infrastructure/prod/ec2.tf @@ -14,7 +14,7 @@ module "powerbi_gw_instance" { source = "../modules/powerbi-gw-ec2" use_custom_ami = false instance_type = var.powerbi_gw_instance_type - name_prefix = "nhsd-nrlf--test-powerbi-gw-v2" + name_prefix = "nhsd-nrlf--test-powerbi-gw" target_bucket_arn = module.prod-glue.target_bucket_arn glue_kms_key_arn = module.prod-glue.aws_kms_key_arn athena_kms_key_arn = module.prod-athena.kms_key_arn diff --git a/terraform/account-wide-infrastructure/test/athena.tf b/terraform/account-wide-infrastructure/test/athena.tf index dd1702f22..a07c884bf 100644 --- a/terraform/account-wide-infrastructure/test/athena.tf +++ b/terraform/account-wide-infrastructure/test/athena.tf @@ -2,22 +2,26 @@ module "qa-athena" { source = "../modules/athena" name_prefix = "nhsd-nrlf--qa" target_bucket_name = module.qa-glue.target_bucket_name + glue_database = module.qa-glue.glue_database } module "int-athena" { source = "../modules/athena" name_prefix = "nhsd-nrlf--int" target_bucket_name = module.int-glue.target_bucket_name + glue_database = module.int-glue.glue_database } module "int-sandbox-athena" { source = "../modules/athena" name_prefix = "nhsd-nrlf--int-sandbox" target_bucket_name = module.int-sandbox-glue.target_bucket_name + glue_database = module.int-sandbox-glue.glue_database } module "ref-athena" { source = "../modules/athena" name_prefix = "nhsd-nrlf--ref" target_bucket_name = module.ref-glue.target_bucket_name + glue_database = module.ref-glue.glue_database } diff --git a/terraform/account-wide-infrastructure/test/ec2.tf b/terraform/account-wide-infrastructure/test/ec2.tf index a20444e44..aa35f5569 100644 --- a/terraform/account-wide-infrastructure/test/ec2.tf +++ b/terraform/account-wide-infrastructure/test/ec2.tf @@ -14,7 +14,7 @@ module "powerbi_gw_instance" { source = "../modules/powerbi-gw-ec2" use_custom_ami = var.use_powerbi_gw_custom_ami instance_type = var.powerbi_gw_instance_type - name_prefix = "nhsd-nrlf--test-powerbi-gw-v2" + name_prefix = "nhsd-nrlf--test-powerbi-gw" target_bucket_arn = module.int-glue.target_bucket_arn glue_kms_key_arn = module.int-glue.aws_kms_key_arn athena_kms_key_arn = module.int-athena.kms_key_arn From b25f5690f2a28b4e723e65426cb18ebca47f956e Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Mon, 16 Jun 2025 18:51:04 +0100 Subject: [PATCH 13/27] [NRL-1386] Fix glue source bucket lifecycle auto-deletes --- terraform/account-wide-infrastructure/modules/glue/s3.tf | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/terraform/account-wide-infrastructure/modules/glue/s3.tf b/terraform/account-wide-infrastructure/modules/glue/s3.tf index b4e9133b1..09578dcae 100644 --- a/terraform/account-wide-infrastructure/modules/glue/s3.tf +++ b/terraform/account-wide-infrastructure/modules/glue/s3.tf @@ -56,12 +56,16 @@ resource "aws_s3_bucket_lifecycle_configuration" "source-data-bucket-lifecycle" rule { - id = "bucket-versioning-rule" + id = "object-auto-delete-rule" status = "Enabled" expiration { days = local.s3.expiration.days } + + noncurrent_version_expiration { + noncurrent_days = local.s3.expiration.days + } } } From 950d036a2fe126d2df9cb5c6d6629591f8418bec Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Wed, 18 Jun 2025 09:23:09 +0100 Subject: [PATCH 14/27] [NRL-1386] Add var to enable/disable all reporting services per env and account-wide --- .../account-wide-infrastructure/dev/athena.tf | 1 + terraform/account-wide-infrastructure/dev/ec2.tf | 8 ++++---- terraform/account-wide-infrastructure/dev/glue.tf | 1 + terraform/account-wide-infrastructure/dev/vars.tf | 6 ++++++ .../modules/glue/glue.tf | 12 ++++++++++-- .../modules/glue/outputs.tf | 2 +- .../modules/glue/s3.tf | 6 +----- .../modules/glue/vars.tf | 6 ++++++ .../account-wide-infrastructure/prod/athena.tf | 1 + terraform/account-wide-infrastructure/prod/ec2.tf | 8 ++++---- .../account-wide-infrastructure/prod/glue.tf | 1 + .../account-wide-infrastructure/prod/vars.tf | 6 ++++++ .../account-wide-infrastructure/test/athena.tf | 10 +++++++++- terraform/account-wide-infrastructure/test/ec2.tf | 12 ++++++------ .../account-wide-infrastructure/test/glue.tf | 9 ++++++++- .../account-wide-infrastructure/test/vars.tf | 6 ++++++ terraform/infrastructure/data.tf | 4 ++-- terraform/infrastructure/etc/dev.tfvars | 4 +++- terraform/infrastructure/etc/int.tfvars | 6 ++++-- terraform/infrastructure/etc/prod.tfvars | 5 ++++- terraform/infrastructure/etc/qa.tfvars | 4 +++- terraform/infrastructure/etc/ref.tfvars | 5 ++++- terraform/infrastructure/locals.tf | 5 +++-- .../infrastructure/modules/firehose/cloudwatch.tf | 2 +- .../infrastructure/modules/firehose/kinesis.tf | 8 ++++---- .../infrastructure/modules/firehose/locals.tf | 6 +++--- terraform/infrastructure/modules/firehose/vars.tf | 7 +++++++ terraform/infrastructure/vars.tf | 15 ++++++++++++++- 28 files changed, 123 insertions(+), 43 deletions(-) diff --git a/terraform/account-wide-infrastructure/dev/athena.tf b/terraform/account-wide-infrastructure/dev/athena.tf index d0fd98178..77cc82148 100644 --- a/terraform/account-wide-infrastructure/dev/athena.tf +++ b/terraform/account-wide-infrastructure/dev/athena.tf @@ -1,4 +1,5 @@ module "dev-athena" { + count = var.enable_reporting ? 1 : 0 source = "../modules/athena" name_prefix = "nhsd-nrlf--dev" target_bucket_name = module.dev-glue.target_bucket_name diff --git a/terraform/account-wide-infrastructure/dev/ec2.tf b/terraform/account-wide-infrastructure/dev/ec2.tf index c0748a950..bfe9ede22 100644 --- a/terraform/account-wide-infrastructure/dev/ec2.tf +++ b/terraform/account-wide-infrastructure/dev/ec2.tf @@ -1,5 +1,5 @@ module "vpc" { - count = var.enable_powerbi_auto_push ? 1 : 0 + count = var.enable_reporting && var.enable_powerbi_auto_push ? 1 : 0 source = "../modules/vpc" vpc_cidr_block = var.vpc_cidr_block enable_dns_hostnames = var.enable_dns_hostnames @@ -10,15 +10,15 @@ module "vpc" { } module "powerbi_gw_instance" { - count = var.enable_powerbi_auto_push ? 1 : 0 + count = var.enable_reporting && var.enable_powerbi_auto_push ? 1 : 0 source = "../modules/powerbi-gw-ec2" use_custom_ami = var.use_powerbi_gw_custom_ami instance_type = var.powerbi_gw_instance_type name_prefix = "nhsd-nrlf--dev-powerbi-gw" target_bucket_arn = module.dev-glue.target_bucket_arn glue_kms_key_arn = module.dev-glue.aws_kms_key_arn - athena_kms_key_arn = module.dev-athena.kms_key_arn - athena_bucket_arn = module.dev-athena.bucket_arn + athena_kms_key_arn = module.dev-athena[0].kms_key_arn + athena_bucket_arn = module.dev-athena[0].bucket_arn subnet_id = module.vpc[0].private_subnet_id security_groups = [module.vpc[0].powerbi_gw_security_group_id] diff --git a/terraform/account-wide-infrastructure/dev/glue.tf b/terraform/account-wide-infrastructure/dev/glue.tf index e8fbd713a..9f52c1f9a 100644 --- a/terraform/account-wide-infrastructure/dev/glue.tf +++ b/terraform/account-wide-infrastructure/dev/glue.tf @@ -1,4 +1,5 @@ module "dev-glue" { + is_enabled = var.enable_reporting source = "../modules/glue" name_prefix = "nhsd-nrlf--dev" python_version = 3 diff --git a/terraform/account-wide-infrastructure/dev/vars.tf b/terraform/account-wide-infrastructure/dev/vars.tf index 0590f74d3..f6a48433e 100644 --- a/terraform/account-wide-infrastructure/dev/vars.tf +++ b/terraform/account-wide-infrastructure/dev/vars.tf @@ -14,6 +14,12 @@ variable "devsandbox_api_domain_name" { default = "dev-sandbox.api.record-locator.dev.national.nhs.uk" } +variable "enable_reporting" { + type = bool + description = "Enable account-wide reporting services in the dev account" + default = false +} + variable "aws_azs" { type = string description = "AWS Availability Zones" diff --git a/terraform/account-wide-infrastructure/modules/glue/glue.tf b/terraform/account-wide-infrastructure/modules/glue/glue.tf index 530101297..64a3c5d99 100644 --- a/terraform/account-wide-infrastructure/modules/glue/glue.tf +++ b/terraform/account-wide-infrastructure/modules/glue/glue.tf @@ -1,13 +1,17 @@ # Create Glue Data Catalog Database resource "aws_glue_catalog_database" "log_database" { + count = var.is_enabled ? 1 : 0 + name = "${var.name_prefix}-reporting" location_uri = "${aws_s3_bucket.target-data-bucket.id}/" } # Create Glue Crawler resource "aws_glue_crawler" "log_crawler" { + count = var.is_enabled ? 1 : 0 + name = "${var.name_prefix}-log-crawler" - database_name = aws_glue_catalog_database.log_database.name + database_name = aws_glue_catalog_database.log_database[0].name role = aws_iam_role.glue_service_role.name s3_target { path = "s3://${aws_s3_bucket.target-data-bucket.id}/consumer_countDocumentReference/" @@ -53,14 +57,18 @@ resource "aws_glue_crawler" "log_crawler" { }) } resource "aws_glue_trigger" "log_trigger" { + count = var.is_enabled ? 1 : 0 + name = "${var.name_prefix}-org-report-trigger" type = "ON_DEMAND" actions { - crawler_name = aws_glue_crawler.log_crawler.name + crawler_name = aws_glue_crawler.log_crawler[0].name } } resource "aws_glue_job" "glue_job" { + count = var.is_enabled ? 1 : 0 + name = "${var.name_prefix}-glue-job" role_arn = aws_iam_role.glue_service_role.arn description = "Transfer logs from source to bucket" diff --git a/terraform/account-wide-infrastructure/modules/glue/outputs.tf b/terraform/account-wide-infrastructure/modules/glue/outputs.tf index b1eee041b..0c5547fa0 100644 --- a/terraform/account-wide-infrastructure/modules/glue/outputs.tf +++ b/terraform/account-wide-infrastructure/modules/glue/outputs.tf @@ -23,5 +23,5 @@ output "glue_crawler_name" { } output "glue_database" { - value = aws_glue_catalog_database.log_database.name + value = var.is_enabled ? aws_glue_catalog_database.log_database[0].name : "" } diff --git a/terraform/account-wide-infrastructure/modules/glue/s3.tf b/terraform/account-wide-infrastructure/modules/glue/s3.tf index 09578dcae..cff5d1274 100644 --- a/terraform/account-wide-infrastructure/modules/glue/s3.tf +++ b/terraform/account-wide-infrastructure/modules/glue/s3.tf @@ -62,17 +62,13 @@ resource "aws_s3_bucket_lifecycle_configuration" "source-data-bucket-lifecycle" expiration { days = local.s3.expiration.days } - - noncurrent_version_expiration { - noncurrent_days = local.s3.expiration.days - } } } resource "aws_s3_bucket_versioning" "source-data-bucket-versioning" { bucket = aws_s3_bucket.source-data-bucket.id versioning_configuration { - status = "Enabled" + status = "Disabled" } } diff --git a/terraform/account-wide-infrastructure/modules/glue/vars.tf b/terraform/account-wide-infrastructure/modules/glue/vars.tf index cb03095bf..ae3281303 100644 --- a/terraform/account-wide-infrastructure/modules/glue/vars.tf +++ b/terraform/account-wide-infrastructure/modules/glue/vars.tf @@ -22,3 +22,9 @@ variable "code_bucket" { description = "S3 bucket for Glue job scripts" default = "code-bucket" } + +variable "is_enabled" { + type = bool + description = "Flag to enable or disable the Glue module" + default = true +} diff --git a/terraform/account-wide-infrastructure/prod/athena.tf b/terraform/account-wide-infrastructure/prod/athena.tf index 6123b24b6..9242ddd5e 100644 --- a/terraform/account-wide-infrastructure/prod/athena.tf +++ b/terraform/account-wide-infrastructure/prod/athena.tf @@ -1,4 +1,5 @@ module "prod-athena" { + count = var.enable_reporting ? 1 : 0 source = "../modules/athena" name_prefix = "nhsd-nrlf--prod" target_bucket_name = module.prod-glue.target_bucket_name diff --git a/terraform/account-wide-infrastructure/prod/ec2.tf b/terraform/account-wide-infrastructure/prod/ec2.tf index e5510bb88..669b2d5be 100644 --- a/terraform/account-wide-infrastructure/prod/ec2.tf +++ b/terraform/account-wide-infrastructure/prod/ec2.tf @@ -1,5 +1,5 @@ module "vpc" { - count = var.enable_powerbi_auto_push ? 1 : 0 + count = var.enable_reporting && var.enable_powerbi_auto_push ? 1 : 0 source = "../modules/vpc" vpc_cidr_block = var.vpc_cidr_block enable_dns_hostnames = var.enable_dns_hostnames @@ -10,15 +10,15 @@ module "vpc" { } module "powerbi_gw_instance" { - count = var.enable_powerbi_auto_push ? 1 : 0 + count = var.enable_reporting && var.enable_powerbi_auto_push ? 1 : 0 source = "../modules/powerbi-gw-ec2" use_custom_ami = false instance_type = var.powerbi_gw_instance_type name_prefix = "nhsd-nrlf--test-powerbi-gw" target_bucket_arn = module.prod-glue.target_bucket_arn glue_kms_key_arn = module.prod-glue.aws_kms_key_arn - athena_kms_key_arn = module.prod-athena.kms_key_arn - athena_bucket_arn = module.prod-athena.bucket_arn + athena_kms_key_arn = module.prod-athena[0].kms_key_arn + athena_bucket_arn = module.prod-athena[0].bucket_arn subnet_id = module.vpc[0].private_subnet_id security_groups = [module.vpc[0].powerbi_gw_security_group_id] diff --git a/terraform/account-wide-infrastructure/prod/glue.tf b/terraform/account-wide-infrastructure/prod/glue.tf index 7a03d7fa3..34c7f540d 100644 --- a/terraform/account-wide-infrastructure/prod/glue.tf +++ b/terraform/account-wide-infrastructure/prod/glue.tf @@ -1,4 +1,5 @@ module "prod-glue" { + is_enabled = var.enable_reporting source = "../modules/glue" name_prefix = "nhsd-nrlf--prod" python_version = 3 diff --git a/terraform/account-wide-infrastructure/prod/vars.tf b/terraform/account-wide-infrastructure/prod/vars.tf index 666021f18..383e71add 100644 --- a/terraform/account-wide-infrastructure/prod/vars.tf +++ b/terraform/account-wide-infrastructure/prod/vars.tf @@ -21,6 +21,12 @@ variable "enable_dns_hostnames" { default = true } +variable "enable_reporting" { + type = bool + description = "Enable account-wide reporting services in the prod account" + default = false +} + variable "vpc_cidr_block" { type = string description = "Base CIDR Block for VPC" diff --git a/terraform/account-wide-infrastructure/test/athena.tf b/terraform/account-wide-infrastructure/test/athena.tf index a07c884bf..a0390d48b 100644 --- a/terraform/account-wide-infrastructure/test/athena.tf +++ b/terraform/account-wide-infrastructure/test/athena.tf @@ -1,4 +1,4 @@ -module "qa-athena" { +/*module "qa-athena" { source = "../modules/athena" name_prefix = "nhsd-nrlf--qa" target_bucket_name = module.qa-glue.target_bucket_name @@ -24,4 +24,12 @@ module "ref-athena" { name_prefix = "nhsd-nrlf--ref" target_bucket_name = module.ref-glue.target_bucket_name glue_database = module.ref-glue.glue_database +}*/ + +module "test-athena" { + count = var.enable_reporting ? 1 : 0 + source = "../modules/athena" + name_prefix = "nhsd-nrlf--test" + target_bucket_name = module.test-glue.target_bucket_name + glue_database = module.test-glue.glue_database } diff --git a/terraform/account-wide-infrastructure/test/ec2.tf b/terraform/account-wide-infrastructure/test/ec2.tf index aa35f5569..c7f1caf6e 100644 --- a/terraform/account-wide-infrastructure/test/ec2.tf +++ b/terraform/account-wide-infrastructure/test/ec2.tf @@ -1,5 +1,5 @@ module "vpc" { - count = var.enable_powerbi_auto_push ? 1 : 0 + count = var.enable_reporting && var.enable_powerbi_auto_push ? 1 : 0 source = "../modules/vpc" vpc_cidr_block = var.vpc_cidr_block enable_dns_hostnames = var.enable_dns_hostnames @@ -10,15 +10,15 @@ module "vpc" { } module "powerbi_gw_instance" { - count = var.enable_powerbi_auto_push ? 1 : 0 + count = var.enable_reporting && var.enable_powerbi_auto_push ? 1 : 0 source = "../modules/powerbi-gw-ec2" use_custom_ami = var.use_powerbi_gw_custom_ami instance_type = var.powerbi_gw_instance_type name_prefix = "nhsd-nrlf--test-powerbi-gw" - target_bucket_arn = module.int-glue.target_bucket_arn - glue_kms_key_arn = module.int-glue.aws_kms_key_arn - athena_kms_key_arn = module.int-athena.kms_key_arn - athena_bucket_arn = module.int-athena.bucket_arn + target_bucket_arn = module.test-glue.target_bucket_arn + glue_kms_key_arn = module.test-glue.aws_kms_key_arn + athena_kms_key_arn = module.test-athena[0].kms_key_arn + athena_bucket_arn = module.test-athena[0].bucket_arn subnet_id = module.vpc[0].private_subnet_id security_groups = [module.vpc[0].powerbi_gw_security_group_id] diff --git a/terraform/account-wide-infrastructure/test/glue.tf b/terraform/account-wide-infrastructure/test/glue.tf index 51c721b5f..93c1f861a 100644 --- a/terraform/account-wide-infrastructure/test/glue.tf +++ b/terraform/account-wide-infrastructure/test/glue.tf @@ -1,4 +1,4 @@ -module "qa-glue" { +/*module "qa-glue" { source = "../modules/glue" name_prefix = "nhsd-nrlf--qa" python_version = 3 @@ -20,4 +20,11 @@ module "ref-glue" { source = "../modules/glue" name_prefix = "nhsd-nrlf--ref" python_version = 3 +}*/ + +module "test-glue" { + is_enabled = var.enable_reporting + source = "../modules/glue" + name_prefix = "nhsd-nrlf--test" + python_version = 3 } diff --git a/terraform/account-wide-infrastructure/test/vars.tf b/terraform/account-wide-infrastructure/test/vars.tf index c71bf86b2..bbf2e0ad7 100644 --- a/terraform/account-wide-infrastructure/test/vars.tf +++ b/terraform/account-wide-infrastructure/test/vars.tf @@ -29,6 +29,12 @@ variable "ref_api_domain_name" { default = "ref.api.record-locator.ref.national.nhs.uk" } +variable "enable_reporting" { + type = bool + description = "Enable account-wide reporting services in the test account" + default = false +} + variable "aws_azs" { type = string description = "AWS Availability Zones" diff --git a/terraform/infrastructure/data.tf b/terraform/infrastructure/data.tf index 506a60af9..926bd13d2 100644 --- a/terraform/infrastructure/data.tf +++ b/terraform/infrastructure/data.tf @@ -43,9 +43,9 @@ data "external" "current-info" { } data "aws_s3_bucket" "source-data-bucket" { - bucket = "${local.shared_prefix}-source-data-bucket" + bucket = "${local.account_prefix}-source-data-bucket" } data "aws_kms_key" "glue" { - key_id = "alias/${local.shared_prefix}-glue" + key_id = "alias/${local.account_prefix}-glue" } diff --git a/terraform/infrastructure/etc/dev.tfvars b/terraform/infrastructure/etc/dev.tfvars index 285015605..4ba4b986f 100644 --- a/terraform/infrastructure/etc/dev.tfvars +++ b/terraform/infrastructure/etc/dev.tfvars @@ -1,6 +1,8 @@ -account_name = "dev" +account_name = "dev" +aws_account_name = "dev" domain = "api.record-locator.dev.national.nhs.uk" public_domain = "internal-dev.api.service.nhs.uk" public_sandbox_domain = "internal-dev-sandbox.api.service.nhs.uk" log_retention_period = 90 +enable_reporting = true diff --git a/terraform/infrastructure/etc/int.tfvars b/terraform/infrastructure/etc/int.tfvars index 7c8b4b8ed..4baa2a936 100644 --- a/terraform/infrastructure/etc/int.tfvars +++ b/terraform/infrastructure/etc/int.tfvars @@ -1,8 +1,10 @@ -// TODO-NOW - Change this file name to int and update all references in codebase (and github repo config) -account_name = "int" +account_name = "int" +aws_account_name = "test" + domain = "api.record-locator.int.national.nhs.uk" deletion_protection = true public_domain = "int.api.service.nhs.uk" public_sandbox_domain = "sandbox.api.service.nhs.uk" log_retention_period = 90 +enable_reporting = false diff --git a/terraform/infrastructure/etc/prod.tfvars b/terraform/infrastructure/etc/prod.tfvars index 01ba96e83..4f9ca34e9 100644 --- a/terraform/infrastructure/etc/prod.tfvars +++ b/terraform/infrastructure/etc/prod.tfvars @@ -1,5 +1,8 @@ -account_name = "prod" +account_name = "prod" +aws_account_name = "prod" + domain = "api.record-locator.national.nhs.uk" public_domain = "api.service.nhs.uk" deletion_protection = true log_retention_period = 2192 +enable_reporting = false diff --git a/terraform/infrastructure/etc/qa.tfvars b/terraform/infrastructure/etc/qa.tfvars index 39e1ec44a..bfada691e 100644 --- a/terraform/infrastructure/etc/qa.tfvars +++ b/terraform/infrastructure/etc/qa.tfvars @@ -1,6 +1,8 @@ -account_name = "qa" +account_name = "qa" +aws_account_name = "test" domain = "qa.record-locator.national.nhs.uk" public_domain = "internal-qa.api.service.nhs.uk" public_sandbox_domain = "internal-qa-sandbox.api.service.nhs.uk" log_retention_period = 90 +enable_reporting = false diff --git a/terraform/infrastructure/etc/ref.tfvars b/terraform/infrastructure/etc/ref.tfvars index ec7a38035..9647baa00 100644 --- a/terraform/infrastructure/etc/ref.tfvars +++ b/terraform/infrastructure/etc/ref.tfvars @@ -1,4 +1,7 @@ -account_name = "ref" +account_name = "ref" +aws_account_name = "test" + domain = "api.record-locator.ref.national.nhs.uk" public_domain = "ref.api.service.nhs.uk" log_retention_period = 30 +enable_reporting = false diff --git a/terraform/infrastructure/locals.tf b/terraform/infrastructure/locals.tf index 298c6a42d..41942da70 100644 --- a/terraform/infrastructure/locals.tf +++ b/terraform/infrastructure/locals.tf @@ -4,6 +4,7 @@ locals { stack_name = terraform.workspace deletion_protection = var.deletion_protection prefix = "${local.project}--${local.stack_name}" + account_prefix = "${local.project}--${var.aws_account_name}" kms = { deletion_window_in_days = 7 @@ -28,8 +29,8 @@ locals { public_domain = local.is_sandbox_env ? var.public_sandbox_domain : var.public_domain # Logic / vars for reporting - reporting_bucket_arn = data.aws_s3_bucket.source-data-bucket[0].arn - reporting_kms_arn = data.aws_kms_key.glue[0].arn + reporting_bucket_arn = data.aws_s3_bucket.source-data-bucket.arn + reporting_kms_arn = data.aws_kms_key.glue.arn firehose_lambda_subscriptions = [ module.firehose__processor.firehose_subscription, module.firehose__processor.firehose_reporting_subscription diff --git a/terraform/infrastructure/modules/firehose/cloudwatch.tf b/terraform/infrastructure/modules/firehose/cloudwatch.tf index 29df97a3f..5539dffa0 100644 --- a/terraform/infrastructure/modules/firehose/cloudwatch.tf +++ b/terraform/infrastructure/modules/firehose/cloudwatch.tf @@ -15,5 +15,5 @@ resource "aws_cloudwatch_log_group" "firehose_reporting" { resource "aws_cloudwatch_log_stream" "firehose_reporting" { name = "${var.prefix}-firehose-reporting" - log_group_name = aws_cloudwatch_log_group.firehose_reporting[0].name + log_group_name = aws_cloudwatch_log_group.firehose_reporting.name } diff --git a/terraform/infrastructure/modules/firehose/kinesis.tf b/terraform/infrastructure/modules/firehose/kinesis.tf index 316edf863..f8daa163a 100644 --- a/terraform/infrastructure/modules/firehose/kinesis.tf +++ b/terraform/infrastructure/modules/firehose/kinesis.tf @@ -68,7 +68,7 @@ resource "aws_kinesis_firehose_delivery_stream" "reporting_stream" { buffering_interval = 600 processing_configuration { - enabled = "true" + enabled = var.enable_reporting_stream processors { type = "Decompression" @@ -90,9 +90,9 @@ resource "aws_kinesis_firehose_delivery_stream" "reporting_stream" { } cloudwatch_logging_options { - enabled = true - log_group_name = aws_cloudwatch_log_group.firehose_reporting[0].name - log_stream_name = aws_cloudwatch_log_stream.firehose_reporting[0].name + enabled = var.enable_reporting_stream + log_group_name = aws_cloudwatch_log_group.firehose_reporting.name + log_stream_name = aws_cloudwatch_log_stream.firehose_reporting.name } } } diff --git a/terraform/infrastructure/modules/firehose/locals.tf b/terraform/infrastructure/modules/firehose/locals.tf index 558586ead..92f9a796a 100644 --- a/terraform/infrastructure/modules/firehose/locals.tf +++ b/terraform/infrastructure/modules/firehose/locals.tf @@ -32,13 +32,13 @@ locals { } iam_firehose = { - cloudwatch_reporting_log_group_arn = aws_cloudwatch_log_group.firehose_reporting[0].arn - cloudwatch_reporting_log_stream_arn = aws_cloudwatch_log_stream.firehose_reporting[0].arn + cloudwatch_reporting_log_group_arn = aws_cloudwatch_log_group.firehose_reporting.arn + cloudwatch_reporting_log_stream_arn = aws_cloudwatch_log_stream.firehose_reporting.arn reporting_s3_arn = "${var.reporting_bucket_arn}/*" } iam_subscriptions = { - firehose_reporting_stream_arn = aws_kinesis_firehose_delivery_stream.reporting_stream[0].arn + firehose_reporting_stream_arn = aws_kinesis_firehose_delivery_stream.reporting_stream.arn } iam_kms_resources = compact([ diff --git a/terraform/infrastructure/modules/firehose/vars.tf b/terraform/infrastructure/modules/firehose/vars.tf index cce26b76c..df869751f 100644 --- a/terraform/infrastructure/modules/firehose/vars.tf +++ b/terraform/infrastructure/modules/firehose/vars.tf @@ -35,6 +35,13 @@ variable "error_prefix" { default = "errors" } + +variable "enable_reporting_stream" { + type = bool + description = "Enable the reporting delivery stream" + default = false + +} variable "reporting_bucket_arn" { type = string default = null diff --git a/terraform/infrastructure/vars.tf b/terraform/infrastructure/vars.tf index 69ada2bb6..e6ca502fd 100644 --- a/terraform/infrastructure/vars.tf +++ b/terraform/infrastructure/vars.tf @@ -1,5 +1,12 @@ variable "account_name" { - type = string + type = string + description = "The name of the AWS environment in the account, e.g. dev, qa, int, prod" +} + +variable "aws_account_name" { + type = string + description = "The name of the AWS account, e.g. dev, test, prod" + default = "dev" } variable "assume_role_arn" { @@ -48,3 +55,9 @@ variable "log_retention_period" { default = 90 type = number } + +variable "enable_reporting" { + type = bool + description = "Enable reporting for this environment" + default = false +} From e73a0c658d24bd3c25740a4ae414c7620c2fc99e Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Wed, 18 Jun 2025 19:14:25 +0100 Subject: [PATCH 15/27] [NRL-1386] Don't provision infra firehose streams for ephem envs --- terraform/infrastructure/firehose.tf | 1 + terraform/infrastructure/lambda.tf | 26 +++++++++---------- terraform/infrastructure/locals.tf | 11 +++----- .../modules/api_gateway/api_gateway.tf | 1 + .../modules/firehose/kinesis.tf | 4 +-- .../infrastructure/modules/firehose/output.tf | 6 ----- .../infrastructure/modules/firehose/vars.tf | 6 ----- 7 files changed, 21 insertions(+), 34 deletions(-) diff --git a/terraform/infrastructure/firehose.tf b/terraform/infrastructure/firehose.tf index b2cdf7aab..b7a9dd370 100644 --- a/terraform/infrastructure/firehose.tf +++ b/terraform/infrastructure/firehose.tf @@ -1,4 +1,5 @@ module "firehose__processor" { + count = var.use_shared_resources ? 1 : 0 source = "./modules/firehose" assume_account = local.aws_account_id prefix = local.prefix diff --git a/terraform/infrastructure/lambda.tf b/terraform/infrastructure/lambda.tf index 4658b4f78..c934452db 100644 --- a/terraform/infrastructure/lambda.tf +++ b/terraform/infrastructure/lambda.tf @@ -11,7 +11,7 @@ module "consumer__readDocumentReference" { PREFIX = "${local.prefix}--" ENVIRONMENT = local.environment POWERTOOLS_LOG_LEVEL = local.log_level - SPLUNK_INDEX = module.firehose__processor.splunk.index + SPLUNK_INDEX = local.splunk_index AUTH_STORE = local.auth_store_id TABLE_NAME = local.pointers_table_name } @@ -39,7 +39,7 @@ module "consumer__countDocumentReference" { ENVIRONMENT = local.environment AUTH_STORE = local.auth_store_id POWERTOOLS_LOG_LEVEL = local.log_level - SPLUNK_INDEX = module.firehose__processor.splunk.index + SPLUNK_INDEX = local.splunk_index TABLE_NAME = local.pointers_table_name } additional_policies = [ @@ -66,7 +66,7 @@ module "consumer__searchDocumentReference" { ENVIRONMENT = local.environment AUTH_STORE = local.auth_store_id POWERTOOLS_LOG_LEVEL = local.log_level - SPLUNK_INDEX = module.firehose__processor.splunk.index + SPLUNK_INDEX = local.splunk_index TABLE_NAME = local.pointers_table_name } additional_policies = [ @@ -93,7 +93,7 @@ module "consumer__searchPostDocumentReference" { ENVIRONMENT = local.environment AUTH_STORE = local.auth_store_id POWERTOOLS_LOG_LEVEL = local.log_level - SPLUNK_INDEX = module.firehose__processor.splunk.index + SPLUNK_INDEX = local.splunk_index TABLE_NAME = local.pointers_table_name } additional_policies = [ @@ -119,7 +119,7 @@ module "producer__createDocumentReference" { PREFIX = "${local.prefix}--" ENVIRONMENT = local.environment AUTH_STORE = local.auth_store_id - SPLUNK_INDEX = module.firehose__processor.splunk.index + SPLUNK_INDEX = local.splunk_index POWERTOOLS_LOG_LEVEL = local.log_level TABLE_NAME = local.pointers_table_name } @@ -148,7 +148,7 @@ module "producer__deleteDocumentReference" { ENVIRONMENT = local.environment AUTH_STORE = local.auth_store_id POWERTOOLS_LOG_LEVEL = local.log_level - SPLUNK_INDEX = module.firehose__processor.splunk.index + SPLUNK_INDEX = local.splunk_index TABLE_NAME = local.pointers_table_name } additional_policies = [ @@ -176,7 +176,7 @@ module "producer__readDocumentReference" { ENVIRONMENT = local.environment AUTH_STORE = local.auth_store_id POWERTOOLS_LOG_LEVEL = local.log_level - SPLUNK_INDEX = module.firehose__processor.splunk.index + SPLUNK_INDEX = local.splunk_index TABLE_NAME = local.pointers_table_name } additional_policies = [ @@ -203,7 +203,7 @@ module "producer__searchDocumentReference" { ENVIRONMENT = local.environment AUTH_STORE = local.auth_store_id POWERTOOLS_LOG_LEVEL = local.log_level - SPLUNK_INDEX = module.firehose__processor.splunk.index + SPLUNK_INDEX = local.splunk_index TABLE_NAME = local.pointers_table_name } additional_policies = [ @@ -230,7 +230,7 @@ module "producer__searchPostDocumentReference" { ENVIRONMENT = local.environment AUTH_STORE = local.auth_store_id POWERTOOLS_LOG_LEVEL = local.log_level - SPLUNK_INDEX = module.firehose__processor.splunk.index + SPLUNK_INDEX = local.splunk_index TABLE_NAME = local.pointers_table_name } additional_policies = [ @@ -257,7 +257,7 @@ module "producer__updateDocumentReference" { ENVIRONMENT = local.environment AUTH_STORE = local.auth_store_id POWERTOOLS_LOG_LEVEL = local.log_level - SPLUNK_INDEX = module.firehose__processor.splunk.index + SPLUNK_INDEX = local.splunk_index TABLE_NAME = local.pointers_table_name } additional_policies = [ @@ -285,7 +285,7 @@ module "producer__upsertDocumentReference" { ENVIRONMENT = local.environment AUTH_STORE = local.auth_store_id POWERTOOLS_LOG_LEVEL = local.log_level - SPLUNK_INDEX = module.firehose__processor.splunk.index + SPLUNK_INDEX = local.splunk_index TABLE_NAME = local.pointers_table_name } additional_policies = [ @@ -313,7 +313,7 @@ module "consumer__status" { ENVIRONMENT = local.environment AUTH_STORE = local.auth_store_id POWERTOOLS_LOG_LEVEL = local.log_level - SPLUNK_INDEX = module.firehose__processor.splunk.index + SPLUNK_INDEX = local.splunk_index DYNAMODB_TIMEOUT = local.dynamodb_timeout_seconds TABLE_NAME = local.pointers_table_name } @@ -342,7 +342,7 @@ module "producer__status" { ENVIRONMENT = local.environment AUTH_STORE = local.auth_store_id POWERTOOLS_LOG_LEVEL = local.log_level - SPLUNK_INDEX = module.firehose__processor.splunk.index + SPLUNK_INDEX = local.splunk_index DYNAMODB_TIMEOUT = local.dynamodb_timeout_seconds TABLE_NAME = local.pointers_table_name } diff --git a/terraform/infrastructure/locals.tf b/terraform/infrastructure/locals.tf index 41942da70..69a89d433 100644 --- a/terraform/infrastructure/locals.tf +++ b/terraform/infrastructure/locals.tf @@ -6,6 +6,8 @@ locals { prefix = "${local.project}--${local.stack_name}" account_prefix = "${local.project}--${var.aws_account_name}" + aws_account_id = data.aws_caller_identity.current.account_id + kms = { deletion_window_in_days = 7 } @@ -28,22 +30,17 @@ locals { shared_prefix = "${local.project}--${local.environment}" public_domain = local.is_sandbox_env ? var.public_sandbox_domain : var.public_domain - # Logic / vars for reporting reporting_bucket_arn = data.aws_s3_bucket.source-data-bucket.arn reporting_kms_arn = data.aws_kms_key.glue.arn - firehose_lambda_subscriptions = [ + firehose_lambda_subscriptions = var.use_shared_resources ? [ module.firehose__processor.firehose_subscription, module.firehose__processor.firehose_reporting_subscription - ] - - # Logic / vars for splunk environment + ] : [] splunk_environment = local.is_sandbox_env ? "${var.account_name}sandbox" : var.account_name splunk_index = "aws_recordlocator_${local.splunk_environment}" log_level = var.account_name == "dev" || var.account_name == "qa" ? "DEBUG" : "INFO" - aws_account_id = data.aws_caller_identity.current.account_id - auth_store_id = var.use_shared_resources ? data.aws_s3_bucket.authorization-store[0].id : module.ephemeral-s3-permission-store[0].bucket_id auth_store_read_policy_arn = var.use_shared_resources ? data.aws_iam_policy.auth-store-read-policy[0].arn : module.ephemeral-s3-permission-store[0].bucket_read_policy_arn diff --git a/terraform/infrastructure/modules/api_gateway/api_gateway.tf b/terraform/infrastructure/modules/api_gateway/api_gateway.tf index b59636f69..40dca31cf 100644 --- a/terraform/infrastructure/modules/api_gateway/api_gateway.tf +++ b/terraform/infrastructure/modules/api_gateway/api_gateway.tf @@ -112,6 +112,7 @@ resource "aws_api_gateway_method_settings" "api_gateway_method_settings" { resource "aws_api_gateway_gateway_response" "api_access_denied" { rest_api_id = aws_api_gateway_rest_api.api_gateway_rest_api.id + status_code = "403" response_type = "ACCESS_DENIED" response_templates = { "application/json" = jsonencode({ diff --git a/terraform/infrastructure/modules/firehose/kinesis.tf b/terraform/infrastructure/modules/firehose/kinesis.tf index f8daa163a..530c9a636 100644 --- a/terraform/infrastructure/modules/firehose/kinesis.tf +++ b/terraform/infrastructure/modules/firehose/kinesis.tf @@ -68,7 +68,7 @@ resource "aws_kinesis_firehose_delivery_stream" "reporting_stream" { buffering_interval = 600 processing_configuration { - enabled = var.enable_reporting_stream + enabled = true processors { type = "Decompression" @@ -90,7 +90,7 @@ resource "aws_kinesis_firehose_delivery_stream" "reporting_stream" { } cloudwatch_logging_options { - enabled = var.enable_reporting_stream + enabled = true log_group_name = aws_cloudwatch_log_group.firehose_reporting.name log_stream_name = aws_cloudwatch_log_stream.firehose_reporting.name } diff --git a/terraform/infrastructure/modules/firehose/output.tf b/terraform/infrastructure/modules/firehose/output.tf index d8b8766a0..e48eba33d 100644 --- a/terraform/infrastructure/modules/firehose/output.tf +++ b/terraform/infrastructure/modules/firehose/output.tf @@ -11,12 +11,6 @@ output "delivery_stream" { } } -output "splunk" { - value = { - index = var.splunk_index - } -} - output "firehose_subscription" { value = { destination = { diff --git a/terraform/infrastructure/modules/firehose/vars.tf b/terraform/infrastructure/modules/firehose/vars.tf index df869751f..4844674e2 100644 --- a/terraform/infrastructure/modules/firehose/vars.tf +++ b/terraform/infrastructure/modules/firehose/vars.tf @@ -36,12 +36,6 @@ variable "error_prefix" { } -variable "enable_reporting_stream" { - type = bool - description = "Enable the reporting delivery stream" - default = false - -} variable "reporting_bucket_arn" { type = string default = null From 8ef6f0ad0a43cd9491a3753602647959d3cf949d Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Wed, 18 Jun 2025 20:29:47 +0100 Subject: [PATCH 16/27] [NRL-1386] Enable reporting infra in dev by default --- terraform/account-wide-infrastructure/dev/vars.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/account-wide-infrastructure/dev/vars.tf b/terraform/account-wide-infrastructure/dev/vars.tf index f6a48433e..a5f387251 100644 --- a/terraform/account-wide-infrastructure/dev/vars.tf +++ b/terraform/account-wide-infrastructure/dev/vars.tf @@ -17,7 +17,7 @@ variable "devsandbox_api_domain_name" { variable "enable_reporting" { type = bool description = "Enable account-wide reporting services in the dev account" - default = false + default = true } variable "aws_azs" { From b56610aab598e4c5ad9f5b8f6d3a52e53da6bcd3 Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Thu, 19 Jun 2025 17:39:09 +0100 Subject: [PATCH 17/27] [NRL-1386] Fix TF bug in infra with firehose__processor --- terraform/infrastructure/locals.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/infrastructure/locals.tf b/terraform/infrastructure/locals.tf index 69a89d433..b0739857e 100644 --- a/terraform/infrastructure/locals.tf +++ b/terraform/infrastructure/locals.tf @@ -33,8 +33,8 @@ locals { reporting_bucket_arn = data.aws_s3_bucket.source-data-bucket.arn reporting_kms_arn = data.aws_kms_key.glue.arn firehose_lambda_subscriptions = var.use_shared_resources ? [ - module.firehose__processor.firehose_subscription, - module.firehose__processor.firehose_reporting_subscription + module.firehose__processor[0].firehose_subscription, + module.firehose__processor[0].firehose_reporting_subscription ] : [] splunk_environment = local.is_sandbox_env ? "${var.account_name}sandbox" : var.account_name splunk_index = "aws_recordlocator_${local.splunk_environment}" From f75a740b169f8991960888880299efa293b4bd77 Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Fri, 20 Jun 2025 07:50:19 +0100 Subject: [PATCH 18/27] [NRL-1386] Switch lambda subscriptions to a named map --- terraform/infrastructure/locals.tf | 8 ++++---- .../infrastructure/modules/lambda/cloudwatch.tf | 12 ++++++------ terraform/infrastructure/modules/lambda/vars.tf | 4 +++- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/terraform/infrastructure/locals.tf b/terraform/infrastructure/locals.tf index b0739857e..957788553 100644 --- a/terraform/infrastructure/locals.tf +++ b/terraform/infrastructure/locals.tf @@ -32,10 +32,10 @@ locals { reporting_bucket_arn = data.aws_s3_bucket.source-data-bucket.arn reporting_kms_arn = data.aws_kms_key.glue.arn - firehose_lambda_subscriptions = var.use_shared_resources ? [ - module.firehose__processor[0].firehose_subscription, - module.firehose__processor[0].firehose_reporting_subscription - ] : [] + firehose_lambda_subscriptions = var.use_shared_resources ? { + "splunk_subscription" : module.firehose__processor[0].firehose_subscription, + "reports_subscription" : module.firehose__processor[0].firehose_reporting_subscription + } : {} splunk_environment = local.is_sandbox_env ? "${var.account_name}sandbox" : var.account_name splunk_index = "aws_recordlocator_${local.splunk_environment}" diff --git a/terraform/infrastructure/modules/lambda/cloudwatch.tf b/terraform/infrastructure/modules/lambda/cloudwatch.tf index b95fc1bbf..fa6305c30 100644 --- a/terraform/infrastructure/modules/lambda/cloudwatch.tf +++ b/terraform/infrastructure/modules/lambda/cloudwatch.tf @@ -5,11 +5,11 @@ resource "aws_cloudwatch_log_group" "lambda_cloudwatch_log_group" { } resource "aws_cloudwatch_log_subscription_filter" "lambda_log_filter" { - name = "${aws_lambda_function.lambda_function.function_name}_filter" - log_group_name = aws_cloudwatch_log_group.lambda_cloudwatch_log_group.name + for_each = var.firehose_subscriptions - count = length(var.firehose_subscriptions) - role_arn = var.firehose_subscriptions[count.index].role.arn - destination_arn = var.firehose_subscriptions[count.index].destination.arn - filter_pattern = var.firehose_subscriptions[count.index].filter.pattern + name = "${aws_lambda_function.lambda_function.function_name}_filter" + log_group_name = aws_cloudwatch_log_group.lambda_cloudwatch_log_group.name + role_arn = each.value.role.arn + destination_arn = each.value.destination.arn + filter_pattern = each.value.filter.pattern } diff --git a/terraform/infrastructure/modules/lambda/vars.tf b/terraform/infrastructure/modules/lambda/vars.tf index 400cc1282..56f08599a 100644 --- a/terraform/infrastructure/modules/lambda/vars.tf +++ b/terraform/infrastructure/modules/lambda/vars.tf @@ -23,7 +23,9 @@ variable "additional_policies" { variable "handler" {} variable "firehose_subscriptions" { - default = [] + description = "The firehose subscriptions to attach to the lambda logs" + type = map(any) + default = {} } variable "vpc" { From a5dc5be1cd01ae22d6571a85b43c5afcc89a6f76 Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Fri, 20 Jun 2025 16:18:01 +0100 Subject: [PATCH 19/27] [NRL-1386] Re-worded flag TF var descr. Removed unused powerbi-gw module outputs --- terraform/account-wide-infrastructure/dev/vars.tf | 2 +- .../modules/powerbi-gw-ec2/outputs.tf | 3 --- terraform/account-wide-infrastructure/prod/vars.tf | 2 +- terraform/account-wide-infrastructure/test/vars.tf | 2 +- 4 files changed, 3 insertions(+), 6 deletions(-) delete mode 100644 terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/outputs.tf diff --git a/terraform/account-wide-infrastructure/dev/vars.tf b/terraform/account-wide-infrastructure/dev/vars.tf index a5f387251..adb15d24a 100644 --- a/terraform/account-wide-infrastructure/dev/vars.tf +++ b/terraform/account-wide-infrastructure/dev/vars.tf @@ -16,7 +16,7 @@ variable "devsandbox_api_domain_name" { variable "enable_reporting" { type = bool - description = "Enable account-wide reporting services in the dev account" + description = "Enable account-wide reporting processes in the dev account" default = true } diff --git a/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/outputs.tf b/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/outputs.tf deleted file mode 100644 index 02ba7f310..000000000 --- a/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/outputs.tf +++ /dev/null @@ -1,3 +0,0 @@ -output "instance_id" { - value = aws_instance.powerbi_gw.id -} diff --git a/terraform/account-wide-infrastructure/prod/vars.tf b/terraform/account-wide-infrastructure/prod/vars.tf index 383e71add..d999261f5 100644 --- a/terraform/account-wide-infrastructure/prod/vars.tf +++ b/terraform/account-wide-infrastructure/prod/vars.tf @@ -23,7 +23,7 @@ variable "enable_dns_hostnames" { variable "enable_reporting" { type = bool - description = "Enable account-wide reporting services in the prod account" + description = "Enable account-wide reporting processes in the prod account" default = false } diff --git a/terraform/account-wide-infrastructure/test/vars.tf b/terraform/account-wide-infrastructure/test/vars.tf index bbf2e0ad7..144929512 100644 --- a/terraform/account-wide-infrastructure/test/vars.tf +++ b/terraform/account-wide-infrastructure/test/vars.tf @@ -31,7 +31,7 @@ variable "ref_api_domain_name" { variable "enable_reporting" { type = bool - description = "Enable account-wide reporting services in the test account" + description = "Enable account-wide reporting processes in the test account" default = false } From 15263c7e8864cff1c7bde3d6c9d4d1027170a2c9 Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Tue, 24 Jun 2025 15:12:01 +0100 Subject: [PATCH 20/27] [NRL-1386] Fix SNS notifications for backup source --- .../account-wide-infrastructure/modules/backup-source/sns.tf | 4 ++-- .../modules/backup-source/variables.tf | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/terraform/account-wide-infrastructure/modules/backup-source/sns.tf b/terraform/account-wide-infrastructure/modules/backup-source/sns.tf index f91b26b96..c0bb6827c 100644 --- a/terraform/account-wide-infrastructure/modules/backup-source/sns.tf +++ b/terraform/account-wide-infrastructure/modules/backup-source/sns.tf @@ -26,9 +26,9 @@ data "aws_iam_policy_document" "allow_backup_to_sns" { } resource "aws_sns_topic_subscription" "aws_backup_notifications_email_target" { - for_each = var.notification_target_email_addresses + count = length(var.notification_target_email_addresses) topic_arn = aws_sns_topic.backup.arn protocol = "email" - endpoint = each.value + endpoint = var.notification_target_email_addresses[count.index] filter_policy = jsonencode({ "State" : [{ "anything-but" : "COMPLETED" }] }) } diff --git a/terraform/account-wide-infrastructure/modules/backup-source/variables.tf b/terraform/account-wide-infrastructure/modules/backup-source/variables.tf index 72cc612f6..e2c5985d3 100644 --- a/terraform/account-wide-infrastructure/modules/backup-source/variables.tf +++ b/terraform/account-wide-infrastructure/modules/backup-source/variables.tf @@ -10,7 +10,8 @@ variable "environment_name" { variable "notification_target_email_addresses" { description = "The email addresses to which backup notifications will be sent via SNS." - type = set(string) + type = list(string) + sensitive = true default = [] } From 43b2e4e5aaa8cfe5c164a786a17c1607710bc4a2 Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Tue, 24 Jun 2025 15:36:04 +0100 Subject: [PATCH 21/27] [NRL-1386] Remove env-specific reporting infra from account-wide --- .../test/athena.tf | 28 ------------------- .../account-wide-infrastructure/test/glue.tf | 24 ---------------- 2 files changed, 52 deletions(-) diff --git a/terraform/account-wide-infrastructure/test/athena.tf b/terraform/account-wide-infrastructure/test/athena.tf index a0390d48b..c31f4f5af 100644 --- a/terraform/account-wide-infrastructure/test/athena.tf +++ b/terraform/account-wide-infrastructure/test/athena.tf @@ -1,31 +1,3 @@ -/*module "qa-athena" { - source = "../modules/athena" - name_prefix = "nhsd-nrlf--qa" - target_bucket_name = module.qa-glue.target_bucket_name - glue_database = module.qa-glue.glue_database -} - -module "int-athena" { - source = "../modules/athena" - name_prefix = "nhsd-nrlf--int" - target_bucket_name = module.int-glue.target_bucket_name - glue_database = module.int-glue.glue_database -} - -module "int-sandbox-athena" { - source = "../modules/athena" - name_prefix = "nhsd-nrlf--int-sandbox" - target_bucket_name = module.int-sandbox-glue.target_bucket_name - glue_database = module.int-sandbox-glue.glue_database -} - -module "ref-athena" { - source = "../modules/athena" - name_prefix = "nhsd-nrlf--ref" - target_bucket_name = module.ref-glue.target_bucket_name - glue_database = module.ref-glue.glue_database -}*/ - module "test-athena" { count = var.enable_reporting ? 1 : 0 source = "../modules/athena" diff --git a/terraform/account-wide-infrastructure/test/glue.tf b/terraform/account-wide-infrastructure/test/glue.tf index 93c1f861a..86e714de3 100644 --- a/terraform/account-wide-infrastructure/test/glue.tf +++ b/terraform/account-wide-infrastructure/test/glue.tf @@ -1,27 +1,3 @@ -/*module "qa-glue" { - source = "../modules/glue" - name_prefix = "nhsd-nrlf--qa" - python_version = 3 -} - -module "int-glue" { - source = "../modules/glue" - name_prefix = "nhsd-nrlf--int" - python_version = 3 -} - -module "int-sandbox-glue" { - source = "../modules/glue" - name_prefix = "nhsd-nrlf--int-sandbox" - python_version = 3 -} - -module "ref-glue" { - source = "../modules/glue" - name_prefix = "nhsd-nrlf--ref" - python_version = 3 -}*/ - module "test-glue" { is_enabled = var.enable_reporting source = "../modules/glue" From 73bae58a97306a0907d0c2c7096c34677fe5e13c Mon Sep 17 00:00:00 2001 From: jackleary Date: Wed, 25 Jun 2025 00:56:57 +0100 Subject: [PATCH 22/27] NRL-1386 Increase EC2 storage to allow standard gw set up --- .../modules/powerbi-gw-ec2/ec2.tf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/ec2.tf b/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/ec2.tf index 37f160cbf..eb106e8fc 100644 --- a/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/ec2.tf +++ b/terraform/account-wide-infrastructure/modules/powerbi-gw-ec2/ec2.tf @@ -7,6 +7,11 @@ resource "aws_instance" "powerbi_gw" { subnet_id = var.subnet_id vpc_security_group_ids = var.security_groups + root_block_device { + volume_size = 40 + volume_type = "gp2" + } + user_data = file("${path.module}/scripts/user_data.tpl") tags = { From 157a145f0d8204192dbabcfc83bb103b98c5e006 Mon Sep 17 00:00:00 2001 From: jackleary Date: Wed, 25 Jun 2025 00:59:51 +0100 Subject: [PATCH 23/27] NRL-1386 Remove unused col in views --- .../modules/athena/sql/rep_consumer.sql | 5 ----- .../modules/athena/sql/rep_producer.sql | 8 -------- 2 files changed, 13 deletions(-) diff --git a/terraform/account-wide-infrastructure/modules/athena/sql/rep_consumer.sql b/terraform/account-wide-infrastructure/modules/athena/sql/rep_consumer.sql index 2e28aaaa8..7fa4095d0 100644 --- a/terraform/account-wide-infrastructure/modules/athena/sql/rep_consumer.sql +++ b/terraform/account-wide-infrastructure/modules/athena/sql/rep_consumer.sql @@ -15,7 +15,6 @@ WITH , event_correlation_id , event_xray_trace_id , event_pointer_types - , '' event_custodian , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods FROM consumer_countdocumentreference @@ -35,7 +34,6 @@ WITH , event_correlation_id , event_xray_trace_id , event_pointer_types - , event_custodian , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods FROM consumer_readdocumentreference @@ -55,7 +53,6 @@ WITH , event_correlation_id , event_xray_trace_id , event_pointer_types - , event_custodian , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods FROM consumer_searchdocumentreference @@ -75,7 +72,6 @@ WITH , event_correlation_id , event_xray_trace_id , event_pointer_types - , event_custodian , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods FROM consumer_searchpostdocumentreference @@ -116,7 +112,6 @@ SELECT , b.event_correlation_id , b.event_xray_trace_id , event_pointer_types -, COALESCE(COALESCE(event_custodian, LAG(event_custodian) IGNORE NULLS OVER (PARTITION BY b.event_xray_trace_id ORDER BY event_timestamp ASC)), COALESCE(event_custodian, LEAD(event_custodian) IGNORE NULLS OVER (PARTITION BY b.event_xray_trace_id ORDER BY event_timestamp ASC))) event_custodian , oc.user_ods FROM (base b diff --git a/terraform/account-wide-infrastructure/modules/athena/sql/rep_producer.sql b/terraform/account-wide-infrastructure/modules/athena/sql/rep_producer.sql index 2b01fbdaa..9c294b257 100644 --- a/terraform/account-wide-infrastructure/modules/athena/sql/rep_producer.sql +++ b/terraform/account-wide-infrastructure/modules/athena/sql/rep_producer.sql @@ -15,7 +15,6 @@ WITH , event_correlation_id , event_xray_trace_id , event_pointer_types - , event_custodian , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods FROM producer_createdocumentreference @@ -35,7 +34,6 @@ WITH , event_correlation_id , event_xray_trace_id , event_pointer_types - , event_custodian , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods FROM producer_deletedocumentreference @@ -55,7 +53,6 @@ WITH , event_correlation_id , event_xray_trace_id , event_pointer_types - , event_custodian , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods FROM producer_readdocumentreference @@ -75,7 +72,6 @@ WITH , event_correlation_id , event_xray_trace_id , event_pointer_types - , event_custodian , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods FROM producer_searchdocumentreference @@ -95,7 +91,6 @@ WITH , event_correlation_id , event_xray_trace_id , event_pointer_types - , event_custodian , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods FROM producer_searchpostdocumentreference @@ -115,7 +110,6 @@ WITH , event_correlation_id , event_xray_trace_id , event_pointer_types - , event_custodian , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods FROM producer_updatedocumentreference @@ -135,7 +129,6 @@ WITH , event_correlation_id , event_xray_trace_id , event_pointer_types - , event_custodian , COALESCE("event_headers_nhsd-end-user-organisation-ods", event_metadata_ods_code) user_ods FROM producer_upsertdocumentreference @@ -185,7 +178,6 @@ SELECT , b.event_correlation_id , b.event_xray_trace_id , event_pointer_types -, COALESCE(COALESCE(event_custodian, LAG(event_custodian) IGNORE NULLS OVER (PARTITION BY b.event_xray_trace_id ORDER BY event_timestamp ASC)), COALESCE(event_custodian, LEAD(event_custodian) IGNORE NULLS OVER (PARTITION BY b.event_xray_trace_id ORDER BY event_timestamp ASC))) event_custodian , oc.user_ods FROM (base b From 2dad8a1284e58340e461987616886d733dd1d527 Mon Sep 17 00:00:00 2001 From: jackleary Date: Wed, 25 Jun 2025 11:37:06 +0100 Subject: [PATCH 24/27] NRL-1386 group small files together --- .../modules/glue/src/pipeline.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py b/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py index 809fb6575..2aec608f6 100644 --- a/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py +++ b/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py @@ -73,7 +73,12 @@ def extract_dynamic(self): if last_runtime: data[name] = self.glue_context.create_dynamic_frame.from_options( connection_type="s3", - connection_options={"paths": [self.source_path], "recurse": True}, + connection_options={ + "paths": [self.source_path], + "recurse": True, + "groupFiles": "inPartition", + "groupSize": "104857600", + }, format="json", ).filter( f=lambda x, n=name: (x["host"].endswith(n)) @@ -83,7 +88,12 @@ def extract_dynamic(self): else: data[name] = self.glue_context.create_dynamic_frame.from_options( connection_type="s3", - connection_options={"paths": [self.source_path], "recurse": True}, + connection_options={ + "paths": [self.source_path], + "recurse": True, + "groupFiles": "inPartition", + "groupSize": "104857600", + }, format="json", ).filter(f=lambda x, n=name: x["host"].endswith(n)) @@ -109,7 +119,7 @@ def load(self, data): self.logger.info( f"Attempting to load dataframe {name} into {self.target_path}{name}" ) - dataframe.coalesce(1).write.mode("append").partitionBy( + dataframe.write.mode("append").partitionBy( *self.partition_cols ).parquet(f"{self.target_path}{name}") except: From 3f1315d42157691f76296ecda184cc5d6298391e Mon Sep 17 00:00:00 2001 From: jackleary Date: Thu, 26 Jun 2025 15:29:19 +0100 Subject: [PATCH 25/27] NRL-1386 Update group size to 128mb --- .../account-wide-infrastructure/modules/glue/src/pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py b/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py index 2aec608f6..7c1b1597c 100644 --- a/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py +++ b/terraform/account-wide-infrastructure/modules/glue/src/pipeline.py @@ -77,7 +77,7 @@ def extract_dynamic(self): "paths": [self.source_path], "recurse": True, "groupFiles": "inPartition", - "groupSize": "104857600", + "groupSize": "134217728", }, format="json", ).filter( @@ -92,7 +92,7 @@ def extract_dynamic(self): "paths": [self.source_path], "recurse": True, "groupFiles": "inPartition", - "groupSize": "104857600", + "groupSize": "134217728", }, format="json", ).filter(f=lambda x, n=name: x["host"].endswith(n)) From 37005d4d12b71454a71b6321ebbd8c7d5f98cc4b Mon Sep 17 00:00:00 2001 From: jackleary Date: Thu, 26 Jun 2025 15:30:05 +0100 Subject: [PATCH 26/27] NRL-1386 Update readme with updated instructions for standard gw install and power bi refresh set up --- terraform/account-wide-infrastructure/README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/terraform/account-wide-infrastructure/README.md b/terraform/account-wide-infrastructure/README.md index 7e8881a6d..96ed581d4 100644 --- a/terraform/account-wide-infrastructure/README.md +++ b/terraform/account-wide-infrastructure/README.md @@ -134,9 +134,11 @@ If deploying the EC2 set up to a new environment, these steps need to be followe aws ssm start-session --target --document-name AWS-StartPortForwardingSession --parameters "localPortNumber=13389,portNumber=3389" ``` -2. Install Athena ODBC driver and Power BI personal on premises gateway -3. Configure ODBC driver to connect to relevant Athena instance and log in to the gateway using NHS email -4. Log into power bi and test the refresh on the relevant data sources +2. Install Athena ODBC driver and Power BI standard on premises gateway +3. Configure ODBC driver to connect to relevant Athena instance +4. Log in to the gateway using NHS email, name the cluster to nhsd-nrlf-{env}--reporting-gw +5. Log on to power bi, navigate to Manage Connections and Gateways in settings and set up Athena connector with authentication method: Anonymous and privacy level: Private +6. Set dataset to point to this gateway, define schedule as needed ## Tear down account wide resources From 02eff29a34127c42872779b09a9bdfcacd4eb719 Mon Sep 17 00:00:00 2001 From: Matt Dean Date: Fri, 27 Jun 2025 14:18:35 +0100 Subject: [PATCH 27/27] [NRL-1386] Make lambda subscription filters unique per firehose stream --- terraform/infrastructure/modules/lambda/cloudwatch.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/infrastructure/modules/lambda/cloudwatch.tf b/terraform/infrastructure/modules/lambda/cloudwatch.tf index fa6305c30..f81f4e739 100644 --- a/terraform/infrastructure/modules/lambda/cloudwatch.tf +++ b/terraform/infrastructure/modules/lambda/cloudwatch.tf @@ -7,7 +7,7 @@ resource "aws_cloudwatch_log_group" "lambda_cloudwatch_log_group" { resource "aws_cloudwatch_log_subscription_filter" "lambda_log_filter" { for_each = var.firehose_subscriptions - name = "${aws_lambda_function.lambda_function.function_name}_filter" + name = "${aws_lambda_function.lambda_function.function_name}_${each.key}_filter" log_group_name = aws_cloudwatch_log_group.lambda_cloudwatch_log_group.name role_arn = each.value.role.arn destination_arn = each.value.destination.arn