Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
a53253a
NRL-1188 S3 reporting module
jackleary Nov 27, 2024
372eeb0
NRL-1188 s3 iam
jackleary Nov 29, 2024
9f1c2d1
NRL-1188 reporting kinesis stream
jackleary Nov 29, 2024
2b28630
NRL-1188 glue set up
jackleary Nov 29, 2024
58815a5
NRL-1188 glue set up
jackleary Dec 4, 2024
89c8873
NRL-1188 athena set up
jackleary Dec 4, 2024
7b5cdeb
NRL-1188 s3 configured in glue module
jackleary Dec 4, 2024
84f7ab3
NRL-1188 invoke athena module
jackleary Dec 5, 2024
9b04618
NRL-1188 kinesis set up
jackleary Dec 5, 2024
d99de77
NRL-1188 athena kms
jackleary Dec 5, 2024
1763cfd
NRL-1188 terraform linting and glue python version update
jackleary Dec 10, 2024
a2502fc
NRL-1186 python transformation script update
jackleary Dec 10, 2024
90ab98c
NRL-1188 zip extra python files for use
jackleary Dec 10, 2024
479a7a9
NRL-1186 Lightweight framework for ETL process
jackleary Dec 10, 2024
85e6fa3
NRL-1186 imports and name update
jackleary Dec 11, 2024
b90b97c
NRL-1188 update glue iam policy
jackleary Dec 11, 2024
8535b4d
NRL-1188 Public access block added
jackleary Dec 11, 2024
3e92fbc
NRL-1188 Public access block added
jackleary Dec 11, 2024
47844be
NRL-1188 Update glue iam policy
jackleary Dec 11, 2024
68bce40
NRL-1188 Ensure S3 buckets are compliant
jackleary Dec 11, 2024
f9f93ab
NRL-1188 Reference existing bucket
jackleary Dec 11, 2024
c98847f
NRL-1188 Reference existing bucket
jackleary Dec 11, 2024
58860ee
NRL-1188 Bucket name update
jackleary Dec 11, 2024
c514551
NRL-1186 import function
jackleary Dec 11, 2024
38f0d33
NRL-1188 specify region in reference
jackleary Dec 11, 2024
8e7798b
NRL-1188 remove region reference
jackleary Dec 11, 2024
55176d7
NRL-1188 Resolving comments
jackleary Dec 13, 2024
0434eb9
NRL-1188 Reference S3 bucket
jackleary Dec 13, 2024
6012091
NRL-1188 Fix var name
jackleary Dec 13, 2024
1e7aa5c
NRL-1188 name updates
jackleary Dec 13, 2024
3b35af9
NRL-1188 use default role policy for glue
jackleary Dec 13, 2024
8b88655
NRL-1188 fix tf plan errors
jackleary Dec 16, 2024
75b84f4
NRL-1188 merge develop
jackleary Dec 16, 2024
1e56f9b
NRL-1188 set up encryption
jackleary Dec 16, 2024
ef476d2
NRL-1188 Use latest allowed python version
jackleary Dec 16, 2024
3760cc7
Merge remote-tracking branch 'origin' into feature/jale13-NRL-1188-po…
jackleary Dec 16, 2024
161c6a1
NRL-1188 Python version var is a number not string
jackleary Dec 16, 2024
b974b67
NRL-1188 Python version update
jackleary Dec 16, 2024
cae4726
NRL-1188 Fix destination for new stream
jackleary Dec 16, 2024
0bf3920
NRL-1188 Reference existing s3 bucket in firehose
jackleary Dec 16, 2024
fa3d2c5
NRL-1188 Update var
jackleary Dec 16, 2024
3c24ddf
NRL-1188 Reference existing bucket
jackleary Dec 16, 2024
b75bbd0
NRL-1188 correct prefix for shared resource
jackleary Dec 16, 2024
7bb8242
NRL-1188 Call existing bucket in correct file
jackleary Dec 17, 2024
fa711eb
NRL-1188 Pass s3 bucket arn as var
jackleary Dec 17, 2024
c81bd8a
NRL-1188 Pass s3 bucket arn as var
jackleary Dec 17, 2024
75dddcf
NRL-1188 Pass s3 bucket arn as var
jackleary Dec 17, 2024
d8de638
NRL-1188 Pass s3 bucket arn as var
jackleary Dec 17, 2024
9c19ebe
NRL-1188 Pass s3 bucket arn as var
jackleary Dec 17, 2024
9e4e8a6
NRL-1188 Pass s3 bucket arn as var
jackleary Dec 17, 2024
c994a99
NRL-1188 Pass s3 bucket arn as var
jackleary Dec 17, 2024
c83ec52
NRL-1188 Pass s3 bucket arn as var
jackleary Dec 17, 2024
043ccd3
NRL-1188 Pass s3 bucket arn as var
jackleary Dec 17, 2024
3135c3b
NRL-1188 Add reporting bucket to firehose policy
jackleary Dec 17, 2024
4f3b00c
NRL-1188 env toggle added
jackleary Dec 18, 2024
c4fa754
NRL-1188 add index to instances where count is used
jackleary Dec 18, 2024
2f7e334
NRL-1188 make index conditional also
jackleary Dec 18, 2024
ab785bd
NRL-1188 update env condition
jackleary Dec 18, 2024
467e825
NRL-1188 use compact to ignore nulls
jackleary Dec 18, 2024
160fd5b
NRL-1188 use compact to ignore nulls
jackleary Dec 18, 2024
e331b8f
NRL-1188 Update env condition
jackleary Dec 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ override.tf.json
*_override.tf
*_override.tf.json

# Ignore output of data object
terraform/account-wide-infrastructure/modules/glue/files/src.zip

# Include override files you do wish to add to version control using negated pattern
#
# !example_override.tf
Expand Down
5 changes: 5 additions & 0 deletions terraform/account-wide-infrastructure/dev/athena.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module "dev-athena" {
source = "../modules/athena"
name_prefix = "nhsd-nrlf--dev"
target_bucket_name = module.dev-glue.target_bucket_name
}
5 changes: 5 additions & 0 deletions terraform/account-wide-infrastructure/dev/glue.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module "dev-glue" {
source = "../modules/glue"
name_prefix = "nhsd-nrlf--dev"
python_version = 3
}
31 changes: 31 additions & 0 deletions terraform/account-wide-infrastructure/modules/athena/athena.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
resource "aws_athena_database" "reporting-db" {
name = var.database

bucket = var.target_bucket_name

encryption_configuration {
encryption_option = "SSE_KMS"
kms_key = aws_kms_key.athena.arn
}

force_destroy = true
}

resource "aws_athena_workgroup" "athena" {
name = "${var.name_prefix}-athena-wg"

configuration {
enforce_workgroup_configuration = true
publish_cloudwatch_metrics_enabled = true

result_configuration {
output_location = "s3://{aws_s3_bucket.athena.bucket}/output/"

encryption_configuration {
encryption_option = "SSE_KMS"
kms_key_arn = aws_kms_key.athena.arn
}
}
}

}
7 changes: 7 additions & 0 deletions terraform/account-wide-infrastructure/modules/athena/kms.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
resource "aws_kms_key" "athena" {
}

resource "aws_kms_alias" "athena" {
name = "alias/${var.name_prefix}-athena"
target_key_id = aws_kms_key.athena.key_id
}
11 changes: 11 additions & 0 deletions terraform/account-wide-infrastructure/modules/athena/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
output "workgroup" {
value = aws_athena_workgroup.athena
}

output "bucket" {
value = aws_s3_bucket.athena
}

output "database" {
value = aws_athena_database.reporting-db
}
52 changes: 52 additions & 0 deletions terraform/account-wide-infrastructure/modules/athena/s3.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
resource "aws_s3_bucket" "athena" {
bucket = "${var.name_prefix}-athena"
}

resource "aws_s3_bucket_policy" "athena" {
bucket = "${var.name_prefix}-athena"

policy = jsonencode({
Version = "2012-10-17"
Id = "athena-policy"
Statement = [
{
Sid = "HTTPSOnly"
Effect = "Deny"
Principal = {
"AWS" : "*"
}
Action = "s3:*"
Resource = [
aws_s3_bucket.athena.arn,
"${aws_s3_bucket.athena.arn}/*",
]
Condition = {
Bool = {
"aws:SecureTransport" = "false"
}
}
},
]
})
}

resource "aws_s3_bucket_public_access_block" "athena-public-access-block" {
bucket = aws_s3_bucket.athena.id

block_public_acls = true
block_public_policy = true
ignore_public_acls = true
restrict_public_buckets = true
}


resource "aws_s3_bucket_server_side_encryption_configuration" "athena" {
bucket = aws_s3_bucket.athena.bucket
rule {
apply_server_side_encryption_by_default {
sse_algorithm = "aws:kms"
kms_master_key_id = aws_kms_key.athena.arn
}
}

}
13 changes: 13 additions & 0 deletions terraform/account-wide-infrastructure/modules/athena/vars.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
variable "database" {
description = "What the db will be called"
default = "nrl_reporting"
}

variable "name_prefix" {
type = string
description = "The prefix to apply to all resources in the module."
}

variable "target_bucket_name" {
type = string
}
59 changes: 59 additions & 0 deletions terraform/account-wide-infrastructure/modules/glue/glue.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Create Glue Data Catalog Database
resource "aws_glue_catalog_database" "raw_log_database" {
name = "${var.name_prefix}-raw_log"
location_uri = "${aws_s3_bucket.source-data-bucket.id}/"
}

# Create Glue Crawler
resource "aws_glue_crawler" "raw_log_crawler" {
name = "${var.name_prefix}-raw-log-crawler"
database_name = aws_glue_catalog_database.raw_log_database.name
role = aws_iam_role.glue_service_role.name
s3_target {
path = "${aws_s3_bucket.source-data-bucket.id}/"
}
schema_change_policy {
delete_behavior = "LOG"
}
configuration = jsonencode({
"Version" : 1.0,
"Grouping" : {
"TableGroupingPolicy" : "CombineCompatibleSchemas"
}
})
}
resource "aws_glue_trigger" "raw_log_trigger" {
name = "${var.name_prefix}-org-report-trigger"
type = "ON_DEMAND"
actions {
crawler_name = aws_glue_crawler.raw_log_crawler.name
}
}

resource "aws_glue_job" "glue_job" {
name = "${var.name_prefix}-glue-job"
role_arn = aws_iam_role.glue_service_role.arn
description = "Transfer logs from source to bucket"
glue_version = "4.0"
worker_type = "G.1X"
timeout = 2880
max_retries = 1
number_of_workers = 2
command {
name = "glueetl"
python_version = var.python_version
script_location = "s3://${aws_s3_bucket.code-bucket.id}/main.py"
}

default_arguments = {
"--enable-auto-scaling" = "true"
"--enable-continous-cloudwatch-log" = "true"
"--datalake-formats" = "delta"
"--source-path" = "s3://${aws_s3_bucket.source-data-bucket.id}/" # Specify the source S3 path
"--destination-path" = "s3://${aws_s3_bucket.target-data-bucket.id}/" # Specify the destination S3 path
"--job-name" = "poc-glue-job"
"--enable-continuous-log-filter" = "true"
"--enable-metrics" = "true"
"--extra-py-files" = "s3://${aws_s3_bucket.code-bucket.id}/src.zip"
}
}
21 changes: 21 additions & 0 deletions terraform/account-wide-infrastructure/modules/glue/iam.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
resource "aws_iam_role" "glue_service_role" {
name = "${var.name_prefix}-glue_service_role"

assume_role_policy = jsonencode({
"Version" : "2012-10-17",
"Statement" : [
{
"Effect" : "Allow",
"Principal" : {
"Service" : "glue.amazonaws.com"
},
"Action" : "sts:AssumeRole"
}
]
})
}

resource "aws_iam_role_policy_attachment" "glue_service" {
role = aws_iam_role.glue_service_role.id
policy_arn = "arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole"
}
7 changes: 7 additions & 0 deletions terraform/account-wide-infrastructure/modules/glue/kms.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
resource "aws_kms_key" "glue" {
}

resource "aws_kms_alias" "glue" {
name = "alias/${var.name_prefix}-glue"
target_key_id = aws_kms_key.glue.key_id
}
13 changes: 13 additions & 0 deletions terraform/account-wide-infrastructure/modules/glue/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
output "target_bucket_name" {
description = "Name of destination bucket"
value = aws_s3_bucket.target-data-bucket.id
}

output "source_bucket_name" {
description = "Name of source bucket"
value = aws_s3_bucket.source-data-bucket.id
}

output "glue_crawler_name" {
value = "s3//${aws_s3_bucket.source-data-bucket.id}/"
}
Loading