diff --git a/infrastructure/terraform/components/api/README.md b/infrastructure/terraform/components/api/README.md
index 75faa887..fdfbaf53 100644
--- a/infrastructure/terraform/components/api/README.md
+++ b/infrastructure/terraform/components/api/README.md
@@ -43,7 +43,11 @@ No requirements.
| Name | Source | Version |
|------|--------|---------|
+| [apigw\_alarms](#module\_apigw\_alarms) | ../../modules/alarms/alarms-apigw | n/a |
| [authorizer\_lambda](#module\_authorizer\_lambda) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
+| [ddb\_alarms\_letters](#module\_ddb\_alarms\_letters) | ../../modules/alarms/alarms-ddb | n/a |
+| [ddb\_alarms\_mi](#module\_ddb\_alarms\_mi) | ../../modules/alarms/alarms-ddb | n/a |
+| [ddb\_alarms\_suppliers](#module\_ddb\_alarms\_suppliers) | ../../modules/alarms/alarms-ddb | n/a |
| [domain\_truststore](#module\_domain\_truststore) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.26/terraform-s3bucket.zip | n/a |
| [eventpub](#module\_eventpub) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.31/terraform-eventpub.zip | n/a |
| [eventsub](#module\_eventsub) | ../../modules/eventsub | n/a |
@@ -52,6 +56,7 @@ No requirements.
| [get\_letters](#module\_get\_letters) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
| [get\_status](#module\_get\_status) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
| [kms](#module\_kms) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.26/terraform-kms.zip | n/a |
+| [lambda\_alarms](#module\_lambda\_alarms) | ../../modules/alarms/alarms-lambda | n/a |
| [letter\_status\_update](#module\_letter\_status\_update) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
| [letter\_status\_updates\_queue](#module\_letter\_status\_updates\_queue) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.24/terraform-sqs.zip | n/a |
| [letter\_updates\_transformer](#module\_letter\_updates\_transformer) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
@@ -61,6 +66,7 @@ No requirements.
| [post\_letters](#module\_post\_letters) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
| [post\_mi](#module\_post\_mi) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
| [s3bucket\_test\_letters](#module\_s3bucket\_test\_letters) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.26/terraform-s3bucket.zip | n/a |
+| [sqs\_alarms](#module\_sqs\_alarms) | ../../modules/alarms/alarms-sqs | n/a |
| [sqs\_letter\_updates](#module\_sqs\_letter\_updates) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.26/terraform-sqs.zip | n/a |
| [supplier\_ssl](#module\_supplier\_ssl) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.26/terraform-ssl.zip | n/a |
| [upsert\_letter](#module\_upsert\_letter) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
diff --git a/infrastructure/terraform/components/api/alarms.tf b/infrastructure/terraform/components/api/alarms.tf
new file mode 100644
index 00000000..5d4ade60
--- /dev/null
+++ b/infrastructure/terraform/components/api/alarms.tf
@@ -0,0 +1,77 @@
+locals {
+ lambda_alarm_targets = {
+ authorizer_lambda = module.authorizer_lambda.function_name
+ get_letter = module.get_letter.function_name
+ get_letters = module.get_letters.function_name
+ get_letter_data = module.get_letter_data.function_name
+ get_status = module.get_status.function_name
+ patch_letter = module.patch_letter.function_name
+ post_letters = module.post_letters.function_name
+ post_mi = module.post_mi.function_name
+ upsert_letter = module.upsert_letter.function_name
+ letter_status_update = module.letter_status_update.function_name
+ letter_updates_transformer = module.letter_updates_transformer.function_name
+ mi_updates_transformer = module.mi_updates_transformer.function_name
+ }
+
+ sqs_queue_names = {
+ letter_updates = {
+ name = module.sqs_letter_updates.sqs_queue_name
+ age_period_seconds = 900
+ }
+ letter_status_updates = {
+ name = module.letter_status_updates_queue.sqs_queue_name
+ age_period_seconds = 900
+ }
+ }
+}
+
+module "lambda_alarms" {
+ for_each = local.lambda_alarm_targets
+ source = "../../modules/alarms/alarms-lambda"
+
+ alarm_prefix = local.csi
+ function_name = each.value
+ log_group_name = "/aws/lambda/${each.value}"
+ tags = local.default_tags
+}
+
+module "ddb_alarms_letters" {
+ source = "../../modules/alarms/alarms-ddb"
+ alarm_prefix = local.csi
+ table_name = aws_dynamodb_table.letters.name
+ tags = local.default_tags
+}
+
+module "ddb_alarms_mi" {
+ source = "../../modules/alarms/alarms-ddb"
+ alarm_prefix = local.csi
+ table_name = aws_dynamodb_table.mi.name
+ tags = local.default_tags
+}
+
+module "ddb_alarms_suppliers" {
+ source = "../../modules/alarms/alarms-ddb"
+ alarm_prefix = local.csi
+ table_name = aws_dynamodb_table.suppliers.name
+ tags = local.default_tags
+}
+
+module "sqs_alarms" {
+ for_each = local.sqs_queue_names
+ source = "../../modules/alarms/alarms-sqs"
+
+ alarm_prefix = local.csi
+ queue_name = each.value.name
+ dlq_queue_name = replace(each.value.name, "-queue", "-dlq")
+ age_period_seconds = each.value.age_period_seconds
+ tags = local.default_tags
+}
+
+module "apigw_alarms" {
+ source = "../../modules/alarms/alarms-apigw"
+ alarm_prefix = local.csi
+ api_name = aws_api_gateway_rest_api.main.name
+ stage_name = aws_api_gateway_stage.main.stage_name
+ tags = local.default_tags
+}
diff --git a/infrastructure/terraform/components/api/module_authorizer_lambda.tf b/infrastructure/terraform/components/api/module_authorizer_lambda.tf
index 7e3c94b8..c90a7d30 100644
--- a/infrastructure/terraform/components/api/module_authorizer_lambda.tf
+++ b/infrastructure/terraform/components/api/module_authorizer_lambda.tf
@@ -36,7 +36,7 @@ module "authorizer_lambda" {
lambda_env_vars = {
CLOUDWATCH_NAMESPACE = "/aws/api-gateway/supplier/alarms",
- CLIENT_CERTIFICATE_EXPIRATION_ALERT_DAYS = 14,
+ CLIENT_CERTIFICATE_EXPIRATION_ALERT_DAYS = 30,
APIM_SUPPLIER_ID_HEADER = "NHSD-Supplier-ID",
SUPPLIERS_TABLE_NAME = aws_dynamodb_table.suppliers.name
}
diff --git a/infrastructure/terraform/modules/alarms/README.md b/infrastructure/terraform/modules/alarms/README.md
new file mode 100644
index 00000000..df8c1f5c
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/README.md
@@ -0,0 +1,19 @@
+
+
+
+
+## Requirements
+
+No requirements.
+## Inputs
+
+No inputs.
+## Modules
+
+No modules.
+## Outputs
+
+No outputs.
+
+
+
diff --git a/infrastructure/terraform/modules/alarms/alarms-apigw/README.md b/infrastructure/terraform/modules/alarms/alarms-apigw/README.md
new file mode 100644
index 00000000..d1de73b5
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/alarms-apigw/README.md
@@ -0,0 +1,34 @@
+
+
+
+
+## Requirements
+
+| Name | Version |
+|------|---------|
+| [terraform](#requirement\_terraform) | >= 1.9.0 |
+## Inputs
+
+| Name | Description | Type | Default | Required |
+|------|-------------|------|---------|:--------:|
+| [alarm\_prefix](#input\_alarm\_prefix) | n/a | `string` | n/a | yes |
+| [api\_name](#input\_api\_name) | n/a | `string` | n/a | yes |
+| [error\_5xx\_evaluation\_periods](#input\_error\_5xx\_evaluation\_periods) | n/a | `number` | `1` | no |
+| [error\_5xx\_period\_seconds](#input\_error\_5xx\_period\_seconds) | n/a | `number` | `60` | no |
+| [error\_5xx\_threshold](#input\_error\_5xx\_threshold) | n/a | `number` | `0` | no |
+| [latency\_anomaly\_sensitivity](#input\_latency\_anomaly\_sensitivity) | n/a | `number` | `2` | no |
+| [latency\_datapoints\_to\_alarm](#input\_latency\_datapoints\_to\_alarm) | n/a | `number` | `3` | no |
+| [latency\_evaluation\_periods](#input\_latency\_evaluation\_periods) | n/a | `number` | `5` | no |
+| [latency\_period\_seconds](#input\_latency\_period\_seconds) | n/a | `number` | `60` | no |
+| [latency\_threshold\_ms](#input\_latency\_threshold\_ms) | n/a | `number` | `29000` | no |
+| [stage\_name](#input\_stage\_name) | n/a | `string` | n/a | yes |
+| [tags](#input\_tags) | n/a | `map(string)` | `{}` | no |
+## Modules
+
+No modules.
+## Outputs
+
+No outputs.
+
+
+
diff --git a/infrastructure/terraform/modules/alarms/alarms-apigw/main.tf b/infrastructure/terraform/modules/alarms/alarms-apigw/main.tf
new file mode 100644
index 00000000..4c376944
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/alarms-apigw/main.tf
@@ -0,0 +1,87 @@
+locals {
+ api_dimensions = {
+ ApiName = var.api_name
+ Stage = var.stage_name
+ }
+}
+
+resource "aws_cloudwatch_metric_alarm" "five_xx" {
+ alarm_name = "${var.alarm_prefix}-apigw-5xx"
+ alarm_description = "RELIABILITY: API Gateway 5xx responses"
+
+ namespace = "AWS/ApiGateway"
+ metric_name = "5XXError"
+ statistic = "Sum"
+ period = var.error_5xx_period_seconds
+
+ evaluation_periods = var.error_5xx_evaluation_periods
+ threshold = var.error_5xx_threshold
+ comparison_operator = "GreaterThanThreshold"
+ treat_missing_data = "notBreaching"
+
+ dimensions = local.api_dimensions
+
+ actions_enabled = false
+ alarm_actions = []
+ ok_actions = []
+ insufficient_data_actions = []
+ tags = var.tags
+}
+
+resource "aws_cloudwatch_metric_alarm" "latency_threshold" {
+ alarm_name = "${var.alarm_prefix}-apigw-latency-threshold"
+ alarm_description = "RELIABILITY: API Gateway latency above threshold"
+
+ namespace = "AWS/ApiGateway"
+ metric_name = "Latency"
+ statistic = "Average"
+ period = var.latency_period_seconds
+
+ evaluation_periods = var.latency_evaluation_periods
+ threshold = var.latency_threshold_ms
+ comparison_operator = "GreaterThanThreshold"
+ treat_missing_data = "notBreaching"
+
+ dimensions = local.api_dimensions
+
+ actions_enabled = false
+ alarm_actions = []
+ ok_actions = []
+ insufficient_data_actions = []
+ tags = var.tags
+}
+
+resource "aws_cloudwatch_metric_alarm" "latency_anomaly" {
+ alarm_name = "${var.alarm_prefix}-apigw-latency-anomaly"
+ alarm_description = "RELIABILITY: API Gateway latency anomaly"
+ comparison_operator = "GreaterThanUpperThreshold"
+ evaluation_periods = var.latency_evaluation_periods
+ datapoints_to_alarm = var.latency_datapoints_to_alarm
+ threshold_metric_id = "ad1"
+ treat_missing_data = "notBreaching"
+
+ actions_enabled = false
+ alarm_actions = []
+ ok_actions = []
+ insufficient_data_actions = []
+ tags = var.tags
+
+ metric_query {
+ id = "m1"
+ metric {
+ metric_name = "Latency"
+ namespace = "AWS/ApiGateway"
+ stat = "Average"
+ period = var.latency_period_seconds
+ dimensions = local.api_dimensions
+ }
+ return_data = true
+ }
+
+ metric_query {
+ id = "ad1"
+ expression = "ANOMALY_DETECTION_BAND(m1, ${var.latency_anomaly_sensitivity})"
+ label = "Latency (expected)"
+ return_data = true
+ }
+}
diff --git a/infrastructure/terraform/modules/alarms/alarms-apigw/variables.tf b/infrastructure/terraform/modules/alarms/alarms-apigw/variables.tf
new file mode 100644
index 00000000..70909ad7
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/alarms-apigw/variables.tf
@@ -0,0 +1,56 @@
+variable "alarm_prefix" {
+ type = string
+}
+
+variable "api_name" {
+ type = string
+}
+
+variable "stage_name" {
+ type = string
+}
+
+variable "tags" {
+ type = map(string)
+ default = {}
+}
+
+variable "error_5xx_threshold" {
+ type = number
+ default = 0
+}
+
+variable "error_5xx_period_seconds" {
+ type = number
+ default = 60
+}
+
+variable "error_5xx_evaluation_periods" {
+ type = number
+ default = 1
+}
+
+variable "latency_threshold_ms" {
+ type = number
+ default = 29000
+}
+
+variable "latency_period_seconds" {
+ type = number
+ default = 60
+}
+
+variable "latency_evaluation_periods" {
+ type = number
+ default = 5
+}
+
+variable "latency_datapoints_to_alarm" {
+ type = number
+ default = 3
+}
+
+variable "latency_anomaly_sensitivity" {
+ type = number
+ default = 2
+}
diff --git a/infrastructure/terraform/modules/alarms/alarms-apigw/versions.tf b/infrastructure/terraform/modules/alarms/alarms-apigw/versions.tf
new file mode 100644
index 00000000..f8dc86e9
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/alarms-apigw/versions.tf
@@ -0,0 +1,9 @@
+
+terraform {
+ required_providers {
+ aws = {
+ source = "hashicorp/aws"
+ }
+ }
+ required_version = ">= 1.9.0"
+}
diff --git a/infrastructure/terraform/modules/alarms/alarms-ddb/README.md b/infrastructure/terraform/modules/alarms/alarms-ddb/README.md
new file mode 100644
index 00000000..b9c3b0c1
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/alarms-ddb/README.md
@@ -0,0 +1,29 @@
+
+
+
+
+## Requirements
+
+| Name | Version |
+|------|---------|
+| [terraform](#requirement\_terraform) | >= 1.9.0 |
+## Inputs
+
+| Name | Description | Type | Default | Required |
+|------|-------------|------|---------|:--------:|
+| [alarm\_prefix](#input\_alarm\_prefix) | n/a | `string` | n/a | yes |
+| [evaluation\_periods](#input\_evaluation\_periods) | n/a | `number` | `1` | no |
+| [period\_seconds](#input\_period\_seconds) | n/a | `number` | `60` | no |
+| [read\_throttle\_threshold](#input\_read\_throttle\_threshold) | n/a | `number` | `0` | no |
+| [table\_name](#input\_table\_name) | n/a | `string` | n/a | yes |
+| [tags](#input\_tags) | n/a | `map(string)` | `{}` | no |
+| [write\_throttle\_threshold](#input\_write\_throttle\_threshold) | n/a | `number` | `0` | no |
+## Modules
+
+No modules.
+## Outputs
+
+No outputs.
+
+
+
diff --git a/infrastructure/terraform/modules/alarms/alarms-ddb/main.tf b/infrastructure/terraform/modules/alarms/alarms-ddb/main.tf
new file mode 100644
index 00000000..a7a046aa
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/alarms-ddb/main.tf
@@ -0,0 +1,45 @@
+resource "aws_cloudwatch_metric_alarm" "read_throttle" {
+ alarm_name = "${var.alarm_prefix}-ddb-${var.table_name}-read-throttle"
+ alarm_description = "RELIABILITY: DynamoDB read throttling"
+
+ namespace = "AWS/DynamoDB"
+ metric_name = "ReadThrottleEvents"
+ statistic = "Sum"
+ period = var.period_seconds
+
+ evaluation_periods = var.evaluation_periods
+ threshold = var.read_throttle_threshold
+ comparison_operator = "GreaterThanThreshold"
+ treat_missing_data = "notBreaching"
+
+ dimensions = { TableName = var.table_name }
+
+ actions_enabled = false
+ alarm_actions = []
+ ok_actions = []
+ insufficient_data_actions = []
+ tags = var.tags
+}
+
+resource "aws_cloudwatch_metric_alarm" "write_throttle" {
+ alarm_name = "${var.alarm_prefix}-ddb-${var.table_name}-write-throttle"
+ alarm_description = "RELIABILITY: DynamoDB write throttling"
+
+ namespace = "AWS/DynamoDB"
+ metric_name = "WriteThrottleEvents"
+ statistic = "Sum"
+ period = var.period_seconds
+
+ evaluation_periods = var.evaluation_periods
+ threshold = var.write_throttle_threshold
+ comparison_operator = "GreaterThanThreshold"
+ treat_missing_data = "notBreaching"
+
+ dimensions = { TableName = var.table_name }
+
+ actions_enabled = false
+ alarm_actions = []
+ ok_actions = []
+ insufficient_data_actions = []
+ tags = var.tags
+}
diff --git a/infrastructure/terraform/modules/alarms/alarms-ddb/variables.tf b/infrastructure/terraform/modules/alarms/alarms-ddb/variables.tf
new file mode 100644
index 00000000..3895d21e
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/alarms-ddb/variables.tf
@@ -0,0 +1,32 @@
+variable "alarm_prefix" {
+ type = string
+}
+
+variable "table_name" {
+ type = string
+}
+
+variable "tags" {
+ type = map(string)
+ default = {}
+}
+
+variable "period_seconds" {
+ type = number
+ default = 60
+}
+
+variable "evaluation_periods" {
+ type = number
+ default = 1
+}
+
+variable "read_throttle_threshold" {
+ type = number
+ default = 0
+}
+
+variable "write_throttle_threshold" {
+ type = number
+ default = 0
+}
diff --git a/infrastructure/terraform/modules/alarms/alarms-ddb/versions.tf b/infrastructure/terraform/modules/alarms/alarms-ddb/versions.tf
new file mode 100644
index 00000000..f8dc86e9
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/alarms-ddb/versions.tf
@@ -0,0 +1,9 @@
+
+terraform {
+ required_providers {
+ aws = {
+ source = "hashicorp/aws"
+ }
+ }
+ required_version = ">= 1.9.0"
+}
diff --git a/infrastructure/terraform/modules/alarms/alarms-lambda/README.md b/infrastructure/terraform/modules/alarms/alarms-lambda/README.md
new file mode 100644
index 00000000..a865cb79
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/alarms-lambda/README.md
@@ -0,0 +1,36 @@
+
+
+
+
+## Requirements
+
+| Name | Version |
+|------|---------|
+| [terraform](#requirement\_terraform) | >= 1.9.0 |
+## Inputs
+
+| Name | Description | Type | Default | Required |
+|------|-------------|------|---------|:--------:|
+| [alarm\_prefix](#input\_alarm\_prefix) | n/a | `string` | n/a | yes |
+| [enable\_error\_log\_metric](#input\_enable\_error\_log\_metric) | n/a | `bool` | `true` | no |
+| [error\_log\_evaluation\_periods](#input\_error\_log\_evaluation\_periods) | n/a | `number` | `1` | no |
+| [error\_log\_metric\_filter\_pattern](#input\_error\_log\_metric\_filter\_pattern) | n/a | `string` | `"?ERROR ?Error ?Exception"` | no |
+| [error\_log\_metric\_name\_prefix](#input\_error\_log\_metric\_name\_prefix) | n/a | `string` | `"LambdaErrorLogs-"` | no |
+| [error\_log\_metric\_namespace](#input\_error\_log\_metric\_namespace) | n/a | `string` | `"Custom/LambdaErrorLogs"` | no |
+| [error\_log\_threshold](#input\_error\_log\_threshold) | n/a | `number` | `0` | no |
+| [errors\_threshold](#input\_errors\_threshold) | n/a | `number` | `0` | no |
+| [evaluation\_periods](#input\_evaluation\_periods) | n/a | `number` | `1` | no |
+| [function\_name](#input\_function\_name) | n/a | `string` | n/a | yes |
+| [log\_group\_name](#input\_log\_group\_name) | n/a | `string` | `""` | no |
+| [period\_seconds](#input\_period\_seconds) | n/a | `number` | `300` | no |
+| [tags](#input\_tags) | n/a | `map(string)` | `{}` | no |
+| [throttles\_threshold](#input\_throttles\_threshold) | n/a | `number` | `0` | no |
+## Modules
+
+No modules.
+## Outputs
+
+No outputs.
+
+
+
diff --git a/infrastructure/terraform/modules/alarms/alarms-lambda/main.tf b/infrastructure/terraform/modules/alarms/alarms-lambda/main.tf
new file mode 100644
index 00000000..033d1798
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/alarms-lambda/main.tf
@@ -0,0 +1,80 @@
+resource "aws_cloudwatch_metric_alarm" "errors" {
+ alarm_name = "${var.alarm_prefix}-lambda-${var.function_name}-errors"
+ alarm_description = "ERROR: Lambda errors"
+
+ namespace = "AWS/Lambda"
+ metric_name = "Errors"
+ statistic = "Sum"
+ period = var.period_seconds
+
+ evaluation_periods = var.evaluation_periods
+ threshold = var.errors_threshold
+ comparison_operator = "GreaterThanThreshold"
+ treat_missing_data = "notBreaching"
+
+ dimensions = { FunctionName = var.function_name }
+
+ actions_enabled = false
+ alarm_actions = []
+ ok_actions = []
+ insufficient_data_actions = []
+ tags = var.tags
+}
+
+resource "aws_cloudwatch_metric_alarm" "throttles" {
+ alarm_name = "${var.alarm_prefix}-lambda-${var.function_name}-throttles"
+ alarm_description = "RELIABILITY: Lambda throttles"
+
+ namespace = "AWS/Lambda"
+ metric_name = "Throttles"
+ statistic = "Sum"
+ period = var.period_seconds
+
+ evaluation_periods = var.evaluation_periods
+ threshold = var.throttles_threshold
+ comparison_operator = "GreaterThanThreshold"
+ treat_missing_data = "notBreaching"
+
+ dimensions = { FunctionName = var.function_name }
+
+ actions_enabled = false
+ alarm_actions = []
+ ok_actions = []
+ insufficient_data_actions = []
+ tags = var.tags
+}
+
+resource "aws_cloudwatch_log_metric_filter" "error_logs" {
+ count = var.enable_error_log_metric ? 1 : 0
+ name = "${var.alarm_prefix}-lambda-${var.function_name}-error-logs"
+ log_group_name = var.log_group_name
+ pattern = var.error_log_metric_filter_pattern
+
+ metric_transformation {
+ name = "${var.error_log_metric_name_prefix}${var.function_name}"
+ namespace = var.error_log_metric_namespace
+ value = "1"
+ }
+}
+
+resource "aws_cloudwatch_metric_alarm" "error_logs" {
+ count = var.enable_error_log_metric ? 1 : 0
+ alarm_name = "${var.alarm_prefix}-lambda-${var.function_name}-error-logs"
+ alarm_description = "ERROR: Lambda error logs detected"
+
+ namespace = var.error_log_metric_namespace
+ metric_name = "${var.error_log_metric_name_prefix}${var.function_name}"
+ statistic = "Sum"
+ period = var.period_seconds
+
+ evaluation_periods = var.error_log_evaluation_periods
+ threshold = var.error_log_threshold
+ comparison_operator = "GreaterThanThreshold"
+ treat_missing_data = "notBreaching"
+
+ actions_enabled = false
+ alarm_actions = []
+ ok_actions = []
+ insufficient_data_actions = []
+ tags = var.tags
+}
diff --git a/infrastructure/terraform/modules/alarms/alarms-lambda/variables.tf b/infrastructure/terraform/modules/alarms/alarms-lambda/variables.tf
new file mode 100644
index 00000000..5da36fd5
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/alarms-lambda/variables.tf
@@ -0,0 +1,67 @@
+variable "alarm_prefix" {
+ type = string
+}
+
+variable "function_name" {
+ type = string
+}
+
+variable "log_group_name" {
+ type = string
+ default = ""
+}
+
+variable "tags" {
+ type = map(string)
+ default = {}
+}
+
+variable "period_seconds" {
+ type = number
+ default = 300
+}
+
+variable "evaluation_periods" {
+ type = number
+ default = 1
+}
+
+variable "errors_threshold" {
+ type = number
+ default = 0
+}
+
+variable "throttles_threshold" {
+ type = number
+ default = 0
+}
+
+variable "enable_error_log_metric" {
+ type = bool
+ default = true
+}
+
+variable "error_log_metric_namespace" {
+ type = string
+ default = "Custom/LambdaErrorLogs"
+}
+
+variable "error_log_metric_name_prefix" {
+ type = string
+ default = "LambdaErrorLogs-"
+}
+
+variable "error_log_metric_filter_pattern" {
+ type = string
+ default = "{ ($.level = \"50\" || $.level = \"error\") && $.environment = * }"
+}
+
+variable "error_log_threshold" {
+ type = number
+ default = 0
+}
+
+variable "error_log_evaluation_periods" {
+ type = number
+ default = 1
+}
diff --git a/infrastructure/terraform/modules/alarms/alarms-lambda/versions.tf b/infrastructure/terraform/modules/alarms/alarms-lambda/versions.tf
new file mode 100644
index 00000000..f8dc86e9
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/alarms-lambda/versions.tf
@@ -0,0 +1,9 @@
+
+terraform {
+ required_providers {
+ aws = {
+ source = "hashicorp/aws"
+ }
+ }
+ required_version = ">= 1.9.0"
+}
diff --git a/infrastructure/terraform/modules/alarms/alarms-sqs/README.md b/infrastructure/terraform/modules/alarms/alarms-sqs/README.md
new file mode 100644
index 00000000..b02b320d
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/alarms-sqs/README.md
@@ -0,0 +1,31 @@
+
+
+
+
+## Requirements
+
+| Name | Version |
+|------|---------|
+| [terraform](#requirement\_terraform) | >= 1.9.0 |
+## Inputs
+
+| Name | Description | Type | Default | Required |
+|------|-------------|------|---------|:--------:|
+| [age\_anomaly\_datapoints\_to\_alarm](#input\_age\_anomaly\_datapoints\_to\_alarm) | n/a | `number` | `3` | no |
+| [age\_anomaly\_evaluation\_periods](#input\_age\_anomaly\_evaluation\_periods) | n/a | `number` | `5` | no |
+| [age\_anomaly\_sensitivity](#input\_age\_anomaly\_sensitivity) | n/a | `number` | `2` | no |
+| [age\_period\_seconds](#input\_age\_period\_seconds) | n/a | `number` | `60` | no |
+| [alarm\_prefix](#input\_alarm\_prefix) | n/a | `string` | n/a | yes |
+| [dlq\_queue\_name](#input\_dlq\_queue\_name) | n/a | `string` | `null` | no |
+| [dlq\_visible\_threshold](#input\_dlq\_visible\_threshold) | n/a | `number` | `0` | no |
+| [queue\_name](#input\_queue\_name) | n/a | `string` | n/a | yes |
+| [tags](#input\_tags) | n/a | `map(string)` | `{}` | no |
+## Modules
+
+No modules.
+## Outputs
+
+No outputs.
+
+
+
diff --git a/infrastructure/terraform/modules/alarms/alarms-sqs/main.tf b/infrastructure/terraform/modules/alarms/alarms-sqs/main.tf
new file mode 100644
index 00000000..1a706973
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/alarms-sqs/main.tf
@@ -0,0 +1,62 @@
+locals {
+ queue_dimensions = { QueueName = var.queue_name }
+}
+
+resource "aws_cloudwatch_metric_alarm" "age_anomaly" {
+ alarm_name = "${var.alarm_prefix}-sqs-${var.queue_name}-age-anomaly"
+ alarm_description = "RELIABILITY: SQS oldest message age anomaly"
+ comparison_operator = "GreaterThanUpperThreshold"
+ evaluation_periods = var.age_anomaly_evaluation_periods
+ datapoints_to_alarm = var.age_anomaly_datapoints_to_alarm
+ threshold_metric_id = "ad1"
+ treat_missing_data = "notBreaching"
+
+ actions_enabled = false
+ alarm_actions = []
+ ok_actions = []
+ insufficient_data_actions = []
+ tags = var.tags
+
+ metric_query {
+ id = "m1"
+ metric {
+ metric_name = "ApproximateAgeOfOldestMessage"
+ namespace = "AWS/SQS"
+ stat = "Maximum"
+ period = var.age_period_seconds
+ dimensions = local.queue_dimensions
+ }
+ return_data = true
+ }
+
+ metric_query {
+ id = "ad1"
+ expression = "ANOMALY_DETECTION_BAND(m1, ${var.age_anomaly_sensitivity})"
+ label = "AgeOfOldestMessage (expected)"
+ return_data = true
+ }
+}
+
+resource "aws_cloudwatch_metric_alarm" "dlq_depth" {
+ count = var.dlq_queue_name == null ? 0 : 1
+ alarm_name = "${var.alarm_prefix}-sqs-${var.dlq_queue_name}-dlq-depth"
+ alarm_description = "RELIABILITY: SQS DLQ has messages"
+
+ namespace = "AWS/SQS"
+ metric_name = "ApproximateNumberOfMessagesVisible"
+ statistic = "Sum"
+ period = 60
+
+ evaluation_periods = 1
+ threshold = var.dlq_visible_threshold
+ comparison_operator = "GreaterThanThreshold"
+ treat_missing_data = "notBreaching"
+
+ dimensions = { QueueName = var.dlq_queue_name }
+
+ actions_enabled = false
+ alarm_actions = []
+ ok_actions = []
+ insufficient_data_actions = []
+ tags = var.tags
+}
diff --git a/infrastructure/terraform/modules/alarms/alarms-sqs/variables.tf b/infrastructure/terraform/modules/alarms/alarms-sqs/variables.tf
new file mode 100644
index 00000000..762c15ea
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/alarms-sqs/variables.tf
@@ -0,0 +1,42 @@
+variable "alarm_prefix" {
+ type = string
+}
+
+variable "queue_name" {
+ type = string
+}
+
+variable "dlq_queue_name" {
+ type = string
+ default = null
+}
+
+variable "tags" {
+ type = map(string)
+ default = {}
+}
+
+variable "age_period_seconds" {
+ type = number
+ default = 60
+}
+
+variable "age_anomaly_sensitivity" {
+ type = number
+ default = 3
+}
+
+variable "age_anomaly_evaluation_periods" {
+ type = number
+ default = 5
+}
+
+variable "age_anomaly_datapoints_to_alarm" {
+ type = number
+ default = 3
+}
+
+variable "dlq_visible_threshold" {
+ type = number
+ default = 0
+}
diff --git a/infrastructure/terraform/modules/alarms/alarms-sqs/versions.tf b/infrastructure/terraform/modules/alarms/alarms-sqs/versions.tf
new file mode 100644
index 00000000..f8dc86e9
--- /dev/null
+++ b/infrastructure/terraform/modules/alarms/alarms-sqs/versions.tf
@@ -0,0 +1,9 @@
+
+terraform {
+ required_providers {
+ aws = {
+ source = "hashicorp/aws"
+ }
+ }
+ required_version = ">= 1.9.0"
+}
diff --git a/lambdas/authorizer/src/__tests__/index.test.ts b/lambdas/authorizer/src/__tests__/index.test.ts
index 4020b55e..a3f2fc9f 100644
--- a/lambdas/authorizer/src/__tests__/index.test.ts
+++ b/lambdas/authorizer/src/__tests__/index.test.ts
@@ -17,7 +17,7 @@ const mockedDeps: jest.Mocked = {
} as unknown as pino.Logger,
env: {
CLOUDWATCH_NAMESPACE: "cloudwatch-namespace",
- CLIENT_CERTIFICATE_EXPIRATION_ALERT_DAYS: 14,
+ CLIENT_CERTIFICATE_EXPIRATION_ALERT_DAYS: 30,
APIM_SUPPLIER_ID_HEADER: "NHSD-Supplier-ID",
} as unknown as EnvVars,
supplierRepo: {
@@ -56,10 +56,11 @@ describe("Authorizer Lambda Function", () => {
});
describe("Certificate expiry check", () => {
+ const currentDate = new Date("2025-11-01T14:19:00Z");
beforeEach(() => {
jest
.useFakeTimers({ doNotFake: ["nextTick"] })
- .setSystemTime(new Date("2025-11-03T14:19:00Z"));
+ .setSystemTime(currentDate);
});
afterEach(() => {
@@ -81,7 +82,7 @@ describe("Authorizer Lambda Function", () => {
it("Should log CloudWatch metric when the certificate expiry threshold is reached", async () => {
mockEvent.requestContext.identity.clientCert = buildCertWithExpiry(
- "2025-11-17T14:19:00Z",
+ "2025-11-31T14:19:00Z",
);
const handler = createAuthorizerHandler(mockedDeps);
@@ -92,7 +93,7 @@ describe("Authorizer Lambda Function", () => {
expect(mockedInfo.mock.calls.map((call) => call[0])).toContain(
JSON.stringify({
_aws: {
- Timestamp: 1_762_179_540_000,
+ Timestamp: currentDate.getTime(),
CloudWatchMetrics: [
{
Namespace: "cloudwatch-namespace",
@@ -108,7 +109,7 @@ describe("Authorizer Lambda Function", () => {
],
},
SUBJECT_DN: "CN=test-subject",
- NOT_AFTER: "2025-11-17T14:19:00Z",
+ NOT_AFTER: "2025-11-31T14:19:00Z",
"apim-client-certificate-near-expiry": 1,
}),
);
@@ -116,7 +117,7 @@ describe("Authorizer Lambda Function", () => {
it("Should not log CloudWatch metric when the certificate expiry threshold is not yet reached", async () => {
mockEvent.requestContext.identity.clientCert = buildCertWithExpiry(
- "2025-11-18T14:19:00Z",
+ "2026-01-01T14:19:00Z",
);
const handler = createAuthorizerHandler(mockedDeps);