From cf043de3102a0ffdf4a7597fd5e2d9eb26e34eb7 Mon Sep 17 00:00:00 2001 From: Harriet H-W Date: Fri, 24 Oct 2025 17:16:28 +0100 Subject: [PATCH] Add an Alert on Exceptions for App Insights We want to trigger an alert whenever there is an exception for manage-breast-screening - specifically for the Invite team's container app jobs in the notifications app. This should trigger whenever the Exception count is above 0 - may want to tweak that but in theory there should never be an Exception unless something has completely failed. --- infrastructure/modules/app-insights/alerts.tf | 26 ++++++++++++++++++ infrastructure/modules/app-insights/tfdocs.md | 25 +++++++++++++++++ .../modules/app-insights/variables.tf | 27 +++++++++++++++++++ 3 files changed, 78 insertions(+) create mode 100644 infrastructure/modules/app-insights/alerts.tf diff --git a/infrastructure/modules/app-insights/alerts.tf b/infrastructure/modules/app-insights/alerts.tf new file mode 100644 index 00000000..337b6d13 --- /dev/null +++ b/infrastructure/modules/app-insights/alerts.tf @@ -0,0 +1,26 @@ +resource "azurerm_monitor_metric_alert" "exceptions" { + count = var.enable_alerting ? 1 : 0 + + auto_mitigate = true + description = "Triggered by any Exception" + enabled = true + frequency = var.alert_frequency + name = "Exceptions" + resource_group_name = var.resource_group_name + scopes = [azurerm_application_insights.appins.id] + severity = 1 + window_size = local.alert_window_size + + action { + action_group_id = var.action_group_id + } + + criteria { + aggregation = "Count" + metric_name = "exceptions/count" + metric_namespace = "microsoft.insights/components" + operator = "GreaterThan" + skip_metric_validation = false + threshold = 0 + } +} diff --git a/infrastructure/modules/app-insights/tfdocs.md b/infrastructure/modules/app-insights/tfdocs.md index 85d743de..ad2959f1 100644 --- a/infrastructure/modules/app-insights/tfdocs.md +++ b/infrastructure/modules/app-insights/tfdocs.md @@ -38,6 +38,30 @@ Type: `string` The following input variables are optional (have default values): +### [action\_group\_id](#input\_action\_group\_id) + +Description: The ID of the Action Group to use for alerts. + +Type: `string` + +Default: `null` + +### [alert\_frequency](#input\_alert\_frequency) + +Description: The frequency an alert is checked e.g. PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H. + +Type: `string` + +Default: `"PT5M"` + +### [enable\_alerting](#input\_enable\_alerting) + +Description: Whether monitoring and alerting is enabled for this module. + +Type: `bool` + +Default: `false` + ### [tags](#input\_tags) Description: A mapping of tags to assign to the resource. @@ -70,3 +94,4 @@ Description: n/a The following resources are used by this module: - [azurerm_application_insights.appins](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/application_insights) (resource) +- [azurerm_monitor_metric_alert.exceptions](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/monitor_metric_alert) (resource) diff --git a/infrastructure/modules/app-insights/variables.tf b/infrastructure/modules/app-insights/variables.tf index a4e575cf..3d671f22 100644 --- a/infrastructure/modules/app-insights/variables.tf +++ b/infrastructure/modules/app-insights/variables.tf @@ -33,3 +33,30 @@ variable "resource_group_name" { type = string description = "The name of the resource group in which the App Insights is created. Changing this forces a new resource to be created." } + +variable "enable_alerting" { + description = "Whether monitoring and alerting is enabled for this module." + type = bool + default = false +} + +variable "alert_frequency" { + type = string + nullable = true + default = "PT5M" + validation { + condition = contains(["PT1M", "PT5M", "PT15M", "PT30M", "PT1H"], var.alert_frequency) + error_message = "The alert_frequency must be one of: PT1M, PT5M, PT15M, PT30M, PT1H" + } + description = "The frequency an alert is checked e.g. PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H." +} + +variable "action_group_id" { + type = string + description = "The ID of the Action Group to use for alerts." + default = null +} + +locals { + alert_window_size = var.alert_frequency +}