From 2562795e9b8f8da865dae9b5ffad093f81390c71 Mon Sep 17 00:00:00 2001 From: Maciej Murawski Date: Thu, 20 Nov 2025 20:25:05 +0000 Subject: [PATCH] feat: add monitoring for db performance - alert if CPU usage above 90 percent --- infrastructure/modules/sql-server/alerts.tf | 32 +++++++++++++ .../modules/sql-server/variables.tf | 45 +++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 infrastructure/modules/sql-server/alerts.tf diff --git a/infrastructure/modules/sql-server/alerts.tf b/infrastructure/modules/sql-server/alerts.tf new file mode 100644 index 00000000..6f115eb0 --- /dev/null +++ b/infrastructure/modules/sql-server/alerts.tf @@ -0,0 +1,32 @@ + + +# Azure Monitor alert for DToSDB CPU Percentage higher than 90% for the past 5 minutes +resource "azurerm_monitor_metric_alert" "cpu" { + count = var.enable_alerting ? 1 : 0 + + name = "${azurerm_mssql_database.defaultdb.name}-cpu" + resource_group_name = var.resource_group_name + scopes = [azurerm_mssql_database.defaultdb.id] + description = "Action will be triggered when cpu use is greater than ${var.alert_cpu_threshold}%" + window_size = var.alert_window_size + severity = 2 + frequency = local.alert_frequency + + criteria { + metric_namespace = "Microsoft.Sql/servers/databases" + metric_name = "cpu_percent" + aggregation = "Average" + operator = "GreaterThan" + threshold = var.alert_cpu_threshold + } + + action { + action_group_id = var.action_group_id + } + + lifecycle { + ignore_changes = [ + tags + ] + } +} diff --git a/infrastructure/modules/sql-server/variables.tf b/infrastructure/modules/sql-server/variables.tf index 25159472..4c501dd7 100644 --- a/infrastructure/modules/sql-server/variables.tf +++ b/infrastructure/modules/sql-server/variables.tf @@ -264,3 +264,48 @@ variable "vulnerability_assessment_enabled" { description = "to enable extended auditing policy for server or database" default = false } + +/* -------------------------------------------------------------------------------------------------- + Azure Monitor Alerts variables +-------------------------------------------------------------------------------------------------- */ + +variable "enable_alerting" { + description = "Whether monitoring and alerting is enabled for the Azure SQL Server." + type = bool + default = false +} + +variable "alert_cpu_threshold" { + type = number + description = "If alerting is enabled this will control what the cpu threshold will be, default will be 90." + default = 90 +} + +variable "alert_window_size" { + type = string + nullable = false + default = "PT5M" + validation { + condition = contains(["PT1M", "PT5M", "PT15M", "PT30M", "PT1H", "PT6H", "PT12H"], var.alert_window_size) + error_message = "The alert_window_size must be one of: PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H" + } + description = "The period of time that is used to monitor alert activity e.g. PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H. The interval between checks is adjusted accordingly." +} + +locals { + alert_frequency_map = { + PT5M = "PT1M" + PT15M = "PT1M" + PT30M = "PT1M" + PT1H = "PT1M" + PT6H = "PT5M" + PT12H = "PT5M" + } + alert_frequency = local.alert_frequency_map[var.alert_window_size] +} + +variable "action_group_id" { + type = string + description = "ID of the action group to notify." + default = null +}