diff --git a/infrastructure/modules/sql-server/alerts.tf b/infrastructure/modules/sql-server/alerts.tf new file mode 100644 index 00000000..6f115eb0 --- /dev/null +++ b/infrastructure/modules/sql-server/alerts.tf @@ -0,0 +1,32 @@ + + +# Azure Monitor alert for DToSDB CPU Percentage higher than 90% for the past 5 minutes +resource "azurerm_monitor_metric_alert" "cpu" { + count = var.enable_alerting ? 1 : 0 + + name = "${azurerm_mssql_database.defaultdb.name}-cpu" + resource_group_name = var.resource_group_name + scopes = [azurerm_mssql_database.defaultdb.id] + description = "Action will be triggered when cpu use is greater than ${var.alert_cpu_threshold}%" + window_size = var.alert_window_size + severity = 2 + frequency = local.alert_frequency + + criteria { + metric_namespace = "Microsoft.Sql/servers/databases" + metric_name = "cpu_percent" + aggregation = "Average" + operator = "GreaterThan" + threshold = var.alert_cpu_threshold + } + + action { + action_group_id = var.action_group_id + } + + lifecycle { + ignore_changes = [ + tags + ] + } +} diff --git a/infrastructure/modules/sql-server/variables.tf b/infrastructure/modules/sql-server/variables.tf index 25159472..4c501dd7 100644 --- a/infrastructure/modules/sql-server/variables.tf +++ b/infrastructure/modules/sql-server/variables.tf @@ -264,3 +264,48 @@ variable "vulnerability_assessment_enabled" { description = "to enable extended auditing policy for server or database" default = false } + +/* -------------------------------------------------------------------------------------------------- + Azure Monitor Alerts variables +-------------------------------------------------------------------------------------------------- */ + +variable "enable_alerting" { + description = "Whether monitoring and alerting is enabled for the Azure SQL Server." + type = bool + default = false +} + +variable "alert_cpu_threshold" { + type = number + description = "If alerting is enabled this will control what the cpu threshold will be, default will be 90." + default = 90 +} + +variable "alert_window_size" { + type = string + nullable = false + default = "PT5M" + validation { + condition = contains(["PT1M", "PT5M", "PT15M", "PT30M", "PT1H", "PT6H", "PT12H"], var.alert_window_size) + error_message = "The alert_window_size must be one of: PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H" + } + description = "The period of time that is used to monitor alert activity e.g. PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H. The interval between checks is adjusted accordingly." +} + +locals { + alert_frequency_map = { + PT5M = "PT1M" + PT15M = "PT1M" + PT30M = "PT1M" + PT1H = "PT1M" + PT6H = "PT5M" + PT12H = "PT5M" + } + alert_frequency = local.alert_frequency_map[var.alert_window_size] +} + +variable "action_group_id" { + type = string + description = "ID of the action group to notify." + default = null +}