Skip to content

Commit 6144dfb

Browse files
authored
feat: add monitoring for db performance - alert if CPU usage above 90 percent (#263)
1 parent cf15363 commit 6144dfb

File tree

2 files changed

+77
-0
lines changed

2 files changed

+77
-0
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
2+
3+
# Azure Monitor alert for DToSDB CPU Percentage higher than 90% for the past 5 minutes
4+
resource "azurerm_monitor_metric_alert" "cpu" {
5+
count = var.enable_alerting ? 1 : 0
6+
7+
name = "${azurerm_mssql_database.defaultdb.name}-cpu"
8+
resource_group_name = var.resource_group_name
9+
scopes = [azurerm_mssql_database.defaultdb.id]
10+
description = "Action will be triggered when cpu use is greater than ${var.alert_cpu_threshold}%"
11+
window_size = var.alert_window_size
12+
severity = 2
13+
frequency = local.alert_frequency
14+
15+
criteria {
16+
metric_namespace = "Microsoft.Sql/servers/databases"
17+
metric_name = "cpu_percent"
18+
aggregation = "Average"
19+
operator = "GreaterThan"
20+
threshold = var.alert_cpu_threshold
21+
}
22+
23+
action {
24+
action_group_id = var.action_group_id
25+
}
26+
27+
lifecycle {
28+
ignore_changes = [
29+
tags
30+
]
31+
}
32+
}

infrastructure/modules/sql-server/variables.tf

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,3 +264,48 @@ variable "vulnerability_assessment_enabled" {
264264
description = "to enable extended auditing policy for server or database"
265265
default = false
266266
}
267+
268+
/* --------------------------------------------------------------------------------------------------
269+
Azure Monitor Alerts variables
270+
-------------------------------------------------------------------------------------------------- */
271+
272+
variable "enable_alerting" {
273+
description = "Whether monitoring and alerting is enabled for the Azure SQL Server."
274+
type = bool
275+
default = false
276+
}
277+
278+
variable "alert_cpu_threshold" {
279+
type = number
280+
description = "If alerting is enabled this will control what the cpu threshold will be, default will be 90."
281+
default = 90
282+
}
283+
284+
variable "alert_window_size" {
285+
type = string
286+
nullable = false
287+
default = "PT5M"
288+
validation {
289+
condition = contains(["PT1M", "PT5M", "PT15M", "PT30M", "PT1H", "PT6H", "PT12H"], var.alert_window_size)
290+
error_message = "The alert_window_size must be one of: PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H"
291+
}
292+
description = "The period of time that is used to monitor alert activity e.g. PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H. The interval between checks is adjusted accordingly."
293+
}
294+
295+
locals {
296+
alert_frequency_map = {
297+
PT5M = "PT1M"
298+
PT15M = "PT1M"
299+
PT30M = "PT1M"
300+
PT1H = "PT1M"
301+
PT6H = "PT5M"
302+
PT12H = "PT5M"
303+
}
304+
alert_frequency = local.alert_frequency_map[var.alert_window_size]
305+
}
306+
307+
variable "action_group_id" {
308+
type = string
309+
description = "ID of the action group to notify."
310+
default = null
311+
}

0 commit comments

Comments
 (0)