From bb8726c9f7822eadba10da2be54354acaf732cec Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Mon, 10 Mar 2025 17:54:31 +0100 Subject: [PATCH 01/21] Add an initial draft of a dirty memory custom check --- .../CustomChecks/CheckDirtyMemory.cs | 48 +++++++++++++++++++ .../MemoryInformationRetriever.cs | 32 +++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs create mode 100644 src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs new file mode 100644 index 0000000000..39b980b277 --- /dev/null +++ b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs @@ -0,0 +1,48 @@ +namespace ServiceControl.Audit.Persistence.RavenDB.CustomChecks; + +using System; +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; +using NServiceBus.CustomChecks; + +class CheckDirtyMemory(IRavenDocumentStoreProvider documentStoreProvider) : CustomCheck("ServiceControl.Audit database", "Dirty memory trends", TimeSpan.FromMinutes(5)) +{ + readonly List lastDirtyMemoryReads = []; + public override async Task PerformCheck(CancellationToken cancellationToken = default) + { + var retriever = await GetMemoryRetriever(cancellationToken); + var memoryInfo = await retriever.GetMemoryInformation(cancellationToken); + + if (memoryInfo.IsHighDirty) + { + //log warning + return CheckResult.Failed("There is a high level of dirty memory. Check the ServiceControl " + + "troubleshooting guide for guidance on how to mitigate the issue."); + } + + lastDirtyMemoryReads.Add(memoryInfo.DirtyMemory); + if (lastDirtyMemoryReads.Count > 20) + { + //cap the list at 20 + lastDirtyMemoryReads.RemoveAt(lastDirtyMemoryReads.Count - 1); + } + + // evaluate the trends + // if the amount of dirty memory is constantly growing log a warning and fail the check + + return CheckResult.Pass; + } + + MemoryInformationRetriever _retriever; + async Task GetMemoryRetriever(CancellationToken cancellationToken = default) + { + if (_retriever == null) + { + var documentStore = await documentStoreProvider.GetDocumentStore(cancellationToken); + var serverUrl = documentStore.Urls[0]; //TODO is there a better way to get the RavenDB server URL? + _retriever = new MemoryInformationRetriever(serverUrl); + } + return _retriever; + } +} \ No newline at end of file diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs b/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs new file mode 100644 index 0000000000..4ff9f236c4 --- /dev/null +++ b/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs @@ -0,0 +1,32 @@ +namespace ServiceControl.Audit.Persistence.RavenDB; + +using System; +using System.Linq; +using System.Net.Http; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; + +class MemoryInformationRetriever(string serverUrl) +{ + readonly HttpClient client = new() { BaseAddress = new Uri(serverUrl) }; + + record ResponseDto + { + public MemoryInformation MemoryInformation { get; set; } + } + + record MemoryInformation + { + public bool IsHighDirty { get; set; } + public string DirtyMemory { get; set; } + } + + public async Task<(bool IsHighDirty, int DirtyMemory)> GetMemoryInformation(CancellationToken cancellationToken = default) + { + var httpResponse = await client.GetAsync("/admin/debug/memory/stats", cancellationToken); + var responseDto = JsonSerializer.Deserialize(await httpResponse.Content.ReadAsStringAsync(cancellationToken)); + + return (responseDto.MemoryInformation.IsHighDirty, int.Parse(responseDto.MemoryInformation.DirtyMemory.Split(' ').First())); + } +} \ No newline at end of file From 784581db22a313e5f62bf3665f471bba005f450b Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Mon, 10 Mar 2025 18:33:45 +0100 Subject: [PATCH 02/21] Add some memory analysis --- .../CustomChecks/CheckDirtyMemory.cs | 55 ++++++++++++++++++- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs index 39b980b277..2074069d83 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs @@ -28,8 +28,10 @@ public override async Task PerformCheck(CancellationToken cancellat lastDirtyMemoryReads.RemoveAt(lastDirtyMemoryReads.Count - 1); } - // evaluate the trends - // if the amount of dirty memory is constantly growing log a warning and fail the check + if (lastDirtyMemoryReads.Count > 3 && AnalyzeTrendUsingRegression(lastDirtyMemoryReads) == TrendDirection.Increasing) // Three means we'll be observing for 15 minutes before calculating the trend + { + // log a warning and fail the check + } return CheckResult.Pass; } @@ -45,4 +47,53 @@ async Task GetMemoryRetriever(CancellationToken canc } return _retriever; } + + static TrendDirection AnalyzeTrendUsingRegression(List values) + { + if (values == null || values.Count <= 1) + { + throw new ArgumentException("Need at least two values to determine a trend"); + } + + // Calculate slope using linear regression + double n = values.Count; + double sumX = 0; + double sumY = 0; + double sumXY = 0; + double sumXX = 0; + + for (int i = 0; i < values.Count; i++) + { + double x = i; + double y = values[i]; + + sumX += x; + sumY += y; + sumXY += x * y; + sumXX += x * x; + } + + double slope = (n * sumXY - sumX * sumY) / (n * sumXX - sumX * sumX); + + // Determine trend based on slope + if (Math.Abs(slope) < 0.001) // Small threshold to handle floating-point precision + { + return TrendDirection.Flat; + } + + if (slope > 0) + { + return TrendDirection.Increasing; + } + + return TrendDirection.Decreasing; + } + + enum TrendDirection + { + Increasing, + Decreasing, + Flat, + Mixed + } } \ No newline at end of file From cfbf439370616b7a38ef1d1c317ef5a6f0a7f45c Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Mon, 10 Mar 2025 19:30:58 +0100 Subject: [PATCH 03/21] Use the database configuration to get the server URL --- .../CustomChecks/CheckDirtyMemory.cs | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs index 2074069d83..b43965e721 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs @@ -6,12 +6,12 @@ namespace ServiceControl.Audit.Persistence.RavenDB.CustomChecks; using System.Threading.Tasks; using NServiceBus.CustomChecks; -class CheckDirtyMemory(IRavenDocumentStoreProvider documentStoreProvider) : CustomCheck("ServiceControl.Audit database", "Dirty memory trends", TimeSpan.FromMinutes(5)) +class CheckDirtyMemory(DatabaseConfiguration databaseConfiguration) : CustomCheck("ServiceControl.Audit database", "Dirty memory trends", TimeSpan.FromMinutes(5)) { readonly List lastDirtyMemoryReads = []; public override async Task PerformCheck(CancellationToken cancellationToken = default) { - var retriever = await GetMemoryRetriever(cancellationToken); + var retriever = await GetMemoryRetriever(); var memoryInfo = await retriever.GetMemoryInformation(cancellationToken); if (memoryInfo.IsHighDirty) @@ -37,15 +37,9 @@ public override async Task PerformCheck(CancellationToken cancellat } MemoryInformationRetriever _retriever; - async Task GetMemoryRetriever(CancellationToken cancellationToken = default) + async Task GetMemoryRetriever() { - if (_retriever == null) - { - var documentStore = await documentStoreProvider.GetDocumentStore(cancellationToken); - var serverUrl = documentStore.Urls[0]; //TODO is there a better way to get the RavenDB server URL? - _retriever = new MemoryInformationRetriever(serverUrl); - } - return _retriever; + return _retriever ??= new MemoryInformationRetriever(databaseConfiguration.ServerConfiguration.ServerUrl); } static TrendDirection AnalyzeTrendUsingRegression(List values) From 6a246ea5030162e6c604506f7a1493e8d27870c9 Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Mon, 10 Mar 2025 21:19:46 +0100 Subject: [PATCH 04/21] Add more logging --- .../CustomChecks/CheckDirtyMemory.cs | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs index b43965e721..630a98061c 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs @@ -5,6 +5,7 @@ namespace ServiceControl.Audit.Persistence.RavenDB.CustomChecks; using System.Threading; using System.Threading.Tasks; using NServiceBus.CustomChecks; +using NServiceBus.Logging; class CheckDirtyMemory(DatabaseConfiguration databaseConfiguration) : CustomCheck("ServiceControl.Audit database", "Dirty memory trends", TimeSpan.FromMinutes(5)) { @@ -16,31 +17,38 @@ public override async Task PerformCheck(CancellationToken cancellat if (memoryInfo.IsHighDirty) { - //log warning - return CheckResult.Failed("There is a high level of dirty memory. Check the ServiceControl " + - "troubleshooting guide for guidance on how to mitigate the issue."); + var message = $"There is a high level of dirty memory ({memoryInfo.DirtyMemory}kb). Check the ServiceControl " + + "troubleshooting guide for guidance on how to mitigate the issue."; + Log.Warn(message); + return CheckResult.Failed(message); } lastDirtyMemoryReads.Add(memoryInfo.DirtyMemory); if (lastDirtyMemoryReads.Count > 20) { - //cap the list at 20 - lastDirtyMemoryReads.RemoveAt(lastDirtyMemoryReads.Count - 1); + //cap the list at 20 which means we're keeping about 1 hour and 40 minutes of data + lastDirtyMemoryReads.RemoveAt(0); } - if (lastDirtyMemoryReads.Count > 3 && AnalyzeTrendUsingRegression(lastDirtyMemoryReads) == TrendDirection.Increasing) // Three means we'll be observing for 15 minutes before calculating the trend + if (lastDirtyMemoryReads.Count < 3) { - // log a warning and fail the check + Log.Debug("Not enough dirty memory data in the series to calculate a trend."); + } + + // Three means we'll be observing for 15 minutes before calculating the trend + if (lastDirtyMemoryReads.Count >= 3 && AnalyzeTrendUsingRegression(lastDirtyMemoryReads) == TrendDirection.Increasing) + { + var message = $"Dirty memory is increasing. Last available value is {memoryInfo.DirtyMemory}kb. " + + $"Check the ServiceControl troubleshooting guide for guidance on how to mitigate the issue."; + Log.Warn(message); + return CheckResult.Failed(message); } return CheckResult.Pass; } MemoryInformationRetriever _retriever; - async Task GetMemoryRetriever() - { - return _retriever ??= new MemoryInformationRetriever(databaseConfiguration.ServerConfiguration.ServerUrl); - } + async Task GetMemoryRetriever() => _retriever ??= new MemoryInformationRetriever(databaseConfiguration.ServerConfiguration.ServerUrl); static TrendDirection AnalyzeTrendUsingRegression(List values) { @@ -87,7 +95,8 @@ enum TrendDirection { Increasing, Decreasing, - Flat, - Mixed + Flat } + + static readonly ILog Log = LogManager.GetLogger(); } \ No newline at end of file From 99cb66680ea0bd32937598150cb081b418cdb833 Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Mon, 10 Mar 2025 21:20:43 +0100 Subject: [PATCH 05/21] Add a to-do --- .../CustomChecks/CheckDirtyMemory.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs index 630a98061c..c1994b39b0 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs @@ -35,6 +35,7 @@ public override async Task PerformCheck(CancellationToken cancellat Log.Debug("Not enough dirty memory data in the series to calculate a trend."); } + // TODO do we need a threshold below which the check never fails? // Three means we'll be observing for 15 minutes before calculating the trend if (lastDirtyMemoryReads.Count >= 3 && AnalyzeTrendUsingRegression(lastDirtyMemoryReads) == TrendDirection.Increasing) { From 7dc68134051fe4ec544a10e07a9a1c46e1f6a955 Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Tue, 11 Mar 2025 10:15:29 +0100 Subject: [PATCH 06/21] minor tweaks --- .../CustomChecks/CheckDirtyMemory.cs | 42 +++++++++---------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs index c1994b39b0..ab233e01ca 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs @@ -13,36 +13,37 @@ class CheckDirtyMemory(DatabaseConfiguration databaseConfiguration) : CustomChec public override async Task PerformCheck(CancellationToken cancellationToken = default) { var retriever = await GetMemoryRetriever(); - var memoryInfo = await retriever.GetMemoryInformation(cancellationToken); + var (isHighDirty, dirtyMemory) = await retriever.GetMemoryInformation(cancellationToken); - if (memoryInfo.IsHighDirty) + if (isHighDirty) { - var message = $"There is a high level of dirty memory ({memoryInfo.DirtyMemory}kb). Check the ServiceControl " + + var message = $"There is a high level of dirty memory ({dirtyMemory}kb). Check the ServiceControl " + "troubleshooting guide for guidance on how to mitigate the issue."; Log.Warn(message); return CheckResult.Failed(message); } - lastDirtyMemoryReads.Add(memoryInfo.DirtyMemory); + lastDirtyMemoryReads.Add(dirtyMemory); if (lastDirtyMemoryReads.Count > 20) { //cap the list at 20 which means we're keeping about 1 hour and 40 minutes of data lastDirtyMemoryReads.RemoveAt(0); } - if (lastDirtyMemoryReads.Count < 3) + switch (lastDirtyMemoryReads.Count) { - Log.Debug("Not enough dirty memory data in the series to calculate a trend."); - } - - // TODO do we need a threshold below which the check never fails? - // Three means we'll be observing for 15 minutes before calculating the trend - if (lastDirtyMemoryReads.Count >= 3 && AnalyzeTrendUsingRegression(lastDirtyMemoryReads) == TrendDirection.Increasing) - { - var message = $"Dirty memory is increasing. Last available value is {memoryInfo.DirtyMemory}kb. " + - $"Check the ServiceControl troubleshooting guide for guidance on how to mitigate the issue."; - Log.Warn(message); - return CheckResult.Failed(message); + case < 3: + Log.Debug("Not enough dirty memory data in the series to calculate a trend."); + break; + // TODO do we need a threshold below which the check never fails? + // Three means we'll be observing for 15 minutes before calculating the trend + case >= 3 when AnalyzeTrendUsingRegression(lastDirtyMemoryReads) == TrendDirection.Increasing: + { + var message = $"Dirty memory is increasing. Last available value is {dirtyMemory}kb. " + + $"Check the ServiceControl troubleshooting guide for guidance on how to mitigate the issue."; + Log.Warn(message); + return CheckResult.Failed(message); + } } return CheckResult.Pass; @@ -53,7 +54,7 @@ public override async Task PerformCheck(CancellationToken cancellat static TrendDirection AnalyzeTrendUsingRegression(List values) { - if (values == null || values.Count <= 1) + if (values is not { Count: > 1 }) { throw new ArgumentException("Need at least two values to determine a trend"); } @@ -84,12 +85,7 @@ static TrendDirection AnalyzeTrendUsingRegression(List values) return TrendDirection.Flat; } - if (slope > 0) - { - return TrendDirection.Increasing; - } - - return TrendDirection.Decreasing; + return slope > 0 ? TrendDirection.Increasing : TrendDirection.Decreasing; } enum TrendDirection From f283b73d2c3ef26e20105af53c062b6a675702db Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Tue, 11 Mar 2025 10:19:48 +0100 Subject: [PATCH 07/21] Because editorconfig --- .../CustomChecks/CheckDirtyMemory.cs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs index ab233e01ca..9b0de38741 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs @@ -12,7 +12,7 @@ class CheckDirtyMemory(DatabaseConfiguration databaseConfiguration) : CustomChec readonly List lastDirtyMemoryReads = []; public override async Task PerformCheck(CancellationToken cancellationToken = default) { - var retriever = await GetMemoryRetriever(); + var retriever = GetMemoryRetriever(); var (isHighDirty, dirtyMemory) = await retriever.GetMemoryInformation(cancellationToken); if (isHighDirty) @@ -44,13 +44,17 @@ public override async Task PerformCheck(CancellationToken cancellat Log.Warn(message); return CheckResult.Failed(message); } + + default: + // NOP + break; } return CheckResult.Pass; } MemoryInformationRetriever _retriever; - async Task GetMemoryRetriever() => _retriever ??= new MemoryInformationRetriever(databaseConfiguration.ServerConfiguration.ServerUrl); + MemoryInformationRetriever GetMemoryRetriever() => _retriever ??= new MemoryInformationRetriever(databaseConfiguration.ServerConfiguration.ServerUrl); static TrendDirection AnalyzeTrendUsingRegression(List values) { @@ -77,7 +81,7 @@ static TrendDirection AnalyzeTrendUsingRegression(List values) sumXX += x * x; } - double slope = (n * sumXY - sumX * sumY) / (n * sumXX - sumX * sumX); + double slope = ((n * sumXY) - (sumX * sumY)) / ((n * sumXX) - (sumX * sumX)); // Determine trend based on slope if (Math.Abs(slope) < 0.001) // Small threshold to handle floating-point precision From 87114363498ef52932431cbc68815f53eafcdcaf Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Tue, 11 Mar 2025 15:28:17 +0100 Subject: [PATCH 08/21] Use better variable names --- .../CustomChecks/CheckDirtyMemory.cs | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs index 9b0de38741..b509a72396 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs @@ -64,27 +64,31 @@ static TrendDirection AnalyzeTrendUsingRegression(List values) } // Calculate slope using linear regression - double n = values.Count; - double sumX = 0; - double sumY = 0; - double sumXY = 0; - double sumXX = 0; + double numberOfPoints = values.Count; + double sumOfIndices = 0; + double sumOfValues = 0; + double sumOfIndicesMultipliedByValues = 0; + double sumOfIndicesSquared = 0; for (int i = 0; i < values.Count; i++) { - double x = i; - double y = values[i]; + double index = i; + double value = values[i]; - sumX += x; - sumY += y; - sumXY += x * y; - sumXX += x * x; + sumOfIndices += index; + sumOfValues += value; + sumOfIndicesMultipliedByValues += index * value; + sumOfIndicesSquared += index * index; } - double slope = ((n * sumXY) - (sumX * sumY)) / ((n * sumXX) - (sumX * sumX)); + // Slope formula: (n*Σxy - Σx*Σy) / (n*Σx² - (Σx)²) + double slopeNumerator = (numberOfPoints * sumOfIndicesMultipliedByValues) - (sumOfIndices * sumOfValues); + double slopeDenominator = (numberOfPoints * sumOfIndicesSquared) - (sumOfIndices * sumOfIndices); + double slope = slopeNumerator / slopeDenominator; // Determine trend based on slope - if (Math.Abs(slope) < 0.001) // Small threshold to handle floating-point precision + const double slopeThreshold = 0.001; // Small threshold to handle floating-point precision + if (Math.Abs(slope) < slopeThreshold) { return TrendDirection.Flat; } From 2b2806b7bd67de44a12fff07b407b3aa02c60658 Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Tue, 11 Mar 2025 15:32:17 +0100 Subject: [PATCH 09/21] Refactor the memory information retriever to check the content schema --- .../MemoryInformationRetriever.cs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs b/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs index 4ff9f236c4..613d02ad10 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs @@ -22,11 +22,16 @@ record MemoryInformation public string DirtyMemory { get; set; } } - public async Task<(bool IsHighDirty, int DirtyMemory)> GetMemoryInformation(CancellationToken cancellationToken = default) + public async Task<(bool IsHighDirty, int DirtyMemoryKb)> GetMemoryInformation(CancellationToken cancellationToken = default) { var httpResponse = await client.GetAsync("/admin/debug/memory/stats", cancellationToken); var responseDto = JsonSerializer.Deserialize(await httpResponse.Content.ReadAsStringAsync(cancellationToken)); - return (responseDto.MemoryInformation.IsHighDirty, int.Parse(responseDto.MemoryInformation.DirtyMemory.Split(' ').First())); + var values = responseDto.MemoryInformation.DirtyMemory.Split(' '); + if (!string.Equals(values[1],"KBytes", StringComparison.OrdinalIgnoreCase)) + { + throw new InvalidOperationException($"Unexpected response. Was expecting memory details in KBytes, instead received: {responseDto.MemoryInformation.DirtyMemory}"); + } + return (responseDto.MemoryInformation.IsHighDirty, int.Parse(values[0])); } } \ No newline at end of file From 61b81428d0d0394e7ec85ecc65361e30280d29fa Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Tue, 11 Mar 2025 15:36:49 +0100 Subject: [PATCH 10/21] Register the MemoryInformationRetriever in DI --- .../CustomChecks/CheckDirtyMemory.cs | 14 +++++--------- .../MemoryInformationRetriever.cs | 5 ++--- .../RavenPersistence.cs | 1 + 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs index b509a72396..67261f773a 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs @@ -7,23 +7,22 @@ namespace ServiceControl.Audit.Persistence.RavenDB.CustomChecks; using NServiceBus.CustomChecks; using NServiceBus.Logging; -class CheckDirtyMemory(DatabaseConfiguration databaseConfiguration) : CustomCheck("ServiceControl.Audit database", "Dirty memory trends", TimeSpan.FromMinutes(5)) +class CheckDirtyMemory(MemoryInformationRetriever memoryInformationRetriever) : CustomCheck("ServiceControl.Audit database", "Dirty memory trends", TimeSpan.FromMinutes(5)) { readonly List lastDirtyMemoryReads = []; public override async Task PerformCheck(CancellationToken cancellationToken = default) { - var retriever = GetMemoryRetriever(); - var (isHighDirty, dirtyMemory) = await retriever.GetMemoryInformation(cancellationToken); + var (isHighDirty, dirtyMemoryKb) = await memoryInformationRetriever.GetMemoryInformation(cancellationToken); if (isHighDirty) { - var message = $"There is a high level of dirty memory ({dirtyMemory}kb). Check the ServiceControl " + + var message = $"There is a high level of dirty memory ({dirtyMemoryKb}kb). Check the ServiceControl " + "troubleshooting guide for guidance on how to mitigate the issue."; Log.Warn(message); return CheckResult.Failed(message); } - lastDirtyMemoryReads.Add(dirtyMemory); + lastDirtyMemoryReads.Add(dirtyMemoryKb); if (lastDirtyMemoryReads.Count > 20) { //cap the list at 20 which means we're keeping about 1 hour and 40 minutes of data @@ -39,7 +38,7 @@ public override async Task PerformCheck(CancellationToken cancellat // Three means we'll be observing for 15 minutes before calculating the trend case >= 3 when AnalyzeTrendUsingRegression(lastDirtyMemoryReads) == TrendDirection.Increasing: { - var message = $"Dirty memory is increasing. Last available value is {dirtyMemory}kb. " + + var message = $"Dirty memory is increasing. Last available value is {dirtyMemoryKb}kb. " + $"Check the ServiceControl troubleshooting guide for guidance on how to mitigate the issue."; Log.Warn(message); return CheckResult.Failed(message); @@ -53,9 +52,6 @@ public override async Task PerformCheck(CancellationToken cancellat return CheckResult.Pass; } - MemoryInformationRetriever _retriever; - MemoryInformationRetriever GetMemoryRetriever() => _retriever ??= new MemoryInformationRetriever(databaseConfiguration.ServerConfiguration.ServerUrl); - static TrendDirection AnalyzeTrendUsingRegression(List values) { if (values is not { Count: > 1 }) diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs b/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs index 613d02ad10..ea22ec5263 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs @@ -1,15 +1,14 @@ namespace ServiceControl.Audit.Persistence.RavenDB; using System; -using System.Linq; using System.Net.Http; using System.Text.Json; using System.Threading; using System.Threading.Tasks; -class MemoryInformationRetriever(string serverUrl) +class MemoryInformationRetriever(DatabaseConfiguration databaseConfiguration) { - readonly HttpClient client = new() { BaseAddress = new Uri(serverUrl) }; + readonly HttpClient client = new() { BaseAddress = new Uri(databaseConfiguration.ServerConfiguration.ServerUrl) }; record ResponseDto { diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/RavenPersistence.cs b/src/ServiceControl.Audit.Persistence.RavenDB/RavenPersistence.cs index 75081a0547..0cd872cbe8 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/RavenPersistence.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/RavenPersistence.cs @@ -21,6 +21,7 @@ public void AddPersistence(IServiceCollection services) static void ConfigureLifecycle(IServiceCollection services, DatabaseConfiguration databaseConfiguration) { services.AddSingleton(databaseConfiguration); + services.AddSingleton(); services.AddSingleton(); services.AddHostedService(); From 723a9c337f39797ffd614364abdd70f39648d0df Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Tue, 11 Mar 2025 15:40:01 +0100 Subject: [PATCH 11/21] Fix formatting --- .../MemoryInformationRetriever.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs b/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs index ea22ec5263..3502c9244c 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs @@ -27,7 +27,7 @@ record MemoryInformation var responseDto = JsonSerializer.Deserialize(await httpResponse.Content.ReadAsStringAsync(cancellationToken)); var values = responseDto.MemoryInformation.DirtyMemory.Split(' '); - if (!string.Equals(values[1],"KBytes", StringComparison.OrdinalIgnoreCase)) + if (!string.Equals(values[1], "KBytes", StringComparison.OrdinalIgnoreCase)) { throw new InvalidOperationException($"Unexpected response. Was expecting memory details in KBytes, instead received: {responseDto.MemoryInformation.DirtyMemory}"); } From 71809445645647e121ee0ffcdac78fe9c9efdc7a Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Tue, 11 Mar 2025 18:58:36 +0100 Subject: [PATCH 12/21] Update the HTTP GET URL to trim the response size --- .../MemoryInformationRetriever.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs b/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs index 3502c9244c..5eaefacd45 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs @@ -23,7 +23,7 @@ record MemoryInformation public async Task<(bool IsHighDirty, int DirtyMemoryKb)> GetMemoryInformation(CancellationToken cancellationToken = default) { - var httpResponse = await client.GetAsync("/admin/debug/memory/stats", cancellationToken); + var httpResponse = await client.GetAsync("/admin/debug/memory/stats?includeThreads=false&includeMappings=false", cancellationToken); var responseDto = JsonSerializer.Deserialize(await httpResponse.Content.ReadAsStringAsync(cancellationToken)); var values = responseDto.MemoryInformation.DirtyMemory.Split(' '); From 706ac4cae8fa940dc5743a9fd7ea0980c002a19c Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Tue, 11 Mar 2025 19:14:50 +0100 Subject: [PATCH 13/21] Make the custom check work in both embedded and external mode --- .../MemoryInformationRetriever.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs b/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs index 5eaefacd45..3bab2dc7b5 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs @@ -8,7 +8,10 @@ namespace ServiceControl.Audit.Persistence.RavenDB; class MemoryInformationRetriever(DatabaseConfiguration databaseConfiguration) { - readonly HttpClient client = new() { BaseAddress = new Uri(databaseConfiguration.ServerConfiguration.ServerUrl) }; + // TODO what does a connection string look like? Is it only a URI or could it contain other stuff? + // The ?? operator is needed because ServerUrl is populated when running embedded and connection string when running in external mode. + // However the tricky part is that when tests are run they behave like if it was external mode + readonly HttpClient client = new() { BaseAddress = new Uri(databaseConfiguration.ServerConfiguration.ServerUrl ?? databaseConfiguration.ServerConfiguration.ConnectionString) }; record ResponseDto { From 3d90262e128da980453366339af0e593d30a5946 Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Tue, 11 Mar 2025 19:15:06 +0100 Subject: [PATCH 14/21] Custom checks approved list --- .../CustomCheckTests.VerifyCustomChecks.approved.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ServiceControl.Audit.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt b/src/ServiceControl.Audit.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt index 2dd77f440a..1ee566332d 100644 --- a/src/ServiceControl.Audit.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt +++ b/src/ServiceControl.Audit.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt @@ -1,3 +1,4 @@ +Dirty memory trends: ServiceControl.Audit database ServiceControl.Audit Health: Audit Database Index Lag ServiceControl.Audit Health: Audit Message Ingestion Process Storage space: ServiceControl.Audit database \ No newline at end of file From 38ccf984edf74087eee98d95b116b505d085d952 Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Tue, 11 Mar 2025 21:10:22 +0100 Subject: [PATCH 15/21] Update log statements to mention RavenDB --- .../CustomChecks/CheckDirtyMemory.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs index 67261f773a..e100872589 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs @@ -16,7 +16,7 @@ public override async Task PerformCheck(CancellationToken cancellat if (isHighDirty) { - var message = $"There is a high level of dirty memory ({dirtyMemoryKb}kb). Check the ServiceControl " + + var message = $"There is a high level of RavenDB dirty memory ({dirtyMemoryKb}kb). Check the ServiceControl " + "troubleshooting guide for guidance on how to mitigate the issue."; Log.Warn(message); return CheckResult.Failed(message); @@ -32,13 +32,13 @@ public override async Task PerformCheck(CancellationToken cancellat switch (lastDirtyMemoryReads.Count) { case < 3: - Log.Debug("Not enough dirty memory data in the series to calculate a trend."); + Log.Debug("Not enough RavenDB dirty memory data in the series to calculate a trend."); break; // TODO do we need a threshold below which the check never fails? // Three means we'll be observing for 15 minutes before calculating the trend case >= 3 when AnalyzeTrendUsingRegression(lastDirtyMemoryReads) == TrendDirection.Increasing: { - var message = $"Dirty memory is increasing. Last available value is {dirtyMemoryKb}kb. " + + var message = $"RavenDB dirty memory is increasing. Last available value is {dirtyMemoryKb}kb. " + $"Check the ServiceControl troubleshooting guide for guidance on how to mitigate the issue."; Log.Warn(message); return CheckResult.Failed(message); From 74b1f41f6ef8975f36ab39d3bc5cb6502a639f37 Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Wed, 12 Mar 2025 09:38:37 +0100 Subject: [PATCH 16/21] Properly invert custom check id and category --- .../CustomChecks/CheckDirtyMemory.cs | 2 +- .../CustomCheckTests.VerifyCustomChecks.approved.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs index e100872589..faa30435ab 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs @@ -7,7 +7,7 @@ namespace ServiceControl.Audit.Persistence.RavenDB.CustomChecks; using NServiceBus.CustomChecks; using NServiceBus.Logging; -class CheckDirtyMemory(MemoryInformationRetriever memoryInformationRetriever) : CustomCheck("ServiceControl.Audit database", "Dirty memory trends", TimeSpan.FromMinutes(5)) +class CheckDirtyMemory(MemoryInformationRetriever memoryInformationRetriever) : CustomCheck("RavenDB dirty memory trends", "ServiceControl.Audit Health", TimeSpan.FromMinutes(5)) { readonly List lastDirtyMemoryReads = []; public override async Task PerformCheck(CancellationToken cancellationToken = default) diff --git a/src/ServiceControl.Audit.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt b/src/ServiceControl.Audit.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt index 1ee566332d..0e8182dded 100644 --- a/src/ServiceControl.Audit.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt +++ b/src/ServiceControl.Audit.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt @@ -1,4 +1,4 @@ -Dirty memory trends: ServiceControl.Audit database ServiceControl.Audit Health: Audit Database Index Lag ServiceControl.Audit Health: Audit Message Ingestion Process +ServiceControl.Audit Health: RavenDB dirty memory trends Storage space: ServiceControl.Audit database \ No newline at end of file From 1af52e388ab302be725d408c8d381e799bf59f4f Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Wed, 12 Mar 2025 09:39:43 +0100 Subject: [PATCH 17/21] Add the CheckDirtyMemory custom check to the primary instance --- .../RavenPersistence.cs | 2 + .../CustomChecks/CheckDirtyMemory.cs | 103 ++++++++++++++++++ .../MemoryInformationRetriever.cs | 37 +++++++ ...IApprovals.CustomCheckDetails.approved.txt | 1 + ...CheckTests.VerifyCustomChecks.approved.txt | 1 + 5 files changed, 144 insertions(+) create mode 100644 src/ServiceControl.Persistence.RavenDb/CustomChecks/CheckDirtyMemory.cs create mode 100644 src/ServiceControl.Persistence.RavenDb/MemoryInformationRetriever.cs diff --git a/src/ServiceControl.Persistence.RavenDB/RavenPersistence.cs b/src/ServiceControl.Persistence.RavenDB/RavenPersistence.cs index 7b1e07ecbb..93a8fe63b7 100644 --- a/src/ServiceControl.Persistence.RavenDB/RavenPersistence.cs +++ b/src/ServiceControl.Persistence.RavenDB/RavenPersistence.cs @@ -49,7 +49,9 @@ public void AddPersistence(IServiceCollection services) services.AddCustomCheck(); services.AddCustomCheck(); services.AddCustomCheck(); + services.AddCustomCheck(); + services.AddSingleton(); services.AddSingleton(); services.AddSingleton(); diff --git a/src/ServiceControl.Persistence.RavenDb/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Persistence.RavenDb/CustomChecks/CheckDirtyMemory.cs new file mode 100644 index 0000000000..41ca21b319 --- /dev/null +++ b/src/ServiceControl.Persistence.RavenDb/CustomChecks/CheckDirtyMemory.cs @@ -0,0 +1,103 @@ +namespace ServiceControl.Persistence.RavenDB.CustomChecks; + +using System; +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; +using NServiceBus.CustomChecks; +using NServiceBus.Logging; + +class CheckDirtyMemory(MemoryInformationRetriever memoryInformationRetriever) : CustomCheck("RavenDB dirty memory trends", "ServiceControl Health", TimeSpan.FromMinutes(5)) +{ + readonly List lastDirtyMemoryReads = []; + public override async Task PerformCheck(CancellationToken cancellationToken = default) + { + var (isHighDirty, dirtyMemoryKb) = await memoryInformationRetriever.GetMemoryInformation(cancellationToken); + + if (isHighDirty) + { + var message = $"There is a high level of RavenDB dirty memory ({dirtyMemoryKb}kb). Check the ServiceControl " + + "troubleshooting guide for guidance on how to mitigate the issue."; + Log.Warn(message); + return CheckResult.Failed(message); + } + + lastDirtyMemoryReads.Add(dirtyMemoryKb); + if (lastDirtyMemoryReads.Count > 20) + { + //cap the list at 20 which means we're keeping about 1 hour and 40 minutes of data + lastDirtyMemoryReads.RemoveAt(0); + } + + switch (lastDirtyMemoryReads.Count) + { + case < 3: + Log.Debug("Not enough RavenDB dirty memory data in the series to calculate a trend."); + break; + // TODO do we need a threshold below which the check never fails? + // Three means we'll be observing for 15 minutes before calculating the trend + case >= 3 when AnalyzeTrendUsingRegression(lastDirtyMemoryReads) == TrendDirection.Increasing: + { + var message = $"RavenDB dirty memory is increasing. Last available value is {dirtyMemoryKb}kb. " + + $"Check the ServiceControl troubleshooting guide for guidance on how to mitigate the issue."; + Log.Warn(message); + return CheckResult.Failed(message); + } + + default: + // NOP + break; + } + + return CheckResult.Pass; + } + + static TrendDirection AnalyzeTrendUsingRegression(List values) + { + if (values is not { Count: > 1 }) + { + throw new ArgumentException("Need at least two values to determine a trend"); + } + + // Calculate slope using linear regression + double numberOfPoints = values.Count; + double sumOfIndices = 0; + double sumOfValues = 0; + double sumOfIndicesMultipliedByValues = 0; + double sumOfIndicesSquared = 0; + + for (int i = 0; i < values.Count; i++) + { + double index = i; + double value = values[i]; + + sumOfIndices += index; + sumOfValues += value; + sumOfIndicesMultipliedByValues += index * value; + sumOfIndicesSquared += index * index; + } + + // Slope formula: (n*Σxy - Σx*Σy) / (n*Σx² - (Σx)²) + double slopeNumerator = (numberOfPoints * sumOfIndicesMultipliedByValues) - (sumOfIndices * sumOfValues); + double slopeDenominator = (numberOfPoints * sumOfIndicesSquared) - (sumOfIndices * sumOfIndices); + double slope = slopeNumerator / slopeDenominator; + + // Determine trend based on slope + const double slopeThreshold = 0.001; // Small threshold to handle floating-point precision + if (Math.Abs(slope) < slopeThreshold) + { + return TrendDirection.Flat; + } + + return slope > 0 ? TrendDirection.Increasing : TrendDirection.Decreasing; + } + + enum TrendDirection + { + Increasing, + Decreasing, + Flat + } + + static readonly ILog Log = LogManager.GetLogger(); +} \ No newline at end of file diff --git a/src/ServiceControl.Persistence.RavenDb/MemoryInformationRetriever.cs b/src/ServiceControl.Persistence.RavenDb/MemoryInformationRetriever.cs new file mode 100644 index 0000000000..6203e18eb7 --- /dev/null +++ b/src/ServiceControl.Persistence.RavenDb/MemoryInformationRetriever.cs @@ -0,0 +1,37 @@ +namespace ServiceControl.Persistence.RavenDB; + +using System; +using System.Net.Http; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; + +class MemoryInformationRetriever(RavenPersisterSettings persisterSettings) +{ + // TODO what does a connection string look like? Is it only a URI or could it contain other stuff? + readonly HttpClient client = new() { BaseAddress = new Uri(persisterSettings.ConnectionString) }; + + record ResponseDto + { + public MemoryInformation MemoryInformation { get; set; } + } + + record MemoryInformation + { + public bool IsHighDirty { get; set; } + public string DirtyMemory { get; set; } + } + + public async Task<(bool IsHighDirty, int DirtyMemoryKb)> GetMemoryInformation(CancellationToken cancellationToken = default) + { + var httpResponse = await client.GetAsync("/admin/debug/memory/stats?includeThreads=false&includeMappings=false", cancellationToken); + var responseDto = JsonSerializer.Deserialize(await httpResponse.Content.ReadAsStringAsync(cancellationToken)); + + var values = responseDto.MemoryInformation.DirtyMemory.Split(' '); + if (!string.Equals(values[1], "KBytes", StringComparison.OrdinalIgnoreCase)) + { + throw new InvalidOperationException($"Unexpected response. Was expecting memory details in KBytes, instead received: {responseDto.MemoryInformation.DirtyMemory}"); + } + return (responseDto.MemoryInformation.IsHighDirty, int.Parse(values[0])); + } +} \ No newline at end of file diff --git a/src/ServiceControl.Persistence.Tests.RavenDB/ApprovalFiles/APIApprovals.CustomCheckDetails.approved.txt b/src/ServiceControl.Persistence.Tests.RavenDB/ApprovalFiles/APIApprovals.CustomCheckDetails.approved.txt index 8c907ba40d..1e5ca563f0 100644 --- a/src/ServiceControl.Persistence.Tests.RavenDB/ApprovalFiles/APIApprovals.CustomCheckDetails.approved.txt +++ b/src/ServiceControl.Persistence.Tests.RavenDB/ApprovalFiles/APIApprovals.CustomCheckDetails.approved.txt @@ -1,4 +1,5 @@ ServiceControl Health: Error Database Index Errors ServiceControl Health: Error Database Index Lag ServiceControl Health: Message Ingestion Process +ServiceControl Health: RavenDB dirty memory trends Storage space: ServiceControl database \ No newline at end of file diff --git a/src/ServiceControl.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt b/src/ServiceControl.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt index 8c907ba40d..1e5ca563f0 100644 --- a/src/ServiceControl.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt +++ b/src/ServiceControl.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt @@ -1,4 +1,5 @@ ServiceControl Health: Error Database Index Errors ServiceControl Health: Error Database Index Lag ServiceControl Health: Message Ingestion Process +ServiceControl Health: RavenDB dirty memory trends Storage space: ServiceControl database \ No newline at end of file From 75449271a3bf7f0cbc418659e07daa7ede8a5e07 Mon Sep 17 00:00:00 2001 From: Mauro Servienti Date: Wed, 12 Mar 2025 11:31:27 +0100 Subject: [PATCH 18/21] Link to the troubleshooting guidance page --- .../CustomChecks/CheckDirtyMemory.cs | 6 ++++-- .../CustomChecks/CheckDirtyMemory.cs | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs index faa30435ab..86a713c6b0 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs @@ -17,7 +17,8 @@ public override async Task PerformCheck(CancellationToken cancellat if (isHighDirty) { var message = $"There is a high level of RavenDB dirty memory ({dirtyMemoryKb}kb). Check the ServiceControl " + - "troubleshooting guide for guidance on how to mitigate the issue."; + "troubleshooting guide for guidance on how to mitigate the issue. " + + "Visit the https://docs.particular.net/servicecontrol/troubleshooting page for more information."; Log.Warn(message); return CheckResult.Failed(message); } @@ -39,7 +40,8 @@ public override async Task PerformCheck(CancellationToken cancellat case >= 3 when AnalyzeTrendUsingRegression(lastDirtyMemoryReads) == TrendDirection.Increasing: { var message = $"RavenDB dirty memory is increasing. Last available value is {dirtyMemoryKb}kb. " + - $"Check the ServiceControl troubleshooting guide for guidance on how to mitigate the issue."; + $"Check the ServiceControl troubleshooting guide for guidance on how to mitigate the issue. " + + $"Visit the https://docs.particular.net/servicecontrol/troubleshooting page for more information."; Log.Warn(message); return CheckResult.Failed(message); } diff --git a/src/ServiceControl.Persistence.RavenDb/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Persistence.RavenDb/CustomChecks/CheckDirtyMemory.cs index 41ca21b319..bb78011af1 100644 --- a/src/ServiceControl.Persistence.RavenDb/CustomChecks/CheckDirtyMemory.cs +++ b/src/ServiceControl.Persistence.RavenDb/CustomChecks/CheckDirtyMemory.cs @@ -17,7 +17,8 @@ public override async Task PerformCheck(CancellationToken cancellat if (isHighDirty) { var message = $"There is a high level of RavenDB dirty memory ({dirtyMemoryKb}kb). Check the ServiceControl " + - "troubleshooting guide for guidance on how to mitigate the issue."; + "troubleshooting guide for guidance on how to mitigate the issue. " + + "Visit the https://docs.particular.net/servicecontrol/troubleshooting page for more information."; Log.Warn(message); return CheckResult.Failed(message); } @@ -39,7 +40,8 @@ public override async Task PerformCheck(CancellationToken cancellat case >= 3 when AnalyzeTrendUsingRegression(lastDirtyMemoryReads) == TrendDirection.Increasing: { var message = $"RavenDB dirty memory is increasing. Last available value is {dirtyMemoryKb}kb. " + - $"Check the ServiceControl troubleshooting guide for guidance on how to mitigate the issue."; + $"Check the ServiceControl troubleshooting guide for guidance on how to mitigate the issue. " + + $"Visit the https://docs.particular.net/servicecontrol/troubleshooting page for more information."; Log.Warn(message); return CheckResult.Failed(message); } From 6a18250bbdcd9bc2da4544f44b1273b07c5a9ac6 Mon Sep 17 00:00:00 2001 From: Brandon Ording Date: Wed, 12 Mar 2025 11:06:02 -0400 Subject: [PATCH 19/21] Add BOM --- .../CustomChecks/CheckDirtyMemory.cs | 2 +- .../MemoryInformationRetriever.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ServiceControl.Persistence.RavenDb/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Persistence.RavenDb/CustomChecks/CheckDirtyMemory.cs index bb78011af1..663822f48e 100644 --- a/src/ServiceControl.Persistence.RavenDb/CustomChecks/CheckDirtyMemory.cs +++ b/src/ServiceControl.Persistence.RavenDb/CustomChecks/CheckDirtyMemory.cs @@ -1,4 +1,4 @@ -namespace ServiceControl.Persistence.RavenDB.CustomChecks; +namespace ServiceControl.Persistence.RavenDB.CustomChecks; using System; using System.Collections.Generic; diff --git a/src/ServiceControl.Persistence.RavenDb/MemoryInformationRetriever.cs b/src/ServiceControl.Persistence.RavenDb/MemoryInformationRetriever.cs index 6203e18eb7..b724fc7dfe 100644 --- a/src/ServiceControl.Persistence.RavenDb/MemoryInformationRetriever.cs +++ b/src/ServiceControl.Persistence.RavenDb/MemoryInformationRetriever.cs @@ -1,4 +1,4 @@ -namespace ServiceControl.Persistence.RavenDB; +namespace ServiceControl.Persistence.RavenDB; using System; using System.Net.Http; From 5e626ad5668dab6e0cf69906664d3967c424ad98 Mon Sep 17 00:00:00 2001 From: Brandon Ording Date: Wed, 12 Mar 2025 11:10:34 -0400 Subject: [PATCH 20/21] Lock to older SDK --- global.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/global.json b/global.json index d31527941e..444e66def0 100644 --- a/global.json +++ b/global.json @@ -1,7 +1,6 @@ { "sdk": { - "version": "8.0.400", - "rollForward": "latestFeature" + "version": "8.0.406" }, "msbuild-sdks": { "Microsoft.Build.NoTargets": "3.7.56" From 45c3cbfa9ae41b98a69ad32002c8e34a30f38605 Mon Sep 17 00:00:00 2001 From: Brandon Ording Date: Wed, 12 Mar 2025 11:14:27 -0400 Subject: [PATCH 21/21] Ans CI too --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9e02d5022a..2d766a3a3d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,7 +40,7 @@ jobs: - name: Setup .NET SDK uses: actions/setup-dotnet@v4.3.0 with: - dotnet-version: 8.0.x + dotnet-version: 8.0.406 - name: Download RavenDB Server run: ./tools/download-ravendb-server.ps1 - name: Build