diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs new file mode 100644 index 0000000000..cedca079c4 --- /dev/null +++ b/src/ServiceControl.Audit.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs @@ -0,0 +1,26 @@ +namespace ServiceControl.Audit.Persistence.RavenDB.CustomChecks; + +using System; +using System.Threading; +using System.Threading.Tasks; +using NServiceBus.CustomChecks; +using NServiceBus.Logging; + +class CheckDirtyMemory(MemoryInformationRetriever memoryInformationRetriever) : CustomCheck("RavenDB dirty memory", "ServiceControl.Audit Health", TimeSpan.FromMinutes(5)) +{ + public override async Task PerformCheck(CancellationToken cancellationToken = default) + { + var (isHighDirty, dirtyMemoryKb) = await memoryInformationRetriever.GetMemoryInformation(cancellationToken); + + if (isHighDirty) + { + var message = $"There is a high level of RavenDB dirty memory ({dirtyMemoryKb}kb). See https://docs.particular.net/servicecontrol/troubleshooting#ravendb-dirty-memory for guidance on how to mitigate the issue."; + Log.Warn(message); + return CheckResult.Failed(message); + } + + return CheckResult.Pass; + } + + static readonly ILog Log = LogManager.GetLogger(); +} \ No newline at end of file diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs b/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs new file mode 100644 index 0000000000..eea7d87865 --- /dev/null +++ b/src/ServiceControl.Audit.Persistence.RavenDB/MemoryInformationRetriever.cs @@ -0,0 +1,41 @@ +namespace ServiceControl.Audit.Persistence.RavenDB; + +using System; +using System.Net.Http; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; + +class MemoryInformationRetriever(DatabaseConfiguration databaseConfiguration) +{ + // What does a connection string look like? Is it only a URI or could it contain other stuff? + // The ?? operator is needed because ServerUrl is populated when running embedded and connection + // string when running in external mode. However, the tricky part is that when tests are run they + // behave like if it was external mode. If the connection string contain always only the server + // URL, this code is safe, otherwise it need to be adjusted to extract the server URL. + readonly HttpClient client = new() { BaseAddress = new Uri(databaseConfiguration.ServerConfiguration.ServerUrl ?? databaseConfiguration.ServerConfiguration.ConnectionString) }; + + record ResponseDto + { + public MemoryInformation MemoryInformation { get; set; } + } + + record MemoryInformation + { + public bool IsHighDirty { get; set; } + public string DirtyMemory { get; set; } + } + + public async Task<(bool IsHighDirty, int DirtyMemoryKb)> GetMemoryInformation(CancellationToken cancellationToken = default) + { + var httpResponse = await client.GetAsync("/admin/debug/memory/stats?includeThreads=false&includeMappings=false", cancellationToken); + var responseDto = JsonSerializer.Deserialize(await httpResponse.Content.ReadAsStringAsync(cancellationToken)); + + var values = responseDto.MemoryInformation.DirtyMemory.Split(' '); + if (!string.Equals(values[1], "KBytes", StringComparison.OrdinalIgnoreCase)) + { + throw new InvalidOperationException($"Unexpected response. Was expecting memory details in KBytes, instead received: {responseDto.MemoryInformation.DirtyMemory}"); + } + return (responseDto.MemoryInformation.IsHighDirty, int.Parse(values[0])); + } +} \ No newline at end of file diff --git a/src/ServiceControl.Audit.Persistence.RavenDB/RavenPersistence.cs b/src/ServiceControl.Audit.Persistence.RavenDB/RavenPersistence.cs index 75081a0547..0cd872cbe8 100644 --- a/src/ServiceControl.Audit.Persistence.RavenDB/RavenPersistence.cs +++ b/src/ServiceControl.Audit.Persistence.RavenDB/RavenPersistence.cs @@ -21,6 +21,7 @@ public void AddPersistence(IServiceCollection services) static void ConfigureLifecycle(IServiceCollection services, DatabaseConfiguration databaseConfiguration) { services.AddSingleton(databaseConfiguration); + services.AddSingleton(); services.AddSingleton(); services.AddHostedService(); diff --git a/src/ServiceControl.Audit.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt b/src/ServiceControl.Audit.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt index 2dd77f440a..f510666c86 100644 --- a/src/ServiceControl.Audit.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt +++ b/src/ServiceControl.Audit.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt @@ -1,3 +1,4 @@ ServiceControl.Audit Health: Audit Database Index Lag ServiceControl.Audit Health: Audit Message Ingestion Process +ServiceControl.Audit Health: RavenDB dirty memory Storage space: ServiceControl.Audit database \ No newline at end of file diff --git a/src/ServiceControl.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs b/src/ServiceControl.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs new file mode 100644 index 0000000000..fb0ec20c96 --- /dev/null +++ b/src/ServiceControl.Persistence.RavenDB/CustomChecks/CheckDirtyMemory.cs @@ -0,0 +1,26 @@ +namespace ServiceControl.Persistence.RavenDB.CustomChecks; + +using System; +using System.Threading; +using System.Threading.Tasks; +using NServiceBus.CustomChecks; +using NServiceBus.Logging; + +class CheckDirtyMemory(MemoryInformationRetriever memoryInformationRetriever) : CustomCheck("RavenDB dirty memory", "ServiceControl Health", TimeSpan.FromMinutes(5)) +{ + public override async Task PerformCheck(CancellationToken cancellationToken = default) + { + var (isHighDirty, dirtyMemoryKb) = await memoryInformationRetriever.GetMemoryInformation(cancellationToken); + + if (isHighDirty) + { + var message = $"There is a high level of RavenDB dirty memory ({dirtyMemoryKb}kb). See https://docs.particular.net/servicecontrol/troubleshooting#ravendb-dirty-memory for guidance on how to mitigate the issue."; + Log.Warn(message); + return CheckResult.Failed(message); + } + + return CheckResult.Pass; + } + + static readonly ILog Log = LogManager.GetLogger(); +} \ No newline at end of file diff --git a/src/ServiceControl.Persistence.RavenDB/MemoryInformationRetriever.cs b/src/ServiceControl.Persistence.RavenDB/MemoryInformationRetriever.cs new file mode 100644 index 0000000000..485c6b6640 --- /dev/null +++ b/src/ServiceControl.Persistence.RavenDB/MemoryInformationRetriever.cs @@ -0,0 +1,40 @@ +namespace ServiceControl.Persistence.RavenDB; + +using System; +using System.Net.Http; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; + +class MemoryInformationRetriever(RavenPersisterSettings persisterSettings) +{ + // What does a connection string look like? Is it only a URI or could it contain other stuff? + // The primary instance has only the concept of a connection string (vs the Audit instance having + // both a ServiceUrl and a ConnectionString). If the connection string contain always only the + // server URL, this code is safe, otherwise it need to be adjusted to extract the server URL. + readonly HttpClient client = new() { BaseAddress = new Uri(persisterSettings.ConnectionString) }; + + record ResponseDto + { + public MemoryInformation MemoryInformation { get; set; } + } + + record MemoryInformation + { + public bool IsHighDirty { get; set; } + public string DirtyMemory { get; set; } + } + + public async Task<(bool IsHighDirty, int DirtyMemoryKb)> GetMemoryInformation(CancellationToken cancellationToken = default) + { + var httpResponse = await client.GetAsync("/admin/debug/memory/stats?includeThreads=false&includeMappings=false", cancellationToken); + var responseDto = JsonSerializer.Deserialize(await httpResponse.Content.ReadAsStringAsync(cancellationToken)); + + var values = responseDto.MemoryInformation.DirtyMemory.Split(' '); + if (!string.Equals(values[1], "KBytes", StringComparison.OrdinalIgnoreCase)) + { + throw new InvalidOperationException($"Unexpected response. Was expecting memory details in KBytes, instead received: {responseDto.MemoryInformation.DirtyMemory}"); + } + return (responseDto.MemoryInformation.IsHighDirty, int.Parse(values[0])); + } +} \ No newline at end of file diff --git a/src/ServiceControl.Persistence.RavenDB/RavenPersistence.cs b/src/ServiceControl.Persistence.RavenDB/RavenPersistence.cs index 7b1e07ecbb..93a8fe63b7 100644 --- a/src/ServiceControl.Persistence.RavenDB/RavenPersistence.cs +++ b/src/ServiceControl.Persistence.RavenDB/RavenPersistence.cs @@ -49,7 +49,9 @@ public void AddPersistence(IServiceCollection services) services.AddCustomCheck(); services.AddCustomCheck(); services.AddCustomCheck(); + services.AddCustomCheck(); + services.AddSingleton(); services.AddSingleton(); services.AddSingleton(); diff --git a/src/ServiceControl.Persistence.Tests.RavenDB/ApprovalFiles/APIApprovals.CustomCheckDetails.approved.txt b/src/ServiceControl.Persistence.Tests.RavenDB/ApprovalFiles/APIApprovals.CustomCheckDetails.approved.txt index 8c907ba40d..ecb6bed984 100644 --- a/src/ServiceControl.Persistence.Tests.RavenDB/ApprovalFiles/APIApprovals.CustomCheckDetails.approved.txt +++ b/src/ServiceControl.Persistence.Tests.RavenDB/ApprovalFiles/APIApprovals.CustomCheckDetails.approved.txt @@ -1,4 +1,5 @@ ServiceControl Health: Error Database Index Errors ServiceControl Health: Error Database Index Lag ServiceControl Health: Message Ingestion Process +ServiceControl Health: RavenDB dirty memory Storage space: ServiceControl database \ No newline at end of file diff --git a/src/ServiceControl.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt b/src/ServiceControl.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt index 8c907ba40d..ecb6bed984 100644 --- a/src/ServiceControl.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt +++ b/src/ServiceControl.Persistence.Tests.RavenDB/ApprovalFiles/CustomCheckTests.VerifyCustomChecks.approved.txt @@ -1,4 +1,5 @@ ServiceControl Health: Error Database Index Errors ServiceControl Health: Error Database Index Lag ServiceControl Health: Message Ingestion Process +ServiceControl Health: RavenDB dirty memory Storage space: ServiceControl database \ No newline at end of file