Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9c4f812
Add an initial draft of a dirty memory custom check
mauroservienti Mar 10, 2025
8b1cbfe
Add some memory analysis
mauroservienti Mar 10, 2025
bb93f38
Use the database configuration to get the server URL
mauroservienti Mar 10, 2025
6793f77
Add more logging
mauroservienti Mar 10, 2025
5afa4c0
Add a to-do
mauroservienti Mar 10, 2025
838d967
minor tweaks
mauroservienti Mar 11, 2025
2364de7
Because editorconfig
mauroservienti Mar 11, 2025
652aaf4
Use better variable names
mauroservienti Mar 11, 2025
f08d19b
Refactor the memory information retriever to check the content schema
mauroservienti Mar 11, 2025
44db42e
Register the MemoryInformationRetriever in DI
mauroservienti Mar 11, 2025
8fa5593
Fix formatting
mauroservienti Mar 11, 2025
37e1d6f
Update the HTTP GET URL to trim the response size
mauroservienti Mar 11, 2025
4396f41
Make the custom check work in both embedded and external mode
mauroservienti Mar 11, 2025
fd681ea
Custom checks approved list
mauroservienti Mar 11, 2025
b1cb4cc
Update log statements to mention RavenDB
mauroservienti Mar 11, 2025
0d5325f
Properly invert custom check id and category
mauroservienti Mar 12, 2025
86d1612
Add the CheckDirtyMemory custom check to the primary instance
mauroservienti Mar 12, 2025
151e29d
Link to the troubleshooting guidance page
mauroservienti Mar 12, 2025
f83d6f7
Fix casing
bording Mar 12, 2025
daddc0c
Remove the trends evaluation from the dirty memory custom check
mauroservienti Mar 13, 2025
a5bdd85
Deep link to guidance
mauroservienti Mar 13, 2025
d13d412
Reword custom check and log warning message
mauroservienti Mar 14, 2025
631e7f9
Rename the dirty memory custom check ID
mauroservienti Mar 14, 2025
9783a38
reword comments
mauroservienti Mar 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
namespace ServiceControl.Audit.Persistence.RavenDB.CustomChecks;

using System;
using System.Threading;
using System.Threading.Tasks;
using NServiceBus.CustomChecks;
using NServiceBus.Logging;

class CheckDirtyMemory(MemoryInformationRetriever memoryInformationRetriever) : CustomCheck("RavenDB dirty memory", "ServiceControl.Audit Health", TimeSpan.FromMinutes(5))
{
public override async Task<CheckResult> PerformCheck(CancellationToken cancellationToken = default)
{
var (isHighDirty, dirtyMemoryKb) = await memoryInformationRetriever.GetMemoryInformation(cancellationToken);

if (isHighDirty)
{
var message = $"There is a high level of RavenDB dirty memory ({dirtyMemoryKb}kb). See https://docs.particular.net/servicecontrol/troubleshooting#ravendb-dirty-memory for guidance on how to mitigate the issue.";
Log.Warn(message);
return CheckResult.Failed(message);
}

return CheckResult.Pass;
}

static readonly ILog Log = LogManager.GetLogger<CheckDirtyMemory>();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
namespace ServiceControl.Audit.Persistence.RavenDB;

using System;
using System.Net.Http;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;

class MemoryInformationRetriever(DatabaseConfiguration databaseConfiguration)
{
// What does a connection string look like? Is it only a URI or could it contain other stuff?
// The ?? operator is needed because ServerUrl is populated when running embedded and connection
// string when running in external mode. However, the tricky part is that when tests are run they
// behave like if it was external mode. If the connection string contain always only the server
// URL, this code is safe, otherwise it need to be adjusted to extract the server URL.
readonly HttpClient client = new() { BaseAddress = new Uri(databaseConfiguration.ServerConfiguration.ServerUrl ?? databaseConfiguration.ServerConfiguration.ConnectionString) };

record ResponseDto
{
public MemoryInformation MemoryInformation { get; set; }
}

record MemoryInformation
{
public bool IsHighDirty { get; set; }
public string DirtyMemory { get; set; }
}

public async Task<(bool IsHighDirty, int DirtyMemoryKb)> GetMemoryInformation(CancellationToken cancellationToken = default)
{
var httpResponse = await client.GetAsync("/admin/debug/memory/stats?includeThreads=false&includeMappings=false", cancellationToken);
var responseDto = JsonSerializer.Deserialize<ResponseDto>(await httpResponse.Content.ReadAsStringAsync(cancellationToken));

var values = responseDto.MemoryInformation.DirtyMemory.Split(' ');
if (!string.Equals(values[1], "KBytes", StringComparison.OrdinalIgnoreCase))
{
throw new InvalidOperationException($"Unexpected response. Was expecting memory details in KBytes, instead received: {responseDto.MemoryInformation.DirtyMemory}");
}
return (responseDto.MemoryInformation.IsHighDirty, int.Parse(values[0]));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ public void AddPersistence(IServiceCollection services)
static void ConfigureLifecycle(IServiceCollection services, DatabaseConfiguration databaseConfiguration)
{
services.AddSingleton(databaseConfiguration);
services.AddSingleton<MemoryInformationRetriever>();

services.AddSingleton<IRavenSessionProvider, RavenSessionProvider>();
services.AddHostedService<RavenPersistenceLifecycleHostedService>();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
ServiceControl.Audit Health: Audit Database Index Lag
ServiceControl.Audit Health: Audit Message Ingestion Process
ServiceControl.Audit Health: RavenDB dirty memory
Storage space: ServiceControl.Audit database
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
namespace ServiceControl.Persistence.RavenDB.CustomChecks;

using System;
using System.Threading;
using System.Threading.Tasks;
using NServiceBus.CustomChecks;
using NServiceBus.Logging;

class CheckDirtyMemory(MemoryInformationRetriever memoryInformationRetriever) : CustomCheck("RavenDB dirty memory", "ServiceControl Health", TimeSpan.FromMinutes(5))
{
public override async Task<CheckResult> PerformCheck(CancellationToken cancellationToken = default)
{
var (isHighDirty, dirtyMemoryKb) = await memoryInformationRetriever.GetMemoryInformation(cancellationToken);

if (isHighDirty)
{
var message = $"There is a high level of RavenDB dirty memory ({dirtyMemoryKb}kb). See https://docs.particular.net/servicecontrol/troubleshooting#ravendb-dirty-memory for guidance on how to mitigate the issue.";
Log.Warn(message);
return CheckResult.Failed(message);
}

return CheckResult.Pass;
}

static readonly ILog Log = LogManager.GetLogger<CheckDirtyMemory>();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
namespace ServiceControl.Persistence.RavenDB;

using System;
using System.Net.Http;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;

class MemoryInformationRetriever(RavenPersisterSettings persisterSettings)
{
// What does a connection string look like? Is it only a URI or could it contain other stuff?
// The primary instance has only the concept of a connection string (vs the Audit instance having
// both a ServiceUrl and a ConnectionString). If the connection string contain always only the
// server URL, this code is safe, otherwise it need to be adjusted to extract the server URL.
readonly HttpClient client = new() { BaseAddress = new Uri(persisterSettings.ConnectionString) };

record ResponseDto
{
public MemoryInformation MemoryInformation { get; set; }
}

record MemoryInformation
{
public bool IsHighDirty { get; set; }
public string DirtyMemory { get; set; }
}

public async Task<(bool IsHighDirty, int DirtyMemoryKb)> GetMemoryInformation(CancellationToken cancellationToken = default)
{
var httpResponse = await client.GetAsync("/admin/debug/memory/stats?includeThreads=false&includeMappings=false", cancellationToken);
var responseDto = JsonSerializer.Deserialize<ResponseDto>(await httpResponse.Content.ReadAsStringAsync(cancellationToken));

var values = responseDto.MemoryInformation.DirtyMemory.Split(' ');
if (!string.Equals(values[1], "KBytes", StringComparison.OrdinalIgnoreCase))
{
throw new InvalidOperationException($"Unexpected response. Was expecting memory details in KBytes, instead received: {responseDto.MemoryInformation.DirtyMemory}");
}
return (responseDto.MemoryInformation.IsHighDirty, int.Parse(values[0]));
}
}
2 changes: 2 additions & 0 deletions src/ServiceControl.Persistence.RavenDB/RavenPersistence.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ public void AddPersistence(IServiceCollection services)
services.AddCustomCheck<CheckRavenDBIndexLag>();
services.AddCustomCheck<CheckFreeDiskSpace>();
services.AddCustomCheck<CheckMinimumStorageRequiredForIngestion>();
services.AddCustomCheck<CheckDirtyMemory>();

services.AddSingleton<MemoryInformationRetriever>();
services.AddSingleton<OperationsManager>();

services.AddSingleton<IArchiveMessages, MessageArchiver>();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
ServiceControl Health: Error Database Index Errors
ServiceControl Health: Error Database Index Lag
ServiceControl Health: Message Ingestion Process
ServiceControl Health: RavenDB dirty memory
Storage space: ServiceControl database
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
ServiceControl Health: Error Database Index Errors
ServiceControl Health: Error Database Index Lag
ServiceControl Health: Message Ingestion Process
ServiceControl Health: RavenDB dirty memory
Storage space: ServiceControl database