From ad1e7cbfe0072c7dbb06adb941c0afd0a178ab6d Mon Sep 17 00:00:00 2001 From: Jack Berg <34418638+jack-berg@users.noreply.github.com> Date: Wed, 21 Jan 2026 08:39:32 -0600 Subject: [PATCH 1/2] Improve histogram, summary performance under contention by striping observationCount Signed-off-by: Jack Berg <34418638+jack-berg@users.noreply.github.com> --- .../metrics/core/metrics/Buffer.java | 31 ++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/prometheus-metrics-core/src/main/java/io/prometheus/metrics/core/metrics/Buffer.java b/prometheus-metrics-core/src/main/java/io/prometheus/metrics/core/metrics/Buffer.java index d4ff33a37..f5d85aa10 100644 --- a/prometheus-metrics-core/src/main/java/io/prometheus/metrics/core/metrics/Buffer.java +++ b/prometheus-metrics-core/src/main/java/io/prometheus/metrics/core/metrics/Buffer.java @@ -18,7 +18,7 @@ class Buffer { private static final long bufferActiveBit = 1L << 63; - private final AtomicLong observationCount = new AtomicLong(0); + private final AtomicLong[] stripedObservationCounts; private double[] observationBuffer = new double[0]; private int bufferPos = 0; private boolean reset = false; @@ -27,8 +27,18 @@ class Buffer { ReentrantLock runLock = new ReentrantLock(); Condition bufferFilled = appendLock.newCondition(); + Buffer() { + stripedObservationCounts = new AtomicLong[Runtime.getRuntime().availableProcessors()]; + for (int i = 0; i < stripedObservationCounts.length; i++) { + stripedObservationCounts[i] = new AtomicLong(0); + } + } + boolean append(double value) { - long count = observationCount.incrementAndGet(); + AtomicLong observationCountForThread = + stripedObservationCounts[ + ((int) Thread.currentThread().getId()) % stripedObservationCounts.length]; + long count = observationCountForThread.incrementAndGet(); if ((count & bufferActiveBit) == 0) { return false; // sign bit not set -> buffer not active. } else { @@ -69,7 +79,10 @@ T run( runLock.lock(); try { // Signal that the buffer is active. - Long expectedCount = observationCount.getAndAdd(bufferActiveBit); + Long expectedCount = 0L; + for (AtomicLong observationCount : stripedObservationCounts) { + expectedCount += observationCount.getAndAdd(bufferActiveBit); + } while (!complete.apply(expectedCount)) { // Wait until all in-flight threads have added their observations to the histogram / @@ -81,14 +94,18 @@ T run( result = createResult.get(); // Signal that the buffer is inactive. - int expectedBufferSize; + long expectedBufferSize = 0; if (reset) { - expectedBufferSize = - (int) ((observationCount.getAndSet(0) & ~bufferActiveBit) - expectedCount); + for (AtomicLong observationCount : stripedObservationCounts) { + expectedBufferSize += (int) (observationCount.getAndSet(0) & ~bufferActiveBit); + } reset = false; } else { - expectedBufferSize = (int) (observationCount.addAndGet(bufferActiveBit) - expectedCount); + for (AtomicLong observationCount : stripedObservationCounts) { + expectedBufferSize += (int) observationCount.addAndGet(bufferActiveBit); + } } + expectedBufferSize -= expectedCount; appendLock.lock(); try { From de15a970d4e499996ef644639363a53c1d683e46 Mon Sep 17 00:00:00 2001 From: Jack Berg <34418638+jack-berg@users.noreply.github.com> Date: Thu, 5 Feb 2026 12:31:20 -0600 Subject: [PATCH 2/2] PR feedback Signed-off-by: Jack Berg <34418638+jack-berg@users.noreply.github.com> --- .../metrics/core/metrics/Buffer.java | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/prometheus-metrics-core/src/main/java/io/prometheus/metrics/core/metrics/Buffer.java b/prometheus-metrics-core/src/main/java/io/prometheus/metrics/core/metrics/Buffer.java index f5d85aa10..1c47f867c 100644 --- a/prometheus-metrics-core/src/main/java/io/prometheus/metrics/core/metrics/Buffer.java +++ b/prometheus-metrics-core/src/main/java/io/prometheus/metrics/core/metrics/Buffer.java @@ -18,6 +18,14 @@ class Buffer { private static final long bufferActiveBit = 1L << 63; + // Tracking observation counts requires an AtomicLong for coordination between recording and + // collecting. AtomicLong does much worse under contention than the LongAdder instances used + // elsewhere to hold aggregated state. To improve, we stripe the AtomicLong into N instances, + // where N is the number of available processors. Each record operation chooses the appropriate + // instance to use based on the modulo of its thread id and N. This is a more naive / simple + // implementation compared to the striping used under the hood in java.util.concurrent classes + // like LongAdder - contention and hot spots can still occur if recording thread ids happen to + // resolve to the same index. Further improvement is possible. private final AtomicLong[] stripedObservationCounts; private double[] observationBuffer = new double[0]; private int bufferPos = 0; @@ -35,9 +43,8 @@ class Buffer { } boolean append(double value) { - AtomicLong observationCountForThread = - stripedObservationCounts[ - ((int) Thread.currentThread().getId()) % stripedObservationCounts.length]; + int index = Math.abs((int) Thread.currentThread().getId()) % stripedObservationCounts.length; + AtomicLong observationCountForThread = stripedObservationCounts[index]; long count = observationCountForThread.incrementAndGet(); if ((count & bufferActiveBit) == 0) { return false; // sign bit not set -> buffer not active. @@ -79,7 +86,7 @@ T run( runLock.lock(); try { // Signal that the buffer is active. - Long expectedCount = 0L; + long expectedCount = 0L; for (AtomicLong observationCount : stripedObservationCounts) { expectedCount += observationCount.getAndAdd(bufferActiveBit); } @@ -97,12 +104,12 @@ T run( long expectedBufferSize = 0; if (reset) { for (AtomicLong observationCount : stripedObservationCounts) { - expectedBufferSize += (int) (observationCount.getAndSet(0) & ~bufferActiveBit); + expectedBufferSize += observationCount.getAndSet(0) & ~bufferActiveBit; } reset = false; } else { for (AtomicLong observationCount : stripedObservationCounts) { - expectedBufferSize += (int) observationCount.addAndGet(bufferActiveBit); + expectedBufferSize += observationCount.addAndGet(bufferActiveBit); } } expectedBufferSize -= expectedCount;