From dc3c3f23466e57adf54f771d3f55c9e2030abf3e Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Tue, 25 Nov 2025 09:56:42 -0800 Subject: [PATCH] fix: exclude final metric sample when running with workload When collecting metrics with a workload (e.g., `perfspect metrics -- stress-ng`), the final sample may be collected after or during workload completion, capturing system transition from loaded to idle state. With short collections, this single anomalous sample can significantly skew summary statistics (mean, min, max, stddev). This change automatically excludes the final timestamp's samples from summary statistics when metrics are collected with a workload. The full CSV with all samples is still preserved for advanced analysis. Implementation: - Added `WithWorkload` field to `Metadata` struct to track workload context - Set `WithWorkload = true` when workload application arguments are provided - Added `excludeFinalSample()` method to remove final timestamp rows before computing summary statistics - Optimized to check first group only and log once per collection Fixes #569 Signed-off-by: Harper, Jason M --- cmd/metrics/metadata.go | 1 + cmd/metrics/metrics.go | 1 + cmd/metrics/summary.go | 38 ++++++++++ cmd/metrics/summary_test.go | 143 ++++++++++++++++++++++++++++++++++++ 4 files changed, 183 insertions(+) create mode 100644 cmd/metrics/summary_test.go diff --git a/cmd/metrics/metadata.go b/cmd/metrics/metadata.go index b0480b1c..806ebf15 100644 --- a/cmd/metrics/metadata.go +++ b/cmd/metrics/metadata.go @@ -72,6 +72,7 @@ type Metadata struct { // below are not loaded by LoadMetadata, but are set by the caller (should these be here at all?) CollectionStartTime time.Time PerfSpectVersion string + WithWorkload bool // true if metrics were collected with a user-provided workload application } // LoadMetadata - populates and returns a Metadata structure containing state of the diff --git a/cmd/metrics/metrics.go b/cmd/metrics/metrics.go index 57c4ac9d..f1ef1c6e 100644 --- a/cmd/metrics/metrics.go +++ b/cmd/metrics/metrics.go @@ -1320,6 +1320,7 @@ func collectOnTarget(targetContext *targetContext, localTempDir string, localOut printCompleteChannel := make(chan []string) // get current time for use in setting timestamps on output targetContext.metadata.CollectionStartTime = time.Now() // save the start time in the metadata for use when using the --input option to process raw data + targetContext.metadata.WithWorkload = len(argsApplication) > 0 go printMetricsAsync(targetContext, localOutputDir, frameChannel, printCompleteChannel) var err error for !signalMgr.shouldStop() { diff --git a/cmd/metrics/summary.go b/cmd/metrics/summary.go index 4573f59d..20a9dd84 100644 --- a/cmd/metrics/summary.go +++ b/cmd/metrics/summary.go @@ -33,6 +33,10 @@ func summarizeMetrics(localOutputDir string, targetName string, metadata Metadat if err != nil { return filesCreated, fmt.Errorf("failed to read metrics from %s: %w", csvMetricsFile, err) } + // exclude the final sample if metrics were collected with a workload + if metadata.WithWorkload { + metrics.excludeFinalSample() + } // csv summary out, err := metrics.getCSV() if err != nil { @@ -214,6 +218,40 @@ func newMetricCollection(csvPath string) (metrics MetricCollection, err error) { return } +// excludeFinalSample removes the final timestamp's rows from all metric groups. +// This is used when collecting metrics with a workload to avoid including +// post-workload data that can skew the summary statistics. +func (mc MetricCollection) excludeFinalSample() { + if len(mc) == 0 { + return + } + // All metric groups should have the same number of rows since they come from the same CSV + // Check the first group to avoid redundant checking + if len(mc[0].rows) <= 1 { + // Don't exclude if there's only one sample or no samples + slog.Warn("metric collection has only one sample, not excluding final sample") + return + } + for i := range mc { + // Find the maximum timestamp in this group + maxTimestamp := mc[i].rows[0].timestamp + for _, row := range mc[i].rows { + if row.timestamp > maxTimestamp { + maxTimestamp = row.timestamp + } + } + // Remove all rows with the maximum timestamp + var filteredRows []row + for _, row := range mc[i].rows { + if row.timestamp != maxTimestamp { + filteredRows = append(filteredRows, row) + } + } + mc[i].rows = filteredRows + } + slog.Debug("excluded final sample from metric collection", slog.Int("num_groups", len(mc))) +} + // getStats - calculate summary stats (min, max, mean, stddev) for each metric func (mg *MetricGroup) getStats() (stats map[string]metricStats, err error) { stats = make(map[string]metricStats) diff --git a/cmd/metrics/summary_test.go b/cmd/metrics/summary_test.go new file mode 100644 index 00000000..0f15fd0f --- /dev/null +++ b/cmd/metrics/summary_test.go @@ -0,0 +1,143 @@ +package metrics + +// Copyright (C) 2021-2025 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestExcludeFinalSample(t *testing.T) { + tests := []struct { + name string + inputRows []row + expectedCount int + expectedMaxTS float64 + }{ + { + name: "exclude single final timestamp", + inputRows: []row{ + {timestamp: 5.0, metrics: map[string]float64{"metric1": 100.0}}, + {timestamp: 10.0, metrics: map[string]float64{"metric1": 200.0}}, + {timestamp: 15.0, metrics: map[string]float64{"metric1": 150.0}}, + {timestamp: 20.0, metrics: map[string]float64{"metric1": 50.0}}, // this should be excluded + }, + expectedCount: 3, + expectedMaxTS: 15.0, + }, + { + name: "exclude multiple rows with same final timestamp", + inputRows: []row{ + {timestamp: 5.0, socket: "0", metrics: map[string]float64{"metric1": 100.0}}, + {timestamp: 10.0, socket: "0", metrics: map[string]float64{"metric1": 200.0}}, + {timestamp: 15.0, socket: "0", metrics: map[string]float64{"metric1": 150.0}}, + {timestamp: 15.0, socket: "1", metrics: map[string]float64{"metric1": 160.0}}, // same timestamp, different socket + }, + expectedCount: 2, + expectedMaxTS: 10.0, + }, + { + name: "single sample - should not exclude", + inputRows: []row{ + {timestamp: 5.0, metrics: map[string]float64{"metric1": 100.0}}, + }, + expectedCount: 1, + expectedMaxTS: 5.0, + }, + { + name: "two samples - exclude last one", + inputRows: []row{ + {timestamp: 5.0, metrics: map[string]float64{"metric1": 100.0}}, + {timestamp: 10.0, metrics: map[string]float64{"metric1": 50.0}}, + }, + expectedCount: 1, + expectedMaxTS: 5.0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a MetricCollection with a single MetricGroup + mc := MetricCollection{ + MetricGroup{ + names: []string{"metric1"}, + rows: tt.inputRows, + groupByField: "", + groupByValue: "", + }, + } + + // Call excludeFinalSample + mc.excludeFinalSample() + + // Verify the number of remaining rows + assert.Equal(t, tt.expectedCount, len(mc[0].rows), "unexpected number of rows after exclusion") + + // Verify that no row has a timestamp greater than expectedMaxTS + if len(mc[0].rows) > 0 { + for _, row := range mc[0].rows { + assert.LessOrEqual(t, row.timestamp, tt.expectedMaxTS, "found row with timestamp greater than expected maximum") + } + } + }) + } +} + +func TestExcludeFinalSampleMultipleGroups(t *testing.T) { + // Test with multiple metric groups (e.g., multiple sockets) + mc := MetricCollection{ + MetricGroup{ + names: []string{"metric1"}, + groupByField: "SKT", + groupByValue: "0", + rows: []row{ + {timestamp: 5.0, socket: "0", metrics: map[string]float64{"metric1": 100.0}}, + {timestamp: 10.0, socket: "0", metrics: map[string]float64{"metric1": 200.0}}, + {timestamp: 15.0, socket: "0", metrics: map[string]float64{"metric1": 50.0}}, // should be excluded + }, + }, + MetricGroup{ + names: []string{"metric1"}, + groupByField: "SKT", + groupByValue: "1", + rows: []row{ + {timestamp: 5.0, socket: "1", metrics: map[string]float64{"metric1": 110.0}}, + {timestamp: 10.0, socket: "1", metrics: map[string]float64{"metric1": 210.0}}, + {timestamp: 15.0, socket: "1", metrics: map[string]float64{"metric1": 60.0}}, // should be excluded + }, + }, + } + + mc.excludeFinalSample() + + // Both groups should have 2 rows remaining + assert.Equal(t, 2, len(mc[0].rows), "socket 0 should have 2 rows") + assert.Equal(t, 2, len(mc[1].rows), "socket 1 should have 2 rows") + + // Verify max timestamps + assert.Equal(t, 10.0, mc[0].rows[1].timestamp, "socket 0 max timestamp should be 10.0") + assert.Equal(t, 10.0, mc[1].rows[1].timestamp, "socket 1 max timestamp should be 10.0") +} + +func TestExcludeFinalSampleEmptyCollection(t *testing.T) { + // Test with empty MetricCollection + mc := MetricCollection{} + mc.excludeFinalSample() // should not panic + assert.Equal(t, 0, len(mc), "empty collection should remain empty") +} + +func TestExcludeFinalSampleEmptyRows(t *testing.T) { + // Test with MetricGroup that has no rows + mc := MetricCollection{ + MetricGroup{ + names: []string{"metric1"}, + groupByField: "", + groupByValue: "", + rows: []row{}, + }, + } + mc.excludeFinalSample() // should not panic + assert.Equal(t, 0, len(mc[0].rows), "empty rows should remain empty") +}