From 4c92e2565cb9aafe3fac9a4099556ba93c30b23b Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Wed, 26 Nov 2025 09:59:14 -0800 Subject: [PATCH 1/5] add metrics trim command Signed-off-by: Harper, Jason M --- README.md | 25 ++ cmd/metrics/metadata.go | 7 + cmd/metrics/metrics.go | 4 + cmd/metrics/resources/base.html | 4 +- cmd/metrics/summary.go | 132 ++++++++ cmd/metrics/trim.go | 358 +++++++++++++++++++++ cmd/metrics/trim_test.go | 532 ++++++++++++++++++++++++++++++++ 7 files changed, 1060 insertions(+), 2 deletions(-) create mode 100644 cmd/metrics/trim.go create mode 100644 cmd/metrics/trim_test.go diff --git a/README.md b/README.md index 9c0df982..d556f921 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,31 @@ If neither sudo nor root access is available, an administrator must apply the fo Once the configuration changes are applied, use the `--noroot` flag on the command line, for example, `perfspect metrics --noroot`. +##### Refining Metrics to a Specific Time Range +After collecting metrics, you can generate new summary reports for a specific time interval using the `metrics trim` subcommand. This is useful when you've collected metrics for an entire workload but want to analyze only a specific portion, excluding setup, teardown, or other unwanted phases. + +The time range can be specified using either absolute timestamps (seconds since epoch) or relative offsets from the beginning/end of the data. At least one time parameter must be specified. + +**Examples:** +
+# Skip the first 10 seconds and last 5 seconds
+$ ./perfspect metrics trim --input hostname_metrics.csv --start-offset 10 --end-offset 5
+
+# Use absolute timestamps (seconds since epoch)
+$ ./perfspect metrics trim --input hostname_metrics.csv --start-time 1764174327 --end-time 1764174351
+
+# Custom output suffix
+$ ./perfspect metrics trim --input hostname_metrics.csv --start-offset 10 --suffix steady_state
+
+ +The trim command creates new files in the same directory as the input file (or in a specified output directory): +- `hostname_metrics_trimmed.csv` - Filtered raw metrics +- `hostname_metrics_trimmed_summary.csv` - Summary statistics +- `hostname_metrics_trimmed_summary.html` - Interactive HTML report + +> [!NOTE] +> If a metadata JSON file exists alongside the input CSV (from the original collection), it will be used to generate a complete HTML report with system summary. Otherwise, a simplified HTML report without system summary will be generated. + ##### Prometheus Endpoint The `metrics` command can expose metrics via a Prometheus compatible `metrics` endpoint. This allows integration with Prometheus monitoring systems. To enable the Prometheus endpoint, use the `--prometheus-server` flag. By default, the endpoint listens on port 9090. The port can be changed using the `--prometheus-server-addr` flag. Run `perfspect metrics --prometheus-server`. See the [example daemonset](docs/perfspect-daemonset.md) for deploying in Kubernetes. diff --git a/cmd/metrics/metadata.go b/cmd/metrics/metadata.go index 806ebf15..c7e94901 100644 --- a/cmd/metrics/metadata.go +++ b/cmd/metrics/metadata.go @@ -539,12 +539,19 @@ func (md Metadata) String() string { return string(jsonData) } +func (md Metadata) Initialized() bool { + return md.SocketCount != 0 && md.CoresPerSocket != 0 +} + // JSON converts the Metadata struct to a JSON-encoded byte slice. // // Returns: // - out: JSON-encoded byte slice representation of the Metadata. // - err: error encountered during the marshaling process, if any. func (md Metadata) JSON() (out []byte, err error) { + if !md.Initialized() { + return []byte("null"), nil + } if out, err = json.Marshal(md); err != nil { slog.Error("failed to marshal metadata structure", slog.String("error", err.Error())) return diff --git a/cmd/metrics/metrics.go b/cmd/metrics/metrics.go index 1cdcbc74..f17f1d63 100644 --- a/cmd/metrics/metrics.go +++ b/cmd/metrics/metrics.go @@ -265,6 +265,10 @@ func usageFunc(cmd *cobra.Command) error { cmd.Printf(" --%-20s %s%s\n", flag.Name, flag.Help, flagDefault) } } + cmd.Printf("\nSubcommands:\n") + for _, subCmd := range cmd.Commands() { + cmd.Printf(" %s: %s\n", subCmd.Name(), subCmd.Short) + } cmd.Println("\nGlobal Flags:") cmd.Parent().PersistentFlags().VisitAll(func(pf *pflag.Flag) { flagDefault := "" diff --git a/cmd/metrics/resources/base.html b/cmd/metrics/resources/base.html index e5cafb3f..879646c2 100644 --- a/cmd/metrics/resources/base.html +++ b/cmd/metrics/resources/base.html @@ -967,7 +967,7 @@ - {system_info.map(([key, value]) => ( + {system_info && system_info.map(([key, value]) => ( {JSON.stringify(key)} @@ -994,7 +994,7 @@ - {Object.entries(metadata).sort(([key1], [key2]) => key1.localeCompare(key2)).map(([key, value]) => ( + {metadata && Object.entries(metadata).sort(([key1], [key2]) => key1.localeCompare(key2)).map(([key, value]) => ( {JSON.stringify(key)} diff --git a/cmd/metrics/summary.go b/cmd/metrics/summary.go index 20a9dd84..ffc7132c 100644 --- a/cmd/metrics/summary.go +++ b/cmd/metrics/summary.go @@ -19,6 +19,7 @@ import ( "regexp" "slices" "strconv" + "strings" texttemplate "text/template" // nosemgrep "time" @@ -816,3 +817,134 @@ func (mc MetricCollection) getCSV() (out string, err error) { } return } + +// filterByTimeRange filters all metric groups to only include rows within the specified time range +func (mc MetricCollection) filterByTimeRange(startTime, endTime float64) { + for i := range mc { + mc[i].filterByTimeRange(startTime, endTime) + } +} + +// filterByTimeRange filters the metric group to only include rows within the specified time range +func (mg *MetricGroup) filterByTimeRange(startTime, endTime float64) { + var filteredRows []row + for _, row := range mg.rows { + if row.timestamp >= startTime && row.timestamp <= endTime { + filteredRows = append(filteredRows, row) + } + } + mg.rows = filteredRows +} + +// writeCSV writes the metric collection to a CSV file +func (mc MetricCollection) writeCSV(path string) error { + if len(mc) == 0 { + return fmt.Errorf("no metrics to write") + } + + file, err := os.Create(path) // #nosec G304 + if err != nil { + return fmt.Errorf("failed to create CSV file: %w", err) + } + defer file.Close() + + // Write header + header := "timestamp,socket,cpu,cgroup" + for _, name := range mc[0].names { + header += "," + name + } + if _, err := file.WriteString(header + "\n"); err != nil { + return fmt.Errorf("failed to write header: %w", err) + } + + // Write rows from all metric groups + for _, mg := range mc { + for _, row := range mg.rows { + line := fmt.Sprintf("%d,%s,%s,%s", + int64(row.timestamp), + row.socket, + row.cpu, + row.cgroup) + for _, name := range mg.names { + val := row.metrics[name] + if math.IsNaN(val) { + line += "," + } else { + line += fmt.Sprintf(",%f", val) + } + } + if _, err := file.WriteString(line + "\n"); err != nil { + return fmt.Errorf("failed to write row: %w", err) + } + } + } + + return nil +} + +// loadMetadataIfExists attempts to load metadata from a companion JSON file +// Returns the metadata, whether it was found, and any error +func loadMetadataIfExists(csvPath string) (Metadata, bool, error) { + // Determine the expected metadata file path + // Input: hostname_metrics.csv -> hostname_metadata.json + dir := filepath.Dir(csvPath) + base := filepath.Base(csvPath) + baseName := strings.TrimSuffix(base, filepath.Ext(base)) + // Replace _metrics with _metadata + metadataName := strings.Replace(baseName, "_metrics", "_metadata", 1) + ".json" + metadataPath := filepath.Join(dir, metadataName) + + // Check if the metadata file exists + if _, err := os.Stat(metadataPath); os.IsNotExist(err) { + return Metadata{}, false, nil + } + + // Read and parse the metadata file + data, err := os.ReadFile(metadataPath) // #nosec G304 + if err != nil { + return Metadata{}, false, fmt.Errorf("failed to read metadata file: %w", err) + } + + var metadata Metadata + if err := json.Unmarshal(data, &metadata); err != nil { + return Metadata{}, false, fmt.Errorf("failed to parse metadata JSON: %w", err) + } + + slog.Info("loaded metadata from file", slog.String("file", metadataPath)) + return metadata, true, nil +} + +// generateTrimmedSummaries creates CSV and HTML summary reports for trimmed metrics +func generateTrimmedSummaries(csvPath, outputDir, targetName string, metadata Metadata, metricDefinitions []MetricDefinition) ([]string, error) { + filesCreated := []string{} + + // Read the trimmed metrics from CSV + metrics, err := newMetricCollection(csvPath) + if err != nil { + return filesCreated, fmt.Errorf("failed to read trimmed metrics: %w", err) + } + + // Generate CSV summary + out, err := metrics.getCSV() + if err != nil { + return filesCreated, fmt.Errorf("failed to generate CSV summary: %w", err) + } + csvSummaryFile := filepath.Join(outputDir, targetName+"_summary.csv") + if err := os.WriteFile(csvSummaryFile, []byte(out), 0644); err != nil { // #nosec G306 + return filesCreated, fmt.Errorf("failed to write CSV summary: %w", err) + } + filesCreated = append(filesCreated, csvSummaryFile) + + // Generate HTML summary + out, err = metrics.getHTML(metadata, metricDefinitions) + if err != nil { + return filesCreated, fmt.Errorf("failed to generate HTML summary: %w", err) + } + htmlSummaryFile := filepath.Join(outputDir, targetName+"_summary.html") + if err := os.WriteFile(htmlSummaryFile, []byte(out), 0644); err != nil { // #nosec G306 + return filesCreated, fmt.Errorf("failed to write HTML summary: %w", err) + } + filesCreated = append(filesCreated, htmlSummaryFile) + + return filesCreated, nil +} diff --git a/cmd/metrics/trim.go b/cmd/metrics/trim.go new file mode 100644 index 00000000..b0f2e598 --- /dev/null +++ b/cmd/metrics/trim.go @@ -0,0 +1,358 @@ +package metrics + +// Copyright (C) 2021-2025 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause + +import ( + "fmt" + "log/slog" + "os" + "path/filepath" + "strings" + + "perfspect/internal/common" + + "github.com/spf13/cobra" +) + +const trimCmdName = "trim" + +// trim command flags +var ( + flagTrimInput string + flagTrimStartTime int64 + flagTrimEndTime int64 + flagTrimStartOffset int64 + flagTrimEndOffset int64 + flagTrimOutputDir string + flagTrimSuffix string +) + +const ( + flagTrimInputName = "input" + flagTrimStartTimeName = "start-time" + flagTrimEndTimeName = "end-time" + flagTrimStartOffsetName = "start-offset" + flagTrimEndOffsetName = "end-offset" + flagTrimOutputDirName = "output-dir" + flagTrimSuffixName = "suffix" +) + +var trimExamples = []string{ + " Skip first 10 seconds and last 5 seconds: $ perfspect metrics trim --input host_metrics.csv --start-offset 10 --end-offset 5", + " Use absolute timestamps: $ perfspect metrics trim --input host_metrics.csv --start-time 1764174327 --end-time 1764174351", + " Custom output suffix: $ perfspect metrics trim --input host_metrics.csv --start-offset 10 --suffix steady_state", + " Specify output directory: $ perfspect metrics trim --input host_metrics.csv --start-offset 5 --output-dir ./trimmed", +} + +var trimCmd = &cobra.Command{ + Use: trimCmdName, + Short: "Refine metrics data to a specific time range", + Long: `Generate new summary reports from existing metrics CSV data by filtering to a specific time range. + +This is useful when you've collected metrics for an entire workload but want to analyze +only a specific portion, excluding setup, teardown, or other phases. The command reads an +existing metrics CSV file, filters rows to the specified time range, and generates new +summary reports (CSV and HTML). + +Time range can be specified using either: + - Absolute timestamps (--start-time and --end-time) + - Relative offsets from beginning/end (--start-offset and --end-offset) + +If a metadata JSON file exists alongside the input CSV, it will be used to generate +a complete HTML report with system summary. Otherwise, a simplified HTML report +without system summary will be generated.`, + Example: strings.Join(trimExamples, "\n"), + RunE: runTrimCmd, + PreRunE: validateTrimFlags, + SilenceErrors: true, +} + +func init() { + Cmd.AddCommand(trimCmd) + + trimCmd.Flags().StringVar(&flagTrimInput, flagTrimInputName, "", "path to the metrics CSV file to trim (required)") + trimCmd.Flags().Int64Var(&flagTrimStartTime, flagTrimStartTimeName, 0, "absolute start timestamp (seconds since epoch)") + trimCmd.Flags().Int64Var(&flagTrimEndTime, flagTrimEndTimeName, 0, "absolute end timestamp (seconds since epoch)") + trimCmd.Flags().Int64Var(&flagTrimStartOffset, flagTrimStartOffsetName, 0, "seconds to skip from the beginning of the data") + trimCmd.Flags().Int64Var(&flagTrimEndOffset, flagTrimEndOffsetName, 0, "seconds to exclude from the end of the data") + trimCmd.Flags().StringVar(&flagTrimOutputDir, flagTrimOutputDirName, "", "output directory (default: same directory as input file)") + trimCmd.Flags().StringVar(&flagTrimSuffix, flagTrimSuffixName, "trimmed", "suffix for output filenames") + + _ = trimCmd.MarkFlagRequired(flagTrimInputName) // error only occurs if flag doesn't exist + + // Set custom usage function to avoid parent's usage function issues + trimCmd.SetUsageFunc(func(cmd *cobra.Command) error { + fmt.Fprintf(cmd.OutOrStdout(), "%s\n\n", cmd.Long) + fmt.Fprintf(cmd.OutOrStdout(), "Usage:\n %s\n\n", cmd.UseLine()) + if cmd.HasExample() { + fmt.Fprintf(cmd.OutOrStdout(), "Examples:\n%s\n\n", cmd.Example) + } + if cmd.HasAvailableLocalFlags() { + fmt.Fprintf(cmd.OutOrStdout(), "Flags:\n%s\n", cmd.LocalFlags().FlagUsages()) + } + if cmd.HasAvailableInheritedFlags() { + fmt.Fprintf(cmd.OutOrStdout(), "Global Flags:\n%s\n", cmd.InheritedFlags().FlagUsages()) + } + return nil + }) +} + +// validateTrimFlags checks that the trim command flags are valid and consistent +func validateTrimFlags(cmd *cobra.Command, args []string) error { + // Check input file exists + if _, err := os.Stat(flagTrimInput); err != nil { + if os.IsNotExist(err) { + return common.FlagValidationError(cmd, fmt.Sprintf("input file does not exist: %s", flagTrimInput)) + } + return common.FlagValidationError(cmd, fmt.Sprintf("failed to access input file: %v", err)) + } + + // Check that input is a CSV file + if !strings.HasSuffix(strings.ToLower(flagTrimInput), ".csv") { + return common.FlagValidationError(cmd, fmt.Sprintf("input file must be a CSV file: %s", flagTrimInput)) + } + + // Check that at least one time parameter is provided + if flagTrimStartTime == 0 && flagTrimEndTime == 0 && flagTrimStartOffset == 0 && flagTrimEndOffset == 0 { + return common.FlagValidationError(cmd, "at least one time parameter must be specified (--start-time, --end-time, --start-offset, or --end-offset)") + } + + // Check that both absolute time and offset are not specified for start + if flagTrimStartTime != 0 && flagTrimStartOffset != 0 { + return common.FlagValidationError(cmd, "cannot specify both --start-time and --start-offset") + } + + // Check that both absolute time and offset are not specified for end + if flagTrimEndTime != 0 && flagTrimEndOffset != 0 { + return common.FlagValidationError(cmd, "cannot specify both --end-time and --end-offset") + } + + // Check for negative values + if flagTrimStartTime < 0 { + return common.FlagValidationError(cmd, "--start-time cannot be negative") + } + if flagTrimEndTime < 0 { + return common.FlagValidationError(cmd, "--end-time cannot be negative") + } + if flagTrimStartOffset < 0 { + return common.FlagValidationError(cmd, "--start-offset cannot be negative") + } + if flagTrimEndOffset < 0 { + return common.FlagValidationError(cmd, "--end-offset cannot be negative") + } + + // Check that absolute times are in order if both specified + if flagTrimStartTime != 0 && flagTrimEndTime != 0 && flagTrimStartTime >= flagTrimEndTime { + return common.FlagValidationError(cmd, "--start-time must be less than --end-time") + } + + // Validate output directory if specified + if flagTrimOutputDir != "" { + if info, err := os.Stat(flagTrimOutputDir); err != nil { + if os.IsNotExist(err) { + return common.FlagValidationError(cmd, fmt.Sprintf("output directory does not exist: %s", flagTrimOutputDir)) + } + return common.FlagValidationError(cmd, fmt.Sprintf("failed to access output directory: %v", err)) + } else if !info.IsDir() { + return common.FlagValidationError(cmd, fmt.Sprintf("output-dir must be a directory: %s", flagTrimOutputDir)) + } + } + + // Validate suffix is not empty and doesn't contain path separators + if flagTrimSuffix == "" { + return common.FlagValidationError(cmd, "--suffix cannot be empty") + } + if strings.ContainsAny(flagTrimSuffix, "/\\") { + return common.FlagValidationError(cmd, "--suffix cannot contain path separators") + } + + return nil +} + +// runTrimCmd executes the trim command +func runTrimCmd(cmd *cobra.Command, args []string) error { + slog.Info("trimming metrics data", + slog.String("input", flagTrimInput), + slog.Int64("start-time", flagTrimStartTime), + slog.Int64("end-time", flagTrimEndTime), + slog.Int64("start-offset", flagTrimStartOffset), + slog.Int64("end-offset", flagTrimEndOffset), + slog.String("suffix", flagTrimSuffix)) + + // Determine output directory + outputDir := flagTrimOutputDir + if outputDir == "" { + outputDir = filepath.Dir(flagTrimInput) + } + + // Load the original metrics CSV + slog.Info("loading metrics from CSV", slog.String("file", flagTrimInput)) + metrics, err := newMetricCollection(flagTrimInput) + if err != nil { + return fmt.Errorf("failed to load metrics from CSV: %w", err) + } + + if len(metrics) == 0 { + return fmt.Errorf("no metrics found in CSV file") + } + + // Calculate the time range + startTime, endTime, err := calculateTimeRange(metrics, flagTrimStartTime, flagTrimEndTime, + flagTrimStartOffset, flagTrimEndOffset) + if err != nil { + return fmt.Errorf("failed to calculate time range: %w", err) + } + + slog.Info("calculated time range", + slog.Int64("start", int64(startTime)), + slog.Int64("end", int64(endTime)), + slog.Float64("duration", endTime-startTime)) + + // Filter metrics by time range + originalRowCount := 0 + for i := range metrics { + originalRowCount += len(metrics[i].rows) + } + + metrics.filterByTimeRange(startTime, endTime) + + filteredRowCount := 0 + for i := range metrics { + filteredRowCount += len(metrics[i].rows) + } + + if filteredRowCount == 0 { + return fmt.Errorf("no data remains after filtering to time range [%.2f, %.2f]", startTime, endTime) + } + + slog.Info("filtered metrics", + slog.Int("original_rows", originalRowCount), + slog.Int("filtered_rows", filteredRowCount), + slog.Int("removed_rows", originalRowCount-filteredRowCount)) + + // Generate output filenames + inputBase := filepath.Base(flagTrimInput) + inputName := strings.TrimSuffix(inputBase, filepath.Ext(inputBase)) + + // Determine target name from input filename + // Input is typically "hostname_metrics.csv", target name is "hostname" + targetName := strings.TrimSuffix(inputName, "_metrics") + + // Write trimmed metrics CSV + trimmedCSVPath := filepath.Join(outputDir, targetName+"_metrics_"+flagTrimSuffix+".csv") + if err := metrics.writeCSV(trimmedCSVPath); err != nil { + return fmt.Errorf("failed to write trimmed CSV: %w", err) + } + slog.Info("wrote trimmed metrics CSV", slog.String("file", trimmedCSVPath)) + + // Try to load metadata if it exists + metadata, metadataFound, err := loadMetadataIfExists(flagTrimInput) + if err != nil { + slog.Warn("failed to load metadata, continuing without it", slog.String("error", err.Error())) + metadataFound = false + } + + if !metadataFound { + slog.Warn("metadata file not found, HTML report will not include system summary") + // Create minimal metadata for summary generation + metadata = Metadata{} + } + + // Load metric definitions for summary generation if we have a valid microarchitecture + var metricDefinitions []MetricDefinition + if metadataFound && metadata.Microarchitecture != "" { + loader, err := NewLoader(metadata.Microarchitecture) + if err != nil { + return fmt.Errorf("failed to create loader: %w", err) + } + loaderConfig := LoaderConfig{ + Metadata: metadata, + } + metricDefinitions, _, err = loader.Load(loaderConfig) + if err != nil { + return fmt.Errorf("failed to load metric definitions: %w", err) + } + } else { + // Use empty metric definitions if no metadata + metricDefinitions = []MetricDefinition{} + } + + // Generate summary files + // Pass the base name (including _metrics) and suffix to generate consistent filenames + trimmedBaseName := targetName + "_metrics_" + flagTrimSuffix + filesCreated, err := generateTrimmedSummaries(trimmedCSVPath, outputDir, trimmedBaseName, metadata, metricDefinitions) + if err != nil { + return fmt.Errorf("failed to generate summary files: %w", err) + } + + // Report success + fmt.Println("\nTrimmed metrics successfully created:") + fmt.Printf(" Trimmed CSV: %s\n", trimmedCSVPath) + for _, file := range filesCreated { + fileType := "Summary" + if strings.HasSuffix(file, ".html") { + fileType = "HTML Summary" + } else if strings.HasSuffix(file, ".csv") { + fileType = "CSV Summary" + } + fmt.Printf(" %s: %s\n", fileType, file) + } + fmt.Printf("\nTime range: %d - %d seconds (%.0f second duration)\n", int64(startTime), int64(endTime), endTime-startTime) + fmt.Printf("Rows: %d original, %d after trimming\n", originalRowCount, filteredRowCount) + + return nil +} + +// calculateTimeRange determines the actual start and end times based on the flags and data +func calculateTimeRange(metrics MetricCollection, startTime, endTime, startOffset, endOffset int64) (float64, float64, error) { + if len(metrics) == 0 || len(metrics[0].rows) == 0 { + return 0, 0, fmt.Errorf("no data available to calculate time range") + } + + // Find min and max timestamps in the data + minTimestamp := metrics[0].rows[0].timestamp + maxTimestamp := metrics[0].rows[0].timestamp + + for _, mg := range metrics { + for _, row := range mg.rows { + if row.timestamp < minTimestamp { + minTimestamp = row.timestamp + } + if row.timestamp > maxTimestamp { + maxTimestamp = row.timestamp + } + } + } + + // Calculate start time + calcStartTime := minTimestamp + if startTime != 0 { + calcStartTime = float64(startTime) + } else if startOffset != 0 { + calcStartTime = minTimestamp + float64(startOffset) + } + + // Calculate end time + calcEndTime := maxTimestamp + if endTime != 0 { + calcEndTime = float64(endTime) + } else if endOffset != 0 { + calcEndTime = maxTimestamp - float64(endOffset) + } + + // Validate the calculated range + if calcStartTime >= calcEndTime { + return 0, 0, fmt.Errorf("invalid time range: start (%d) >= end (%d)", int64(calcStartTime), int64(calcEndTime)) + } + + if calcStartTime > maxTimestamp { + return 0, 0, fmt.Errorf("start time (%d) is beyond the end of available data (%d)", int64(calcStartTime), int64(maxTimestamp)) + } + + if calcEndTime < minTimestamp { + return 0, 0, fmt.Errorf("end time (%d) is before the beginning of available data (%d)", int64(calcEndTime), int64(minTimestamp)) + } + + return calcStartTime, calcEndTime, nil +} diff --git a/cmd/metrics/trim_test.go b/cmd/metrics/trim_test.go new file mode 100644 index 00000000..13cfb57a --- /dev/null +++ b/cmd/metrics/trim_test.go @@ -0,0 +1,532 @@ +package metrics + +// Copyright (C) 2021-2025 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause + +import ( + "encoding/csv" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// createTestMetricsCSV creates a test CSV file with sample metrics data +func createTestMetricsCSV(t *testing.T, dir string, filename string) string { + path := filepath.Join(dir, filename) + file, err := os.Create(path) + require.NoError(t, err) + defer file.Close() + + writer := csv.NewWriter(file) + defer writer.Flush() + + // Write header + err = writer.Write([]string{"timestamp", "socket", "cpu", "cgroup", "metric_cpu_utilization", "metric_instructions"}) + require.NoError(t, err) + + // Write sample data with timestamps from 0 to 100 seconds + for i := 0; i <= 20; i++ { + timestamp := float64(i * 5) // 0, 5, 10, ..., 100 + err = writer.Write([]string{ + fmt.Sprintf("%.6f", timestamp), + "", + "", + "", + fmt.Sprintf("%.2f", 50.0+float64(i)), + fmt.Sprintf("%.0f", 1000000.0*float64(i+1)), + }) + require.NoError(t, err) + } + + return path +} + +func TestFilterByTimeRange(t *testing.T) { + // Create test data + metrics := MetricCollection{ + { + names: []string{"metric1", "metric2"}, + rows: []row{ + {timestamp: 10.0, metrics: map[string]float64{"metric1": 1.0, "metric2": 2.0}}, + {timestamp: 20.0, metrics: map[string]float64{"metric1": 3.0, "metric2": 4.0}}, + {timestamp: 30.0, metrics: map[string]float64{"metric1": 5.0, "metric2": 6.0}}, + {timestamp: 40.0, metrics: map[string]float64{"metric1": 7.0, "metric2": 8.0}}, + {timestamp: 50.0, metrics: map[string]float64{"metric1": 9.0, "metric2": 10.0}}, + }, + }, + } + + tests := []struct { + name string + startTime float64 + endTime float64 + expectedCount int + }{ + { + name: "filter middle range", + startTime: 20.0, + endTime: 40.0, + expectedCount: 3, // timestamps 20, 30, 40 + }, + { + name: "filter all", + startTime: 10.0, + endTime: 50.0, + expectedCount: 5, + }, + { + name: "filter to single point", + startTime: 30.0, + endTime: 30.0, + expectedCount: 1, + }, + { + name: "filter to none (range before data)", + startTime: 1.0, + endTime: 5.0, + expectedCount: 0, + }, + { + name: "filter to none (range after data)", + startTime: 60.0, + endTime: 70.0, + expectedCount: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Make a copy to avoid modifying the original + testMetrics := make(MetricCollection, len(metrics)) + for i := range metrics { + testMetrics[i] = MetricGroup{ + names: metrics[i].names, + rows: make([]row, len(metrics[i].rows)), + } + copy(testMetrics[i].rows, metrics[i].rows) + } + + // Apply filter + testMetrics.filterByTimeRange(tt.startTime, tt.endTime) + + // Check result + assert.Equal(t, tt.expectedCount, len(testMetrics[0].rows)) + + // Verify all remaining rows are in range + for _, row := range testMetrics[0].rows { + assert.GreaterOrEqual(t, row.timestamp, tt.startTime) + assert.LessOrEqual(t, row.timestamp, tt.endTime) + } + }) + } +} + +func TestCalculateTimeRange(t *testing.T) { + // Create test data spanning from 10.0 to 100.0 + metrics := MetricCollection{ + { + rows: []row{ + {timestamp: 10.0}, + {timestamp: 30.0}, + {timestamp: 50.0}, + {timestamp: 70.0}, + {timestamp: 100.0}, + }, + }, + } + + tests := []struct { + name string + startTime int64 + endTime int64 + startOffset int64 + endOffset int64 + wantStart float64 + wantEnd float64 + wantErr bool + }{ + { + name: "use absolute times", + startTime: 20, + endTime: 80, + wantStart: 20.0, + wantEnd: 80.0, + wantErr: false, + }, + { + name: "use offsets from beginning and end", + startOffset: 10, + endOffset: 5, + wantStart: 20.0, // 10.0 + 10.0 + wantEnd: 95.0, // 100.0 - 5.0 + wantErr: false, + }, + { + name: "use defaults (entire range)", + wantStart: 10.0, + wantEnd: 100.0, + wantErr: false, + }, + { + name: "use start offset only", + startOffset: 15, + wantStart: 25.0, + wantEnd: 100.0, + wantErr: false, + }, + { + name: "use end time only", + endTime: 60, + wantStart: 10.0, + wantEnd: 60.0, + wantErr: false, + }, + { + name: "invalid range (start >= end)", + startTime: 80, + endTime: 20, + wantErr: true, + }, + { + name: "invalid range (offset results in start >= end)", + startOffset: 50, + endOffset: 50, + wantErr: true, + }, + { + name: "start time beyond data", + startTime: 150, + endTime: 200, + wantErr: true, + }, + { + name: "end time before data", + startTime: 1, + endTime: 5, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotStart, gotEnd, err := calculateTimeRange(metrics, tt.startTime, tt.endTime, tt.startOffset, tt.endOffset) + + if tt.wantErr { + assert.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, tt.wantStart, gotStart) + assert.Equal(t, tt.wantEnd, gotEnd) + } + }) + } +} + +func TestWriteCSV(t *testing.T) { + tempDir := t.TempDir() + + tests := []struct { + name string + metrics MetricCollection + wantErr bool + }{ + { + name: "write simple metrics", + metrics: MetricCollection{ + { + names: []string{"metric1", "metric2"}, + rows: []row{ + { + timestamp: 10.5, + socket: "0", + cpu: "", + cgroup: "", + metrics: map[string]float64{"metric1": 1.5, "metric2": 2.5}, + }, + { + timestamp: 20.5, + socket: "0", + cpu: "", + cgroup: "", + metrics: map[string]float64{"metric1": 3.5, "metric2": 4.5}, + }, + }, + }, + }, + wantErr: false, + }, + { + name: "write empty collection", + metrics: MetricCollection{}, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + path := filepath.Join(tempDir, tt.name+".csv") + err := tt.metrics.writeCSV(path) + + if tt.wantErr { + assert.Error(t, err) + return + } + + require.NoError(t, err) + + // Verify file was created and has content + info, err := os.Stat(path) + require.NoError(t, err) + assert.Greater(t, info.Size(), int64(0)) + + // Read back and verify basic structure + file, err := os.Open(path) + require.NoError(t, err) + defer file.Close() + + reader := csv.NewReader(file) + records, err := reader.ReadAll() + require.NoError(t, err) + + // Should have header + data rows + expectedRows := 1 + len(tt.metrics[0].rows) + assert.Equal(t, expectedRows, len(records)) + + // Verify header + assert.Equal(t, "timestamp", records[0][0]) + assert.Equal(t, "socket", records[0][1]) + assert.Equal(t, "cpu", records[0][2]) + assert.Equal(t, "cgroup", records[0][3]) + }) + } +} + +func TestLoadMetadataIfExists(t *testing.T) { + tempDir := t.TempDir() + + t.Run("metadata exists", func(t *testing.T) { + // Create a metrics CSV file + metricsPath := filepath.Join(tempDir, "test_metrics.csv") + _, err := os.Create(metricsPath) + require.NoError(t, err) + + // Create a corresponding metadata JSON file + metadataPath := filepath.Join(tempDir, "test_metadata.json") + metadataContent := `{"Hostname":"testhost","Microarchitecture":"SPR"}` + err = os.WriteFile(metadataPath, []byte(metadataContent), 0644) + require.NoError(t, err) + + // Load metadata + metadata, found, err := loadMetadataIfExists(metricsPath) + require.NoError(t, err) + assert.True(t, found) + assert.Equal(t, "testhost", metadata.Hostname) + assert.Equal(t, "SPR", metadata.Microarchitecture) + }) + + t.Run("metadata does not exist", func(t *testing.T) { + // Create a metrics CSV file without metadata + metricsPath := filepath.Join(tempDir, "nometa_metrics.csv") + _, err := os.Create(metricsPath) + require.NoError(t, err) + + // Try to load metadata + _, found, err := loadMetadataIfExists(metricsPath) + require.NoError(t, err) + assert.False(t, found) + }) + + t.Run("metadata file is malformed", func(t *testing.T) { + // Create a metrics CSV file + metricsPath := filepath.Join(tempDir, "badmeta_metrics.csv") + _, err := os.Create(metricsPath) + require.NoError(t, err) + + // Create a malformed metadata JSON file + metadataPath := filepath.Join(tempDir, "badmeta_metadata.json") + err = os.WriteFile(metadataPath, []byte("not valid json{"), 0644) + require.NoError(t, err) + + // Try to load metadata + _, found, err := loadMetadataIfExists(metricsPath) + assert.Error(t, err) + assert.False(t, found) + }) +} + +func TestTrimValidateFlags(t *testing.T) { + tempDir := t.TempDir() + + // Create a test CSV file + testCSV := createTestMetricsCSV(t, tempDir, "test_metrics.csv") + + tests := []struct { + name string + setup func() + wantErr bool + errMsg string + }{ + { + name: "valid input file", + setup: func() { + flagTrimInput = testCSV + flagTrimStartOffset = 10 + flagTrimSuffix = "trimmed" + }, + wantErr: false, + }, + { + name: "no time parameters specified", + setup: func() { + flagTrimInput = testCSV + flagTrimSuffix = "trimmed" + }, + wantErr: true, + errMsg: "at least one time parameter must be specified", + }, + { + name: "input file does not exist", + setup: func() { + flagTrimInput = filepath.Join(tempDir, "nonexistent.csv") + flagTrimStartOffset = 10 + flagTrimSuffix = "trimmed" + }, + wantErr: true, + errMsg: "does not exist", + }, + { + name: "input is not a CSV file", + setup: func() { + txtFile := filepath.Join(tempDir, "test.txt") + _ = os.WriteFile(txtFile, []byte("test"), 0644) // #nosec G306 + flagTrimInput = txtFile + flagTrimStartOffset = 10 + flagTrimSuffix = "trimmed" + }, + wantErr: true, + errMsg: "must be a CSV file", + }, + { + name: "both start-time and start-offset specified", + setup: func() { + flagTrimInput = testCSV + flagTrimStartTime = 10 + flagTrimStartOffset = 5 + flagTrimSuffix = "trimmed" + }, + wantErr: true, + errMsg: "cannot specify both", + }, + { + name: "both end-time and end-offset specified", + setup: func() { + flagTrimInput = testCSV + flagTrimStartTime = 0 + flagTrimStartOffset = 0 + flagTrimEndTime = 50 + flagTrimEndOffset = 10 + flagTrimSuffix = "trimmed" + }, + wantErr: true, + errMsg: "cannot specify both", + }, + { + name: "negative start-time", + setup: func() { + flagTrimInput = testCSV + flagTrimStartTime = -10.0 + flagTrimEndTime = 0 + flagTrimStartOffset = 0 + flagTrimEndOffset = 0 + flagTrimSuffix = "trimmed" + }, + wantErr: true, + errMsg: "cannot be negative", + }, + { + name: "start-time >= end-time", + setup: func() { + flagTrimInput = testCSV + flagTrimStartTime = 50 + flagTrimEndTime = 40 + flagTrimStartOffset = 0 + flagTrimEndOffset = 0 + flagTrimSuffix = "trimmed" + }, + wantErr: true, + errMsg: "must be less than", + }, + { + name: "empty suffix", + setup: func() { + flagTrimInput = testCSV + flagTrimStartTime = 0 + flagTrimEndTime = 0 + flagTrimStartOffset = 10 + flagTrimEndOffset = 0 + flagTrimSuffix = "" + }, + wantErr: true, + errMsg: "cannot be empty", + }, + { + name: "suffix with path separator", + setup: func() { + flagTrimInput = testCSV + flagTrimSuffix = "trim/med" + flagTrimStartTime = 0 + flagTrimEndTime = 0 + flagTrimStartOffset = 10 + flagTrimEndOffset = 0 + }, + wantErr: true, + errMsg: "cannot contain path separators", + }, + { + name: "output directory does not exist", + setup: func() { + flagTrimInput = testCSV + flagTrimOutputDir = filepath.Join(tempDir, "nonexistent") + flagTrimSuffix = "trimmed" + flagTrimStartTime = 0 + flagTrimEndTime = 0 + flagTrimStartOffset = 10 + flagTrimEndOffset = 0 + }, + wantErr: true, + errMsg: "does not exist", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Reset flags to defaults + flagTrimInput = "" + flagTrimStartTime = 0 + flagTrimEndTime = 0 + flagTrimStartOffset = 0 + flagTrimEndOffset = 0 + flagTrimOutputDir = "" + flagTrimSuffix = "trimmed" + + // Setup test-specific flags + tt.setup() + + // Validate + err := validateTrimFlags(trimCmd, nil) + + if tt.wantErr { + assert.Error(t, err) + if tt.errMsg != "" { + assert.Contains(t, err.Error(), tt.errMsg) + } + } else { + assert.NoError(t, err) + } + }) + } +} From 357c2522079072cb2c4916a2d31d3dccd81e0d8c Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Fri, 28 Nov 2025 10:47:02 -0800 Subject: [PATCH 2/5] simplify Signed-off-by: Harper, Jason M --- cmd/metrics/summary.go | 146 ++-------- cmd/metrics/summary_test.go | 48 ++-- cmd/metrics/trim.go | 377 +++++++++++++------------ cmd/metrics/trim_test.go | 532 ------------------------------------ 4 files changed, 249 insertions(+), 854 deletions(-) delete mode 100644 cmd/metrics/trim_test.go diff --git a/cmd/metrics/summary.go b/cmd/metrics/summary.go index ffc7132c..7894673a 100644 --- a/cmd/metrics/summary.go +++ b/cmd/metrics/summary.go @@ -19,14 +19,19 @@ import ( "regexp" "slices" "strconv" - "strings" texttemplate "text/template" // nosemgrep "time" "github.com/casbin/govaluate" ) +// summarizeMetrics reads the metrics CSV from localOutputDir for targetName, +// generates summary files (CSV and HTML) using the provided metadata and metric definitions, +// and returns a list of created summary file paths. func summarizeMetrics(localOutputDir string, targetName string, metadata Metadata, metricDefinitions []MetricDefinition) ([]string, error) { + return summarizeMetricsWithTrim(localOutputDir, targetName, metadata, metricDefinitions, 0, 0) +} +func summarizeMetricsWithTrim(localOutputDir string, targetName string, metadata Metadata, metricDefinitions []MetricDefinition, startTimestamp, endTimestamp int) ([]string, error) { filesCreated := []string{} // read the metrics from CSV csvMetricsFile := filepath.Join(localOutputDir, targetName+"_metrics.csv") @@ -34,9 +39,15 @@ func summarizeMetrics(localOutputDir string, targetName string, metadata Metadat if err != nil { return filesCreated, fmt.Errorf("failed to read metrics from %s: %w", csvMetricsFile, err) } - // exclude the final sample if metrics were collected with a workload - if metadata.WithWorkload { - metrics.excludeFinalSample() + if startTimestamp != 0 || endTimestamp != 0 { + // trim the metrics to the specified time range + metrics.filterByTimeRange(startTimestamp, endTimestamp) + } else { + // trim time range not specified, + // exclude the final sample if metrics were collected with a workload + if metadata.WithWorkload { + metrics.excludeFinalSample() + } } // csv summary out, err := metrics.getCSV() @@ -75,7 +86,7 @@ type metricStats struct { } type row struct { - timestamp float64 + timestamp int socket string cpu string cgroup string @@ -88,8 +99,8 @@ func newRow(fields []string, names []string) (r row, err error) { for fIdx, field := range fields { switch fIdx { case idxTimestamp: - var ts float64 - if ts, err = strconv.ParseFloat(field, 64); err != nil { + var ts int + if ts, err = strconv.Atoi(field); err != nil { return } r.timestamp = ts @@ -337,8 +348,8 @@ func (mc MetricCollection) aggregate() (m *MetricGroup, err error) { groupByValue: "", } // aggregate the rows by timestamp - timestampMap := make(map[float64][]map[string]float64) // map of timestamp to list of metric maps - var timestamps []float64 // list of timestamps in order + timestampMap := make(map[int][]map[string]float64) // map of timestamp to list of metric maps + var timestamps []int // list of timestamps in order for _, metrics := range mc { for _, row := range metrics.rows { if _, ok := timestampMap[row.timestamp]; !ok { @@ -819,14 +830,14 @@ func (mc MetricCollection) getCSV() (out string, err error) { } // filterByTimeRange filters all metric groups to only include rows within the specified time range -func (mc MetricCollection) filterByTimeRange(startTime, endTime float64) { +func (mc MetricCollection) filterByTimeRange(startTime, endTime int) { for i := range mc { mc[i].filterByTimeRange(startTime, endTime) } } // filterByTimeRange filters the metric group to only include rows within the specified time range -func (mg *MetricGroup) filterByTimeRange(startTime, endTime float64) { +func (mg *MetricGroup) filterByTimeRange(startTime, endTime int) { var filteredRows []row for _, row := range mg.rows { if row.timestamp >= startTime && row.timestamp <= endTime { @@ -835,116 +846,3 @@ func (mg *MetricGroup) filterByTimeRange(startTime, endTime float64) { } mg.rows = filteredRows } - -// writeCSV writes the metric collection to a CSV file -func (mc MetricCollection) writeCSV(path string) error { - if len(mc) == 0 { - return fmt.Errorf("no metrics to write") - } - - file, err := os.Create(path) // #nosec G304 - if err != nil { - return fmt.Errorf("failed to create CSV file: %w", err) - } - defer file.Close() - - // Write header - header := "timestamp,socket,cpu,cgroup" - for _, name := range mc[0].names { - header += "," + name - } - if _, err := file.WriteString(header + "\n"); err != nil { - return fmt.Errorf("failed to write header: %w", err) - } - - // Write rows from all metric groups - for _, mg := range mc { - for _, row := range mg.rows { - line := fmt.Sprintf("%d,%s,%s,%s", - int64(row.timestamp), - row.socket, - row.cpu, - row.cgroup) - for _, name := range mg.names { - val := row.metrics[name] - if math.IsNaN(val) { - line += "," - } else { - line += fmt.Sprintf(",%f", val) - } - } - if _, err := file.WriteString(line + "\n"); err != nil { - return fmt.Errorf("failed to write row: %w", err) - } - } - } - - return nil -} - -// loadMetadataIfExists attempts to load metadata from a companion JSON file -// Returns the metadata, whether it was found, and any error -func loadMetadataIfExists(csvPath string) (Metadata, bool, error) { - // Determine the expected metadata file path - // Input: hostname_metrics.csv -> hostname_metadata.json - dir := filepath.Dir(csvPath) - base := filepath.Base(csvPath) - baseName := strings.TrimSuffix(base, filepath.Ext(base)) - // Replace _metrics with _metadata - metadataName := strings.Replace(baseName, "_metrics", "_metadata", 1) + ".json" - metadataPath := filepath.Join(dir, metadataName) - - // Check if the metadata file exists - if _, err := os.Stat(metadataPath); os.IsNotExist(err) { - return Metadata{}, false, nil - } - - // Read and parse the metadata file - data, err := os.ReadFile(metadataPath) // #nosec G304 - if err != nil { - return Metadata{}, false, fmt.Errorf("failed to read metadata file: %w", err) - } - - var metadata Metadata - if err := json.Unmarshal(data, &metadata); err != nil { - return Metadata{}, false, fmt.Errorf("failed to parse metadata JSON: %w", err) - } - - slog.Info("loaded metadata from file", slog.String("file", metadataPath)) - return metadata, true, nil -} - -// generateTrimmedSummaries creates CSV and HTML summary reports for trimmed metrics -func generateTrimmedSummaries(csvPath, outputDir, targetName string, metadata Metadata, metricDefinitions []MetricDefinition) ([]string, error) { - filesCreated := []string{} - - // Read the trimmed metrics from CSV - metrics, err := newMetricCollection(csvPath) - if err != nil { - return filesCreated, fmt.Errorf("failed to read trimmed metrics: %w", err) - } - - // Generate CSV summary - out, err := metrics.getCSV() - if err != nil { - return filesCreated, fmt.Errorf("failed to generate CSV summary: %w", err) - } - csvSummaryFile := filepath.Join(outputDir, targetName+"_summary.csv") - if err := os.WriteFile(csvSummaryFile, []byte(out), 0644); err != nil { // #nosec G306 - return filesCreated, fmt.Errorf("failed to write CSV summary: %w", err) - } - filesCreated = append(filesCreated, csvSummaryFile) - - // Generate HTML summary - out, err = metrics.getHTML(metadata, metricDefinitions) - if err != nil { - return filesCreated, fmt.Errorf("failed to generate HTML summary: %w", err) - } - htmlSummaryFile := filepath.Join(outputDir, targetName+"_summary.html") - if err := os.WriteFile(htmlSummaryFile, []byte(out), 0644); err != nil { // #nosec G306 - return filesCreated, fmt.Errorf("failed to write HTML summary: %w", err) - } - filesCreated = append(filesCreated, htmlSummaryFile) - - return filesCreated, nil -} diff --git a/cmd/metrics/summary_test.go b/cmd/metrics/summary_test.go index 0f15fd0f..c4aaa218 100644 --- a/cmd/metrics/summary_test.go +++ b/cmd/metrics/summary_test.go @@ -14,46 +14,46 @@ func TestExcludeFinalSample(t *testing.T) { name string inputRows []row expectedCount int - expectedMaxTS float64 + expectedMaxTS int }{ { name: "exclude single final timestamp", inputRows: []row{ - {timestamp: 5.0, metrics: map[string]float64{"metric1": 100.0}}, - {timestamp: 10.0, metrics: map[string]float64{"metric1": 200.0}}, - {timestamp: 15.0, metrics: map[string]float64{"metric1": 150.0}}, - {timestamp: 20.0, metrics: map[string]float64{"metric1": 50.0}}, // this should be excluded + {timestamp: 5, metrics: map[string]float64{"metric1": 100.0}}, + {timestamp: 10, metrics: map[string]float64{"metric1": 200.0}}, + {timestamp: 15, metrics: map[string]float64{"metric1": 150.0}}, + {timestamp: 20, metrics: map[string]float64{"metric1": 50.0}}, // this should be excluded }, expectedCount: 3, - expectedMaxTS: 15.0, + expectedMaxTS: 15, }, { name: "exclude multiple rows with same final timestamp", inputRows: []row{ - {timestamp: 5.0, socket: "0", metrics: map[string]float64{"metric1": 100.0}}, - {timestamp: 10.0, socket: "0", metrics: map[string]float64{"metric1": 200.0}}, - {timestamp: 15.0, socket: "0", metrics: map[string]float64{"metric1": 150.0}}, - {timestamp: 15.0, socket: "1", metrics: map[string]float64{"metric1": 160.0}}, // same timestamp, different socket + {timestamp: 5, socket: "0", metrics: map[string]float64{"metric1": 100.0}}, + {timestamp: 10, socket: "0", metrics: map[string]float64{"metric1": 200.0}}, + {timestamp: 15, socket: "0", metrics: map[string]float64{"metric1": 150.0}}, + {timestamp: 15, socket: "1", metrics: map[string]float64{"metric1": 160.0}}, // same timestamp, different socket }, expectedCount: 2, - expectedMaxTS: 10.0, + expectedMaxTS: 10, }, { name: "single sample - should not exclude", inputRows: []row{ - {timestamp: 5.0, metrics: map[string]float64{"metric1": 100.0}}, + {timestamp: 5, metrics: map[string]float64{"metric1": 100.0}}, }, expectedCount: 1, - expectedMaxTS: 5.0, + expectedMaxTS: 5, }, { name: "two samples - exclude last one", inputRows: []row{ - {timestamp: 5.0, metrics: map[string]float64{"metric1": 100.0}}, - {timestamp: 10.0, metrics: map[string]float64{"metric1": 50.0}}, + {timestamp: 5, metrics: map[string]float64{"metric1": 100.0}}, + {timestamp: 10, metrics: map[string]float64{"metric1": 50.0}}, }, expectedCount: 1, - expectedMaxTS: 5.0, + expectedMaxTS: 5, }, } @@ -93,9 +93,9 @@ func TestExcludeFinalSampleMultipleGroups(t *testing.T) { groupByField: "SKT", groupByValue: "0", rows: []row{ - {timestamp: 5.0, socket: "0", metrics: map[string]float64{"metric1": 100.0}}, - {timestamp: 10.0, socket: "0", metrics: map[string]float64{"metric1": 200.0}}, - {timestamp: 15.0, socket: "0", metrics: map[string]float64{"metric1": 50.0}}, // should be excluded + {timestamp: 5, socket: "0", metrics: map[string]float64{"metric1": 100.0}}, + {timestamp: 10, socket: "0", metrics: map[string]float64{"metric1": 200.0}}, + {timestamp: 15, socket: "0", metrics: map[string]float64{"metric1": 50.0}}, // should be excluded }, }, MetricGroup{ @@ -103,9 +103,9 @@ func TestExcludeFinalSampleMultipleGroups(t *testing.T) { groupByField: "SKT", groupByValue: "1", rows: []row{ - {timestamp: 5.0, socket: "1", metrics: map[string]float64{"metric1": 110.0}}, - {timestamp: 10.0, socket: "1", metrics: map[string]float64{"metric1": 210.0}}, - {timestamp: 15.0, socket: "1", metrics: map[string]float64{"metric1": 60.0}}, // should be excluded + {timestamp: 5, socket: "1", metrics: map[string]float64{"metric1": 110.0}}, + {timestamp: 10, socket: "1", metrics: map[string]float64{"metric1": 210.0}}, + {timestamp: 15, socket: "1", metrics: map[string]float64{"metric1": 60.0}}, // should be excluded }, }, } @@ -117,8 +117,8 @@ func TestExcludeFinalSampleMultipleGroups(t *testing.T) { assert.Equal(t, 2, len(mc[1].rows), "socket 1 should have 2 rows") // Verify max timestamps - assert.Equal(t, 10.0, mc[0].rows[1].timestamp, "socket 0 max timestamp should be 10.0") - assert.Equal(t, 10.0, mc[1].rows[1].timestamp, "socket 1 max timestamp should be 10.0") + assert.Equal(t, 10, mc[0].rows[1].timestamp, "socket 0 max timestamp should be 10") + assert.Equal(t, 10, mc[1].rows[1].timestamp, "socket 1 max timestamp should be 10") } func TestExcludeFinalSampleEmptyCollection(t *testing.T) { diff --git a/cmd/metrics/trim.go b/cmd/metrics/trim.go index b0f2e598..b6c3de14 100644 --- a/cmd/metrics/trim.go +++ b/cmd/metrics/trim.go @@ -4,6 +4,7 @@ package metrics // SPDX-License-Identifier: BSD-3-Clause import ( + "encoding/json" "fmt" "log/slog" "os" @@ -20,12 +21,10 @@ const trimCmdName = "trim" // trim command flags var ( flagTrimInput string - flagTrimStartTime int64 - flagTrimEndTime int64 - flagTrimStartOffset int64 - flagTrimEndOffset int64 - flagTrimOutputDir string - flagTrimSuffix string + flagTrimStartTime int + flagTrimEndTime int + flagTrimStartOffset int + flagTrimEndOffset int ) const ( @@ -34,34 +33,18 @@ const ( flagTrimEndTimeName = "end-time" flagTrimStartOffsetName = "start-offset" flagTrimEndOffsetName = "end-offset" - flagTrimOutputDirName = "output-dir" - flagTrimSuffixName = "suffix" ) var trimExamples = []string{ - " Skip first 10 seconds and last 5 seconds: $ perfspect metrics trim --input host_metrics.csv --start-offset 10 --end-offset 5", - " Use absolute timestamps: $ perfspect metrics trim --input host_metrics.csv --start-time 1764174327 --end-time 1764174351", - " Custom output suffix: $ perfspect metrics trim --input host_metrics.csv --start-offset 10 --suffix steady_state", - " Specify output directory: $ perfspect metrics trim --input host_metrics.csv --start-offset 5 --output-dir ./trimmed", + " Skip first 30 seconds: $ perfspect metrics trim --input perfspect_2025-11-28_09-21-56 --start-offset 30", + " Skip first 10 seconds and last 5 seconds: $ perfspect metrics trim --input perfspect_2025-11-28_09-21-56 --start-offset 10 --end-offset 5", + " Use absolute timestamps and specific CSV: $ perfspect metrics trim --input perfspect_2025-11-28_09-21-56/myhost_metrics.csv --start-time 1764174327 --end-time 1764174351", } var trimCmd = &cobra.Command{ - Use: trimCmdName, - Short: "Refine metrics data to a specific time range", - Long: `Generate new summary reports from existing metrics CSV data by filtering to a specific time range. - -This is useful when you've collected metrics for an entire workload but want to analyze -only a specific portion, excluding setup, teardown, or other phases. The command reads an -existing metrics CSV file, filters rows to the specified time range, and generates new -summary reports (CSV and HTML). - -Time range can be specified using either: - - Absolute timestamps (--start-time and --end-time) - - Relative offsets from beginning/end (--start-offset and --end-offset) - -If a metadata JSON file exists alongside the input CSV, it will be used to generate -a complete HTML report with system summary. Otherwise, a simplified HTML report -without system summary will be generated.`, + Use: trimCmdName, + Short: "Generate new summary reports from existing metrics collection by filtering to a specific time range", + Long: "", Example: strings.Join(trimExamples, "\n"), RunE: runTrimCmd, PreRunE: validateTrimFlags, @@ -71,19 +54,16 @@ without system summary will be generated.`, func init() { Cmd.AddCommand(trimCmd) - trimCmd.Flags().StringVar(&flagTrimInput, flagTrimInputName, "", "path to the metrics CSV file to trim (required)") - trimCmd.Flags().Int64Var(&flagTrimStartTime, flagTrimStartTimeName, 0, "absolute start timestamp (seconds since epoch)") - trimCmd.Flags().Int64Var(&flagTrimEndTime, flagTrimEndTimeName, 0, "absolute end timestamp (seconds since epoch)") - trimCmd.Flags().Int64Var(&flagTrimStartOffset, flagTrimStartOffsetName, 0, "seconds to skip from the beginning of the data") - trimCmd.Flags().Int64Var(&flagTrimEndOffset, flagTrimEndOffsetName, 0, "seconds to exclude from the end of the data") - trimCmd.Flags().StringVar(&flagTrimOutputDir, flagTrimOutputDirName, "", "output directory (default: same directory as input file)") - trimCmd.Flags().StringVar(&flagTrimSuffix, flagTrimSuffixName, "trimmed", "suffix for output filenames") + trimCmd.Flags().StringVar(&flagTrimInput, flagTrimInputName, "", "path to the directory or specific metrics CSV file to trim (required)") + trimCmd.Flags().IntVar(&flagTrimStartTime, flagTrimStartTimeName, 0, "absolute start timestamp (seconds since epoch)") + trimCmd.Flags().IntVar(&flagTrimEndTime, flagTrimEndTimeName, 0, "absolute end timestamp (seconds since epoch)") + trimCmd.Flags().IntVar(&flagTrimStartOffset, flagTrimStartOffsetName, 0, "seconds to skip from the beginning of the data") + trimCmd.Flags().IntVar(&flagTrimEndOffset, flagTrimEndOffsetName, 0, "seconds to exclude from the end of the data") _ = trimCmd.MarkFlagRequired(flagTrimInputName) // error only occurs if flag doesn't exist // Set custom usage function to avoid parent's usage function issues trimCmd.SetUsageFunc(func(cmd *cobra.Command) error { - fmt.Fprintf(cmd.OutOrStdout(), "%s\n\n", cmd.Long) fmt.Fprintf(cmd.OutOrStdout(), "Usage:\n %s\n\n", cmd.UseLine()) if cmd.HasExample() { fmt.Fprintf(cmd.OutOrStdout(), "Examples:\n%s\n\n", cmd.Example) @@ -100,17 +80,12 @@ func init() { // validateTrimFlags checks that the trim command flags are valid and consistent func validateTrimFlags(cmd *cobra.Command, args []string) error { - // Check input file exists + // Check input file or directory exists if _, err := os.Stat(flagTrimInput); err != nil { if os.IsNotExist(err) { - return common.FlagValidationError(cmd, fmt.Sprintf("input file does not exist: %s", flagTrimInput)) + return common.FlagValidationError(cmd, fmt.Sprintf("input file or directory does not exist: %s", flagTrimInput)) } - return common.FlagValidationError(cmd, fmt.Sprintf("failed to access input file: %v", err)) - } - - // Check that input is a CSV file - if !strings.HasSuffix(strings.ToLower(flagTrimInput), ".csv") { - return common.FlagValidationError(cmd, fmt.Sprintf("input file must be a CSV file: %s", flagTrimInput)) + return common.FlagValidationError(cmd, fmt.Sprintf("failed to access input file or directory: %v", err)) } // Check that at least one time parameter is provided @@ -147,165 +122,219 @@ func validateTrimFlags(cmd *cobra.Command, args []string) error { return common.FlagValidationError(cmd, "--start-time must be less than --end-time") } - // Validate output directory if specified - if flagTrimOutputDir != "" { - if info, err := os.Stat(flagTrimOutputDir); err != nil { - if os.IsNotExist(err) { - return common.FlagValidationError(cmd, fmt.Sprintf("output directory does not exist: %s", flagTrimOutputDir)) - } - return common.FlagValidationError(cmd, fmt.Sprintf("failed to access output directory: %v", err)) - } else if !info.IsDir() { - return common.FlagValidationError(cmd, fmt.Sprintf("output-dir must be a directory: %s", flagTrimOutputDir)) - } - } - - // Validate suffix is not empty and doesn't contain path separators - if flagTrimSuffix == "" { - return common.FlagValidationError(cmd, "--suffix cannot be empty") - } - if strings.ContainsAny(flagTrimSuffix, "/\\") { - return common.FlagValidationError(cmd, "--suffix cannot contain path separators") - } - return nil } // runTrimCmd executes the trim command func runTrimCmd(cmd *cobra.Command, args []string) error { - slog.Info("trimming metrics data", - slog.String("input", flagTrimInput), - slog.Int64("start-time", flagTrimStartTime), - slog.Int64("end-time", flagTrimEndTime), - slog.Int64("start-offset", flagTrimStartOffset), - slog.Int64("end-offset", flagTrimEndOffset), - slog.String("suffix", flagTrimSuffix)) - - // Determine output directory - outputDir := flagTrimOutputDir - if outputDir == "" { - outputDir = filepath.Dir(flagTrimInput) - } + // appContext is the application context that holds common data and resources. + // appContext := cmd.Parent().Parent().Context().Value(common.AppContext{}).(common.AppContext) - // Load the original metrics CSV - slog.Info("loading metrics from CSV", slog.String("file", flagTrimInput)) - metrics, err := newMetricCollection(flagTrimInput) + // flagTrimInput can be a file or directory + // get the directory and use it as output dir + var sourceDir string + fileInfo, err := os.Stat(flagTrimInput) if err != nil { - return fmt.Errorf("failed to load metrics from CSV: %w", err) - } - - if len(metrics) == 0 { - return fmt.Errorf("no metrics found in CSV file") + err = fmt.Errorf("failed to access input path: %w", err) + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + slog.Error(err.Error()) + cmd.SilenceUsage = true + return err + } + if fileInfo.IsDir() { + sourceDir = flagTrimInput + } else { + sourceDir = filepath.Dir(flagTrimInput) } + outputDir := sourceDir - // Calculate the time range - startTime, endTime, err := calculateTimeRange(metrics, flagTrimStartTime, flagTrimEndTime, - flagTrimStartOffset, flagTrimEndOffset) + sourceInfos, err := getTrimmedSourceInfos(flagTrimInput) if err != nil { - return fmt.Errorf("failed to calculate time range: %w", err) - } - - slog.Info("calculated time range", - slog.Int64("start", int64(startTime)), - slog.Int64("end", int64(endTime)), - slog.Float64("duration", endTime-startTime)) - - // Filter metrics by time range - originalRowCount := 0 - for i := range metrics { - originalRowCount += len(metrics[i].rows) - } - - metrics.filterByTimeRange(startTime, endTime) - - filteredRowCount := 0 - for i := range metrics { - filteredRowCount += len(metrics[i].rows) + err = fmt.Errorf("failed to determine source files: %w", err) + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + slog.Error(err.Error()) + cmd.SilenceUsage = true + return err + } + if len(sourceInfos) == 0 { + err = fmt.Errorf("no valid metrics CSV files found to trim in: %s", sourceDir) + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + slog.Error(err.Error()) + cmd.SilenceUsage = true + return err + } + + var filesCreated []string + for _, sourceInfo := range sourceInfos { + filesCreated, err = summarizeMetricsWithTrim(outputDir, sourceInfo.targetName, sourceInfo.metadata, sourceInfo.metricDefinitions, sourceInfo.startTime, sourceInfo.endTime) + if err != nil { + err = fmt.Errorf("failed to generate trimmed summaries for %s: %w", sourceInfo.allCSVPath, err) + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + slog.Error(err.Error()) + cmd.SilenceUsage = true + return err + } } - if filteredRowCount == 0 { - return fmt.Errorf("no data remains after filtering to time range [%.2f, %.2f]", startTime, endTime) + // Report success + fmt.Println("\nTrimmed metrics successfully created:") + for _, filePath := range filesCreated { + fmt.Printf(" %s\n", filePath) } - slog.Info("filtered metrics", - slog.Int("original_rows", originalRowCount), - slog.Int("filtered_rows", filteredRowCount), - slog.Int("removed_rows", originalRowCount-filteredRowCount)) - - // Generate output filenames - inputBase := filepath.Base(flagTrimInput) - inputName := strings.TrimSuffix(inputBase, filepath.Ext(inputBase)) + return nil +} - // Determine target name from input filename - // Input is typically "hostname_metrics.csv", target name is "hostname" - targetName := strings.TrimSuffix(inputName, "_metrics") +type trimSourceInfo struct { + allCSVPath string + summaryCSVPath string + summaryHTMLPath string + targetName string + metadata Metadata + metricDefinitions []MetricDefinition + startTime int + endTime int +} - // Write trimmed metrics CSV - trimmedCSVPath := filepath.Join(outputDir, targetName+"_metrics_"+flagTrimSuffix+".csv") - if err := metrics.writeCSV(trimmedCSVPath); err != nil { - return fmt.Errorf("failed to write trimmed CSV: %w", err) - } - slog.Info("wrote trimmed metrics CSV", slog.String("file", trimmedCSVPath)) +func getTrimmedSourceInfos(sourceDirOrFilename string) ([]trimSourceInfo, error) { + var sourceInfos []trimSourceInfo + + // If a specific file is provided, use that + if sourceDirOrFilename != "" && strings.HasSuffix(strings.ToLower(sourceDirOrFilename), ".csv") { + baseName := strings.TrimSuffix(filepath.Base(sourceDirOrFilename), filepath.Ext(sourceDirOrFilename)) + summaryCSV := filepath.Join(filepath.Dir(sourceDirOrFilename), baseName+"_summary.csv") + summaryHTML := filepath.Join(filepath.Dir(sourceDirOrFilename), baseName+"_summary.html") + sourceInfos = append(sourceInfos, trimSourceInfo{ + allCSVPath: sourceDirOrFilename, + summaryCSVPath: summaryCSV, + summaryHTMLPath: summaryHTML, + }) + } else { - // Try to load metadata if it exists - metadata, metadataFound, err := loadMetadataIfExists(flagTrimInput) - if err != nil { - slog.Warn("failed to load metadata, continuing without it", slog.String("error", err.Error())) - metadataFound = false - } + // Otherwise, scan the directory for all *_metrics.csv files + files, err := os.ReadDir(sourceDirOrFilename) + if err != nil { + return nil, fmt.Errorf("failed to read directory: %w", err) + } - if !metadataFound { - slog.Warn("metadata file not found, HTML report will not include system summary") - // Create minimal metadata for summary generation - metadata = Metadata{} + for _, file := range files { + if file.IsDir() { + continue + } + if strings.HasSuffix(strings.ToLower(file.Name()), "_metrics.csv") { + baseName := strings.TrimSuffix(file.Name(), filepath.Ext(file.Name())) + allCSVPath := filepath.Join(sourceDirOrFilename, file.Name()) + summaryCSV := filepath.Join(sourceDirOrFilename, baseName+"_summary.csv") + summaryHTML := filepath.Join(sourceDirOrFilename, baseName+"_summary.html") + sourceInfos = append(sourceInfos, trimSourceInfo{ + allCSVPath: allCSVPath, + summaryCSVPath: summaryCSV, + summaryHTMLPath: summaryHTML, + }) + } + } } - // Load metric definitions for summary generation if we have a valid microarchitecture - var metricDefinitions []MetricDefinition - if metadataFound && metadata.Microarchitecture != "" { - loader, err := NewLoader(metadata.Microarchitecture) + for i, sourceInfo := range sourceInfos { + // Determine target name from filename + inputBase := filepath.Base(sourceInfo.allCSVPath) + inputName := strings.TrimSuffix(inputBase, filepath.Ext(inputBase)) + targetName := strings.TrimSuffix(inputName, "_metrics") + sourceInfos[i].targetName = targetName + // Load all metrics to determine time range + metrics, err := newMetricCollection(sourceInfo.allCSVPath) if err != nil { - return fmt.Errorf("failed to create loader: %w", err) + return nil, fmt.Errorf("failed to load metrics from CSV: %w", err) } - loaderConfig := LoaderConfig{ - Metadata: metadata, + if len(metrics) == 0 { + return nil, fmt.Errorf("no metrics found in CSV file") } - metricDefinitions, _, err = loader.Load(loaderConfig) + // Calculate the time range + startTime, endTime, err := calculateTimeRange(metrics, flagTrimStartTime, flagTrimEndTime, flagTrimStartOffset, flagTrimEndOffset) if err != nil { - return fmt.Errorf("failed to load metric definitions: %w", err) + return nil, fmt.Errorf("failed to calculate time range: %w", err) } - } else { - // Use empty metric definitions if no metadata - metricDefinitions = []MetricDefinition{} + sourceInfos[i].startTime = startTime + sourceInfos[i].endTime = endTime + // Retrieve the metadata from the HTML summary + metadata, err := loadMetadataFromHTMLSummary(sourceInfo.summaryHTMLPath) + if err != nil { + return nil, fmt.Errorf("failed to load metadata from HTML summary: %w", err) + } + sourceInfos[i].metadata = metadata + // Load metric definitions using the metadata + metricDefinitions, err := loadMetricDefinitions(metadata) + if err != nil { + return nil, fmt.Errorf("failed to get metric definitions: %w", err) + } + sourceInfos[i].metricDefinitions = metricDefinitions } - // Generate summary files - // Pass the base name (including _metrics) and suffix to generate consistent filenames - trimmedBaseName := targetName + "_metrics_" + flagTrimSuffix - filesCreated, err := generateTrimmedSummaries(trimmedCSVPath, outputDir, trimmedBaseName, metadata, metricDefinitions) + return sourceInfos, nil +} + +func loadMetricDefinitions(metadata Metadata) ([]MetricDefinition, error) { + loader, err := NewLoader(metadata.Microarchitecture) + if err != nil { + return nil, fmt.Errorf("failed to create metric definition loader: %w", err) + } + metricDefinitions, _, err := loader.Load(getLoaderConfig(loader, []string{}, metadata, "", "")) if err != nil { - return fmt.Errorf("failed to generate summary files: %w", err) + return nil, fmt.Errorf("failed to load metric definitions: %w", err) } + return metricDefinitions, nil +} - // Report success - fmt.Println("\nTrimmed metrics successfully created:") - fmt.Printf(" Trimmed CSV: %s\n", trimmedCSVPath) - for _, file := range filesCreated { - fileType := "Summary" - if strings.HasSuffix(file, ".html") { - fileType = "HTML Summary" - } else if strings.HasSuffix(file, ".csv") { - fileType = "CSV Summary" +func loadMetadataFromHTMLSummary(summaryHTMLPath string) (Metadata, error) { + var metadata Metadata + // Check if the summary HTML file exists + _, err := os.Stat(summaryHTMLPath) + if err != nil { + return metadata, fmt.Errorf("summary HTML file does not exist: %s", summaryHTMLPath) + } + + // find "const metadata = " and "const system_info = " in HTML file. + // The JSON string follows the equals sign. + // e.g., const metadata = {"NumGeneralPurposeCounters":8,"SocketCount":2, ... } + content, err := os.ReadFile(summaryHTMLPath) + if err != nil { + return metadata, fmt.Errorf("failed to read summary HTML file: %w", err) + } + + // assumes system_info comes after metadata in the file + const metadataPrefix = "const metadata = " + const systemInfoPrefix = "const system_info = " + for line := range strings.SplitSeq(string(content), "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, metadataPrefix) { + jsonStart := len(metadataPrefix) + // to end of line + jsonString := strings.TrimSpace(line[jsonStart:]) + // parse JSON string into Metadata struct + err = json.Unmarshal([]byte(jsonString), &metadata) + if err != nil { + return metadata, fmt.Errorf("failed to parse metadata JSON: %w", err) + } + } else if strings.HasPrefix(line, systemInfoPrefix) { + // system info + var systemInfo [][]string + jsonStart := len(systemInfoPrefix) + jsonString := strings.TrimSpace(line[jsonStart:]) + err = json.Unmarshal([]byte(jsonString), &systemInfo) + if err != nil { + return metadata, fmt.Errorf("failed to parse system info JSON: %w", err) + } + metadata.SystemSummaryFields = systemInfo + return metadata, nil } - fmt.Printf(" %s: %s\n", fileType, file) } - fmt.Printf("\nTime range: %d - %d seconds (%.0f second duration)\n", int64(startTime), int64(endTime), endTime-startTime) - fmt.Printf("Rows: %d original, %d after trimming\n", originalRowCount, filteredRowCount) - return nil + return metadata, fmt.Errorf("metadata not found in summary HTML file: %s", summaryHTMLPath) } // calculateTimeRange determines the actual start and end times based on the flags and data -func calculateTimeRange(metrics MetricCollection, startTime, endTime, startOffset, endOffset int64) (float64, float64, error) { +// Returns startTime, endTime, error +func calculateTimeRange(metrics MetricCollection, startTime, endTime, startOffset, endOffset int) (int, int, error) { if len(metrics) == 0 || len(metrics[0].rows) == 0 { return 0, 0, fmt.Errorf("no data available to calculate time range") } @@ -328,30 +357,30 @@ func calculateTimeRange(metrics MetricCollection, startTime, endTime, startOffse // Calculate start time calcStartTime := minTimestamp if startTime != 0 { - calcStartTime = float64(startTime) + calcStartTime = startTime } else if startOffset != 0 { - calcStartTime = minTimestamp + float64(startOffset) + calcStartTime = minTimestamp + startOffset } // Calculate end time calcEndTime := maxTimestamp if endTime != 0 { - calcEndTime = float64(endTime) + calcEndTime = endTime } else if endOffset != 0 { - calcEndTime = maxTimestamp - float64(endOffset) + calcEndTime = maxTimestamp - endOffset } // Validate the calculated range if calcStartTime >= calcEndTime { - return 0, 0, fmt.Errorf("invalid time range: start (%d) >= end (%d)", int64(calcStartTime), int64(calcEndTime)) + return 0, 0, fmt.Errorf("invalid time range: start (%d) >= end (%d)", calcStartTime, calcEndTime) } if calcStartTime > maxTimestamp { - return 0, 0, fmt.Errorf("start time (%d) is beyond the end of available data (%d)", int64(calcStartTime), int64(maxTimestamp)) + return 0, 0, fmt.Errorf("start time (%d) is beyond the end of available data (%d)", calcStartTime, maxTimestamp) } if calcEndTime < minTimestamp { - return 0, 0, fmt.Errorf("end time (%d) is before the beginning of available data (%d)", int64(calcEndTime), int64(minTimestamp)) + return 0, 0, fmt.Errorf("end time (%d) is before the beginning of available data (%d)", calcEndTime, minTimestamp) } return calcStartTime, calcEndTime, nil diff --git a/cmd/metrics/trim_test.go b/cmd/metrics/trim_test.go deleted file mode 100644 index 13cfb57a..00000000 --- a/cmd/metrics/trim_test.go +++ /dev/null @@ -1,532 +0,0 @@ -package metrics - -// Copyright (C) 2021-2025 Intel Corporation -// SPDX-License-Identifier: BSD-3-Clause - -import ( - "encoding/csv" - "fmt" - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -// createTestMetricsCSV creates a test CSV file with sample metrics data -func createTestMetricsCSV(t *testing.T, dir string, filename string) string { - path := filepath.Join(dir, filename) - file, err := os.Create(path) - require.NoError(t, err) - defer file.Close() - - writer := csv.NewWriter(file) - defer writer.Flush() - - // Write header - err = writer.Write([]string{"timestamp", "socket", "cpu", "cgroup", "metric_cpu_utilization", "metric_instructions"}) - require.NoError(t, err) - - // Write sample data with timestamps from 0 to 100 seconds - for i := 0; i <= 20; i++ { - timestamp := float64(i * 5) // 0, 5, 10, ..., 100 - err = writer.Write([]string{ - fmt.Sprintf("%.6f", timestamp), - "", - "", - "", - fmt.Sprintf("%.2f", 50.0+float64(i)), - fmt.Sprintf("%.0f", 1000000.0*float64(i+1)), - }) - require.NoError(t, err) - } - - return path -} - -func TestFilterByTimeRange(t *testing.T) { - // Create test data - metrics := MetricCollection{ - { - names: []string{"metric1", "metric2"}, - rows: []row{ - {timestamp: 10.0, metrics: map[string]float64{"metric1": 1.0, "metric2": 2.0}}, - {timestamp: 20.0, metrics: map[string]float64{"metric1": 3.0, "metric2": 4.0}}, - {timestamp: 30.0, metrics: map[string]float64{"metric1": 5.0, "metric2": 6.0}}, - {timestamp: 40.0, metrics: map[string]float64{"metric1": 7.0, "metric2": 8.0}}, - {timestamp: 50.0, metrics: map[string]float64{"metric1": 9.0, "metric2": 10.0}}, - }, - }, - } - - tests := []struct { - name string - startTime float64 - endTime float64 - expectedCount int - }{ - { - name: "filter middle range", - startTime: 20.0, - endTime: 40.0, - expectedCount: 3, // timestamps 20, 30, 40 - }, - { - name: "filter all", - startTime: 10.0, - endTime: 50.0, - expectedCount: 5, - }, - { - name: "filter to single point", - startTime: 30.0, - endTime: 30.0, - expectedCount: 1, - }, - { - name: "filter to none (range before data)", - startTime: 1.0, - endTime: 5.0, - expectedCount: 0, - }, - { - name: "filter to none (range after data)", - startTime: 60.0, - endTime: 70.0, - expectedCount: 0, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - // Make a copy to avoid modifying the original - testMetrics := make(MetricCollection, len(metrics)) - for i := range metrics { - testMetrics[i] = MetricGroup{ - names: metrics[i].names, - rows: make([]row, len(metrics[i].rows)), - } - copy(testMetrics[i].rows, metrics[i].rows) - } - - // Apply filter - testMetrics.filterByTimeRange(tt.startTime, tt.endTime) - - // Check result - assert.Equal(t, tt.expectedCount, len(testMetrics[0].rows)) - - // Verify all remaining rows are in range - for _, row := range testMetrics[0].rows { - assert.GreaterOrEqual(t, row.timestamp, tt.startTime) - assert.LessOrEqual(t, row.timestamp, tt.endTime) - } - }) - } -} - -func TestCalculateTimeRange(t *testing.T) { - // Create test data spanning from 10.0 to 100.0 - metrics := MetricCollection{ - { - rows: []row{ - {timestamp: 10.0}, - {timestamp: 30.0}, - {timestamp: 50.0}, - {timestamp: 70.0}, - {timestamp: 100.0}, - }, - }, - } - - tests := []struct { - name string - startTime int64 - endTime int64 - startOffset int64 - endOffset int64 - wantStart float64 - wantEnd float64 - wantErr bool - }{ - { - name: "use absolute times", - startTime: 20, - endTime: 80, - wantStart: 20.0, - wantEnd: 80.0, - wantErr: false, - }, - { - name: "use offsets from beginning and end", - startOffset: 10, - endOffset: 5, - wantStart: 20.0, // 10.0 + 10.0 - wantEnd: 95.0, // 100.0 - 5.0 - wantErr: false, - }, - { - name: "use defaults (entire range)", - wantStart: 10.0, - wantEnd: 100.0, - wantErr: false, - }, - { - name: "use start offset only", - startOffset: 15, - wantStart: 25.0, - wantEnd: 100.0, - wantErr: false, - }, - { - name: "use end time only", - endTime: 60, - wantStart: 10.0, - wantEnd: 60.0, - wantErr: false, - }, - { - name: "invalid range (start >= end)", - startTime: 80, - endTime: 20, - wantErr: true, - }, - { - name: "invalid range (offset results in start >= end)", - startOffset: 50, - endOffset: 50, - wantErr: true, - }, - { - name: "start time beyond data", - startTime: 150, - endTime: 200, - wantErr: true, - }, - { - name: "end time before data", - startTime: 1, - endTime: 5, - wantErr: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - gotStart, gotEnd, err := calculateTimeRange(metrics, tt.startTime, tt.endTime, tt.startOffset, tt.endOffset) - - if tt.wantErr { - assert.Error(t, err) - } else { - require.NoError(t, err) - assert.Equal(t, tt.wantStart, gotStart) - assert.Equal(t, tt.wantEnd, gotEnd) - } - }) - } -} - -func TestWriteCSV(t *testing.T) { - tempDir := t.TempDir() - - tests := []struct { - name string - metrics MetricCollection - wantErr bool - }{ - { - name: "write simple metrics", - metrics: MetricCollection{ - { - names: []string{"metric1", "metric2"}, - rows: []row{ - { - timestamp: 10.5, - socket: "0", - cpu: "", - cgroup: "", - metrics: map[string]float64{"metric1": 1.5, "metric2": 2.5}, - }, - { - timestamp: 20.5, - socket: "0", - cpu: "", - cgroup: "", - metrics: map[string]float64{"metric1": 3.5, "metric2": 4.5}, - }, - }, - }, - }, - wantErr: false, - }, - { - name: "write empty collection", - metrics: MetricCollection{}, - wantErr: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - path := filepath.Join(tempDir, tt.name+".csv") - err := tt.metrics.writeCSV(path) - - if tt.wantErr { - assert.Error(t, err) - return - } - - require.NoError(t, err) - - // Verify file was created and has content - info, err := os.Stat(path) - require.NoError(t, err) - assert.Greater(t, info.Size(), int64(0)) - - // Read back and verify basic structure - file, err := os.Open(path) - require.NoError(t, err) - defer file.Close() - - reader := csv.NewReader(file) - records, err := reader.ReadAll() - require.NoError(t, err) - - // Should have header + data rows - expectedRows := 1 + len(tt.metrics[0].rows) - assert.Equal(t, expectedRows, len(records)) - - // Verify header - assert.Equal(t, "timestamp", records[0][0]) - assert.Equal(t, "socket", records[0][1]) - assert.Equal(t, "cpu", records[0][2]) - assert.Equal(t, "cgroup", records[0][3]) - }) - } -} - -func TestLoadMetadataIfExists(t *testing.T) { - tempDir := t.TempDir() - - t.Run("metadata exists", func(t *testing.T) { - // Create a metrics CSV file - metricsPath := filepath.Join(tempDir, "test_metrics.csv") - _, err := os.Create(metricsPath) - require.NoError(t, err) - - // Create a corresponding metadata JSON file - metadataPath := filepath.Join(tempDir, "test_metadata.json") - metadataContent := `{"Hostname":"testhost","Microarchitecture":"SPR"}` - err = os.WriteFile(metadataPath, []byte(metadataContent), 0644) - require.NoError(t, err) - - // Load metadata - metadata, found, err := loadMetadataIfExists(metricsPath) - require.NoError(t, err) - assert.True(t, found) - assert.Equal(t, "testhost", metadata.Hostname) - assert.Equal(t, "SPR", metadata.Microarchitecture) - }) - - t.Run("metadata does not exist", func(t *testing.T) { - // Create a metrics CSV file without metadata - metricsPath := filepath.Join(tempDir, "nometa_metrics.csv") - _, err := os.Create(metricsPath) - require.NoError(t, err) - - // Try to load metadata - _, found, err := loadMetadataIfExists(metricsPath) - require.NoError(t, err) - assert.False(t, found) - }) - - t.Run("metadata file is malformed", func(t *testing.T) { - // Create a metrics CSV file - metricsPath := filepath.Join(tempDir, "badmeta_metrics.csv") - _, err := os.Create(metricsPath) - require.NoError(t, err) - - // Create a malformed metadata JSON file - metadataPath := filepath.Join(tempDir, "badmeta_metadata.json") - err = os.WriteFile(metadataPath, []byte("not valid json{"), 0644) - require.NoError(t, err) - - // Try to load metadata - _, found, err := loadMetadataIfExists(metricsPath) - assert.Error(t, err) - assert.False(t, found) - }) -} - -func TestTrimValidateFlags(t *testing.T) { - tempDir := t.TempDir() - - // Create a test CSV file - testCSV := createTestMetricsCSV(t, tempDir, "test_metrics.csv") - - tests := []struct { - name string - setup func() - wantErr bool - errMsg string - }{ - { - name: "valid input file", - setup: func() { - flagTrimInput = testCSV - flagTrimStartOffset = 10 - flagTrimSuffix = "trimmed" - }, - wantErr: false, - }, - { - name: "no time parameters specified", - setup: func() { - flagTrimInput = testCSV - flagTrimSuffix = "trimmed" - }, - wantErr: true, - errMsg: "at least one time parameter must be specified", - }, - { - name: "input file does not exist", - setup: func() { - flagTrimInput = filepath.Join(tempDir, "nonexistent.csv") - flagTrimStartOffset = 10 - flagTrimSuffix = "trimmed" - }, - wantErr: true, - errMsg: "does not exist", - }, - { - name: "input is not a CSV file", - setup: func() { - txtFile := filepath.Join(tempDir, "test.txt") - _ = os.WriteFile(txtFile, []byte("test"), 0644) // #nosec G306 - flagTrimInput = txtFile - flagTrimStartOffset = 10 - flagTrimSuffix = "trimmed" - }, - wantErr: true, - errMsg: "must be a CSV file", - }, - { - name: "both start-time and start-offset specified", - setup: func() { - flagTrimInput = testCSV - flagTrimStartTime = 10 - flagTrimStartOffset = 5 - flagTrimSuffix = "trimmed" - }, - wantErr: true, - errMsg: "cannot specify both", - }, - { - name: "both end-time and end-offset specified", - setup: func() { - flagTrimInput = testCSV - flagTrimStartTime = 0 - flagTrimStartOffset = 0 - flagTrimEndTime = 50 - flagTrimEndOffset = 10 - flagTrimSuffix = "trimmed" - }, - wantErr: true, - errMsg: "cannot specify both", - }, - { - name: "negative start-time", - setup: func() { - flagTrimInput = testCSV - flagTrimStartTime = -10.0 - flagTrimEndTime = 0 - flagTrimStartOffset = 0 - flagTrimEndOffset = 0 - flagTrimSuffix = "trimmed" - }, - wantErr: true, - errMsg: "cannot be negative", - }, - { - name: "start-time >= end-time", - setup: func() { - flagTrimInput = testCSV - flagTrimStartTime = 50 - flagTrimEndTime = 40 - flagTrimStartOffset = 0 - flagTrimEndOffset = 0 - flagTrimSuffix = "trimmed" - }, - wantErr: true, - errMsg: "must be less than", - }, - { - name: "empty suffix", - setup: func() { - flagTrimInput = testCSV - flagTrimStartTime = 0 - flagTrimEndTime = 0 - flagTrimStartOffset = 10 - flagTrimEndOffset = 0 - flagTrimSuffix = "" - }, - wantErr: true, - errMsg: "cannot be empty", - }, - { - name: "suffix with path separator", - setup: func() { - flagTrimInput = testCSV - flagTrimSuffix = "trim/med" - flagTrimStartTime = 0 - flagTrimEndTime = 0 - flagTrimStartOffset = 10 - flagTrimEndOffset = 0 - }, - wantErr: true, - errMsg: "cannot contain path separators", - }, - { - name: "output directory does not exist", - setup: func() { - flagTrimInput = testCSV - flagTrimOutputDir = filepath.Join(tempDir, "nonexistent") - flagTrimSuffix = "trimmed" - flagTrimStartTime = 0 - flagTrimEndTime = 0 - flagTrimStartOffset = 10 - flagTrimEndOffset = 0 - }, - wantErr: true, - errMsg: "does not exist", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - // Reset flags to defaults - flagTrimInput = "" - flagTrimStartTime = 0 - flagTrimEndTime = 0 - flagTrimStartOffset = 0 - flagTrimEndOffset = 0 - flagTrimOutputDir = "" - flagTrimSuffix = "trimmed" - - // Setup test-specific flags - tt.setup() - - // Validate - err := validateTrimFlags(trimCmd, nil) - - if tt.wantErr { - assert.Error(t, err) - if tt.errMsg != "" { - assert.Contains(t, err.Error(), tt.errMsg) - } - } else { - assert.NoError(t, err) - } - }) - } -} From ca9bcfbd72a5835c04baff0e5f0a94684bbbc370 Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Fri, 28 Nov 2025 11:30:10 -0800 Subject: [PATCH 3/5] remove commented code Signed-off-by: Harper, Jason M --- cmd/metrics/trim.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/cmd/metrics/trim.go b/cmd/metrics/trim.go index b6c3de14..83a96c55 100644 --- a/cmd/metrics/trim.go +++ b/cmd/metrics/trim.go @@ -127,9 +127,6 @@ func validateTrimFlags(cmd *cobra.Command, args []string) error { // runTrimCmd executes the trim command func runTrimCmd(cmd *cobra.Command, args []string) error { - // appContext is the application context that holds common data and resources. - // appContext := cmd.Parent().Parent().Context().Value(common.AppContext{}).(common.AppContext) - // flagTrimInput can be a file or directory // get the directory and use it as output dir var sourceDir string From 352db20c56cacda42eb0f44147f4f814593497b3 Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Fri, 28 Nov 2025 11:30:29 -0800 Subject: [PATCH 4/5] align README to trim functionality Signed-off-by: Harper, Jason M --- README.md | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index d556f921..faecbae4 100644 --- a/README.md +++ b/README.md @@ -57,26 +57,17 @@ After collecting metrics, you can generate new summary reports for a specific ti The time range can be specified using either absolute timestamps (seconds since epoch) or relative offsets from the beginning/end of the data. At least one time parameter must be specified. +The trim command overwrites the CSV and HTML summary files in the input directory with new files containing the filtered data. + **Examples:**
 # Skip the first 10 seconds and last 5 seconds
-$ ./perfspect metrics trim --input hostname_metrics.csv --start-offset 10 --end-offset 5
+$ ./perfspect metrics trim --input perfspect_2025-11-28_09-21-56 --start-offset 10 --end-offset 5
 
 # Use absolute timestamps (seconds since epoch)
-$ ./perfspect metrics trim --input hostname_metrics.csv --start-time 1764174327 --end-time 1764174351
-
-# Custom output suffix
-$ ./perfspect metrics trim --input hostname_metrics.csv --start-offset 10 --suffix steady_state
+$ ./perfspect metrics trim --input perfspect_2025-11-28_09-21-56 --start-time 1764174327 --end-time 1764174351
 
-The trim command creates new files in the same directory as the input file (or in a specified output directory): -- `hostname_metrics_trimmed.csv` - Filtered raw metrics -- `hostname_metrics_trimmed_summary.csv` - Summary statistics -- `hostname_metrics_trimmed_summary.html` - Interactive HTML report - -> [!NOTE] -> If a metadata JSON file exists alongside the input CSV (from the original collection), it will be used to generate a complete HTML report with system summary. Otherwise, a simplified HTML report without system summary will be generated. - ##### Prometheus Endpoint The `metrics` command can expose metrics via a Prometheus compatible `metrics` endpoint. This allows integration with Prometheus monitoring systems. To enable the Prometheus endpoint, use the `--prometheus-server` flag. By default, the endpoint listens on port 9090. The port can be changed using the `--prometheus-server-addr` flag. Run `perfspect metrics --prometheus-server`. See the [example daemonset](docs/perfspect-daemonset.md) for deploying in Kubernetes. From 84985b2669846282252f015f782fc5569894ff6b Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Fri, 28 Nov 2025 12:49:04 -0800 Subject: [PATCH 5/5] put new files in new output dir Signed-off-by: Harper, Jason M --- README.md | 4 ++-- cmd/metrics/summary.go | 6 +++--- cmd/metrics/trim.go | 23 +++++++++++++++++++---- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index faecbae4..4b891459 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ After collecting metrics, you can generate new summary reports for a specific ti The time range can be specified using either absolute timestamps (seconds since epoch) or relative offsets from the beginning/end of the data. At least one time parameter must be specified. -The trim command overwrites the CSV and HTML summary files in the input directory with new files containing the filtered data. +The trimmed CSV and HTML summary files will be placed in a new output directory. The output directory can be specified using the `--output` flag. **Examples:**
@@ -165,7 +165,7 @@ $ ./perfspect metrics --syslog
 
##### Report Files -By default, PerfSpect creates a unique directory in the user's current working directory to store output files. Users can specify a custom output directory, but the directory provided must exist; PerfSpect will not create it. +By default, PerfSpect creates a unique directory in the user's current working directory to store output files. Users can specify a custom output directory with the --output flag.
 $./perfspect telemetry --output /home/elaine/perfspect/telemetry
 
diff --git a/cmd/metrics/summary.go b/cmd/metrics/summary.go index 7894673a..2b27b867 100644 --- a/cmd/metrics/summary.go +++ b/cmd/metrics/summary.go @@ -29,12 +29,12 @@ import ( // generates summary files (CSV and HTML) using the provided metadata and metric definitions, // and returns a list of created summary file paths. func summarizeMetrics(localOutputDir string, targetName string, metadata Metadata, metricDefinitions []MetricDefinition) ([]string, error) { - return summarizeMetricsWithTrim(localOutputDir, targetName, metadata, metricDefinitions, 0, 0) + return summarizeMetricsWithTrim(localOutputDir, localOutputDir, targetName, metadata, metricDefinitions, 0, 0) } -func summarizeMetricsWithTrim(localOutputDir string, targetName string, metadata Metadata, metricDefinitions []MetricDefinition, startTimestamp, endTimestamp int) ([]string, error) { +func summarizeMetricsWithTrim(localInputDir, localOutputDir, targetName string, metadata Metadata, metricDefinitions []MetricDefinition, startTimestamp, endTimestamp int) ([]string, error) { filesCreated := []string{} // read the metrics from CSV - csvMetricsFile := filepath.Join(localOutputDir, targetName+"_metrics.csv") + csvMetricsFile := filepath.Join(localInputDir, targetName+"_metrics.csv") metrics, err := newMetricCollection(csvMetricsFile) if err != nil { return filesCreated, fmt.Errorf("failed to read metrics from %s: %w", csvMetricsFile, err) diff --git a/cmd/metrics/trim.go b/cmd/metrics/trim.go index 83a96c55..df3251fa 100644 --- a/cmd/metrics/trim.go +++ b/cmd/metrics/trim.go @@ -12,6 +12,7 @@ import ( "strings" "perfspect/internal/common" + "perfspect/internal/util" "github.com/spf13/cobra" ) @@ -127,8 +128,11 @@ func validateTrimFlags(cmd *cobra.Command, args []string) error { // runTrimCmd executes the trim command func runTrimCmd(cmd *cobra.Command, args []string) error { + // appContext is the application context that holds common data and resources. + appContext := cmd.Parent().Context().Value(common.AppContext{}).(common.AppContext) + outputDir := appContext.OutputDir + // flagTrimInput can be a file or directory - // get the directory and use it as output dir var sourceDir string fileInfo, err := os.Stat(flagTrimInput) if err != nil { @@ -143,8 +147,8 @@ func runTrimCmd(cmd *cobra.Command, args []string) error { } else { sourceDir = filepath.Dir(flagTrimInput) } - outputDir := sourceDir + // Determine source files to process sourceInfos, err := getTrimmedSourceInfos(flagTrimInput) if err != nil { err = fmt.Errorf("failed to determine source files: %w", err) @@ -154,16 +158,27 @@ func runTrimCmd(cmd *cobra.Command, args []string) error { return err } if len(sourceInfos) == 0 { - err = fmt.Errorf("no valid metrics CSV files found to trim in: %s", sourceDir) + err = fmt.Errorf("no valid metrics CSV files found to trim") + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + slog.Error(err.Error()) + cmd.SilenceUsage = true + return err + } + + // create output directory if it doesn't exist + err = util.CreateDirectoryIfNotExists(outputDir, 0755) // #nosec G301 + if err != nil { + err = fmt.Errorf("failed to create output directory: %w", err) fmt.Fprintf(os.Stderr, "Error: %v\n", err) slog.Error(err.Error()) cmd.SilenceUsage = true return err } + // Process each source file var filesCreated []string for _, sourceInfo := range sourceInfos { - filesCreated, err = summarizeMetricsWithTrim(outputDir, sourceInfo.targetName, sourceInfo.metadata, sourceInfo.metricDefinitions, sourceInfo.startTime, sourceInfo.endTime) + filesCreated, err = summarizeMetricsWithTrim(sourceDir, outputDir, sourceInfo.targetName, sourceInfo.metadata, sourceInfo.metricDefinitions, sourceInfo.startTime, sourceInfo.endTime) if err != nil { err = fmt.Errorf("failed to generate trimmed summaries for %s: %w", sourceInfo.allCSVPath, err) fmt.Fprintf(os.Stderr, "Error: %v\n", err)