Skip to content
4 changes: 4 additions & 0 deletions .github/workflows/framework-codegen.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ jobs:
go-modules-${{ runner.os }}
- name: Install dependencies
run: go mod download
- name: Install Just
uses: extractions/setup-just@e33e0265a09d6d736e2ee1e0eb685ef1de4669ff # v3
with:
just-version: "1.40.0"
- name: Run Codegen Tests
run: |
go test -timeout ${{ matrix.test.timeout }} -v -count ${{ matrix.test.count }} -run ${{ matrix.test.name }}
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,6 @@ tag.py
parrot/*.json
parrot/*.log
# Executable
parrot/parrot
parrot/parrot
# Devenv (generated manually)
devenv/
2 changes: 2 additions & 0 deletions framework/.changeset/v0.13.4.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- Multi-product env template
- Split product and infra configuration code and files
18 changes: 8 additions & 10 deletions framework/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,6 @@ Usage:
Aliases: []string{"r"},
Usage: "Your product name",
},
&cli.StringFlag{
Name: "product-configuration-type",
Aliases: []string{"p"},
Value: "evm-single",
Usage: "Product configuration type/layout (single network, multi-network, etc)",
},
&cli.IntFlag{
Name: "nodes",
Aliases: []string{"n"},
Expand All @@ -79,7 +73,6 @@ Usage:
},
Action: func(c *cli.Context) error {
outputDir := c.String("output-dir")
productConfType := c.String("product-configuration-type")
nodes := c.Int("nodes")
cliName := c.String("cli")
if cliName == "" {
Expand All @@ -93,17 +86,22 @@ Usage:
Str("OutputDir", outputDir).
Str("Name", cliName).
Int("CLNodes", nodes).
Str("ProductConfigurationType", productConfType).
Msg("Generating developer environment")

cg, err := framework.NewEnvBuilder(cliName, nodes, productConfType, productName).
cg, err := framework.NewEnvBuilder(cliName, nodes, productName).
OutputDir(outputDir).
Build()
if err != nil {
return fmt.Errorf("failed to create codegen: %w", err)
}
if err := cg.Write(); err != nil {
return fmt.Errorf("failed to generate module: %w", err)
return fmt.Errorf("failed to generate environment: %w", err)
}
if err := cg.WriteFakes(); err != nil {
return fmt.Errorf("failed to generate fakes: %w", err)
}
if err := cg.WriteProducts(); err != nil {
return fmt.Errorf("failed to generate products: %w", err)
}

fmt.Println()
Expand Down
3 changes: 1 addition & 2 deletions framework/leak/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
)

const (
StepTick = 3 * time.Minute
StepTick = 20 * time.Minute
)

func main() {
Expand All @@ -20,7 +20,6 @@ func main() {
workersSchedule := os.Getenv("WORKERS")
memorySchedule := os.Getenv("MEMORY")
repeatStr := os.Getenv("REPEAT")

leaks := make([][]byte, 0)
workerCounter := 0

Expand Down
61 changes: 55 additions & 6 deletions framework/leak/detector_cl_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package leak
import (
"errors"
"fmt"
"strconv"
"time"

"github.com/smartcontractkit/chainlink-testing-framework/framework"
Expand All @@ -23,9 +24,9 @@ type CLNodesCheck struct {
// CLNodesLeakDetector is Chainlink node specific resource leak detector
// can be used with both local and remote Chainlink node sets (DONs)
type CLNodesLeakDetector struct {
Mode string
CPUQuery, MemoryQuery string
c *ResourceLeakChecker
Mode string
CPUQuery, MemoryQuery, ContainerAliveQuery string
c *ResourceLeakChecker
}

// WithCPUQuery allows to override CPU leak query (Prometheus)
Expand Down Expand Up @@ -55,9 +56,10 @@ func NewCLNodesLeakDetector(c *ResourceLeakChecker, opts ...func(*CLNodesLeakDet
}
switch cd.Mode {
case "devenv":
// aggregate it on 5m interval with 2m step for mitigating spikes
cd.CPUQuery = `avg_over_time((sum(rate(container_cpu_usage_seconds_total{name="don-node%d"}[5m])) * 100)[5m:2m])`
cd.MemoryQuery = `avg_over_time(container_memory_rss{name="don-node%d"}[5m]) / 1024 / 1024`
cd.ContainerAliveQuery = `time() - container_start_time_seconds{name=~"don-node%d"}`
// avg from intervals of 1h with 30m step to mitigate spikes
cd.CPUQuery = `avg_over_time((sum(rate(container_cpu_usage_seconds_total{name="don-node%d"}[1h])))[1h:30m]) * 100`
cd.MemoryQuery = `avg_over_time(container_memory_rss{name="don-node%d"}[1h:30m]) / 1024 / 1024`
case "griddle":
return nil, fmt.Errorf("not implemented yet")
default:
Expand All @@ -66,13 +68,39 @@ func NewCLNodesLeakDetector(c *ResourceLeakChecker, opts ...func(*CLNodesLeakDet
return cd, nil
}

func (cd *CLNodesLeakDetector) checkContainerUptime(t *CLNodesCheck, nodeIdx int) (float64, error) {
uptimeResp, err := cd.c.c.Query(fmt.Sprintf(cd.ContainerAliveQuery, nodeIdx), t.End)
if err != nil {
return 0, fmt.Errorf("failed to execute container alive query: %w", err)
}
uptimeResult := uptimeResp.Data.Result
if len(uptimeResult) == 0 {
return 0, fmt.Errorf("no results for end timestamp: %s", t.End)
}

uptimeResultValue, resOk := uptimeResult[0].Value[1].(string)
if !resOk {
return 0, fmt.Errorf("invalid Prometheus response value for timestamp: %s, value: %v", t.End, uptimeResult[0].Value[1])
}

uptimeResultValueFloat, err := strconv.ParseFloat(uptimeResultValue, 64)
if err != nil {
return 0, fmt.Errorf("uptime can't be parsed from string: %w", err)
}
if uptimeResultValueFloat <= float64(t.End.Unix())-float64(t.Start.Unix()) {
return uptimeResultValueFloat, fmt.Errorf("container hasn't lived long enough and was killed while the test was running")
}
return uptimeResultValueFloat, nil
}

// Check runs all resource leak checks and returns errors if threshold reached for any of them
func (cd *CLNodesLeakDetector) Check(t *CLNodesCheck) error {
if t.NumNodes == 0 {
return fmt.Errorf("cl nodes num must be > 0")
}
memoryDiffs := make([]float64, 0)
cpuDiffs := make([]float64, 0)
uptimes := make([]float64, 0)
errs := make([]error, 0)
for i := range t.NumNodes {
memoryDiff, err := cd.c.MeasureDelta(&CheckConfig{
Expand Down Expand Up @@ -108,10 +136,31 @@ func (cd *CLNodesLeakDetector) Check(t *CLNodesCheck) error {
i, t.Start, t.End, cpuDiff,
))
}
uptime, err := cd.checkContainerUptime(t, i)
if err != nil {
errs = append(errs, fmt.Errorf(
"Container uptime issue for node %d and interval: [%s -> %s], uptime: %.f, err: %w",
i, t.Start, t.End, uptime, err,
))
}
uptimes = append(uptimes, uptime)
}
framework.L.Info().
Any("MemoryDiffs", memoryDiffs).
Any("CPUDiffs", cpuDiffs).
Any("Uptimes", uptimes).
Str("TestDuration", t.End.Sub(t.Start).String()).
Float64("TestDurationSec", t.End.Sub(t.Start).Seconds()).
Msg("Leaks info")
framework.L.Info().Msg("Downloading pprof profile..")
dumper := NewProfileDumper(framework.LocalPyroscopeBaseURL)
profilePath, err := dumper.MemoryProfile(&ProfileDumperConfig{
ServiceName: "chainlink-node",
})
if err != nil {
errs = append(errs, fmt.Errorf("failed to download Pyroscopt profile: %w", err))
return errors.Join(errs...)
}
framework.L.Info().Str("Path", profilePath).Msg("Saved pprof profile")
return errors.Join(errs...)
}
23 changes: 13 additions & 10 deletions framework/leak/detector_hog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ func TestCyclicHog(t *testing.T) {
ctx,
"resource-hog:latest",
map[string]string{
"WORKERS": "1,2,3,2,1",
"MEMORY": "1,2,3,2,1",
"WORKERS": "1,2,3,4,5,5,4,3,2,1",
"MEMORY": "1,2,3,4,5,5,4,3,2,1",
"REPEAT": "1",
},
)
require.NoError(t, err)
time.Sleep(15 * time.Minute)
time.Sleep(2 * time.Hour)
t.Cleanup(func() {
if err := hog.Terminate(ctx); err != nil {
log.Printf("Failed to terminate container: %v", err)
Expand All @@ -41,19 +41,22 @@ func TestVerifyCyclicHog(t *testing.T) {
lc := leak.NewResourceLeakChecker()
// cpu
diff, err := lc.MeasureDelta(&leak.CheckConfig{
Query: `avg_over_time((sum(rate(container_cpu_usage_seconds_total{name="resource-hog"}[5m])) * 100)[5m:2m])`,
Start: mustTime("2026-01-16T13:20:30Z"),
End: mustTime("2026-01-16T13:32:40Z"),
WarmUpDuration: 2 * time.Minute,
Query: `avg_over_time((sum(rate(container_cpu_usage_seconds_total{name="resource-hog"}[30m])))[30m:5m]) * 100`,
// set timestamps for the run you are analyzing
Start: mustTime("2026-01-19T10:30:00Z"),
End: mustTime("2026-01-19T12:29:15Z"),
WarmUpDuration: 10 * time.Minute,
})
fmt.Println(diff)
require.NoError(t, err)

// mem
diff, err = lc.MeasureDelta(&leak.CheckConfig{
Query: `avg_over_time(container_memory_rss{name="resource-hog"}[5m]) / 1024 / 1024`,
Start: mustTime("2026-01-16T13:20:30Z"),
End: mustTime("2026-01-16T13:38:25Z"),
Query: `avg_over_time(container_memory_rss{name="resource-hog"}[30m]) / 1024 / 1024`,
// set timestamps for the run you are analyzing
Start: mustTime("2026-01-19T10:30:00Z"),
End: mustTime("2026-01-19T12:29:15Z"),
WarmUpDuration: 10 * time.Minute,
})
fmt.Println(diff)
require.NoError(t, err)
Expand Down
10 changes: 6 additions & 4 deletions framework/leak/detector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func mustTime(start string) time.Time {
return s
}

func TestMeasure(t *testing.T) {
func TestSmokeMeasure(t *testing.T) {
qc := leak.NewFakeQueryClient()
lc := leak.NewResourceLeakChecker(leak.WithQueryClient(qc))
testCases := []struct {
Expand Down Expand Up @@ -130,9 +130,10 @@ func TestRealCLNodesLeakDetectionLocalDevenv(t *testing.T) {
require.NoError(t, err)
errs := cnd.Check(&leak.CLNodesCheck{
NumNodes: 4,
Start: mustTime("2026-01-15T01:14:00Z"),
End: mustTime("2026-01-15T02:04:00Z"),
CPUThreshold: 20.0,
// set timestamps for the run you are analyzing
Start: mustTime("2026-01-19T17:23:14Z"),
End: mustTime("2026-01-19T18:00:51Z"),
CPUThreshold: 100.0,
MemoryThreshold: 20.0,
})
require.NoError(t, errs)
Expand All @@ -150,6 +151,7 @@ func TestRealPrometheusLowLevelAPI(t *testing.T) {
for i := range donNodes {
diff, err := lc.MeasureDelta(&leak.CheckConfig{
Query: fmt.Sprintf(`quantile_over_time(0.5, container_memory_rss{name="don-node%d"}[1h]) / 1024 / 1024`, i),
// set timestamps for the run you are analyzing
Start: mustTime("2026-01-12T21:53:00Z"),
End: mustTime("2026-01-13T10:11:00Z"),
WarmUpDuration: 1 * time.Hour,
Expand Down
Loading
Loading