From 9ff4c7d5ec4f30d7bddb09bbcc590deb9a2c0c15 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 16:30:58 +0000 Subject: [PATCH 1/4] Initial plan From c9679582eafa2453311a8a67a02baba2e43a53a0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 16:36:08 +0000 Subject: [PATCH 2/4] Fix bug: Exclude hosts with non-OK HTTP status from lag calculation Previously, only hosts returning 404 were excluded from lag calculations. This caused hosts returning other error codes (500, 503, etc.) to still be used in lag calculations, even though they were unhealthy. Changed the condition to exclude any host with non-OK (non-200) HTTP status. - Updated pkg/throttle/mysql.go to check for != StatusOK instead of == StatusNotFound - Added test case to verify 500 and 503 status codes are properly excluded Co-authored-by: alexisjcarr <34029816+alexisjcarr@users.noreply.github.com> --- pkg/throttle/mysql.go | 2 +- pkg/throttle/mysql_status_test.go | 51 +++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 pkg/throttle/mysql_status_test.go diff --git a/pkg/throttle/mysql.go b/pkg/throttle/mysql.go index 2489b34a..a68f733d 100644 --- a/pkg/throttle/mysql.go +++ b/pkg/throttle/mysql.go @@ -21,7 +21,7 @@ func aggregateMySQLProbes( // so it's safe to iterate it probeValues := []float64{} for _, probe := range *probes { - if clusterInstanceHttpChecksMap[mysql.MySQLHttpCheckHashKey(clusterName, &probe.Key)] == http.StatusNotFound { + if clusterInstanceHttpChecksMap[mysql.MySQLHttpCheckHashKey(clusterName, &probe.Key)] != http.StatusOK { continue } instanceMetricResult, ok := instanceResultsMap[mysql.GetClusterInstanceKey(clusterName, &probe.Key)] diff --git a/pkg/throttle/mysql_status_test.go b/pkg/throttle/mysql_status_test.go new file mode 100644 index 00000000..db58d658 --- /dev/null +++ b/pkg/throttle/mysql_status_test.go @@ -0,0 +1,51 @@ +package throttle + +import ( +"net/http" +"testing" + +"github.com/github/freno/pkg/base" +"github.com/github/freno/pkg/mysql" + +test "github.com/outbrain/golib/tests" +) + +func TestHttpStatusExclusion(t *testing.T) { +clusterName := "c0" +k1 := mysql.GetClusterInstanceKey(clusterName, &key1) +k2 := mysql.GetClusterInstanceKey(clusterName, &key2) +k3 := mysql.GetClusterInstanceKey(clusterName, &key3) +k4 := mysql.GetClusterInstanceKey(clusterName, &key4) + +instanceResults := mysql.InstanceMetricResultMap{ +k1: base.NewSimpleMetricResult(1.2), +k2: base.NewSimpleMetricResult(1.7), +k3: base.NewSimpleMetricResult(0.3), +k4: base.NewSimpleMetricResult(2.5), +} + +var probeList mysql.Probes = map[mysql.InstanceKey](*mysql.Probe){} +for ck := range instanceResults { +probeList[ck.Key] = &mysql.Probe{Key: ck.Key} +} + +// Test: 500 status should exclude host with worst lag +httpStatuses := mysql.ClusterInstanceHttpCheckResultMap{ +mysql.MySQLHttpCheckHashKey(clusterName, &key1): http.StatusOK, +mysql.MySQLHttpCheckHashKey(clusterName, &key2): http.StatusOK, +mysql.MySQLHttpCheckHashKey(clusterName, &key3): http.StatusOK, +mysql.MySQLHttpCheckHashKey(clusterName, &key4): http.StatusInternalServerError, +} + +result := aggregateMySQLProbes(&probeList, clusterName, instanceResults, httpStatuses, 0, false, 0) +lagValue, err := result.Get() +test.S(t).ExpectNil(err) +test.S(t).ExpectEquals(lagValue, 1.7) + +// Test: 503 status should also exclude +httpStatuses[mysql.MySQLHttpCheckHashKey(clusterName, &key2)] = http.StatusServiceUnavailable +result = aggregateMySQLProbes(&probeList, clusterName, instanceResults, httpStatuses, 0, false, 0) +lagValue, err = result.Get() +test.S(t).ExpectNil(err) +test.S(t).ExpectEquals(lagValue, 1.2) +} From 8c8b9ecf4dad45a8a43ee4373a6508ec180957f3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 16:44:15 +0000 Subject: [PATCH 3/4] Revert to original code - 404 exclusion was correct behavior The original code that excluded only 404 status was correct. The bug report was misunderstood initially - a host returning 404 was being INCLUDED when it should have been EXCLUDED. The original behavior is correct: - 404 (Not Found): Exclude from lag calculation (health check endpoint doesn't exist) - 500, 503, etc: INCLUDE in lag calculation (these indicate replication/health issues we want to measure) Reverted incorrect changes that excluded all non-200 status codes. Co-authored-by: alexisjcarr <34029816+alexisjcarr@users.noreply.github.com> --- pkg/throttle/mysql.go | 2 +- pkg/throttle/mysql_status_test.go | 51 ------------------------------- 2 files changed, 1 insertion(+), 52 deletions(-) delete mode 100644 pkg/throttle/mysql_status_test.go diff --git a/pkg/throttle/mysql.go b/pkg/throttle/mysql.go index a68f733d..2489b34a 100644 --- a/pkg/throttle/mysql.go +++ b/pkg/throttle/mysql.go @@ -21,7 +21,7 @@ func aggregateMySQLProbes( // so it's safe to iterate it probeValues := []float64{} for _, probe := range *probes { - if clusterInstanceHttpChecksMap[mysql.MySQLHttpCheckHashKey(clusterName, &probe.Key)] != http.StatusOK { + if clusterInstanceHttpChecksMap[mysql.MySQLHttpCheckHashKey(clusterName, &probe.Key)] == http.StatusNotFound { continue } instanceMetricResult, ok := instanceResultsMap[mysql.GetClusterInstanceKey(clusterName, &probe.Key)] diff --git a/pkg/throttle/mysql_status_test.go b/pkg/throttle/mysql_status_test.go deleted file mode 100644 index db58d658..00000000 --- a/pkg/throttle/mysql_status_test.go +++ /dev/null @@ -1,51 +0,0 @@ -package throttle - -import ( -"net/http" -"testing" - -"github.com/github/freno/pkg/base" -"github.com/github/freno/pkg/mysql" - -test "github.com/outbrain/golib/tests" -) - -func TestHttpStatusExclusion(t *testing.T) { -clusterName := "c0" -k1 := mysql.GetClusterInstanceKey(clusterName, &key1) -k2 := mysql.GetClusterInstanceKey(clusterName, &key2) -k3 := mysql.GetClusterInstanceKey(clusterName, &key3) -k4 := mysql.GetClusterInstanceKey(clusterName, &key4) - -instanceResults := mysql.InstanceMetricResultMap{ -k1: base.NewSimpleMetricResult(1.2), -k2: base.NewSimpleMetricResult(1.7), -k3: base.NewSimpleMetricResult(0.3), -k4: base.NewSimpleMetricResult(2.5), -} - -var probeList mysql.Probes = map[mysql.InstanceKey](*mysql.Probe){} -for ck := range instanceResults { -probeList[ck.Key] = &mysql.Probe{Key: ck.Key} -} - -// Test: 500 status should exclude host with worst lag -httpStatuses := mysql.ClusterInstanceHttpCheckResultMap{ -mysql.MySQLHttpCheckHashKey(clusterName, &key1): http.StatusOK, -mysql.MySQLHttpCheckHashKey(clusterName, &key2): http.StatusOK, -mysql.MySQLHttpCheckHashKey(clusterName, &key3): http.StatusOK, -mysql.MySQLHttpCheckHashKey(clusterName, &key4): http.StatusInternalServerError, -} - -result := aggregateMySQLProbes(&probeList, clusterName, instanceResults, httpStatuses, 0, false, 0) -lagValue, err := result.Get() -test.S(t).ExpectNil(err) -test.S(t).ExpectEquals(lagValue, 1.7) - -// Test: 503 status should also exclude -httpStatuses[mysql.MySQLHttpCheckHashKey(clusterName, &key2)] = http.StatusServiceUnavailable -result = aggregateMySQLProbes(&probeList, clusterName, instanceResults, httpStatuses, 0, false, 0) -lagValue, err = result.Get() -test.S(t).ExpectNil(err) -test.S(t).ExpectEquals(lagValue, 1.2) -} From a3623238cfdbc2b83dcd8a962b67c273a18b1e60 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 16:46:46 +0000 Subject: [PATCH 4/4] Fix bug: Exclude hosts with missing or 404 HTTP check status The bug was that when HTTP check results were missing from the map (value = 0), hosts were incorrectly included in lag calculations. Fixed by checking for both StatusNotFound (404) AND zero (missing map entry). - Updated aggregateMySQLProbes to check: httpStatus == StatusNotFound || httpStatus == 0 - Added test case TestAggregateMySQLProbesWithMissingHttpChecks to validate fix - All existing tests pass Co-authored-by: alexisjcarr <34029816+alexisjcarr@users.noreply.github.com> --- pkg/throttle/mysql.go | 3 ++- pkg/throttle/mysql_test.go | 31 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/pkg/throttle/mysql.go b/pkg/throttle/mysql.go index 2489b34a..0e85161e 100644 --- a/pkg/throttle/mysql.go +++ b/pkg/throttle/mysql.go @@ -21,7 +21,8 @@ func aggregateMySQLProbes( // so it's safe to iterate it probeValues := []float64{} for _, probe := range *probes { - if clusterInstanceHttpChecksMap[mysql.MySQLHttpCheckHashKey(clusterName, &probe.Key)] == http.StatusNotFound { + httpStatus := clusterInstanceHttpChecksMap[mysql.MySQLHttpCheckHashKey(clusterName, &probe.Key)] + if httpStatus == http.StatusNotFound || httpStatus == 0 { continue } instanceMetricResult, ok := instanceResultsMap[mysql.GetClusterInstanceKey(clusterName, &probe.Key)] diff --git a/pkg/throttle/mysql_test.go b/pkg/throttle/mysql_test.go index 2b13f1a3..c374a90c 100644 --- a/pkg/throttle/mysql_test.go +++ b/pkg/throttle/mysql_test.go @@ -265,3 +265,34 @@ func TestAggregateMySQLProbesWithHttpChecks(t *testing.T) { test.S(t).ExpectNotNil(err) } } + +func TestAggregateMySQLProbesWithMissingHttpChecks(t *testing.T) { +clusterName := "c0" +k1 := mysql.GetClusterInstanceKey(clusterName, &key1) +k2 := mysql.GetClusterInstanceKey(clusterName, &key2) +k3 := mysql.GetClusterInstanceKey(clusterName, &key3) + +results := mysql.InstanceMetricResultMap{ +k1: base.NewSimpleMetricResult(1.2), +k2: base.NewSimpleMetricResult(2.5), +k3: base.NewSimpleMetricResult(0.3), +} + +// Test with missing HTTP check entry (will return 0 from map) +checksWithMissing := mysql.ClusterInstanceHttpCheckResultMap{ +mysql.MySQLHttpCheckHashKey(clusterName, &key1): http.StatusOK, +mysql.MySQLHttpCheckHashKey(clusterName, &key3): http.StatusOK, +// key2 is missing - should be excluded +} + +var probeSet mysql.Probes = map[mysql.InstanceKey](*mysql.Probe){} +for ck := range results { +probeSet[ck.Key] = &mysql.Probe{Key: ck.Key} +} + +metric := aggregateMySQLProbes(&probeSet, clusterName, results, checksWithMissing, 0, false, 0) +val, err := metric.Get() +test.S(t).ExpectNil(err) +// key2 (2.5) should be excluded due to missing HTTP check, worst should be 1.2 +test.S(t).ExpectEquals(val, 1.2) +}