From 08dda7bd935adf4de74cdda866be0722d3a7b006 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Thu, 29 Jan 2026 10:11:51 +0000 Subject: [PATCH 01/13] feat(grapy-proxy): request duration as metrics --- backend/graph-proxy/src/graphql/mod.rs | 31 +++++++++++++++++++++----- backend/graph-proxy/src/metrics.rs | 15 +++++++++++-- 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/backend/graph-proxy/src/graphql/mod.rs b/backend/graph-proxy/src/graphql/mod.rs index 76f97543a..aa8c068cd 100644 --- a/backend/graph-proxy/src/graphql/mod.rs +++ b/backend/graph-proxy/src/graphql/mod.rs @@ -116,7 +116,9 @@ pub async fn graphql_handler( auth_token_header: Option>>, request: GraphQLRequest, ) -> GraphQLResponse { + let start = std::time::Instant::now(); let query = request.into_inner(); + let mut request_type = "unparseable"; if let Ok(query) = parse_query(&query.query) { let operation = query.operations; @@ -130,19 +132,24 @@ pub async fn graphql_handler( .map(|operation| operation.1.node.ty) .collect(), }; + let mut has_mutation = false; for operation in operations { match operation { async_graphql::parser::types::OperationType::Query => state .metrics_state .total_requests .add(1, &[KeyValue::new("request_type", "query")]), - async_graphql::parser::types::OperationType::Mutation => state - .metrics_state - .total_requests - .add(1, &[KeyValue::new("request_type", "mutation")]), + async_graphql::parser::types::OperationType::Mutation => { + has_mutation = true; + state + .metrics_state + .total_requests + .add(1, &[KeyValue::new("request_type", "mutation")]) + } async_graphql::parser::types::OperationType::Subscription => {} }; } + request_type = if has_mutation { "mutation" } else { "query" }; } else { state .metrics_state @@ -151,7 +158,21 @@ pub async fn graphql_handler( }; let auth_token = auth_token_header.map(|header| header.0); - state.schema.execute(query.data(auth_token)).await.into() + let response = state.schema.execute(query.data(auth_token)).await; + let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0; + let status = if response.errors.is_empty() { + "ok" + } else { + "error" + }; + state.metrics_state.request_duration_ms.record( + elapsed_ms, + &[ + KeyValue::new("request_type", request_type), + KeyValue::new("status", status), + ], + ); + response.into() } lazy_static! { diff --git a/backend/graph-proxy/src/metrics.rs b/backend/graph-proxy/src/metrics.rs index e61ed555f..458fc38d7 100644 --- a/backend/graph-proxy/src/metrics.rs +++ b/backend/graph-proxy/src/metrics.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use opentelemetry::metrics::{Counter, MeterProvider}; +use opentelemetry::metrics::{Counter, Histogram, MeterProvider}; use opentelemetry_sdk::metrics::SdkMeterProvider; /// Thread-safe wrapper for OTEL metrics @@ -11,6 +11,8 @@ pub type MetricsState = Arc; pub struct Metrics { /// Total requests on all routes pub total_requests: Counter, + /// Request duration in miliseconds on every request + pub request_duration_ms: Histogram, } impl Metrics { @@ -23,6 +25,15 @@ impl Metrics { .with_description("The total requests on all routes made since the last restart.") .build(); - Metrics { total_requests } + let request_duration_ms = meter + .f64_histogram("graph_proxy_request_duration_ms") + .with_description("GraphQL request duration") + .with_unit("ms") + .build(); + + Metrics { + total_requests, + request_duration_ms, + } } } From 216767ff481e6d9aa92e61e67debc889cb8e5019 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Mon, 2 Feb 2026 13:44:31 +0000 Subject: [PATCH 02/13] feat(graph-proxy): error count in graph proxy --- backend/graph-proxy/src/graphql/mod.rs | 5 ++++- backend/graph-proxy/src/metrics.rs | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/backend/graph-proxy/src/graphql/mod.rs b/backend/graph-proxy/src/graphql/mod.rs index aa8c068cd..9d17572bf 100644 --- a/backend/graph-proxy/src/graphql/mod.rs +++ b/backend/graph-proxy/src/graphql/mod.rs @@ -163,7 +163,10 @@ pub async fn graphql_handler( let status = if response.errors.is_empty() { "ok" } else { - "error" + state + .metrics_state + .total-errors + .add(1, &[KeyValue::new("status", "error")]); }; state.metrics_state.request_duration_ms.record( elapsed_ms, diff --git a/backend/graph-proxy/src/metrics.rs b/backend/graph-proxy/src/metrics.rs index 458fc38d7..7e213e03a 100644 --- a/backend/graph-proxy/src/metrics.rs +++ b/backend/graph-proxy/src/metrics.rs @@ -13,6 +13,7 @@ pub struct Metrics { pub total_requests: Counter, /// Request duration in miliseconds on every request pub request_duration_ms: Histogram, + pub total_errors: Counter, } impl Metrics { @@ -31,6 +32,11 @@ impl Metrics { .with_unit("ms") .build(); + let total_errors = meter + .u64_counter("graph_proxy_total_errors") + .with_description("The total number of errors since the last restart.") + .build(); + Metrics { total_requests, request_duration_ms, From f5ac6cc5f3bbfdb23b887846014fa0e85e0467f1 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Mon, 2 Feb 2026 13:46:53 +0000 Subject: [PATCH 03/13] chore(graph-proxy): linting --- backend/graph-proxy/src/graphql/mod.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/backend/graph-proxy/src/graphql/mod.rs b/backend/graph-proxy/src/graphql/mod.rs index 9d17572bf..e9b320fad 100644 --- a/backend/graph-proxy/src/graphql/mod.rs +++ b/backend/graph-proxy/src/graphql/mod.rs @@ -163,10 +163,7 @@ pub async fn graphql_handler( let status = if response.errors.is_empty() { "ok" } else { - state - .metrics_state - .total-errors - .add(1, &[KeyValue::new("status", "error")]); + state.metrics_state.total - errors.add(1, &[KeyValue::new("status", "error")]); }; state.metrics_state.request_duration_ms.record( elapsed_ms, From b073fd76c00cbb060cffea3ab2264d6fe011fdb3 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Mon, 2 Feb 2026 13:51:00 +0000 Subject: [PATCH 04/13] fix(graph-proxy): typo --- backend/graph-proxy/src/graphql/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/graph-proxy/src/graphql/mod.rs b/backend/graph-proxy/src/graphql/mod.rs index e9b320fad..5e01c371e 100644 --- a/backend/graph-proxy/src/graphql/mod.rs +++ b/backend/graph-proxy/src/graphql/mod.rs @@ -163,7 +163,7 @@ pub async fn graphql_handler( let status = if response.errors.is_empty() { "ok" } else { - state.metrics_state.total - errors.add(1, &[KeyValue::new("status", "error")]); + state.metrics_state.total_errors.add(1, &[KeyValue::new("status", "error")]); }; state.metrics_state.request_duration_ms.record( elapsed_ms, From e3a4080e9114cd90536d688fafb490c7d0a96713 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Mon, 2 Feb 2026 13:52:07 +0000 Subject: [PATCH 05/13] fix(graph-proxy): typo --- backend/graph-proxy/src/metrics.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/graph-proxy/src/metrics.rs b/backend/graph-proxy/src/metrics.rs index 7e213e03a..756413b80 100644 --- a/backend/graph-proxy/src/metrics.rs +++ b/backend/graph-proxy/src/metrics.rs @@ -40,6 +40,7 @@ impl Metrics { Metrics { total_requests, request_duration_ms, + total_errors, } } } From 0dc88296503fd965884c634584e20b63b45fa19c Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Mon, 2 Feb 2026 13:54:01 +0000 Subject: [PATCH 06/13] fix(graph-proxy): typo --- backend/graph-proxy/src/graphql/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/graph-proxy/src/graphql/mod.rs b/backend/graph-proxy/src/graphql/mod.rs index 5e01c371e..fe0755127 100644 --- a/backend/graph-proxy/src/graphql/mod.rs +++ b/backend/graph-proxy/src/graphql/mod.rs @@ -163,6 +163,9 @@ pub async fn graphql_handler( let status = if response.errors.is_empty() { "ok" } else { + "error" + }; + if status == "error" { state.metrics_state.total_errors.add(1, &[KeyValue::new("status", "error")]); }; state.metrics_state.request_duration_ms.record( From 5ff5a4bb94828e6c7d10a00cb7442cd892421400 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Mon, 2 Feb 2026 13:55:26 +0000 Subject: [PATCH 07/13] fix(graph-proxy): typo --- backend/graph-proxy/src/graphql/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/graph-proxy/src/graphql/mod.rs b/backend/graph-proxy/src/graphql/mod.rs index fe0755127..e996eeb61 100644 --- a/backend/graph-proxy/src/graphql/mod.rs +++ b/backend/graph-proxy/src/graphql/mod.rs @@ -166,7 +166,10 @@ pub async fn graphql_handler( "error" }; if status == "error" { - state.metrics_state.total_errors.add(1, &[KeyValue::new("status", "error")]); + state + .metrics_state + .total_errors + .add(1, &[KeyValue::new("status", "error")]); }; state.metrics_state.request_duration_ms.record( elapsed_ms, From cd450aec22f4eb1792b9c5dc29d02c0b0005b8ec Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Mon, 2 Feb 2026 13:58:57 +0000 Subject: [PATCH 08/13] fix(graph-proxy): add docstring --- backend/graph-proxy/src/metrics.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/graph-proxy/src/metrics.rs b/backend/graph-proxy/src/metrics.rs index 756413b80..258ba207a 100644 --- a/backend/graph-proxy/src/metrics.rs +++ b/backend/graph-proxy/src/metrics.rs @@ -13,6 +13,7 @@ pub struct Metrics { pub total_requests: Counter, /// Request duration in miliseconds on every request pub request_duration_ms: Histogram, + /// Total errors on query and mutation pub total_errors: Counter, } From 0479084d182ea1fc506a8ed3ae69ac1c9fec04bb Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Wed, 28 Jan 2026 10:27:15 +0000 Subject: [PATCH 09/13] chore(charts): bump version --- charts/workflows/Chart.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/charts/workflows/Chart.yaml b/charts/workflows/Chart.yaml index 8ee84f3d1..a872c22da 100644 --- a/charts/workflows/Chart.yaml +++ b/charts/workflows/Chart.yaml @@ -2,9 +2,7 @@ apiVersion: v2 name: workflows description: Data Analysis workflow orchestration type: application - -version: 0.13.35 - +version: 0.13.36 dependencies: - name: argo-workflows repository: https://argoproj.github.io/argo-helm From e2dc57fe56d110c428996ee05b955e6aae414fea Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Wed, 28 Jan 2026 10:27:47 +0000 Subject: [PATCH 10/13] feat(charts): enable otel-collector to scrape workflows controller --- charts/workflows/values.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/charts/workflows/values.yaml b/charts/workflows/values.yaml index 7aab7f7f0..74ac5e97d 100644 --- a/charts/workflows/values.yaml +++ b/charts/workflows/values.yaml @@ -13,6 +13,10 @@ argo-workflows: bucket: k8s-workflows-test region: unsupported controller: + metricsConfig: + enabled: true + secure: false + scheme: http replicas: 2 podAnnotations: prometheus.io/scrape: "true" From 8b20a9f143c1494c5097ecc1208d0cc88343ca79 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Wed, 28 Jan 2026 17:30:30 +0000 Subject: [PATCH 11/13] feat(graph-proxy): make a container for branch when push --- .github/workflows/_graph_proxy_container.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/_graph_proxy_container.yaml b/.github/workflows/_graph_proxy_container.yaml index 2b9e0b85e..2e024d900 100644 --- a/.github/workflows/_graph_proxy_container.yaml +++ b/.github/workflows/_graph_proxy_container.yaml @@ -35,6 +35,7 @@ jobs: images: ${{ env.IMAGE_REPOSITORY }} tags: | type=raw,value=${{ steps.tags.outputs.version }} + type=raw,value=${{ github.ref_name }} type=raw,value=latest - name: Set up Docker Buildx @@ -47,7 +48,7 @@ jobs: context: backend file: backend/Dockerfile.graph-proxy target: deploy - push: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/graph-proxy@') }} + push: ${{ github.event_name == 'push' && (github.ref_type == 'branch' || startsWith(github.ref, 'ref/tags/graph-proxy@')) }} load: ${{ !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/graph-proxy@')) }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} From 0379da742a308659a9bfa73e321935cea8602316 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Fri, 30 Jan 2026 11:03:59 +0000 Subject: [PATCH 12/13] fix(grapy-proxy): typo --- .github/workflows/_graph_proxy_container.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_graph_proxy_container.yaml b/.github/workflows/_graph_proxy_container.yaml index 2e024d900..bda085515 100644 --- a/.github/workflows/_graph_proxy_container.yaml +++ b/.github/workflows/_graph_proxy_container.yaml @@ -48,7 +48,7 @@ jobs: context: backend file: backend/Dockerfile.graph-proxy target: deploy - push: ${{ github.event_name == 'push' && (github.ref_type == 'branch' || startsWith(github.ref, 'ref/tags/graph-proxy@')) }} + push: ${{ github.event_name == 'push' && (github.ref_type == 'branch' || startsWith(github.ref, 'refs/tags/graph-proxy@')) }} load: ${{ !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/graph-proxy@')) }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} From b0e089e3c69919bd190c449d278c5a645c16fb28 Mon Sep 17 00:00:00 2001 From: Sze Ching Date: Fri, 30 Jan 2026 11:24:29 +0000 Subject: [PATCH 13/13] fix(graph-proxy): remove latest tag from branch push --- .github/workflows/_graph_proxy_container.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_graph_proxy_container.yaml b/.github/workflows/_graph_proxy_container.yaml index bda085515..77a1c0c5b 100644 --- a/.github/workflows/_graph_proxy_container.yaml +++ b/.github/workflows/_graph_proxy_container.yaml @@ -34,9 +34,9 @@ jobs: with: images: ${{ env.IMAGE_REPOSITORY }} tags: | - type=raw,value=${{ steps.tags.outputs.version }} + type=raw,value=${{ steps.tags.outputs.version }},enable=${{ startsWith(github.ref, 'refs/tags/graph-proxy@') }} + type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/graph-proxy@') }} type=raw,value=${{ github.ref_name }} - type=raw,value=latest - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3.11.1 @@ -48,7 +48,7 @@ jobs: context: backend file: backend/Dockerfile.graph-proxy target: deploy - push: ${{ github.event_name == 'push' && (github.ref_type == 'branch' || startsWith(github.ref, 'refs/tags/graph-proxy@')) }} + push: ${{ github.event_name == 'push' }} load: ${{ !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/graph-proxy@')) }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }}