diff --git a/.github/workflows/_graph_proxy_container.yaml b/.github/workflows/_graph_proxy_container.yaml index 2b9e0b85e..77a1c0c5b 100644 --- a/.github/workflows/_graph_proxy_container.yaml +++ b/.github/workflows/_graph_proxy_container.yaml @@ -34,8 +34,9 @@ jobs: with: images: ${{ env.IMAGE_REPOSITORY }} tags: | - type=raw,value=${{ steps.tags.outputs.version }} - type=raw,value=latest + type=raw,value=${{ steps.tags.outputs.version }},enable=${{ startsWith(github.ref, 'refs/tags/graph-proxy@') }} + type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/graph-proxy@') }} + type=raw,value=${{ github.ref_name }} - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3.11.1 @@ -47,7 +48,7 @@ jobs: context: backend file: backend/Dockerfile.graph-proxy target: deploy - push: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/graph-proxy@') }} + push: ${{ github.event_name == 'push' }} load: ${{ !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/graph-proxy@')) }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} diff --git a/backend/graph-proxy/src/graphql/mod.rs b/backend/graph-proxy/src/graphql/mod.rs index 76f97543a..e996eeb61 100644 --- a/backend/graph-proxy/src/graphql/mod.rs +++ b/backend/graph-proxy/src/graphql/mod.rs @@ -116,7 +116,9 @@ pub async fn graphql_handler( auth_token_header: Option>>, request: GraphQLRequest, ) -> GraphQLResponse { + let start = std::time::Instant::now(); let query = request.into_inner(); + let mut request_type = "unparseable"; if let Ok(query) = parse_query(&query.query) { let operation = query.operations; @@ -130,19 +132,24 @@ pub async fn graphql_handler( .map(|operation| operation.1.node.ty) .collect(), }; + let mut has_mutation = false; for operation in operations { match operation { async_graphql::parser::types::OperationType::Query => state .metrics_state .total_requests .add(1, &[KeyValue::new("request_type", "query")]), - async_graphql::parser::types::OperationType::Mutation => state - .metrics_state - .total_requests - .add(1, &[KeyValue::new("request_type", "mutation")]), + async_graphql::parser::types::OperationType::Mutation => { + has_mutation = true; + state + .metrics_state + .total_requests + .add(1, &[KeyValue::new("request_type", "mutation")]) + } async_graphql::parser::types::OperationType::Subscription => {} }; } + request_type = if has_mutation { "mutation" } else { "query" }; } else { state .metrics_state @@ -151,7 +158,27 @@ pub async fn graphql_handler( }; let auth_token = auth_token_header.map(|header| header.0); - state.schema.execute(query.data(auth_token)).await.into() + let response = state.schema.execute(query.data(auth_token)).await; + let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0; + let status = if response.errors.is_empty() { + "ok" + } else { + "error" + }; + if status == "error" { + state + .metrics_state + .total_errors + .add(1, &[KeyValue::new("status", "error")]); + }; + state.metrics_state.request_duration_ms.record( + elapsed_ms, + &[ + KeyValue::new("request_type", request_type), + KeyValue::new("status", status), + ], + ); + response.into() } lazy_static! { diff --git a/backend/graph-proxy/src/metrics.rs b/backend/graph-proxy/src/metrics.rs index e61ed555f..258ba207a 100644 --- a/backend/graph-proxy/src/metrics.rs +++ b/backend/graph-proxy/src/metrics.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use opentelemetry::metrics::{Counter, MeterProvider}; +use opentelemetry::metrics::{Counter, Histogram, MeterProvider}; use opentelemetry_sdk::metrics::SdkMeterProvider; /// Thread-safe wrapper for OTEL metrics @@ -11,6 +11,10 @@ pub type MetricsState = Arc; pub struct Metrics { /// Total requests on all routes pub total_requests: Counter, + /// Request duration in miliseconds on every request + pub request_duration_ms: Histogram, + /// Total errors on query and mutation + pub total_errors: Counter, } impl Metrics { @@ -23,6 +27,21 @@ impl Metrics { .with_description("The total requests on all routes made since the last restart.") .build(); - Metrics { total_requests } + let request_duration_ms = meter + .f64_histogram("graph_proxy_request_duration_ms") + .with_description("GraphQL request duration") + .with_unit("ms") + .build(); + + let total_errors = meter + .u64_counter("graph_proxy_total_errors") + .with_description("The total number of errors since the last restart.") + .build(); + + Metrics { + total_requests, + request_duration_ms, + total_errors, + } } } diff --git a/charts/workflows/Chart.yaml b/charts/workflows/Chart.yaml index 8ee84f3d1..a872c22da 100644 --- a/charts/workflows/Chart.yaml +++ b/charts/workflows/Chart.yaml @@ -2,9 +2,7 @@ apiVersion: v2 name: workflows description: Data Analysis workflow orchestration type: application - -version: 0.13.35 - +version: 0.13.36 dependencies: - name: argo-workflows repository: https://argoproj.github.io/argo-helm diff --git a/charts/workflows/values.yaml b/charts/workflows/values.yaml index 7aab7f7f0..74ac5e97d 100644 --- a/charts/workflows/values.yaml +++ b/charts/workflows/values.yaml @@ -13,6 +13,10 @@ argo-workflows: bucket: k8s-workflows-test region: unsupported controller: + metricsConfig: + enabled: true + secure: false + scheme: http replicas: 2 podAnnotations: prometheus.io/scrape: "true"