From 8899ef29f2a6f8b3475285e20e47a42a9dc83a69 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 2 Feb 2026 23:34:32 +0000 Subject: [PATCH 1/2] Add Prometheus Query API and Holmes Chat API documentation Add documentation for two new API endpoints: - Prometheus Query API: Run PromQL queries against connected clusters - Holmes Chat API: Send questions to Holmes AI for root cause analysis https://claude.ai/code/session_01MLXAVw9RVcPfGyUhy1XeDQ --- .../exporting/prometheus-query-api.rst | 205 ++++++++++++++++++ .../exporting/robusta-pro-features.rst | 1 + .../holmesgpt/holmes-chat-api.rst | 171 +++++++++++++++ docs/index.rst | 2 + 4 files changed, 379 insertions(+) create mode 100644 docs/configuration/exporting/prometheus-query-api.rst create mode 100644 docs/configuration/holmesgpt/holmes-chat-api.rst diff --git a/docs/configuration/exporting/prometheus-query-api.rst b/docs/configuration/exporting/prometheus-query-api.rst new file mode 100644 index 000000000..c7698f0d7 --- /dev/null +++ b/docs/configuration/exporting/prometheus-query-api.rst @@ -0,0 +1,205 @@ +Prometheus Query API +==================== + +.. note:: + + This feature is available with the Robusta SaaS platform and self-hosted commercial plans. + It is not available in the open-source version. + +The Prometheus Query API allows you to run PromQL queries against the Prometheus instance +in your connected clusters. + +Endpoint +-------- + +.. code-block:: + + POST https://api.robusta.dev/api/accounts/{account_id}/clusters/{cluster_name}/prometheus/query + +Path Parameters +--------------- + +.. list-table:: + :header-rows: 1 + :widths: 20 10 70 + + * - Parameter + - Required + - Description + * - account_id + - Yes + - Your Robusta account ID (found in generated_values.yaml as ``globalConfig.account_id``) + * - cluster_name + - Yes + - The name of the cluster to query (as shown in the Robusta UI) + +Request Body +------------ + +.. list-table:: + :header-rows: 1 + :widths: 20 10 15 55 + + * - Field + - Required + - Default + - Description + * - promql_query + - Yes + - + - The PromQL query to execute + * - duration_minutes + - No + - 60 + - Time range in minutes (looking back from now) + * - starts_at + - No + - + - Start time for date range queries (format: ``YYYY-MM-DD HH:MM:SS UTC``) + * - ends_at + - No + - + - End time for date range queries (format: ``YYYY-MM-DD HH:MM:SS UTC``) + * - step + - No + - 1m + - Query resolution step (e.g., ``30s``, ``1m``, ``5m``) + +.. note:: + + You can specify either ``duration_minutes`` OR both ``starts_at`` and ``ends_at``, but not both. + If neither is specified, ``duration_minutes`` defaults to 60. + +Authentication +-------------- + +Include an API key in the Authorization header: + +.. code-block:: + + Authorization: Bearer + +The API key must have **Read Metrics** permission. + +Example Request +--------------- + +Using duration (last 5 minutes): + +.. code-block:: bash + + curl -X POST "https://api.robusta.dev/api/accounts/ACCOUNT_ID/clusters/CLUSTER_NAME/prometheus/query" \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "promql_query": "up", + "duration_minutes": 5, + "step": "1m" + }' + +Using date range: + +.. code-block:: bash + + curl -X POST "https://api.robusta.dev/api/accounts/ACCOUNT_ID/clusters/CLUSTER_NAME/prometheus/query" \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "promql_query": "kube_resourcequota{type=\"hard\"}", + "starts_at": "2024-01-27 10:00:00 UTC", + "ends_at": "2024-01-27 11:00:00 UTC", + "step": "1m" + }' + +Replace ``ACCOUNT_ID``, ``CLUSTER_NAME``, and ``YOUR_API_KEY`` with your actual values. + +Response Format +--------------- + +Success response: + +.. code-block:: json + + { + "status": "success", + "query": "up", + "data": [ + { + "metric": { + "__name__": "up", + "job": "kubelet", + "namespace": "kube-system", + "node": "worker-node-1" + }, + "timestamps": [1706356800.0, 1706356860.0, 1706356920.0], + "values": ["1", "1", "1"] + }, + { + "metric": { + "__name__": "up", + "job": "coredns", + "namespace": "kube-system", + "pod": "coredns-7c68b4b998-abc12" + }, + "timestamps": [1706356800.0, 1706356860.0, 1706356920.0], + "values": ["1", "1", "1"] + } + ] + } + +Response Fields +--------------- + +.. list-table:: + :header-rows: 1 + :widths: 20 80 + + * - Field + - Description + * - status + - ``success`` or ``error`` + * - query + - The PromQL query that was executed + * - data + - Array of time series results + * - data[].metric + - Labels identifying the time series + * - data[].timestamps + - Array of Unix timestamps for each data point + * - data[].values + - Array of values corresponding to each timestamp + +Error Response +-------------- + +.. code-block:: json + + { + "status": "error", + "query": "invalid_query", + "error": "Error message describing what went wrong" + } + +Common Queries +-------------- + +Resource quotas (OpenShift/Kubernetes): + +.. code-block:: json + + {"promql_query": "kube_resourcequota{type=\"hard\"}"} + {"promql_query": "kube_resourcequota{type=\"used\"}"} + +CPU and memory: + +.. code-block:: json + + {"promql_query": "sum(rate(container_cpu_usage_seconds_total[5m])) by (pod, namespace)"} + {"promql_query": "container_memory_usage_bytes{namespace=\"default\"}"} + +Pod status: + +.. code-block:: json + + {"promql_query": "kube_pod_status_phase{phase=\"Running\"}"} + {"promql_query": "kube_pod_container_status_restarts_total"} diff --git a/docs/configuration/exporting/robusta-pro-features.rst b/docs/configuration/exporting/robusta-pro-features.rst index 4eae61798..0a3ca30a8 100644 --- a/docs/configuration/exporting/robusta-pro-features.rst +++ b/docs/configuration/exporting/robusta-pro-features.rst @@ -49,6 +49,7 @@ Features include: * :doc:`Send Alerts API `: Send alerts programmatically from external systems or via integrations * :doc:`Configuration Changes API `: Track configuration changes in your environment * :doc:`RBAC Configuration API `: Programmatically manage role-based access control configurations +* :doc:`Prometheus Query API `: Run PromQL queries against Prometheus in your connected clusters Additional Pro Features ----------------------- diff --git a/docs/configuration/holmesgpt/holmes-chat-api.rst b/docs/configuration/holmesgpt/holmes-chat-api.rst new file mode 100644 index 000000000..6aa9ffa3f --- /dev/null +++ b/docs/configuration/holmesgpt/holmes-chat-api.rst @@ -0,0 +1,171 @@ +Holmes Chat API +=============== + +.. note:: + This feature is available with the Robusta SaaS platform and self-hosted commercial plans. It is not available in the open-source version. + +Use this endpoint to send questions to Holmes AI for root cause analysis. You can provide alert context and Holmes will investigate and return a detailed markdown analysis. + +.. _holmes-chat-api: + +POST https://api.robusta.dev/api/holmes//chat +--------------------------------------------------------- + +URL Parameters +^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 10 70 10 + :header-rows: 1 + + * - Parameter + - Type + - Description + - Required + * - ``account_id`` + - string + - The unique account identifier (found in your ``generated_values.yaml`` file). + - Yes + +Request Body +^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 10 60 10 + :header-rows: 1 + + * - Parameter + - Type + - Description + - Required + * - ``cluster_id`` + - string + - The cluster to query. If not provided, Holmes will try to extract it from the payload or use the single connected cluster. + - No + * - ``ask`` + - string + - The question to ask Holmes. Default: ``"Why is this alert firing?"`` + - No + * - ``payload`` + - object + - Alert payload or context data for Holmes to analyze (e.g., Prometheus alert labels). + - No + * - ``additional_system_prompt`` + - string + - Additional instructions to include in the system prompt. + - No + * - ``model`` + - string + - The AI model to use. If not provided, uses the account default. + - No + +Example Request +^^^^^^^^^^^^^^^ + +The following ``curl`` command demonstrates how to ask Holmes about a failing pod: + +.. code-block:: bash + + curl -X POST 'https://api.robusta.dev/api/holmes/ACCOUNT_ID/chat' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer API-KEY' \ + --data '{ + "cluster_id": "my-cluster", + "ask": "Which pods are failing in the cluster?", + "payload": { + "alertname": "KubeContainerWaiting", + "container": "frontend", + "namespace": "production", + "pod": "frontend-59fbcd7965-drx6w", + "reason": "ContainerCreating", + "severity": "warning" + } + }' + +In the command, make sure to replace the following placeholders: + +- ``ACCOUNT_ID``: Your account ID, which can be found in your ``generated_values.yaml`` file. +- ``API-KEY``: Your API Key for authentication. You can generate this token in the platform by navigating to **Settings** -> **API Keys** -> **New API Key**, and creating a key with the "Robusta AI" resource and "Write" permission. + +Request Headers +^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 30 70 + :header-rows: 1 + + * - Header + - Description + * - ``Content-Type`` + - Must be ``application/json``. + * - ``Authorization`` + - Bearer token for authentication (e.g., ``Bearer TOKEN_HERE``). The token must have "Robusta AI" resource with "Write" permission. + +Response Format +^^^^^^^^^^^^^^^ + +The API returns a JSON object containing the cluster ID and the analysis in markdown format. + +Example Response +^^^^^^^^^^^^^^^^ + +.. code-block:: json + + { + "cluster_id": "my-cluster", + "analysis": "## Investigation Summary\n\n### Failing Pods\n\n| Pod | Namespace | Status | Reason |\n|-----|-----------|--------|--------|\n| `frontend-59fbcd7965-drx6w` | production | Pending | ContainerCreating |\n\n**Root Cause**: Missing Secret - The pod is stuck because the secret `frontend-api-keys` does not exist.\n\n### Fix\n\n```bash\nkubectl create secret generic frontend-api-keys --from-literal=API_KEY= -n production\n```" + } + +Response Fields +^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 25 10 70 + :header-rows: 1 + + * - Field + - Type + - Description + * - ``cluster_id`` + - string + - The cluster that was queried. + * - ``analysis`` + - string + - Holmes AI analysis in markdown format. Contains investigation findings, root cause analysis, and remediation steps. + +Error Responses +^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 15 30 55 + :header-rows: 1 + + * - Status + - Error + - Description + * - 400 + - Bad Request + - Invalid request body or could not determine cluster. + * - 401 + - Unauthorized + - Invalid or missing API key. + * - 403 + - Forbidden + - API key lacks required permissions. + * - 404 + - Not Found + - Specified cluster is not connected to this account. + * - 500 + - Internal Server Error + - Holmes request failed. + +Cluster Resolution +^^^^^^^^^^^^^^^^^^ + +If ``cluster_id`` is not provided in the request, Holmes will attempt to determine the cluster using the following logic: + +1. Extract from ``payload`` fields (``cluster_name``, ``cluster``, ``cluster_id``, ``source``, or from ``labels``/``annotations``). +2. Use AI to extract the cluster name from the payload content. +3. If only one cluster is connected to the account, use that cluster. + +If none of these methods succeed and multiple clusters are connected, the API will return an error listing the available clusters. diff --git a/docs/index.rst b/docs/index.rst index b802b856f..d5034fb0e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -25,6 +25,7 @@ configuration/holmesgpt/main-features configuration/holmesgpt/getting-started HolmesGPT Docs + Holmes Chat API .. toctree:: :maxdepth: 4 @@ -139,6 +140,7 @@ configuration/exporting/alert-statistics-api configuration/exporting/namespace-resources-api configuration/exporting/rbac-api + configuration/exporting/prometheus-query-api .. toctree:: :maxdepth: 4 From dca01628ca26ab93fee0bc91376ed9c45af480e2 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 3 Feb 2026 00:09:17 +0000 Subject: [PATCH 2/2] Improve API documentation with additional details - Fix ask parameter to be mandatory in Holmes Chat API - Add API key generation instructions to Prometheus Query API - Add Holmes Chat API reference to Robusta Pro Features - Add markdown response handling examples for Holmes Chat API https://claude.ai/code/session_01MLXAVw9RVcPfGyUhy1XeDQ --- .../exporting/prometheus-query-api.rst | 2 +- .../exporting/robusta-pro-features.rst | 3 ++ .../holmesgpt/holmes-chat-api.rst | 33 +++++++++++++++++-- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/docs/configuration/exporting/prometheus-query-api.rst b/docs/configuration/exporting/prometheus-query-api.rst index c7698f0d7..d02a94f38 100644 --- a/docs/configuration/exporting/prometheus-query-api.rst +++ b/docs/configuration/exporting/prometheus-query-api.rst @@ -79,7 +79,7 @@ Include an API key in the Authorization header: Authorization: Bearer -The API key must have **Read Metrics** permission. +The API key must have **Read Metrics** permission. You can generate this token in the platform by navigating to **Settings** -> **API Keys** -> **New API Key**, and creating a key with the "Read Metrics" permission. Example Request --------------- diff --git a/docs/configuration/exporting/robusta-pro-features.rst b/docs/configuration/exporting/robusta-pro-features.rst index 0a3ca30a8..e8ca75c89 100644 --- a/docs/configuration/exporting/robusta-pro-features.rst +++ b/docs/configuration/exporting/robusta-pro-features.rst @@ -14,6 +14,9 @@ Automatically investigate and resolve issues with AI-powered analysis. :doc:`AI Analysis (HolmesGPT) <../holmesgpt/main-features>` Automatically analyze Kubernetes alerts, logs, and metrics. Get potential root causes and remediation suggestions. +:doc:`Holmes Chat API <../holmesgpt/holmes-chat-api>` + Programmatically send questions to Holmes AI for root cause analysis via REST API. + Custom Alert Ingestion ----------------------- diff --git a/docs/configuration/holmesgpt/holmes-chat-api.rst b/docs/configuration/holmesgpt/holmes-chat-api.rst index 6aa9ffa3f..9032ffd32 100644 --- a/docs/configuration/holmesgpt/holmes-chat-api.rst +++ b/docs/configuration/holmesgpt/holmes-chat-api.rst @@ -44,8 +44,8 @@ Request Body - No * - ``ask`` - string - - The question to ask Holmes. Default: ``"Why is this alert firing?"`` - - No + - The question to ask Holmes. + - Yes * - ``payload`` - object - Alert payload or context data for Holmes to analyze (e.g., Prometheus alert labels). @@ -133,6 +133,35 @@ Response Fields - string - Holmes AI analysis in markdown format. Contains investigation findings, root cause analysis, and remediation steps. +Handling the Markdown Response +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``analysis`` field contains markdown text with escaped newlines (``\n``). To render it properly: + +**Python:** + +.. code-block:: python + + import json + + response_data = json.loads(response.text) + markdown_text = response_data["analysis"] # Already unescaped by json.loads + print(markdown_text) # Renders with proper line breaks + +**JavaScript:** + +.. code-block:: javascript + + const data = JSON.parse(responseText); + const markdownText = data.analysis; // Already unescaped by JSON.parse + // Use a markdown renderer like marked.js or react-markdown + +**Bash (using jq):** + +.. code-block:: bash + + curl ... | jq -r '.analysis' # -r outputs raw string with actual newlines + Error Responses ^^^^^^^^^^^^^^^