Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions helm/robusta/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,9 @@ lightActions:
- prometheus_all_available_metrics
- prometheus_get_series
- prometheus_get_label_names
- holmes_workload_health
- holmes_conversation
- holmes_issue_chat
- holmes_chat
- holmes_workload_chat
- list_pods
- kubectl_describe
- fetch_resource_yaml
Expand Down
38 changes: 0 additions & 38 deletions src/robusta/core/model/base_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,44 +232,6 @@ class HolmesConversationParams(HolmesParams):
include_tool_call_results: bool = True


class HolmesWorkloadHealthParams(HolmesParams):
"""
:var ask: Override question to ask holmes
:var resource: The resource related to this investigation. A resource has a `name` and `kind`, and may have `namespace` and `node`
:var alert_history: fetch historical alert data on the resource
:var alert_history_since_hours: Timespan of historic data to use in hours. 24 by default.
:var stored_instrucitons: Use remote instructions specified for the workload.
:var instructions: List of extra instructions to supply.
:var silent_healthy: Does not create findings in the case of healthy workload.

:example ask: What are all the issues in my cluster right now?
"""

ask: Optional[str]
resource: Optional[ResourceInfo] = ResourceInfo()
alert_history: bool = True
alert_history_since_hours: float = 24
stored_instrucitons: bool = True
instructions: List[str] = []
include_tool_calls: bool = True
include_tool_call_results: bool = True
silent_healthy: bool = False


class HolmesWorkloadHealthChatParams(HolmesParams):
"""
:var ask: User's prompt for holmes
:var workload_health_result: Result from the workload health check
:var resource: The resource related to the initial investigation
:var conversation_history: List of previous user prompts and responses.
"""

ask: str
workload_health_result: HolmesInvestigationResult
resource: ResourceInfo
conversation_history: Optional[list[dict]] = None


class NamespacedResourcesParams(ActionParams):
"""
:var name: Resource name
Expand Down
166 changes: 0 additions & 166 deletions src/robusta/core/playbooks/internal/ai_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
HolmesChatParams,
HolmesConversationParams,
HolmesIssueChatParams,
HolmesWorkloadHealthChatParams,
HolmesWorkloadHealthParams,
ResourceInfo,
)
from robusta.core.model.events import ExecutionBaseEvent
Expand All @@ -35,7 +33,6 @@
HolmesRequest,
HolmesResult,
HolmesResultsBlock,
HolmesWorkloadHealthRequest,
)
from robusta.core.reporting.utils import convert_svg_to_png
from robusta.core.stream.utils import (
Expand All @@ -44,10 +41,6 @@
parse_sse_data,
StreamEvents,
)
from robusta.core.schedule.model import FixedDelayRepeat
from robusta.integrations.kubernetes.autogenerated.events import (
KubernetesAnyChangeEvent,
)
from robusta.integrations.prometheus.utils import HolmesDiscovery
from robusta.utils.error_codes import ActionException, ErrorCodes

Expand Down Expand Up @@ -173,71 +166,6 @@ def ask_holmes(event: ExecutionBaseEvent, params: AIInvestigateParams):
handle_holmes_error(e)


@action
def holmes_workload_health(
event: ExecutionBaseEvent, params: HolmesWorkloadHealthParams
):
holmes_url = HolmesDiscovery.find_holmes_url(params.holmes_url)
if not holmes_url:
raise ActionException(
ErrorCodes.HOLMES_DISCOVERY_FAILED,
"Robusta couldn't connect to the Holmes client.",
)

params.resource.cluster = event.get_context().cluster_name

try:
result = requests.post(
f"{holmes_url}/api/workload_health_check", data=params.json()
)
result.raise_for_status()

holmes_result = HolmesResult(**json.loads(result.text))

healthy = True
try:
analysis = json.loads(holmes_result.analysis)
healthy = analysis.get("workload_healthy")
except Exception:
logging.exception(
"Error in holmes response format, analysis did not return the expected json format."
)
pass

if params.silent_healthy and healthy:
return

finding = Finding(
title=f"AI Health check of {params.resource}",
aggregation_key="HolmesHealthCheck",
subject=FindingSubject(
name=params.resource.name if params.resource else "",
namespace=params.resource.namespace if params.resource else "",
subject_type=(
FindingSubjectType.from_kind(params.resource.kind)
if params.resource
else FindingSubjectType.TYPE_NONE
),
node=params.resource.node if params.resource else "",
container=params.resource.container if params.resource else "",
),
finding_type=FindingType.AI_ANALYSIS,
failure=False,
)
finding.add_enrichment(
[HolmesResultsBlock(holmes_result=holmes_result)],
enrichment_type=EnrichmentType.ai_analysis,
)

event.add_finding(finding)
except Exception as e:
logging.exception(
f"Failed to get holmes analysis for {params.resource}, {params.ask}",
exc_info=True,
)
handle_holmes_error(e)


def build_conversation_title(params: HolmesConversationParams) -> str:
return (
f"{params.resource}, {params.ask} for issue '{params.context.robusta_issue_id}'"
Expand Down Expand Up @@ -315,42 +243,6 @@ def holmes_conversation(event: ExecutionBaseEvent, params: HolmesConversationPar
handle_holmes_error(e)


class DelayedHealthCheckParams(HolmesWorkloadHealthParams):
delay_seconds: int = 120


@action
def delayed_health_check(
event: KubernetesAnyChangeEvent, action_params: DelayedHealthCheckParams
):
"""
runs a holmes workload health action with a delay
"""
metadata = event.obj and event.obj.metadata

if not action_params.ask:
action_params.ask = f"help me diagnose an issue with a workload {metadata.namespace}/{event.obj.kind}/{metadata.name} running in my Kubernetes cluster. Can you assist with identifying potential issues and pinpoint the root cause."

action_params.resource = ResourceInfo(
name=metadata.name, namespace=metadata.namespace, kind=event.obj.kind
)

logging.info(
f"Scheduling health check. {metadata.name} delays: {action_params.delay_seconds}"
)
event.get_scheduler().schedule_action(
action_func=holmes_workload_health,
task_id=f"health_check_{metadata.name}_{metadata.namespace}",
scheduling_params=FixedDelayRepeat(
repeat=1, seconds_delay=action_params.delay_seconds
),
named_sinks=event.named_sinks,
action_params=action_params,
replace_existing=True,
standalone_task=True,
)


@action
def holmes_issue_chat(event: ExecutionBaseEvent, params: HolmesIssueChatParams):
holmes_url = HolmesDiscovery.find_holmes_url(params.holmes_url)
Expand Down Expand Up @@ -493,64 +385,6 @@ def holmes_chat(event: ExecutionBaseEvent, params: HolmesChatParams):
handle_holmes_error(e)


@action
def holmes_workload_chat(
event: ExecutionBaseEvent, params: HolmesWorkloadHealthChatParams
):
holmes_url = HolmesDiscovery.find_holmes_url(params.holmes_url)
if not holmes_url:
raise ActionException(
ErrorCodes.HOLMES_DISCOVERY_FAILED,
"Robusta couldn't connect to the Holmes client.",
)

try:
holmes_req = HolmesWorkloadHealthRequest(
ask=params.ask,
conversation_history=params.conversation_history,
workload_health_result=params.workload_health_result,
resource=params.resource,
model=params.model,
)
result = requests.post(
f"{holmes_url}/api/workload_health_chat", data=holmes_req.json()
)
result.raise_for_status()

holmes_result = HolmesChatResult(**json.loads(result.text))

finding = Finding(
title=f"AI Chat for Health Check of {params.resource}",
aggregation_key="HolmesWorkloadConversationResult",
subject=FindingSubject(
name=params.resource.name if params.resource else "",
namespace=params.resource.namespace if params.resource else "",
subject_type=(
FindingSubjectType.from_kind(params.resource.kind)
if params.resource
else FindingSubjectType.TYPE_NONE
),
node=params.resource.node if params.resource else "",
container=params.resource.container if params.resource else "",
),
finding_type=FindingType.AI_ANALYSIS,
failure=False,
)
finding.add_enrichment(
[HolmesChatResultsBlock(holmes_result=holmes_result)],
enrichment_type=EnrichmentType.ai_analysis,
)

event.add_finding(finding)

except Exception as e:
logging.exception(
f"Failed to get holmes chat for health check of {params.resource}",
exc_info=True,
)
handle_holmes_error(e)


def stream_and_render_graphs(url, holmes_req, event):
with requests.post(
url,
Expand Down
5 changes: 0 additions & 5 deletions src/robusta/core/reporting/holmes.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,3 @@ class HolmesChatResult(BaseModel):

class HolmesChatResultsBlock(BaseBlock):
holmes_result: Optional[HolmesChatResult]


class HolmesWorkloadHealthRequest(HolmesChatRequest):
workload_health_result: HolmesInvestigationResult
resource: ResourceInfo
Loading