diff --git a/helm/robusta/values.yaml b/helm/robusta/values.yaml index d6dc82bb1..16a48f1b5 100644 --- a/helm/robusta/values.yaml +++ b/helm/robusta/values.yaml @@ -91,11 +91,9 @@ lightActions: - prometheus_all_available_metrics - prometheus_get_series - prometheus_get_label_names -- holmes_workload_health - holmes_conversation - holmes_issue_chat - holmes_chat -- holmes_workload_chat - list_pods - kubectl_describe - fetch_resource_yaml diff --git a/src/robusta/core/model/base_params.py b/src/robusta/core/model/base_params.py index ee82198de..63f2eda75 100644 --- a/src/robusta/core/model/base_params.py +++ b/src/robusta/core/model/base_params.py @@ -232,44 +232,6 @@ class HolmesConversationParams(HolmesParams): include_tool_call_results: bool = True -class HolmesWorkloadHealthParams(HolmesParams): - """ - :var ask: Override question to ask holmes - :var resource: The resource related to this investigation. A resource has a `name` and `kind`, and may have `namespace` and `node` - :var alert_history: fetch historical alert data on the resource - :var alert_history_since_hours: Timespan of historic data to use in hours. 24 by default. - :var stored_instrucitons: Use remote instructions specified for the workload. - :var instructions: List of extra instructions to supply. - :var silent_healthy: Does not create findings in the case of healthy workload. - - :example ask: What are all the issues in my cluster right now? - """ - - ask: Optional[str] - resource: Optional[ResourceInfo] = ResourceInfo() - alert_history: bool = True - alert_history_since_hours: float = 24 - stored_instrucitons: bool = True - instructions: List[str] = [] - include_tool_calls: bool = True - include_tool_call_results: bool = True - silent_healthy: bool = False - - -class HolmesWorkloadHealthChatParams(HolmesParams): - """ - :var ask: User's prompt for holmes - :var workload_health_result: Result from the workload health check - :var resource: The resource related to the initial investigation - :var conversation_history: List of previous user prompts and responses. - """ - - ask: str - workload_health_result: HolmesInvestigationResult - resource: ResourceInfo - conversation_history: Optional[list[dict]] = None - - class NamespacedResourcesParams(ActionParams): """ :var name: Resource name diff --git a/src/robusta/core/playbooks/internal/ai_integration.py b/src/robusta/core/playbooks/internal/ai_integration.py index 78a6ae153..1c6e5d519 100644 --- a/src/robusta/core/playbooks/internal/ai_integration.py +++ b/src/robusta/core/playbooks/internal/ai_integration.py @@ -12,8 +12,6 @@ HolmesChatParams, HolmesConversationParams, HolmesIssueChatParams, - HolmesWorkloadHealthChatParams, - HolmesWorkloadHealthParams, ResourceInfo, ) from robusta.core.model.events import ExecutionBaseEvent @@ -35,7 +33,6 @@ HolmesRequest, HolmesResult, HolmesResultsBlock, - HolmesWorkloadHealthRequest, ) from robusta.core.reporting.utils import convert_svg_to_png from robusta.core.stream.utils import ( @@ -44,10 +41,6 @@ parse_sse_data, StreamEvents, ) -from robusta.core.schedule.model import FixedDelayRepeat -from robusta.integrations.kubernetes.autogenerated.events import ( - KubernetesAnyChangeEvent, -) from robusta.integrations.prometheus.utils import HolmesDiscovery from robusta.utils.error_codes import ActionException, ErrorCodes @@ -173,71 +166,6 @@ def ask_holmes(event: ExecutionBaseEvent, params: AIInvestigateParams): handle_holmes_error(e) -@action -def holmes_workload_health( - event: ExecutionBaseEvent, params: HolmesWorkloadHealthParams -): - holmes_url = HolmesDiscovery.find_holmes_url(params.holmes_url) - if not holmes_url: - raise ActionException( - ErrorCodes.HOLMES_DISCOVERY_FAILED, - "Robusta couldn't connect to the Holmes client.", - ) - - params.resource.cluster = event.get_context().cluster_name - - try: - result = requests.post( - f"{holmes_url}/api/workload_health_check", data=params.json() - ) - result.raise_for_status() - - holmes_result = HolmesResult(**json.loads(result.text)) - - healthy = True - try: - analysis = json.loads(holmes_result.analysis) - healthy = analysis.get("workload_healthy") - except Exception: - logging.exception( - "Error in holmes response format, analysis did not return the expected json format." - ) - pass - - if params.silent_healthy and healthy: - return - - finding = Finding( - title=f"AI Health check of {params.resource}", - aggregation_key="HolmesHealthCheck", - subject=FindingSubject( - name=params.resource.name if params.resource else "", - namespace=params.resource.namespace if params.resource else "", - subject_type=( - FindingSubjectType.from_kind(params.resource.kind) - if params.resource - else FindingSubjectType.TYPE_NONE - ), - node=params.resource.node if params.resource else "", - container=params.resource.container if params.resource else "", - ), - finding_type=FindingType.AI_ANALYSIS, - failure=False, - ) - finding.add_enrichment( - [HolmesResultsBlock(holmes_result=holmes_result)], - enrichment_type=EnrichmentType.ai_analysis, - ) - - event.add_finding(finding) - except Exception as e: - logging.exception( - f"Failed to get holmes analysis for {params.resource}, {params.ask}", - exc_info=True, - ) - handle_holmes_error(e) - - def build_conversation_title(params: HolmesConversationParams) -> str: return ( f"{params.resource}, {params.ask} for issue '{params.context.robusta_issue_id}'" @@ -315,42 +243,6 @@ def holmes_conversation(event: ExecutionBaseEvent, params: HolmesConversationPar handle_holmes_error(e) -class DelayedHealthCheckParams(HolmesWorkloadHealthParams): - delay_seconds: int = 120 - - -@action -def delayed_health_check( - event: KubernetesAnyChangeEvent, action_params: DelayedHealthCheckParams -): - """ - runs a holmes workload health action with a delay - """ - metadata = event.obj and event.obj.metadata - - if not action_params.ask: - action_params.ask = f"help me diagnose an issue with a workload {metadata.namespace}/{event.obj.kind}/{metadata.name} running in my Kubernetes cluster. Can you assist with identifying potential issues and pinpoint the root cause." - - action_params.resource = ResourceInfo( - name=metadata.name, namespace=metadata.namespace, kind=event.obj.kind - ) - - logging.info( - f"Scheduling health check. {metadata.name} delays: {action_params.delay_seconds}" - ) - event.get_scheduler().schedule_action( - action_func=holmes_workload_health, - task_id=f"health_check_{metadata.name}_{metadata.namespace}", - scheduling_params=FixedDelayRepeat( - repeat=1, seconds_delay=action_params.delay_seconds - ), - named_sinks=event.named_sinks, - action_params=action_params, - replace_existing=True, - standalone_task=True, - ) - - @action def holmes_issue_chat(event: ExecutionBaseEvent, params: HolmesIssueChatParams): holmes_url = HolmesDiscovery.find_holmes_url(params.holmes_url) @@ -493,64 +385,6 @@ def holmes_chat(event: ExecutionBaseEvent, params: HolmesChatParams): handle_holmes_error(e) -@action -def holmes_workload_chat( - event: ExecutionBaseEvent, params: HolmesWorkloadHealthChatParams -): - holmes_url = HolmesDiscovery.find_holmes_url(params.holmes_url) - if not holmes_url: - raise ActionException( - ErrorCodes.HOLMES_DISCOVERY_FAILED, - "Robusta couldn't connect to the Holmes client.", - ) - - try: - holmes_req = HolmesWorkloadHealthRequest( - ask=params.ask, - conversation_history=params.conversation_history, - workload_health_result=params.workload_health_result, - resource=params.resource, - model=params.model, - ) - result = requests.post( - f"{holmes_url}/api/workload_health_chat", data=holmes_req.json() - ) - result.raise_for_status() - - holmes_result = HolmesChatResult(**json.loads(result.text)) - - finding = Finding( - title=f"AI Chat for Health Check of {params.resource}", - aggregation_key="HolmesWorkloadConversationResult", - subject=FindingSubject( - name=params.resource.name if params.resource else "", - namespace=params.resource.namespace if params.resource else "", - subject_type=( - FindingSubjectType.from_kind(params.resource.kind) - if params.resource - else FindingSubjectType.TYPE_NONE - ), - node=params.resource.node if params.resource else "", - container=params.resource.container if params.resource else "", - ), - finding_type=FindingType.AI_ANALYSIS, - failure=False, - ) - finding.add_enrichment( - [HolmesChatResultsBlock(holmes_result=holmes_result)], - enrichment_type=EnrichmentType.ai_analysis, - ) - - event.add_finding(finding) - - except Exception as e: - logging.exception( - f"Failed to get holmes chat for health check of {params.resource}", - exc_info=True, - ) - handle_holmes_error(e) - - def stream_and_render_graphs(url, holmes_req, event): with requests.post( url, diff --git a/src/robusta/core/reporting/holmes.py b/src/robusta/core/reporting/holmes.py index b374ee8e4..385fa89a5 100644 --- a/src/robusta/core/reporting/holmes.py +++ b/src/robusta/core/reporting/holmes.py @@ -93,8 +93,3 @@ class HolmesChatResult(BaseModel): class HolmesChatResultsBlock(BaseBlock): holmes_result: Optional[HolmesChatResult] - - -class HolmesWorkloadHealthRequest(HolmesChatRequest): - workload_health_result: HolmesInvestigationResult - resource: ResourceInfo