Update agent continuous evaluation sample (#44463)

YusakuNo1 · web-flow · commit 752729d9120a · 2025-12-17T17:12:31.000-08:00
* Update agent continuous evaluation sample

* Fix typo
diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_continuous_evaluation_rule.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_continuous_evaluation_rule.py
@@ -12,6 +12,14 @@
     the OpenAI client from the `openai` package. See https://platform.openai.com/docs/api-reference
     for more information.
 
+PREREQUISITE:
+    To enable continuous evaluation, please assign project managed identity with the following steps:
+    1) Open https://portal.azure.com
+    2) Search for the AI Foundry project from search bar
+    3) Choose "Access control (IAM)" -> "Add"
+    4) In "Add role assignment", search for "Azure AI User"
+    5) Choose "User, group, or service principal" or "Managed Identity", add your AI Foundry project managed identity
+
 USAGE:
     python sample_continuous_evaluation_rule.py
 
@@ -28,6 +36,7 @@
 """
 
 import os
+import time
 from dotenv import load_dotenv
 from azure.identity import DefaultAzureCredential
 from azure.ai.projects import AIProjectClient
@@ -49,6 +58,8 @@
     project_client.get_openai_client() as openai_client,
 ):
 
+    # Create agent
+
     agent = project_client.agents.create_version(
         agent_name=os.environ["AZURE_AI_AGENT_NAME"],
         definition=PromptAgentDefinition(
@@ -58,6 +69,8 @@
     )
     print(f"Agent created (id: {agent.id}, name: {agent.name}, version: {agent.version})")
 
+    # Setup agent continuous evaluation
+
     data_source_config = {"type": "azure_ai_source", "scenario": "responses"}
     testing_criteria = [
         {"type": "azure_ai_evaluator", "name": "violence_detection", "evaluator_name": "builtin.violence"}
@@ -84,11 +97,69 @@
         f"Continuous Evaluation Rule created (id: {continuous_eval_rule.id}, name: {continuous_eval_rule.display_name})"
     )
 
-    continuous_eval_rule = project_client.evaluation_rules.delete(id=continuous_eval_rule.id)
-    print("Continuous Evaluation Rule deleted")
+    # Run agent
 
-    openai_client.evals.delete(eval_id=eval_object.id)
-    print("Evaluation deleted")
+    conversation = openai_client.conversations.create(
+        items=[{"type": "message", "role": "user", "content": "What is the size of France in square miles?"}],
+    )
+    print(f"Created conversation with initial user message (id: {conversation.id})")
 
-    project_client.agents.delete(agent_name=agent.name)
-    print("Agent deleted")
+    response = openai_client.responses.create(
+        conversation=conversation.id,
+        extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
+        input="",
+    )
+    print(f"Response output: {response.output_text}")
+
+    # Loop for 5 questions
+
+    MAX_QUESTIONS = 10
+    for i in range(0, MAX_QUESTIONS):
+        openai_client.conversations.items.create(
+            conversation_id=conversation.id,
+            items=[{"type": "message", "role": "user", "content": f"Question {i}: What is the capital city?"}],
+        )
+        print(f"Added a user message to the conversation")
+
+        response = openai_client.responses.create(
+            conversation=conversation.id,
+            extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
+            input="",
+        )
+        print(f"Response output: {response.output_text}")
+
+        # Wait for 10 seconds for evaluation, and then retrieve eval results
+
+        time.sleep(10)
+        eval_run_list = openai_client.evals.runs.list(
+            eval_id=eval_object.id,
+            order="desc",
+            limit=10,
+        )
+
+        if len(eval_run_list.data) > 0:
+            eval_run_ids = [eval_run.id for eval_run in eval_run_list.data]
+            print(f"Finished evals: {' '.join(eval_run_ids)}")
+
+    # Get the report_url
+
+    print("Agent runs finished")
+
+    MAX_LOOP = 20
+    for _ in range(0, MAX_LOOP):
+        print(f"Waiting for eval run to complete...")
+
+        eval_run_list = openai_client.evals.runs.list(
+            eval_id=eval_object.id,
+            order="desc",
+            limit=10,
+        )
+
+        if len(eval_run_list.data) > 0 and eval_run_list.data[0].report_url:
+            run_report_url = eval_run_list.data[0].report_url
+            # Remove the last 2 URL path segments (run/continuousevalrun_xxx)
+            report_url = '/'.join(run_report_url.split('/')[:-2])
+            print(f"To check evaluation runs, please open {report_url} from the browser")
+            break
+
+        time.sleep(10)