Skip to content

Commit 752729d

Browse files
authored
Update agent continuous evaluation sample (#44463)
* Update agent continuous evaluation sample * Fix typo
1 parent 1068818 commit 752729d

File tree

1 file changed

+77
-6
lines changed

1 file changed

+77
-6
lines changed

sdk/ai/azure-ai-projects/samples/evaluations/sample_continuous_evaluation_rule.py

Lines changed: 77 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@
1212
the OpenAI client from the `openai` package. See https://platform.openai.com/docs/api-reference
1313
for more information.
1414
15+
PREREQUISITE:
16+
To enable continuous evaluation, please assign project managed identity with the following steps:
17+
1) Open https://portal.azure.com
18+
2) Search for the AI Foundry project from search bar
19+
3) Choose "Access control (IAM)" -> "Add"
20+
4) In "Add role assignment", search for "Azure AI User"
21+
5) Choose "User, group, or service principal" or "Managed Identity", add your AI Foundry project managed identity
22+
1523
USAGE:
1624
python sample_continuous_evaluation_rule.py
1725
@@ -28,6 +36,7 @@
2836
"""
2937

3038
import os
39+
import time
3140
from dotenv import load_dotenv
3241
from azure.identity import DefaultAzureCredential
3342
from azure.ai.projects import AIProjectClient
@@ -49,6 +58,8 @@
4958
project_client.get_openai_client() as openai_client,
5059
):
5160

61+
# Create agent
62+
5263
agent = project_client.agents.create_version(
5364
agent_name=os.environ["AZURE_AI_AGENT_NAME"],
5465
definition=PromptAgentDefinition(
@@ -58,6 +69,8 @@
5869
)
5970
print(f"Agent created (id: {agent.id}, name: {agent.name}, version: {agent.version})")
6071

72+
# Setup agent continuous evaluation
73+
6174
data_source_config = {"type": "azure_ai_source", "scenario": "responses"}
6275
testing_criteria = [
6376
{"type": "azure_ai_evaluator", "name": "violence_detection", "evaluator_name": "builtin.violence"}
@@ -84,11 +97,69 @@
8497
f"Continuous Evaluation Rule created (id: {continuous_eval_rule.id}, name: {continuous_eval_rule.display_name})"
8598
)
8699

87-
continuous_eval_rule = project_client.evaluation_rules.delete(id=continuous_eval_rule.id)
88-
print("Continuous Evaluation Rule deleted")
100+
# Run agent
89101

90-
openai_client.evals.delete(eval_id=eval_object.id)
91-
print("Evaluation deleted")
102+
conversation = openai_client.conversations.create(
103+
items=[{"type": "message", "role": "user", "content": "What is the size of France in square miles?"}],
104+
)
105+
print(f"Created conversation with initial user message (id: {conversation.id})")
92106

93-
project_client.agents.delete(agent_name=agent.name)
94-
print("Agent deleted")
107+
response = openai_client.responses.create(
108+
conversation=conversation.id,
109+
extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
110+
input="",
111+
)
112+
print(f"Response output: {response.output_text}")
113+
114+
# Loop for 5 questions
115+
116+
MAX_QUESTIONS = 10
117+
for i in range(0, MAX_QUESTIONS):
118+
openai_client.conversations.items.create(
119+
conversation_id=conversation.id,
120+
items=[{"type": "message", "role": "user", "content": f"Question {i}: What is the capital city?"}],
121+
)
122+
print(f"Added a user message to the conversation")
123+
124+
response = openai_client.responses.create(
125+
conversation=conversation.id,
126+
extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
127+
input="",
128+
)
129+
print(f"Response output: {response.output_text}")
130+
131+
# Wait for 10 seconds for evaluation, and then retrieve eval results
132+
133+
time.sleep(10)
134+
eval_run_list = openai_client.evals.runs.list(
135+
eval_id=eval_object.id,
136+
order="desc",
137+
limit=10,
138+
)
139+
140+
if len(eval_run_list.data) > 0:
141+
eval_run_ids = [eval_run.id for eval_run in eval_run_list.data]
142+
print(f"Finished evals: {' '.join(eval_run_ids)}")
143+
144+
# Get the report_url
145+
146+
print("Agent runs finished")
147+
148+
MAX_LOOP = 20
149+
for _ in range(0, MAX_LOOP):
150+
print(f"Waiting for eval run to complete...")
151+
152+
eval_run_list = openai_client.evals.runs.list(
153+
eval_id=eval_object.id,
154+
order="desc",
155+
limit=10,
156+
)
157+
158+
if len(eval_run_list.data) > 0 and eval_run_list.data[0].report_url:
159+
run_report_url = eval_run_list.data[0].report_url
160+
# Remove the last 2 URL path segments (run/continuousevalrun_xxx)
161+
report_url = '/'.join(run_report_url.split('/')[:-2])
162+
print(f"To check evaluation runs, please open {report_url} from the browser")
163+
break
164+
165+
time.sleep(10)

0 commit comments

Comments
 (0)