+ Try It Now
+
+
Learn How
Get Started
diff --git a/pages/demo.js b/pages/demo.js
new file mode 100644
index 0000000..64f37ab
--- /dev/null
+++ b/pages/demo.js
@@ -0,0 +1,355 @@
+import { useEffect, useState, useRef } from 'react'
+import Head from 'next/head'
+import Link from 'next/link'
+import Footer from '@components/Footer'
+import styles from '@components/MastHead.module.css'
+
+export default function Demo() {
+ const [tasks, setTasks] = useState([])
+ const [currentTaskIndex, setCurrentTaskIndex] = useState(0)
+ const [currentStepIndex, setCurrentStepIndex] = useState(0)
+ const [isPlaying, setIsPlaying] = useState(false)
+ const [playSpeed, setPlaySpeed] = useState(1500)
+ const [summary, setSummary] = useState(null)
+ const playIntervalRef = useRef(null)
+
+ useEffect(() => {
+ // Load benchmark data
+ async function loadData() {
+ try {
+ const summaryRes = await fetch('/benchmark-data/summary.json')
+ const summaryData = await summaryRes.json()
+ setSummary(summaryData)
+
+ // Load task data
+ const taskPromises = summaryData.tasks.map(async (taskInfo) => {
+ const taskRes = await fetch(`/benchmark-data/tasks/${taskInfo.task_id}/task.json`)
+ const execRes = await fetch(`/benchmark-data/tasks/${taskInfo.task_id}/execution.json`)
+
+ const task = await taskRes.json()
+ const execution = await execRes.json()
+
+ return {
+ ...taskInfo,
+ definition: task,
+ execution: execution,
+ screenshots: execution.steps.map(step =>
+ `/benchmark-data/tasks/${taskInfo.task_id}/${step.screenshot_path}`
+ )
+ }
+ })
+
+ const loadedTasks = await Promise.all(taskPromises)
+ setTasks(loadedTasks)
+ } catch (error) {
+ console.error('Failed to load benchmark data:', error)
+ }
+ }
+
+ loadData()
+ }, [])
+
+ useEffect(() => {
+ // Handle play/pause
+ if (isPlaying) {
+ playIntervalRef.current = setInterval(() => {
+ setCurrentStepIndex(prev => {
+ const task = tasks[currentTaskIndex]
+ if (!task || !task.execution) return prev
+
+ const maxSteps = task.execution.steps.length - 1
+ if (prev >= maxSteps) {
+ setIsPlaying(false)
+ return prev
+ }
+ return prev + 1
+ })
+ }, playSpeed)
+ } else {
+ if (playIntervalRef.current) {
+ clearInterval(playIntervalRef.current)
+ }
+ }
+
+ return () => {
+ if (playIntervalRef.current) {
+ clearInterval(playIntervalRef.current)
+ }
+ }
+ }, [isPlaying, playSpeed, currentTaskIndex, tasks])
+
+ const currentTask = tasks[currentTaskIndex]
+ const currentStep = currentTask?.execution?.steps?.[currentStepIndex]
+ const currentScreenshot = currentTask?.screenshots?.[currentStepIndex]
+
+ const handlePrevStep = () => {
+ setCurrentStepIndex(prev => Math.max(0, prev - 1))
+ setIsPlaying(false)
+ }
+
+ const handleNextStep = () => {
+ const maxSteps = currentTask?.execution?.steps?.length - 1 || 0
+ setCurrentStepIndex(prev => Math.min(maxSteps, prev + 1))
+ setIsPlaying(false)
+ }
+
+ const handlePlayPause = () => {
+ setIsPlaying(!isPlaying)
+ }
+
+ const formatAction = (action) => {
+ if (!action) return 'No action'
+
+ switch (action.type) {
+ case 'click':
+ return `CLICK at (${(action.x * 100).toFixed(1)}%, ${(action.y * 100).toFixed(1)}%)`
+ case 'type':
+ return `TYPE "${action.text}"`
+ case 'key':
+ return `KEY ${action.key}`
+ case 'scroll':
+ return `SCROLL ${action.scroll_direction}`
+ default:
+ return action.raw_action?.code || action.type
+ }
+ }
+
+ return (
+ <>
+
+
Interactive Demo - OpenAdapt.AI
+
+
+
+
+ {/* Header */}
+
+
+ OpenAdapt.AI
+
+
+ Interactive Demo
+
+
+ Watch AI agents perform real tasks on Windows.
+ These are actual benchmark evaluations showing screenshots, actions, and execution logs.
+
+
+ ← Back to Home
+
+
+
+ {/* Main Demo Viewer */}
+
+ {!summary ? (
+
+
+
Loading benchmark data...
+
+ ) : (
+ <>
+ {/* Summary Stats */}
+
+
+
Tasks
+
{summary.num_tasks}
+
+
+
Success Rate
+
{(summary.success_rate * 100).toFixed(0)}%
+
+
+
Avg Steps
+
{summary.avg_steps.toFixed(1)}
+
+
+
Avg Time
+
{summary.avg_time_seconds.toFixed(0)}s
+
+
+
+ {/* Viewer */}
+ {currentTask && (
+
+ {/* Task Header */}
+
+
+
+
+ {currentTask.definition.task_id}
+
+
+ Domain: {currentTask.definition.domain}
+
+
+
+ {currentTask.success ? 'Success' : 'Failed'}
+
+
+
+
+ {/* Screenshot Viewer */}
+
+ {currentScreenshot ? (
+

+ ) : (
+
+
No screenshot available
+
+ )}
+
+ {/* Step indicator overlay */}
+
+
+ Step {currentStepIndex + 1} / {currentTask.execution.steps.length}
+
+
+
+ {/* Click indicator overlay */}
+ {currentStep?.action?.type === 'click' && (
+
+ )}
+
+
+ {/* Controls */}
+
+
+
+
+
+
+ {/* Progress bar */}
+
+ setCurrentStepIndex(parseInt(e.target.value))}
+ className="range range-xs range-primary"
+ />
+
+
+ {/* Speed control */}
+
+
+
+ {/* Action details */}
+
+
+
Action:
+
+ {formatAction(currentStep?.action)}
+
+ {currentStep?.reasoning && (
+ <>
+
Reasoning:
+
{currentStep.reasoning}
+ >
+ )}
+
+
+
+
+ {/* Task navigation */}
+ {tasks.length > 1 && (
+
+
+ {tasks.map((task, idx) => (
+
+ ))}
+
+
+ )}
+
+ )}
+
+ {/* Call to Action */}
+
+
+ Ready to Build Your Own AI Agents?
+
+
+ OpenAdapt.AI makes it easy to record demonstrations, train models, and deploy agents that can use any software.
+
+
+
+ >
+ )}
+
+
+
+
+
+
+ >
+ )
+}
diff --git a/public/benchmark-data/metadata.json b/public/benchmark-data/metadata.json
new file mode 100644
index 0000000..017f47b
--- /dev/null
+++ b/public/benchmark-data/metadata.json
@@ -0,0 +1,6 @@
+{
+ "benchmark_name": "waa-live",
+ "run_name": "waa-live_eval_20260116_200004",
+ "model_id": "unknown",
+ "created_at": "2026-01-16T20:00:04.964702"
+}
\ No newline at end of file
diff --git a/public/benchmark-data/summary.json b/public/benchmark-data/summary.json
new file mode 100644
index 0000000..9159e68
--- /dev/null
+++ b/public/benchmark-data/summary.json
@@ -0,0 +1,20 @@
+{
+ "benchmark_name": "waa-live",
+ "run_name": "waa-live_eval_20260116_200004",
+ "model_id": "unknown",
+ "num_tasks": 1,
+ "num_success": 0,
+ "success_rate": 0.0,
+ "avg_score": 0.0,
+ "avg_steps": 5.0,
+ "avg_time_seconds": 71.88987074999022,
+ "tasks": [
+ {
+ "task_id": "notepad_1",
+ "success": false,
+ "score": 0.0,
+ "num_steps": 5,
+ "error": null
+ }
+ ]
+}
\ No newline at end of file
diff --git a/public/benchmark-data/tasks/notepad_1/execution.json b/public/benchmark-data/tasks/notepad_1/execution.json
new file mode 100644
index 0000000..c2fd7f2
--- /dev/null
+++ b/public/benchmark-data/tasks/notepad_1/execution.json
@@ -0,0 +1,142 @@
+{
+ "task_id": "notepad_1",
+ "model_id": "unknown",
+ "success": false,
+ "score": 0.0,
+ "num_steps": 5,
+ "total_time_seconds": 71.88987074999022,
+ "error": null,
+ "reason": "Evaluation requires WAA evaluators (not yet implemented)",
+ "steps": [
+ {
+ "step_idx": 0,
+ "screenshot_path": "screenshots/step_000.png",
+ "action": {
+ "type": "click",
+ "x": 0.6442708333333333,
+ "y": 0.32666666666666666,
+ "target_node_id": null,
+ "target_bbox": null,
+ "target_role": null,
+ "target_name": null,
+ "text": null,
+ "key": null,
+ "modifiers": null,
+ "scroll_direction": null,
+ "scroll_amount": null,
+ "end_x": null,
+ "end_y": null,
+ "answer": null,
+ "raw_action": {
+ "code": "computer.click(1237, 392)"
+ }
+ },
+ "reasoning": null,
+ "timestamp": 1768611618.802666
+ },
+ {
+ "step_idx": 1,
+ "screenshot_path": "screenshots/step_001.png",
+ "action": {
+ "type": "click",
+ "x": 0.6072916666666667,
+ "y": 0.5216666666666666,
+ "target_node_id": null,
+ "target_bbox": null,
+ "target_role": null,
+ "target_name": null,
+ "text": null,
+ "key": null,
+ "modifiers": null,
+ "scroll_direction": null,
+ "scroll_amount": null,
+ "end_x": null,
+ "end_y": null,
+ "answer": null,
+ "raw_action": {
+ "code": "computer.click(1166, 626)"
+ }
+ },
+ "reasoning": null,
+ "timestamp": 1768611631.025159
+ },
+ {
+ "step_idx": 2,
+ "screenshot_path": "screenshots/step_002.png",
+ "action": {
+ "type": "click",
+ "x": 0.6072916666666667,
+ "y": 0.5216666666666666,
+ "target_node_id": null,
+ "target_bbox": null,
+ "target_role": null,
+ "target_name": null,
+ "text": null,
+ "key": null,
+ "modifiers": null,
+ "scroll_direction": null,
+ "scroll_amount": null,
+ "end_x": null,
+ "end_y": null,
+ "answer": null,
+ "raw_action": {
+ "code": "computer.click(1166, 626)"
+ }
+ },
+ "reasoning": null,
+ "timestamp": 1768611644.028655
+ },
+ {
+ "step_idx": 3,
+ "screenshot_path": "screenshots/step_003.png",
+ "action": {
+ "type": "click",
+ "x": 0.6072916666666667,
+ "y": 0.5216666666666666,
+ "target_node_id": null,
+ "target_bbox": null,
+ "target_role": null,
+ "target_name": null,
+ "text": null,
+ "key": null,
+ "modifiers": null,
+ "scroll_direction": null,
+ "scroll_amount": null,
+ "end_x": null,
+ "end_y": null,
+ "answer": null,
+ "raw_action": {
+ "code": "computer.click(1166, 626)"
+ }
+ },
+ "reasoning": null,
+ "timestamp": 1768611657.31775
+ },
+ {
+ "step_idx": 4,
+ "screenshot_path": "screenshots/step_004.png",
+ "action": {
+ "type": "click",
+ "x": 0.6442708333333333,
+ "y": 0.32666666666666666,
+ "target_node_id": null,
+ "target_bbox": null,
+ "target_role": null,
+ "target_name": null,
+ "text": null,
+ "key": null,
+ "modifiers": null,
+ "scroll_direction": null,
+ "scroll_amount": null,
+ "end_x": null,
+ "end_y": null,
+ "answer": null,
+ "raw_action": {
+ "code": "computer.click(1237, 392)"
+ }
+ },
+ "reasoning": null,
+ "timestamp": 1768611670.178712
+ }
+ ]
+}
\ No newline at end of file
diff --git a/public/benchmark-data/tasks/notepad_1/screenshots/step_000.png b/public/benchmark-data/tasks/notepad_1/screenshots/step_000.png
new file mode 100644
index 0000000..7e9cbf3
Binary files /dev/null and b/public/benchmark-data/tasks/notepad_1/screenshots/step_000.png differ
diff --git a/public/benchmark-data/tasks/notepad_1/screenshots/step_001.png b/public/benchmark-data/tasks/notepad_1/screenshots/step_001.png
new file mode 100644
index 0000000..7e9cbf3
Binary files /dev/null and b/public/benchmark-data/tasks/notepad_1/screenshots/step_001.png differ
diff --git a/public/benchmark-data/tasks/notepad_1/screenshots/step_002.png b/public/benchmark-data/tasks/notepad_1/screenshots/step_002.png
new file mode 100644
index 0000000..7e9cbf3
Binary files /dev/null and b/public/benchmark-data/tasks/notepad_1/screenshots/step_002.png differ
diff --git a/public/benchmark-data/tasks/notepad_1/screenshots/step_003.png b/public/benchmark-data/tasks/notepad_1/screenshots/step_003.png
new file mode 100644
index 0000000..7e9cbf3
Binary files /dev/null and b/public/benchmark-data/tasks/notepad_1/screenshots/step_003.png differ
diff --git a/public/benchmark-data/tasks/notepad_1/screenshots/step_004.png b/public/benchmark-data/tasks/notepad_1/screenshots/step_004.png
new file mode 100644
index 0000000..7e9cbf3
Binary files /dev/null and b/public/benchmark-data/tasks/notepad_1/screenshots/step_004.png differ
diff --git a/public/benchmark-data/tasks/notepad_1/task.json b/public/benchmark-data/tasks/notepad_1/task.json
new file mode 100644
index 0000000..8eb822d
--- /dev/null
+++ b/public/benchmark-data/tasks/notepad_1/task.json
@@ -0,0 +1,9 @@
+{
+ "task_id": "notepad_1",
+ "instruction": "Task notepad_1",
+ "domain": "notepad",
+ "initial_state_ref": null,
+ "time_limit_steps": 5,
+ "raw_config": {},
+ "evaluation_spec": null
+}
\ No newline at end of file