From ec97d4626dca4eda51de2733b924b76415c22259 Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Sun, 18 Jan 2026 18:57:41 -0500 Subject: [PATCH] Add interactive demo page with real benchmark viewer Implements Week 1 MVP for interactive demo page as outlined in website strategy: - Created /pages/demo.js with interactive benchmark viewer component - Embedded real evaluation data from openadapt-evals benchmark results - Added "Try It Now" CTA button to homepage (MastHead component) - Copied benchmark results data to public directory for web access Features: - Interactive step-by-step viewer with screenshot display - Play/pause controls with adjustable speed - Visual click indicators on screenshots - Action and reasoning display for each step - Task navigation for multiple benchmark tasks - Summary statistics (tasks, success rate, avg steps/time) - Professional presentation matching site design - Responsive layout with DaisyUI components The demo uses real Windows Agent Arena evaluation data, showing actual screenshots, actions, and execution logs from AI agent benchmark runs. Co-Authored-By: Claude Opus 4.5 --- components/MastHead.js | 10 +- pages/demo.js | 355 ++++++++++++++++++ public/benchmark-data/metadata.json | 6 + public/benchmark-data/summary.json | 20 + .../tasks/notepad_1/execution.json | 142 +++++++ .../tasks/notepad_1/screenshots/step_000.png | Bin 0 -> 601195 bytes .../tasks/notepad_1/screenshots/step_001.png | Bin 0 -> 601195 bytes .../tasks/notepad_1/screenshots/step_002.png | Bin 0 -> 601195 bytes .../tasks/notepad_1/screenshots/step_003.png | Bin 0 -> 601195 bytes .../tasks/notepad_1/screenshots/step_004.png | Bin 0 -> 601195 bytes .../benchmark-data/tasks/notepad_1/task.json | 9 + 11 files changed, 540 insertions(+), 2 deletions(-) create mode 100644 pages/demo.js create mode 100644 public/benchmark-data/metadata.json create mode 100644 public/benchmark-data/summary.json create mode 100644 public/benchmark-data/tasks/notepad_1/execution.json create mode 100644 public/benchmark-data/tasks/notepad_1/screenshots/step_000.png create mode 100644 public/benchmark-data/tasks/notepad_1/screenshots/step_001.png create mode 100644 public/benchmark-data/tasks/notepad_1/screenshots/step_002.png create mode 100644 public/benchmark-data/tasks/notepad_1/screenshots/step_003.png create mode 100644 public/benchmark-data/tasks/notepad_1/screenshots/step_004.png create mode 100644 public/benchmark-data/tasks/notepad_1/task.json diff --git a/components/MastHead.js b/components/MastHead.js index 6e65777..3913523 100644 --- a/components/MastHead.js +++ b/components/MastHead.js @@ -139,13 +139,19 @@ export default function Home() {
+ Try It Now + + Learn How Get Started diff --git a/pages/demo.js b/pages/demo.js new file mode 100644 index 0000000..64f37ab --- /dev/null +++ b/pages/demo.js @@ -0,0 +1,355 @@ +import { useEffect, useState, useRef } from 'react' +import Head from 'next/head' +import Link from 'next/link' +import Footer from '@components/Footer' +import styles from '@components/MastHead.module.css' + +export default function Demo() { + const [tasks, setTasks] = useState([]) + const [currentTaskIndex, setCurrentTaskIndex] = useState(0) + const [currentStepIndex, setCurrentStepIndex] = useState(0) + const [isPlaying, setIsPlaying] = useState(false) + const [playSpeed, setPlaySpeed] = useState(1500) + const [summary, setSummary] = useState(null) + const playIntervalRef = useRef(null) + + useEffect(() => { + // Load benchmark data + async function loadData() { + try { + const summaryRes = await fetch('/benchmark-data/summary.json') + const summaryData = await summaryRes.json() + setSummary(summaryData) + + // Load task data + const taskPromises = summaryData.tasks.map(async (taskInfo) => { + const taskRes = await fetch(`/benchmark-data/tasks/${taskInfo.task_id}/task.json`) + const execRes = await fetch(`/benchmark-data/tasks/${taskInfo.task_id}/execution.json`) + + const task = await taskRes.json() + const execution = await execRes.json() + + return { + ...taskInfo, + definition: task, + execution: execution, + screenshots: execution.steps.map(step => + `/benchmark-data/tasks/${taskInfo.task_id}/${step.screenshot_path}` + ) + } + }) + + const loadedTasks = await Promise.all(taskPromises) + setTasks(loadedTasks) + } catch (error) { + console.error('Failed to load benchmark data:', error) + } + } + + loadData() + }, []) + + useEffect(() => { + // Handle play/pause + if (isPlaying) { + playIntervalRef.current = setInterval(() => { + setCurrentStepIndex(prev => { + const task = tasks[currentTaskIndex] + if (!task || !task.execution) return prev + + const maxSteps = task.execution.steps.length - 1 + if (prev >= maxSteps) { + setIsPlaying(false) + return prev + } + return prev + 1 + }) + }, playSpeed) + } else { + if (playIntervalRef.current) { + clearInterval(playIntervalRef.current) + } + } + + return () => { + if (playIntervalRef.current) { + clearInterval(playIntervalRef.current) + } + } + }, [isPlaying, playSpeed, currentTaskIndex, tasks]) + + const currentTask = tasks[currentTaskIndex] + const currentStep = currentTask?.execution?.steps?.[currentStepIndex] + const currentScreenshot = currentTask?.screenshots?.[currentStepIndex] + + const handlePrevStep = () => { + setCurrentStepIndex(prev => Math.max(0, prev - 1)) + setIsPlaying(false) + } + + const handleNextStep = () => { + const maxSteps = currentTask?.execution?.steps?.length - 1 || 0 + setCurrentStepIndex(prev => Math.min(maxSteps, prev + 1)) + setIsPlaying(false) + } + + const handlePlayPause = () => { + setIsPlaying(!isPlaying) + } + + const formatAction = (action) => { + if (!action) return 'No action' + + switch (action.type) { + case 'click': + return `CLICK at (${(action.x * 100).toFixed(1)}%, ${(action.y * 100).toFixed(1)}%)` + case 'type': + return `TYPE "${action.text}"` + case 'key': + return `KEY ${action.key}` + case 'scroll': + return `SCROLL ${action.scroll_direction}` + default: + return action.raw_action?.code || action.type + } + } + + return ( + <> + + Interactive Demo - OpenAdapt.AI + + + +
+ {/* Header */} +
+

+ OpenAdapt.AI +

+

+ Interactive Demo +

+

+ Watch AI agents perform real tasks on Windows. + These are actual benchmark evaluations showing screenshots, actions, and execution logs. +

+ + ← Back to Home + +
+ + {/* Main Demo Viewer */} +
+ {!summary ? ( +
+
+

Loading benchmark data...

+
+ ) : ( + <> + {/* Summary Stats */} +
+
+
Tasks
+
{summary.num_tasks}
+
+
+
Success Rate
+
{(summary.success_rate * 100).toFixed(0)}%
+
+
+
Avg Steps
+
{summary.avg_steps.toFixed(1)}
+
+
+
Avg Time
+
{summary.avg_time_seconds.toFixed(0)}s
+
+
+ + {/* Viewer */} + {currentTask && ( +
+ {/* Task Header */} +
+
+
+

+ {currentTask.definition.task_id} +

+

+ Domain: {currentTask.definition.domain} +

+
+
+ {currentTask.success ? 'Success' : 'Failed'} +
+
+
+ + {/* Screenshot Viewer */} +
+ {currentScreenshot ? ( + {`Step + ) : ( +
+

No screenshot available

+
+ )} + + {/* Step indicator overlay */} +
+ + Step {currentStepIndex + 1} / {currentTask.execution.steps.length} + +
+ + {/* Click indicator overlay */} + {currentStep?.action?.type === 'click' && ( +
+ )} +
+ + {/* Controls */} +
+
+ + + + + {/* Progress bar */} +
+ setCurrentStepIndex(parseInt(e.target.value))} + className="range range-xs range-primary" + /> +
+ + {/* Speed control */} + +
+ + {/* Action details */} +
+
+
Action:
+
+ {formatAction(currentStep?.action)} +
+ {currentStep?.reasoning && ( + <> +
Reasoning:
+
{currentStep.reasoning}
+ + )} +
+
+
+ + {/* Task navigation */} + {tasks.length > 1 && ( +
+
+ {tasks.map((task, idx) => ( + + ))} +
+
+ )} +
+ )} + + {/* Call to Action */} +
+

+ Ready to Build Your Own AI Agents? +

+

+ OpenAdapt.AI makes it easy to record demonstrations, train models, and deploy agents that can use any software. +

+ +
+ + )} +
+
+ +