Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
9c89d47
🤖 feat: add voice input mode using OpenAI Whisper
ammar-agent Dec 2, 2025
e54dbf9
feat: improve voice recording UI states
ammar-agent Dec 2, 2025
5bcfbdd
feat: space to send during recording, thinner border
ammar-agent Dec 2, 2025
5914ed1
fix: auto-focus recording button for spacebar, add margin
ammar-agent Dec 2, 2025
ffb6e94
fix: properly implement space-to-send with hook callback
ammar-agent Dec 2, 2025
53cefb3
style: reduce vertical space in chat controls
ammar-agent Dec 2, 2025
bc400fb
style: make toggle group match send button size
ammar-agent Dec 2, 2025
4d9552d
refactor: voice input cleanup and user education
ammar-agent Dec 2, 2025
adcec7c
refactor: clean up useVoiceInput with enum state
ammar-agent Dec 2, 2025
944e341
feat: space on empty input starts voice, escape cancels
ammar-agent Dec 2, 2025
f82b49f
fix: remove ugly focus ring, improve voice tooltip
ammar-agent Dec 2, 2025
91c65e7
fix: use gpt-4o-transcribe model instead of whisper-1
ammar-agent Dec 2, 2025
92e31f3
fix: global keybinds during recording work regardless of focus
ammar-agent Dec 2, 2025
5d71600
test: add Storybook story for voice input without API key
ammar-agent Dec 2, 2025
fe15e4d
fix: add defense-in-depth mobile check in voice start()
ammar-agent Dec 2, 2025
8c56c9c
refactor: consolidate voice input useEffects
ammar-agent Dec 2, 2025
4ffcec9
refactor: improve useVoiceInput clarity and touch detection
ammar-agent Dec 2, 2025
7e1294b
fix: make E2E test more specific for OpenAI provider button
ammar-agent Dec 2, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bun.lock
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
"minimist": "^1.2.8",
"motion": "^12.23.24",
"ollama-ai-provider-v2": "^1.5.4",
"openai": "^6.9.1",
"rehype-harden": "^1.1.5",
"shescape": "^2.1.6",
"source-map-support": "^0.5.21",
Expand Down Expand Up @@ -2688,6 +2689,8 @@

"oniguruma-to-es": ["oniguruma-to-es@4.3.3", "", { "dependencies": { "oniguruma-parser": "^0.12.1", "regex": "^6.0.1", "regex-recursion": "^6.0.2" } }, "sha512-rPiZhzC3wXwE59YQMRDodUwwT9FZ9nNBwQQfsd1wfdtlKEyCdRV0avrTcSZ5xlIvGRVPd/cx6ZN45ECmS39xvg=="],

"openai": ["openai@6.9.1", "", { "peerDependencies": { "ws": "^8.18.0", "zod": "^3.25 || ^4.0" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-vQ5Rlt0ZgB3/BNmTa7bIijYFhz3YBceAA3Z4JuoMSBftBF9YqFHIEhZakSs+O/Ad7EaoEimZvHxD5ylRjN11Lg=="],

"optionator": ["optionator@0.9.4", "", { "dependencies": { "deep-is": "^0.1.3", "fast-levenshtein": "^2.0.6", "levn": "^0.4.1", "prelude-ls": "^1.2.1", "type-check": "^0.4.0", "word-wrap": "^1.2.5" } }, "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g=="],

"ora": ["ora@5.4.1", "", { "dependencies": { "bl": "^4.1.0", "chalk": "^4.1.0", "cli-cursor": "^3.1.0", "cli-spinners": "^2.5.0", "is-interactive": "^1.0.0", "is-unicode-supported": "^0.1.0", "log-symbols": "^4.1.0", "strip-ansi": "^6.0.0", "wcwidth": "^1.0.1" } }, "sha512-5b6Y85tPxZZ7QytO+BQzysW31HJku27cRIlkbAXaNx+BdcVi+LlRFmVXzeF6a7JCwJpyw5c4b+YSVImQIrBpuQ=="],
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
"minimist": "^1.2.8",
"motion": "^12.23.24",
"ollama-ai-provider-v2": "^1.5.4",
"openai": "^6.9.1",
"rehype-harden": "^1.1.5",
"shescape": "^2.1.6",
"source-map-support": "^0.5.21",
Expand Down
3 changes: 3 additions & 0 deletions src/browser/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,9 @@ const webApi: IPCApi = {
},
closeWindow: (workspaceId) => invokeIPC(IPC_CHANNELS.TERMINAL_WINDOW_CLOSE, workspaceId),
},
voice: {
transcribe: (audioBase64) => invokeIPC(IPC_CHANNELS.VOICE_TRANSCRIBE, audioBase64),
},
update: {
check: () => invokeIPC(IPC_CHANNELS.UPDATE_CHECK),
download: () => invokeIPC(IPC_CHANNELS.UPDATE_DOWNLOAD),
Expand Down
74 changes: 74 additions & 0 deletions src/browser/components/ChatInput/VoiceInputButton.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/**
* Voice input button - floats inside the chat input textarea.
* Minimal footprint: just an icon that changes color based on state.
*/

import React from "react";
import { Mic, Loader2 } from "lucide-react";
import { TooltipWrapper, Tooltip } from "../Tooltip";
import { formatKeybind, KEYBINDS } from "@/browser/utils/ui/keybinds";
import { cn } from "@/common/lib/utils";
import type { VoiceInputState } from "@/browser/hooks/useVoiceInput";

interface VoiceInputButtonProps {
state: VoiceInputState;
isApiKeySet: boolean;
shouldShowUI: boolean;
onToggle: () => void;
disabled?: boolean;
}

const STATE_CONFIG: Record<VoiceInputState, { label: string; colorClass: string }> = {
idle: { label: "Voice input", colorClass: "text-muted/50 hover:text-muted" },
recording: { label: "Stop recording", colorClass: "text-blue-500 animate-pulse" },
transcribing: { label: "Transcribing...", colorClass: "text-amber-500" },
};

export const VoiceInputButton: React.FC<VoiceInputButtonProps> = (props) => {
if (!props.shouldShowUI) return null;

const needsApiKey = !props.isApiKeySet;
const { label, colorClass } = needsApiKey
? { label: "Voice input (requires OpenAI API key)", colorClass: "text-muted/50" }
: STATE_CONFIG[props.state];

const Icon = props.state === "transcribing" ? Loader2 : Mic;
const isTranscribing = props.state === "transcribing";

return (
<TooltipWrapper inline>
<button
type="button"
onClick={props.onToggle}
disabled={(props.disabled ?? false) || isTranscribing || needsApiKey}
aria-label={label}
aria-pressed={props.state === "recording"}
className={cn(
"inline-flex items-center justify-center rounded p-0.5 transition-colors duration-150",
"disabled:cursor-not-allowed disabled:opacity-40",
colorClass
)}
>
<Icon className={cn("h-4 w-4", isTranscribing && "animate-spin")} strokeWidth={1.5} />
</button>
<Tooltip className="tooltip" align="right">
{needsApiKey ? (
<>
Voice input requires OpenAI API key.
<br />
Configure in Settings → Providers.
</>
) : (
<>
<strong>Voice input</strong> — press space on empty input
<br />
or {formatKeybind(KEYBINDS.TOGGLE_VOICE_INPUT)} anytime
<br />
<br />
While recording: space sends, esc cancels
</>
)}
</Tooltip>
</TooltipWrapper>
);
};
32 changes: 32 additions & 0 deletions src/browser/components/ChatInput/WaveformBars.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/**
* Animated waveform bars for voice recording UI.
* Shows 5 bars with staggered pulse animation.
*/

import { cn } from "@/common/lib/utils";

interface WaveformBarsProps {
/** Color class for the bars (e.g., "bg-blue-500") */
colorClass: string;
/** Whether to mirror the animation (for right-side waveform) */
mirrored?: boolean;
}

export const WaveformBars: React.FC<WaveformBarsProps> = (props) => {
const indices = props.mirrored ? [4, 3, 2, 1, 0] : [0, 1, 2, 3, 4];

return (
<div className="flex items-center gap-1">
{indices.map((i, displayIndex) => (
<div
key={displayIndex}
className={cn("w-1 rounded-full", props.colorClass)}
style={{
height: `${12 + Math.sin(i * 0.8) * 8}px`,
animation: `pulse 0.8s ease-in-out ${i * 0.1}s infinite alternate`,
}}
/>
))}
</div>
);
};
Loading