Skip to content

Commit 93dc9c0

Browse files
committed
feat(cli): add image support for sending and displaying images
- Add image-handler.ts for auto-detecting and processing image paths - Add terminal-images.ts for iTerm2/Kitty inline image rendering - Add ImageContentBlock type and ImageBlock component - Update SDK RunOptions to accept multimodal content (text + images) - Auto-detect image paths in user input (e.g. @image.png, ./path/to/image.jpg) - Display images inline in supported terminals, fallback to metadata
1 parent 2233ff1 commit 93dc9c0

File tree

8 files changed

+832
-7
lines changed

8 files changed

+832
-7
lines changed
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
import { TextAttributes } from '@opentui/core'
2+
import { memo, useMemo } from 'react'
3+
4+
import { useTheme } from '../../hooks/use-theme'
5+
import {
6+
renderInlineImage,
7+
supportsInlineImages,
8+
getImageSupportDescription,
9+
} from '../../utils/terminal-images'
10+
11+
import type { ImageContentBlock } from '../../types/chat'
12+
13+
interface ImageBlockProps {
14+
block: ImageContentBlock
15+
availableWidth: number
16+
}
17+
18+
export const ImageBlock = memo(({ block, availableWidth }: ImageBlockProps) => {
19+
const theme = useTheme()
20+
21+
const { image, mediaType, filename, size } = block
22+
23+
// Try to render inline if supported
24+
const inlineSequence = useMemo(() => {
25+
if (!supportsInlineImages()) {
26+
return null
27+
}
28+
29+
// Calculate reasonable display dimensions based on available width
30+
// Terminal cells are roughly 2:1 aspect ratio (height:width)
31+
const maxCells = Math.min(availableWidth - 4, 80)
32+
const displayWidth = Math.min(maxCells, 40)
33+
const displayHeight = Math.floor(displayWidth / 2) // Maintain rough aspect ratio
34+
35+
return renderInlineImage(image, {
36+
width: displayWidth,
37+
height: displayHeight,
38+
filename,
39+
})
40+
}, [image, filename, availableWidth])
41+
42+
// Format file size
43+
const formattedSize = useMemo(() => {
44+
if (!size) return null
45+
if (size < 1024) return `${size}B`
46+
if (size < 1024 * 1024) return `${(size / 1024).toFixed(1)}KB`
47+
return `${(size / (1024 * 1024)).toFixed(1)}MB`
48+
}, [size])
49+
50+
// Get file extension for display
51+
const fileExtension = useMemo(() => {
52+
if (filename) {
53+
const parts = filename.split('.')
54+
return parts.length > 1 ? parts[parts.length - 1].toUpperCase() : null
55+
}
56+
// Extract from mediaType
57+
const match = mediaType.match(/image\/(\w+)/)
58+
return match ? match[1].toUpperCase() : null
59+
}, [filename, mediaType])
60+
61+
if (inlineSequence) {
62+
// Render inline image using terminal escape sequence
63+
return (
64+
<box style={{ flexDirection: 'column', gap: 0, marginTop: 1, marginBottom: 1 }}>
65+
{/* Image caption/metadata */}
66+
<text style={{ wrapMode: 'none', fg: theme.muted }}>
67+
<span attributes={TextAttributes.DIM}>📷 </span>
68+
<span>{filename || 'Image'}</span>
69+
{formattedSize && (
70+
<span attributes={TextAttributes.DIM}> ({formattedSize})</span>
71+
)}
72+
</text>
73+
74+
{/* The actual inline image - rendered via escape sequence */}
75+
<text style={{ wrapMode: 'none' }}>{inlineSequence}</text>
76+
</box>
77+
)
78+
}
79+
80+
// Fallback: Display image metadata when inline rendering not supported
81+
return (
82+
<box
83+
style={{
84+
flexDirection: 'column',
85+
gap: 0,
86+
marginTop: 1,
87+
marginBottom: 1,
88+
paddingLeft: 1,
89+
borderStyle: 'single',
90+
borderColor: theme.border,
91+
}}
92+
>
93+
{/* Header */}
94+
<text style={{ wrapMode: 'none', fg: theme.foreground }}>
95+
<span attributes={TextAttributes.BOLD}>📷 Image Attachment</span>
96+
</text>
97+
98+
{/* Filename */}
99+
{filename && (
100+
<text style={{ wrapMode: 'none', fg: theme.foreground }}>
101+
<span attributes={TextAttributes.DIM}>Name: </span>
102+
<span>{filename}</span>
103+
</text>
104+
)}
105+
106+
{/* Type */}
107+
<text style={{ wrapMode: 'none', fg: theme.muted }}>
108+
<span attributes={TextAttributes.DIM}>Type: </span>
109+
<span>{fileExtension || mediaType}</span>
110+
</text>
111+
112+
{/* Size */}
113+
{formattedSize && (
114+
<text style={{ wrapMode: 'none', fg: theme.muted }}>
115+
<span attributes={TextAttributes.DIM}>Size: </span>
116+
<span>{formattedSize}</span>
117+
</text>
118+
)}
119+
120+
{/* Hint about terminal support */}
121+
<text
122+
style={{ wrapMode: 'word', fg: theme.muted, marginTop: 1 }}
123+
attributes={TextAttributes.DIM}
124+
>
125+
{`(${getImageSupportDescription()} - use iTerm2 or Kitty for inline display)`}
126+
</text>
127+
</box>
128+
)
129+
})

cli/src/components/message-block.tsx

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import { ContentWithMarkdown } from './blocks/content-with-markdown'
2222
import { ThinkingBlock } from './blocks/thinking-block'
2323
import { ToolBranch } from './blocks/tool-branch'
2424
import { AskUserBranch } from './blocks/ask-user-branch'
25+
import { ImageBlock } from './blocks/image-block'
2526
import { PlanBox } from './renderers/plan-box'
2627

2728
import type {
@@ -30,8 +31,9 @@ import type {
3031
HtmlContentBlock,
3132
AgentContentBlock,
3233
ImageAttachment,
34+
ImageContentBlock,
3335
} from '../types/chat'
34-
import { isAskUserBlock } from '../types/chat'
36+
import { isAskUserBlock, isImageBlock } from '../types/chat'
3537
import type { ThemeColor } from '../types/theme-system'
3638

3739
interface MessageBlockProps {
@@ -317,6 +319,7 @@ const isRenderableTimelineBlock = (
317319
case 'plan':
318320
case 'mode-divider':
319321
case 'ask-user':
322+
case 'image':
320323
return true
321324
default:
322325
return false
@@ -943,6 +946,16 @@ const SingleBlock = memo(
943946
)
944947
}
945948

949+
case 'image': {
950+
return (
951+
<ImageBlock
952+
key={`${messageId}-image-${idx}`}
953+
block={block as ImageContentBlock}
954+
availableWidth={availableWidth}
955+
/>
956+
)
957+
}
958+
946959
case 'agent': {
947960
return (
948961
<AgentBranchWrapper
@@ -1036,6 +1049,19 @@ const BlocksRenderer = memo(
10361049
)
10371050
continue
10381051
}
1052+
// Handle image blocks
1053+
if (isImageBlock(block)) {
1054+
nodes.push(
1055+
<ImageBlock
1056+
key={`${messageId}-image-${i}`}
1057+
block={block}
1058+
availableWidth={availableWidth}
1059+
/>,
1060+
)
1061+
i++
1062+
continue
1063+
}
1064+
10391065
if (block.type === 'tool') {
10401066
const start = i
10411067
const group: Extract<ContentBlock, { type: 'tool' }>[] = []

cli/src/hooks/use-send-message.ts

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,9 @@ import { formatTimestamp } from '../utils/helpers'
2121
import { loadAgentDefinitions } from '../utils/load-agent-definitions'
2222

2323
import { logger } from '../utils/logger'
24+
import { extractImagePaths, processImageFile } from '../utils/image-handler'
2425
import { getUserMessage } from '../utils/message-history'
25-
import {
26-
extractImagePaths,
27-
processImageFile,
28-
} from '../utils/image-handler'
26+
import { getProjectRoot } from '../project-files'
2927
import { NETWORK_ERROR_ID } from '../utils/validation-error-helpers'
3028
import {
3129
loadMostRecentChatState,
@@ -39,7 +37,7 @@ import type { SendMessageFn } from '../types/contracts/send-message'
3937
import type { ParamsOf } from '../types/function-params'
4038
import type { SetElement } from '../types/utils'
4139
import type { AgentMode } from '../utils/constants'
42-
import type { AgentDefinition, RunState, ToolName } from '@codebuff/sdk'
40+
import type { AgentDefinition, RunState, ToolName, MessageContent } from '@codebuff/sdk'
4341
import type { SetStateAction } from 'react'
4442
const hiddenToolNames = new Set<ToolName | 'spawn_agent_inline'>([
4543
'spawn_agent_inline',
@@ -1011,6 +1009,63 @@ export const useSendMessage = ({
10111009
? 'base2-max'
10121010
: 'base2-plan'
10131011

1012+
// Auto-detect and process image paths in the content
1013+
const imagePaths = extractImagePaths(content)
1014+
const imagePartsPromises = imagePaths.map(async (imagePath) => {
1015+
const cwd = getProjectRoot()
1016+
const result = await processImageFile(imagePath, cwd)
1017+
if (result.success && result.imagePart) {
1018+
return {
1019+
type: 'image' as const,
1020+
image: result.imagePart.image,
1021+
mediaType: result.imagePart.mediaType,
1022+
filename: result.imagePart.filename,
1023+
size: result.imagePart.size,
1024+
}
1025+
}
1026+
// Log failed image processing
1027+
if (!result.success) {
1028+
logger.warn(
1029+
{ imagePath, error: result.error },
1030+
'Failed to process image',
1031+
)
1032+
}
1033+
return null
1034+
})
1035+
1036+
const imagePartsResults = await Promise.all(imagePartsPromises)
1037+
const validImageParts = imagePartsResults.filter(
1038+
(part): part is NonNullable<typeof part> => part !== null,
1039+
)
1040+
1041+
// Build message content array
1042+
let messageContent: MessageContent[] | undefined
1043+
if (validImageParts.length > 0) {
1044+
messageContent = [
1045+
{ type: 'text' as const, text: content },
1046+
...validImageParts.map((img) => ({
1047+
type: 'image' as const,
1048+
image: img.image,
1049+
mediaType: img.mediaType,
1050+
})),
1051+
]
1052+
1053+
// Calculate total size for logging
1054+
const totalSize = validImageParts.reduce(
1055+
(sum, part) => sum + (part.size || 0),
1056+
0,
1057+
)
1058+
1059+
logger.info(
1060+
{
1061+
imageCount: validImageParts.length,
1062+
totalSize,
1063+
totalSizeKB: (totalSize / 1024).toFixed(1),
1064+
},
1065+
`📎 ${validImageParts.length} image(s) attached`,
1066+
)
1067+
}
1068+
10141069
let runState: RunState
10151070
try {
10161071
runState = await client.run({

cli/src/types/chat.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,18 @@ export type PlanContentBlock = {
6565
content: string
6666
}
6767

68+
export type ImageContentBlock = {
69+
type: 'image'
70+
image: string // base64 encoded image data
71+
mediaType: string
72+
filename?: string
73+
size?: number
74+
width?: number
75+
height?: number
76+
isCollapsed?: boolean
77+
userOpened?: boolean
78+
}
79+
6880
export type AskUserContentBlock = {
6981
type: 'ask-user'
7082
toolCallId: string

0 commit comments

Comments
 (0)