Skip to content

Commit 604f460

Browse files
committed
images in chat
1 parent 4c658d7 commit 604f460

File tree

9 files changed

+18683
-23153
lines changed

9 files changed

+18683
-23153
lines changed

client/src/Chat.tsx

+66-10
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,75 @@
1-
import { useState } from "react";
1+
import { useRef, useState } from "react";
22
import { useParams } from "react-router-dom";
33
import { useMutation } from "@tanstack/react-query";
44
import { Input } from "@/components/ui/input";
55
import { Button } from "@/components/ui/button";
6+
import { ImageIcon } from "lucide-react";
67
import "./App.css";
8+
import path from "path";
79

810
type TextResponse = {
911
text: string;
1012
user: string;
13+
attachments?: { url: string; contentType: string; title: string }[];
1114
};
1215

1316
export default function Chat() {
1417
const { agentId } = useParams();
1518
const [input, setInput] = useState("");
1619
const [messages, setMessages] = useState<TextResponse[]>([]);
20+
const [selectedFile, setSelectedFile] = useState<File | null>(null);
21+
const fileInputRef = useRef<HTMLInputElement>(null);
1722

1823
const mutation = useMutation({
1924
mutationFn: async (text: string) => {
25+
const formData = new FormData();
26+
formData.append("text", text);
27+
formData.append("userId", "user");
28+
formData.append("roomId", `default-room-${agentId}`);
29+
30+
if (selectedFile) {
31+
formData.append("file", selectedFile);
32+
}
33+
2034
const res = await fetch(`/api/${agentId}/message`, {
2135
method: "POST",
22-
headers: {
23-
"Content-Type": "application/json",
24-
},
25-
body: JSON.stringify({
26-
text,
27-
userId: "user",
28-
roomId: `default-room-${agentId}`,
29-
}),
36+
body: formData,
3037
});
3138
return res.json() as Promise<TextResponse[]>;
3239
},
3340
onSuccess: (data) => {
3441
setMessages((prev) => [...prev, ...data]);
42+
setSelectedFile(null);
3543
},
3644
});
3745

3846
const handleSubmit = async (e: React.FormEvent) => {
3947
e.preventDefault();
40-
if (!input.trim()) return;
48+
if (!input.trim() && !selectedFile) return;
4149

4250
// Add user message immediately to state
4351
const userMessage: TextResponse = {
4452
text: input,
4553
user: "user",
54+
attachments: selectedFile ? [{ url: URL.createObjectURL(selectedFile), contentType: selectedFile.type, title: selectedFile.name }] : undefined,
4655
};
4756
setMessages((prev) => [...prev, userMessage]);
4857

4958
mutation.mutate(input);
5059
setInput("");
5160
};
5261

62+
const handleFileSelect = () => {
63+
fileInputRef.current?.click();
64+
};
65+
66+
const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
67+
const file = e.target.files?.[0];
68+
if (file && file.type.startsWith('image/')) {
69+
setSelectedFile(file);
70+
}
71+
};
72+
5373
return (
5474
<div className="flex flex-col h-screen max-h-screen w-full">
5575
<div className="flex-1 min-h-0 overflow-y-auto p-4">
@@ -72,6 +92,21 @@ export default function Chat() {
7292
}`}
7393
>
7494
{message.text}
95+
{message.attachments?.map((attachment, i) => (
96+
attachment.contentType.startsWith('image/') && (
97+
<img
98+
key={i}
99+
src={message.user === "user"
100+
? attachment.url
101+
: attachment.url.startsWith('http')
102+
? attachment.url
103+
: `http://localhost:3000/media/generated/${attachment.url.split('/').pop()}`
104+
}
105+
alt={attachment.title || "Attached image"}
106+
className="mt-2 max-w-full rounded-lg"
107+
/>
108+
)
109+
))}
75110
</div>
76111
</div>
77112
))
@@ -86,17 +121,38 @@ export default function Chat() {
86121
<div className="border-t p-4 bg-background">
87122
<div className="max-w-3xl mx-auto">
88123
<form onSubmit={handleSubmit} className="flex gap-2">
124+
<input
125+
type="file"
126+
ref={fileInputRef}
127+
onChange={handleFileChange}
128+
accept="image/*"
129+
className="hidden"
130+
/>
89131
<Input
90132
value={input}
91133
onChange={(e) => setInput(e.target.value)}
92134
placeholder="Type a message..."
93135
className="flex-1"
94136
disabled={mutation.isPending}
95137
/>
138+
<Button
139+
type="button"
140+
variant="outline"
141+
size="icon"
142+
onClick={handleFileSelect}
143+
disabled={mutation.isPending}
144+
>
145+
<ImageIcon className="h-4 w-4" />
146+
</Button>
96147
<Button type="submit" disabled={mutation.isPending}>
97148
{mutation.isPending ? "..." : "Send"}
98149
</Button>
99150
</form>
151+
{selectedFile && (
152+
<div className="mt-2 text-sm text-muted-foreground">
153+
Selected file: {selectedFile.name}
154+
</div>
155+
)}
100156
</div>
101157
</div>
102158
</div>

packages/client-direct/package.json

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"multer": "1.4.5-lts.1"
1818
},
1919
"devDependencies": {
20+
"@types/multer": "^1.4.12",
2021
"tsup": "8.3.5"
2122
},
2223
"scripts": {

packages/client-direct/src/index.ts

+57-5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
import bodyParser from "body-parser";
22
import cors from "cors";
33
import express, { Request as ExpressRequest } from "express";
4-
import multer, { File } from "multer";
5-
import { elizaLogger, generateCaption, generateImage } from "@elizaos/core";
4+
import multer from "multer";
5+
import {
6+
elizaLogger,
7+
generateCaption,
8+
generateImage,
9+
Media,
10+
} from "@elizaos/core";
611
import { composeContext } from "@elizaos/core";
712
import { generateMessageResponse } from "@elizaos/core";
813
import { messageCompletionFooter } from "@elizaos/core";
@@ -19,7 +24,23 @@ import { settings } from "@elizaos/core";
1924
import { createApiRouter } from "./api.ts";
2025
import * as fs from "fs";
2126
import * as path from "path";
22-
const upload = multer({ storage: multer.memoryStorage() });
27+
28+
const storage = multer.diskStorage({
29+
destination: (req, file, cb) => {
30+
const uploadDir = path.join(process.cwd(), "data", "uploads");
31+
// Create the directory if it doesn't exist
32+
if (!fs.existsSync(uploadDir)) {
33+
fs.mkdirSync(uploadDir, { recursive: true });
34+
}
35+
cb(null, uploadDir);
36+
},
37+
filename: (req, file, cb) => {
38+
const uniqueSuffix = `${Date.now()}-${Math.round(Math.random() * 1e9)}`;
39+
cb(null, `${uniqueSuffix}-${file.originalname}`);
40+
},
41+
});
42+
43+
const upload = multer({ storage });
2344

2445
export const messageHandlerTemplate =
2546
// {{goals}}
@@ -66,12 +87,22 @@ export class DirectClient {
6687
this.app.use(bodyParser.json());
6788
this.app.use(bodyParser.urlencoded({ extended: true }));
6889

90+
// Serve both uploads and generated images
91+
this.app.use(
92+
"/media/uploads",
93+
express.static(path.join(process.cwd(), "/data/uploads"))
94+
);
95+
this.app.use(
96+
"/media/generated",
97+
express.static(path.join(process.cwd(), "/generatedImages"))
98+
);
99+
69100
const apiRouter = createApiRouter(this.agents, this);
70101
this.app.use(apiRouter);
71102

72103
// Define an interface that extends the Express Request interface
73104
interface CustomRequest extends ExpressRequest {
74-
file: File;
105+
file?: Express.Multer.File;
75106
}
76107

77108
// Update the route handler to use CustomRequest instead of express.Request
@@ -128,6 +159,7 @@ export class DirectClient {
128159

129160
this.app.post(
130161
"/:agentId/message",
162+
upload.single("file"),
131163
async (req: express.Request, res: express.Response) => {
132164
const agentId = req.params.agentId;
133165
const roomId = stringToUuid(
@@ -162,9 +194,29 @@ export class DirectClient {
162194
const text = req.body.text;
163195
const messageId = stringToUuid(Date.now().toString());
164196

197+
const attachments: Media[] = [];
198+
if (req.file) {
199+
const filePath = path.join(
200+
process.cwd(),
201+
"agent",
202+
"data",
203+
"uploads",
204+
req.file.filename
205+
);
206+
attachments.push({
207+
id: Date.now().toString(),
208+
url: filePath,
209+
title: req.file.originalname,
210+
source: "direct",
211+
description: `Uploaded file: ${req.file.originalname}`,
212+
text: "",
213+
contentType: req.file.mimetype,
214+
});
215+
}
216+
165217
const content: Content = {
166218
text,
167-
attachments: [],
219+
attachments,
168220
source: "direct",
169221
inReplyTo: undefined,
170222
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import {
2+
Action,
3+
IAgentRuntime,
4+
Memory,
5+
State,
6+
HandlerCallback,
7+
composeContext,
8+
generateObject,
9+
ActionExample,
10+
ModelClass,
11+
elizaLogger,
12+
ServiceType,
13+
IImageDescriptionService,
14+
} from "@elizaos/core";
15+
import { getFileLocationTemplate } from "../templates";
16+
import { FileLocationResultSchema, isFileLocationResult } from "../types";
17+
18+
export const describeImage: Action = {
19+
name: "DESCRIBE_IMAGE",
20+
similes: ["DESCRIBE_PICTURE", "EXPLAIN_PICTURE", "EXPLAIN_IMAGE"],
21+
validate: async (_runtime: IAgentRuntime, _message: Memory) => {
22+
return true;
23+
},
24+
description: "Describe an image",
25+
handler: async (
26+
runtime: IAgentRuntime,
27+
message: Memory,
28+
state: State,
29+
_options: { [key: string]: unknown },
30+
callback?: HandlerCallback
31+
): Promise<boolean> => {
32+
// Create context with attachments and URL
33+
const getFileLocationContext = composeContext({
34+
state,
35+
template: getFileLocationTemplate,
36+
});
37+
38+
const fileLocationResultObject = await generateObject({
39+
runtime,
40+
context: getFileLocationContext,
41+
modelClass: ModelClass.SMALL,
42+
schema: FileLocationResultSchema,
43+
stop: ["\n"],
44+
});
45+
46+
if (!isFileLocationResult(fileLocationResultObject?.object)) {
47+
elizaLogger.error("Failed to generate file location");
48+
return false;
49+
}
50+
51+
const { fileLocation } = fileLocationResultObject.object;
52+
53+
const { description } = await runtime
54+
.getService<IImageDescriptionService>(ServiceType.IMAGE_DESCRIPTION)
55+
.describeImage(fileLocation);
56+
57+
runtime.messageManager.createMemory({
58+
userId: message.agentId,
59+
agentId: message.agentId,
60+
roomId: message.roomId,
61+
content: {
62+
text: description,
63+
},
64+
});
65+
66+
callback({
67+
text: description,
68+
});
69+
70+
return true;
71+
},
72+
examples: [
73+
[
74+
{
75+
user: "{{user1}}",
76+
content: {
77+
text: "Can you describe this image for me?",
78+
},
79+
},
80+
{
81+
user: "{{user2}}",
82+
content: {
83+
text: "Let me analyze this image for you...",
84+
action: "DESCRIBE_IMAGE",
85+
},
86+
},
87+
{
88+
user: "{{user2}}",
89+
content: {
90+
text: "I see an orange tabby cat sitting on a windowsill. The cat appears to be relaxed and looking out the window at birds flying by. The lighting suggests it's a sunny afternoon.",
91+
},
92+
},
93+
],
94+
[
95+
{
96+
user: "{{user1}}",
97+
content: {
98+
text: "What's in this picture?",
99+
},
100+
},
101+
{
102+
user: "{{user2}}",
103+
content: {
104+
text: "I'll take a look at that image...",
105+
action: "DESCRIBE_IMAGE",
106+
},
107+
},
108+
{
109+
user: "{{user2}}",
110+
content: {
111+
text: "The image shows a modern kitchen with stainless steel appliances. There's a large island counter in the center with marble countertops. The cabinets are white with sleek handles, and there's pendant lighting hanging above the island.",
112+
},
113+
},
114+
],
115+
[
116+
{
117+
user: "{{user1}}",
118+
content: {
119+
text: "Could you tell me what this image depicts?",
120+
},
121+
},
122+
{
123+
user: "{{user2}}",
124+
content: {
125+
text: "I'll describe this image for you...",
126+
action: "DESCRIBE_IMAGE",
127+
},
128+
},
129+
{
130+
user: "{{user2}}",
131+
content: {
132+
text: "This is a scenic mountain landscape at sunset. The peaks are snow-capped and reflected in a calm lake below. The sky is painted in vibrant oranges and purples, with a few wispy clouds catching the last rays of sunlight.",
133+
},
134+
},
135+
],
136+
] as ActionExample[][],
137+
} as Action;

0 commit comments

Comments
 (0)