From f74d38d741fa895c465cda35d4021357d3ad8256 Mon Sep 17 00:00:00 2001 From: odilitime Date: Mon, 6 Jan 2025 04:58:45 +0000 Subject: [PATCH] hyperfi endpoint, clean up multer/imports/console logs, notes --- packages/client-direct/src/index.ts | 297 +++++++++++++++++++++++++--- 1 file changed, 273 insertions(+), 24 deletions(-) diff --git a/packages/client-direct/src/index.ts b/packages/client-direct/src/index.ts index 3fda905a7a3..03d4f92babc 100644 --- a/packages/client-direct/src/index.ts +++ b/packages/client-direct/src/index.ts @@ -2,26 +2,26 @@ import bodyParser from "body-parser"; import cors from "cors"; import express, { Request as ExpressRequest } from "express"; import multer from "multer"; +import { z } from "zod"; import { + AgentRuntime, elizaLogger, + messageCompletionFooter, generateCaption, generateImage, Media, getEmbeddingZeroVector, -} from "@elizaos/core"; -import { composeContext } from "@elizaos/core"; -import { generateMessageResponse } from "@elizaos/core"; -import { messageCompletionFooter } from "@elizaos/core"; -import { AgentRuntime } from "@elizaos/core"; -import { + composeContext, + generateMessageResponse, + generateObject, Content, Memory, ModelClass, Client, + stringToUuid, + settings, IAgentRuntime, } from "@elizaos/core"; -import { stringToUuid } from "@elizaos/core"; -import { settings } from "@elizaos/core"; import { createApiRouter } from "./api.ts"; import * as fs from "fs"; import * as path from "path"; @@ -41,12 +41,13 @@ const storage = multer.diskStorage({ }, }); -const upload = multer({ storage }); +// some people have more memory than disk.io +const upload = multer({ storage /*: multer.memoryStorage() */ }); export const messageHandlerTemplate = // {{goals}} - `# Action Examples -{{actionExamples}} + // "# Action Examples" is already included + `{{actionExamples}} (Action examples are for reference only. Do not use the information from them in your response.) # Knowledge @@ -73,6 +74,39 @@ Note that {{agentName}} is capable of reading/seeing/hearing various forms of me # Instructions: Write the next message for {{agentName}}. ` + messageCompletionFooter; +export const hyperfiHandlerTemplate = + `{{actionExamples}} +(Action examples are for reference only. Do not use the information from them in your response.) + +# Knowledge +{{knowledge}} + +# Task: Generate dialog and actions for the character {{agentName}}. +About {{agentName}}: +{{bio}} +{{lore}} + +{{providers}} + +{{attachments}} + +# Capabilities +Note that {{agentName}} is capable of reading/seeing/hearing various forms of media, including images, videos, audio, plaintext and PDFs. Recent attachments have been included above under the "Attachments" section. + +{{messageDirections}} + +{{recentMessages}} + +{{actions}} + +# Instructions: Write the next message for {{agentName}}. + +Response format should be formatted in a JSON block like this: +\`\`\`json +{ "lookAt": "{{nearby}}" or null, "emote": "{{emotes}}" or null, "say": "string" or null, "actions": (array of strings) or null } +\`\`\` +`; + export class DirectClient { public app: express.Application; private agents: Map; // container management @@ -251,7 +285,6 @@ export class DirectClient { }); const response = await generateMessageResponse({ - // @ts-expect-error todo runtime: runtime, context, modelClass: ModelClass.LARGE, @@ -315,6 +348,223 @@ export class DirectClient { } ); + this.app.post("/agents/:agentIdOrName/hyperfi/v1", + async (req: express.Request, res: express.Response) => { + // get runtime + const agentId = req.params.agentIdOrName; + let runtime = this.agents.get(agentId); + // if runtime is null, look for runtime with the same name + if (!runtime) { + runtime = Array.from(this.agents.values()).find( + (a) => + a.character.name.toLowerCase() === + agentId.toLowerCase() + ); + } + if (!runtime) { + res.status(404).send("Agent not found"); + return; + } + + // can we be in more than one hyperfi world at once + // but you may want the same context is multiple worlds + // this is more like an instanceId + const roomId = stringToUuid( + req.body.roomId ?? "hyperfi" + ); + + const body = req.body; + + // hyperfi specific parameters + let nearby = [] + let messages = [] + let availableEmotes = [] + + if (body.nearby) { + nearby = body.nearby; + } + if (body.messages) { + messages = body.messages; + // loop on the messages and record the memories + // might want to do this in parallel + for(const msg of body.messages) { + const parts = msg.split(/:\s*/); + const mUserId = stringToUuid(parts[0]) + await runtime.ensureConnection( + mUserId, + roomId, // where + parts[0], // username + parts[0], // userScreeName? + "hyperfi" + ); + const content: Content = { + text: parts[1] || "", + attachments: [], + source: "hyperfi", + inReplyTo: undefined, + }; + const memory: Memory = { + id: stringToUuid(msg), + agentId: runtime.agentId, + userId: mUserId, + roomId, + content, + }; + await runtime.messageManager.createMemory(memory); + } + } + if (body.availableEmotes) { + availableEmotes = body.availableEmotes; + } + + const content: Content = { + // we need to compose who's near and what emotes are available + text: JSON.stringify(req.body), + attachments: [], + source: "hyperfi", + inReplyTo: undefined, + }; + + const userId = stringToUuid("hyperfi") + const userMessage = { + content, + userId, + roomId, + agentId: runtime.agentId, + }; + + const state = await runtime.composeState(userMessage, { + agentName: runtime.character.name, + }); + + let template = hyperfiHandlerTemplate + template = template.replace('{{emotes}}', availableEmotes.join('|')) + template = template.replace('{{nearby}}', nearby.join('|')) + const context = composeContext({ + state, + template, + }); + + function createHyperfiOutSchema(nearby: string[], availableEmotes: string[]) { + const lookAtSchema = + nearby.length > 1 + ? z.union(nearby.map((item) => z.literal(item)) as [z.ZodLiteral, z.ZodLiteral, ...z.ZodLiteral[]]).nullable() + : nearby.length === 1 + ? z.literal(nearby[0]).nullable() + : z.null(); // Fallback for empty array + + const emoteSchema = + availableEmotes.length > 1 + ? z.union(availableEmotes.map((item) => z.literal(item)) as [z.ZodLiteral, z.ZodLiteral, ...z.ZodLiteral[]]).nullable() + : availableEmotes.length === 1 + ? z.literal(availableEmotes[0]).nullable() + : z.null(); // Fallback for empty array + + return z.object({ + lookAt: lookAtSchema, + emote: emoteSchema, + say: z.string().nullable(), + actions: z.array(z.string()).nullable(), + }); + } + + // Define the schema for the expected output + const hyperfiOutSchema = createHyperfiOutSchema(nearby, availableEmotes) + + // Call LLM + const response = await generateObject({ + runtime, + context, + modelClass: ModelClass.SMALL, // 1s processing time on openai small + schema: hyperfiOutSchema, + }); + + let hfOut + try { + hfOut = hyperfiOutSchema.parse(response.object) + } catch (e) { + elizaLogger.error('cant serialize response', response.object) + res.status(500).send( + "Error in LLM response, try again" + ); + return; + } + + // do this in the background + const rememberThis = new Promise(async resolve => { + const contentObj:Content = { + text: hfOut.say, + } + + if (hfOut.lookAt !== null || hfOut.emote !== null) { + contentObj.text += ". Then I "; + if (hfOut.lookAt !== null) { + contentObj.text += "looked at " + hfOut.lookAt; + if (hfOut.emote !== null) { + contentObj.text += " and "; + } + } + if (hfOut.emote !== null) { + contentObj.text = "emoted " + hfOut.emote; + } + } + + if (hfOut.actions !== null) { + // content can only do one action + contentObj.action = hfOut.actions[0]; + } + + + // save response to memory + const responseMessage = { + ...userMessage, + userId: runtime.agentId, + content: contentObj, + }; + + await runtime.messageManager.createMemory(responseMessage); // 18.2ms + + if (!response) { + res.status(500).send( + "No response from generateMessageResponse" + ); + return; + } + + let message = null as Content | null; + + const messageId = stringToUuid(Date.now().toString()); + const memory: Memory = { + id: messageId, + agentId: runtime.agentId, + userId, + roomId, + content, + createdAt: Date.now(), + }; + + // run evaluators (generally can be done in parallel with processActions) + // can an evaluator modify memory? it could but currently doesn't + await runtime.evaluate(memory, state); // 0.5s + + // only need to call if responseMessage.content.action is set + if (contentObj.action) { + // pass memory (query) to any actions to call + const _result = await runtime.processActions( + memory, + [responseMessage], + state, + async (newMessages) => { + message = newMessages; + return [memory]; + } + ); // 0.674s + } + resolve(true); + }) + res.json({ response: hfOut }); + }); + this.app.post( "/:agentId/image", async (req: express.Request, res: express.Response) => { @@ -324,14 +574,12 @@ export class DirectClient { res.status(404).send("Agent not found"); return; } - // @ts-expect-error todo const images = await generateImage({ ...req.body }, agent); const imagesRes: { image: string; caption: string }[] = []; if (images.data && images.data.length > 0) { for (let i = 0; i < images.data.length; i++) { const caption = await generateCaption( { imageUrl: images.data[i] }, - // @ts-expect-error todo agent ); imagesRes.push({ @@ -380,13 +628,13 @@ export class DirectClient { assetId ); - console.log("Download directory:", downloadDir); + elizaLogger.log("Download directory:", downloadDir); try { - console.log("Creating directory..."); + elizaLogger.log("Creating directory..."); await fs.promises.mkdir(downloadDir, { recursive: true }); - console.log("Fetching file..."); + elizaLogger.log("Fetching file..."); const fileResponse = await fetch( `https://api.bageldb.ai/api/v1/asset/${assetId}/download`, { @@ -402,7 +650,7 @@ export class DirectClient { ); } - console.log("Response headers:", fileResponse.headers); + elizaLogger.log("Response headers:", fileResponse.headers); const fileName = fileResponse.headers @@ -410,19 +658,19 @@ export class DirectClient { ?.split("filename=")[1] ?.replace(/"/g, /* " */ "") || "default_name.txt"; - console.log("Saving as:", fileName); + elizaLogger.log("Saving as:", fileName); const arrayBuffer = await fileResponse.arrayBuffer(); const buffer = Buffer.from(arrayBuffer); const filePath = path.join(downloadDir, fileName); - console.log("Full file path:", filePath); + elizaLogger.log("Full file path:", filePath); await fs.promises.writeFile(filePath, buffer); // Verify file was written const stats = await fs.promises.stat(filePath); - console.log( + elizaLogger.log( "File written successfully. Size:", stats.size, "bytes" @@ -437,7 +685,7 @@ export class DirectClient { fileSize: stats.size, }); } catch (error) { - console.error("Detailed error:", error); + elizaLogger.error("Detailed error:", error); res.status(500).json({ error: "Failed to download files from BagelDB", details: error.message, @@ -522,7 +770,6 @@ export class DirectClient { }); const response = await generateMessageResponse({ - // @ts-expect-error todo runtime: runtime, context, modelClass: ModelClass.LARGE, @@ -612,7 +859,7 @@ export class DirectClient { res.send(Buffer.from(audioBuffer)); } catch (error) { - console.error( + elizaLogger.error( "Error processing message or generating speech:", error ); @@ -626,6 +873,8 @@ export class DirectClient { // agent/src/index.ts:startAgent calls this public registerAgent(runtime: AgentRuntime) { + // register any plugin endpoints? + // but once and only once this.agents.set(runtime.agentId, runtime); }