Skip to content

Commit 4a3213b

Browse files
authored
Merge branch 'develop' into feat/live-monorepo-types
2 parents 2c830bb + bf6ef96 commit 4a3213b

13 files changed

+656
-510
lines changed

.env.example

+4
Original file line numberDiff line numberDiff line change
@@ -368,3 +368,7 @@ CRONOSZKEVM_PRIVATE_KEY=
368368

369369
# Fuel Ecosystem (FuelVM)
370370
FUEL_WALLET_PRIVATE_KEY=
371+
372+
# Tokenizer Settings
373+
TOKENIZER_MODEL= # Specify the tokenizer model to be used.
374+
TOKENIZER_TYPE= # Options: tiktoken (for OpenAI models) or auto (AutoTokenizer from Hugging Face for non-OpenAI models). Default: tiktoken.

packages/client-discord/src/actions/chat_with_attachments.ts

+6-7
Original file line numberDiff line numberDiff line change
@@ -191,17 +191,16 @@ const summarizeAction = {
191191

192192
state.attachmentsWithText = attachmentsWithText;
193193
state.objective = objective;
194-
194+
const template = await trimTokens(
195+
summarizationTemplate,
196+
chunkSize + 500,
197+
runtime
198+
);
195199
const context = composeContext({
196200
state,
197201
// make sure it fits, we can pad the tokens a bit
198202
// Get the model's tokenizer based on the current model being used
199-
template: trimTokens(
200-
summarizationTemplate,
201-
chunkSize + 500,
202-
(model.model[ModelClass.SMALL] ||
203-
"gpt-4o-mini") as TiktokenModel // Use the same model as generation; Fallback if no SMALL model configured
204-
),
203+
template,
205204
});
206205

207206
const summary = await generateText({

packages/client-discord/src/actions/summarize_conversation.ts

+6-5
Original file line numberDiff line numberDiff line change
@@ -261,14 +261,15 @@ const summarizeAction = {
261261
const chunk = chunks[i];
262262
state.currentSummary = currentSummary;
263263
state.currentChunk = chunk;
264+
const template = await trimTokens(
265+
summarizationTemplate,
266+
chunkSize + 500,
267+
runtime
268+
);
264269
const context = composeContext({
265270
state,
266271
// make sure it fits, we can pad the tokens a bit
267-
template: trimTokens(
268-
summarizationTemplate,
269-
chunkSize + 500,
270-
"gpt-4o-mini"
271-
),
272+
template,
272273
});
273274

274275
const summary = await generateText({

packages/client-discord/src/attachments.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ async function generateSummary(
1919
text: string
2020
): Promise<{ title: string; description: string }> {
2121
// make sure text is under 128k characters
22-
text = trimTokens(text, 100000, "gpt-4o-mini"); // TODO: clean this up
22+
text = await trimTokens(text, 100000, runtime);
2323

2424
const prompt = `Please generate a concise summary for the following text:
2525

packages/client-discord/src/utils.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ export async function generateSummary(
4747
text: string
4848
): Promise<{ title: string; description: string }> {
4949
// make sure text is under 128k characters
50-
text = trimTokens(text, 100000, "gpt-4o-mini"); // TODO: clean this up
50+
text = await trimTokens(text, 100000, runtime);
5151

5252
const prompt = `Please generate a concise summary for the following text:
5353

packages/client-slack/src/actions/chat_with_attachments.ts

+6-5
Original file line numberDiff line numberDiff line change
@@ -200,13 +200,14 @@ const summarizeAction: Action = {
200200
currentState.attachmentsWithText = attachmentsWithText;
201201
currentState.objective = objective;
202202

203+
const template = await trimTokens(
204+
summarizationTemplate,
205+
chunkSize + 500,
206+
runtime
207+
);
203208
const context = composeContext({
204209
state: currentState,
205-
template: trimTokens(
206-
summarizationTemplate,
207-
chunkSize + 500,
208-
"gpt-4o-mini"
209-
),
210+
template,
210211
});
211212

212213
const summary = await generateText({

packages/client-slack/src/actions/summarize_conversation.ts

+7-5
Original file line numberDiff line numberDiff line change
@@ -279,13 +279,15 @@ const summarizeAction: Action = {
279279
currentState.currentSummary = currentSummary;
280280
currentState.currentChunk = chunk;
281281

282+
const template = await trimTokens(
283+
summarizationTemplate,
284+
chunkSize + 500,
285+
runtime
286+
);
287+
282288
const context = composeContext({
283289
state: currentState,
284-
template: trimTokens(
285-
summarizationTemplate,
286-
chunkSize + 500,
287-
"gpt-4o-mini"
288-
),
290+
template,
289291
});
290292

291293
const summary = await generateText({

packages/client-slack/src/attachments.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ async function generateSummary(
2121
runtime: IAgentRuntime,
2222
text: string
2323
): Promise<{ title: string; description: string }> {
24-
text = trimTokens(text, 100000, "gpt-4o-mini");
24+
text = await trimTokens(text, 100000, runtime);
2525

2626
const prompt = `Please generate a concise summary for the following text:
2727

packages/core/src/generation.ts

+120-49
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import { Buffer } from "buffer";
1414
import { createOllama } from "ollama-ai-provider";
1515
import OpenAI from "openai";
1616
import { encodingForModel, TiktokenModel } from "js-tiktoken";
17+
import { AutoTokenizer } from "@huggingface/transformers";
1718
import Together from "together-ai";
1819
import { ZodSchema } from "zod";
1920
import { elizaLogger } from "./index.ts";
@@ -37,13 +38,122 @@ import {
3738
SearchResponse,
3839
ActionResponse,
3940
TelemetrySettings,
41+
TokenizerType,
4042
} from "./types.ts";
4143
import { fal } from "@fal-ai/client";
4244
import { tavily } from "@tavily/core";
4345

4446
type Tool = CoreTool<any, any>;
4547
type StepResult = AIStepResult<any>;
4648

49+
/**
50+
* Trims the provided text context to a specified token limit using a tokenizer model and type.
51+
*
52+
* The function dynamically determines the truncation method based on the tokenizer settings
53+
* provided by the runtime. If no tokenizer settings are defined, it defaults to using the
54+
* TikToken truncation method with the "gpt-4o" model.
55+
*
56+
* @async
57+
* @function trimTokens
58+
* @param {string} context - The text to be tokenized and trimmed.
59+
* @param {number} maxTokens - The maximum number of tokens allowed after truncation.
60+
* @param {IAgentRuntime} runtime - The runtime interface providing tokenizer settings.
61+
*
62+
* @returns {Promise<string>} A promise that resolves to the trimmed text.
63+
*
64+
* @throws {Error} Throws an error if the runtime settings are invalid or missing required fields.
65+
*
66+
* @example
67+
* const trimmedText = await trimTokens("This is an example text", 50, runtime);
68+
* console.log(trimmedText); // Output will be a truncated version of the input text.
69+
*/
70+
export async function trimTokens(
71+
context: string,
72+
maxTokens: number,
73+
runtime: IAgentRuntime
74+
) {
75+
if (!context) return "";
76+
if (maxTokens <= 0) throw new Error("maxTokens must be positive");
77+
78+
const tokenizerModel = runtime.getSetting("TOKENIZER_MODEL");
79+
const tokenizerType = runtime.getSetting("TOKENIZER_TYPE");
80+
81+
if (!tokenizerModel || !tokenizerType) {
82+
// Default to TikToken truncation using the "gpt-4o" model if tokenizer settings are not defined
83+
return truncateTiktoken("gpt-4o", context, maxTokens);
84+
}
85+
86+
// Choose the truncation method based on tokenizer type
87+
if (tokenizerType === TokenizerType.Auto) {
88+
return truncateAuto(tokenizerModel, context, maxTokens);
89+
}
90+
91+
if (tokenizerType === TokenizerType.TikToken) {
92+
return truncateTiktoken(
93+
tokenizerModel as TiktokenModel,
94+
context,
95+
maxTokens
96+
);
97+
}
98+
99+
elizaLogger.warn(`Unsupported tokenizer type: ${tokenizerType}`);
100+
return truncateTiktoken("gpt-4o", context, maxTokens);
101+
}
102+
103+
async function truncateAuto(
104+
modelPath: string,
105+
context: string,
106+
maxTokens: number
107+
) {
108+
try {
109+
const tokenizer = await AutoTokenizer.from_pretrained(modelPath);
110+
const tokens = tokenizer.encode(context);
111+
112+
// If already within limits, return unchanged
113+
if (tokens.length <= maxTokens) {
114+
return context;
115+
}
116+
117+
// Keep the most recent tokens by slicing from the end
118+
const truncatedTokens = tokens.slice(-maxTokens);
119+
120+
// Decode back to text - js-tiktoken decode() returns a string directly
121+
return tokenizer.decode(truncatedTokens);
122+
} catch (error) {
123+
elizaLogger.error("Error in trimTokens:", error);
124+
// Return truncated string if tokenization fails
125+
return context.slice(-maxTokens * 4); // Rough estimate of 4 chars per token
126+
}
127+
}
128+
129+
async function truncateTiktoken(
130+
model: TiktokenModel,
131+
context: string,
132+
maxTokens: number
133+
) {
134+
try {
135+
const encoding = encodingForModel(model);
136+
137+
// Encode the text into tokens
138+
const tokens = encoding.encode(context);
139+
140+
// If already within limits, return unchanged
141+
if (tokens.length <= maxTokens) {
142+
return context;
143+
}
144+
145+
// Keep the most recent tokens by slicing from the end
146+
const truncatedTokens = tokens.slice(-maxTokens);
147+
148+
// Decode back to text - js-tiktoken decode() returns a string directly
149+
return encoding.decode(truncatedTokens);
150+
} catch (error) {
151+
elizaLogger.error("Error in trimTokens:", error);
152+
// Return truncated string if tokenization fails
153+
return context.slice(-maxTokens * 4); // Rough estimate of 4 chars per token
154+
}
155+
}
156+
47157
/**
48158
* Send a message to the model for a text generateText - receive a string back and parse how you'd like
49159
* @param opts - The options for the generateText request.
@@ -187,7 +297,8 @@ export async function generateText({
187297
elizaLogger.debug(
188298
`Trimming context to max length of ${max_context_length} tokens.`
189299
);
190-
context = trimTokens(context, max_context_length, "gpt-4o");
300+
301+
context = await trimTokens(context, max_context_length, runtime);
191302

192303
let response: string;
193304

@@ -653,45 +764,6 @@ export async function generateText({
653764
}
654765
}
655766

656-
/**
657-
* Truncate the context to the maximum length allowed by the model.
658-
* @param context The text to truncate
659-
* @param maxTokens Maximum number of tokens to keep
660-
* @param model The tokenizer model to use
661-
* @returns The truncated text
662-
*/
663-
export function trimTokens(
664-
context: string,
665-
maxTokens: number,
666-
model: TiktokenModel
667-
): string {
668-
if (!context) return "";
669-
if (maxTokens <= 0) throw new Error("maxTokens must be positive");
670-
671-
// Get the tokenizer for the model
672-
const encoding = encodingForModel(model);
673-
674-
try {
675-
// Encode the text into tokens
676-
const tokens = encoding.encode(context);
677-
678-
// If already within limits, return unchanged
679-
if (tokens.length <= maxTokens) {
680-
return context;
681-
}
682-
683-
// Keep the most recent tokens by slicing from the end
684-
const truncatedTokens = tokens.slice(-maxTokens);
685-
686-
// Decode back to text - js-tiktoken decode() returns a string directly
687-
return encoding.decode(truncatedTokens);
688-
} catch (error) {
689-
console.error("Error in trimTokens:", error);
690-
// Return truncated string if tokenization fails
691-
return context.slice(-maxTokens * 4); // Rough estimate of 4 chars per token
692-
}
693-
}
694-
695767
/**
696768
* Sends a message to the model to determine if it should respond to the given context.
697769
* @param opts - The options for the generateText request
@@ -973,9 +1045,10 @@ export async function generateMessageResponse({
9731045
context: string;
9741046
modelClass: string;
9751047
}): Promise<Content> {
976-
const max_context_length =
977-
models[runtime.modelProvider].settings.maxInputTokens;
978-
context = trimTokens(context, max_context_length, "gpt-4o");
1048+
const provider = runtime.modelProvider;
1049+
const max_context_length = models[provider].settings.maxInputTokens;
1050+
1051+
context = await trimTokens(context, max_context_length, runtime);
9791052
let retryLength = 1000; // exponential backoff
9801053
while (true) {
9811054
try {
@@ -1443,20 +1516,18 @@ export const generateObject = async ({
14431516
}
14441517

14451518
const provider = runtime.modelProvider;
1446-
const model = models[provider].model[modelClass] as TiktokenModel;
1447-
if (!model) {
1448-
throw new Error(`Unsupported model class: ${modelClass}`);
1449-
}
1519+
const model = models[provider].model[modelClass];
14501520
const temperature = models[provider].settings.temperature;
14511521
const frequency_penalty = models[provider].settings.frequency_penalty;
14521522
const presence_penalty = models[provider].settings.presence_penalty;
14531523
const max_context_length = models[provider].settings.maxInputTokens;
14541524
const max_response_length = models[provider].settings.maxOutputTokens;
1455-
const experimental_telemetry = models[provider].settings.experimental_telemetry;
1525+
const experimental_telemetry =
1526+
models[provider].settings.experimental_telemetry;
14561527
const apiKey = runtime.token;
14571528

14581529
try {
1459-
context = trimTokens(context, max_context_length, model);
1530+
context = await trimTokens(context, max_context_length, runtime);
14601531

14611532
const modelOptions: ModelSettings = {
14621533
prompt: context,

packages/core/src/types.ts

+5-1
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,6 @@ export type Character = {
687687
/** Image model provider to use, if different from modelProvider */
688688
imageModelProvider?: ModelProviderName;
689689

690-
691690
/** Image Vision model provider to use, if different from modelProvider */
692691
imageVisionModelProvider?: ModelProviderName;
693692

@@ -1319,6 +1318,11 @@ export interface ISlackService extends Service {
13191318
client: any;
13201319
}
13211320

1321+
export enum TokenizerType {
1322+
Auto = "auto",
1323+
TikToken = "tiktoken",
1324+
}
1325+
13221326
export enum TranscriptionProvider {
13231327
OpenAI = "openai",
13241328
Deepgram = "deepgram",

packages/core/tsup.config.ts

+2
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,7 @@ export default defineConfig({
1919
"https",
2020
// Add other modules you want to externalize
2121
"@tavily/core",
22+
"onnxruntime-node",
23+
"sharp",
2224
],
2325
});

packages/plugin-node/src/services/browser.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ async function generateSummary(
1313
text: string
1414
): Promise<{ title: string; description: string }> {
1515
// make sure text is under 128k characters
16-
text = trimTokens(text, 100000, "gpt-4o-mini"); // TODO: clean this up
16+
text = await trimTokens(text, 100000, runtime);
1717

1818
const prompt = `Please generate a concise summary for the following text:
1919

0 commit comments

Comments
 (0)