Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gguf: run generate-llm #1224

Merged
merged 2 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions packages/gguf/scripts/generate-llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@

import { writeFileSync } from "node:fs";

const SOURCE_CPP_URL = "https://raw.githubusercontent.com/ggerganov/llama.cpp/master/src/llama.cpp";
const SOURCE_CPP_URLS = [
"https://raw.githubusercontent.com/ggerganov/llama.cpp/master/src/llama-arch.cpp",
"https://raw.githubusercontent.com/ggerganov/llama.cpp/master/src/llama-model.cpp",
];
const DEST_FILE_PATH = "./src/transformer-llm.ts";
const DEST_COMMON_SOURCE = `
/** This file is auto-generated by generate-llm.ts */
Expand Down Expand Up @@ -90,10 +93,26 @@ const KV_TYPE = {
LLM_KV_EXPERT_SHARED_COUNT: "number",
LLM_KV_EXPERT_WEIGHTS_SCALE: "number",
LLM_KV_ROPE_SCALING_YARN_LOG_MUL: "number",
LLM_KV_ROPE_DIMENSION_COUNT: "number",
LLM_KV_ROPE_DIMENSION_SECTIONS: "number[]",
LLM_KV_ATTENTION_Q_LORA_RANK: "number",
LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT: "number",
LLM_KV_DECODER_START_TOKEN_ID: "number",
LLM_KV_USE_PARALLEL_RESIDUAL: "boolean",
LLM_KV_WKV_HEAD_SIZE: "number",
LLM_KV_TIME_MIX_EXTRA_DIM: "number",
LLM_KV_TIME_DECAY_EXTRA_DIM: "number",
LLM_KV_RESCALE_EVERY_N_LAYERS: "boolean",
LLM_KV_TOKEN_SHIFT_COUNT: "boolean",
LLM_KV_SWIN_NORM: "boolean",
LLM_KV_ATTENTION_GROUPNORM_EPS: "number",
LLM_KV_ATTENTION_GROUPNORM_GROUPS: "number",
LLM_KV_ATTENTION_SCALE: "number",
LLM_KV_EMBEDDING_SCALE: "number",
LLM_KV_RESIDUAL_SCALE: "number",
LLM_KV_SSM_DT_B_C_RMS: "boolean",
LLM_KV_EXPERT_WEIGHTS_NORM: "boolean",
LLM_KV_EXPERT_GATING_FUNC: "boolean",
};

interface Arch {
Expand All @@ -105,8 +124,13 @@ interface Arch {
}

async function main() {
const res = await fetch(SOURCE_CPP_URL);
const cppSource = await res.text();
const cppSources = await Promise.all(
SOURCE_CPP_URLS.map(async (url) => {
const res = await fetch(url);
return await res.text();
})
);
const cppSource = cppSources.join("\n");

/////////////////////////////////////
// extract list of all architectures
Expand Down Expand Up @@ -143,6 +167,7 @@ async function main() {
constToKVName[matched.groups.cppConst] = matched.groups.name;
}
}
console.log("constToKVName", constToKVName);

/////////////////////////////////////
// extract list of tensor names based on architecture
Expand Down Expand Up @@ -172,8 +197,8 @@ async function main() {
let insideLoadHParamsFn = false;
currCppConst = "";
for (const line of cppSource.split("\n")) {
// check if current line is function llm_load_hparams()
if (line.startsWith("static void llm_load_hparams")) {
// check if current line is function llama_model::load_hparams()
if (line.startsWith("void llama_model::load_hparams")) {
insideLoadHParamsFn = true;
}
if (!insideLoadHParamsFn) {
Expand Down
109 changes: 105 additions & 4 deletions packages/gguf/src/transformer-llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ export enum TransformerLLMPoolingType {

export const LLM_ARCHITECTURES = [
"llama",
"deci",
"falcon",
"grok",
"gpt2",
Expand All @@ -71,34 +72,52 @@ export const LLM_ARCHITECTURES = [
"qwen",
"qwen2",
"qwen2moe",
"qwen2vl",
"phi2",
"phi3",
"phimoe",
"plamo",
"codeshell",
"orion",
"internlm2",
"minicpm",
"minicpm3",
"gemma",
"gemma2",
"starcoder2",
"mamba",
"xverse",
"command-r",
"cohere2",
"dbrx",
"olmo",
"olmo2",
"olmoe",
"openelm",
"arctic",
"deepseek",
"deepseek2",
"chatglm",
"bitnet",
"t5",
"t5encoder",
"jais",
"nemotron",
"exaone",
"rwkv6",
"rwkv6qwen2",
"granite",
"granitemoe",
"chameleon",
"wavtokenizer-dec",
] as const;
type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number];
export type ArchLlama = TransformerLLMBase<"llama"> & {
"llama.attention.layer_norm_rms_epsilon": number;
};
export type ArchDeci = TransformerLLMBase<"deci"> & {
"deci.attention.layer_norm_rms_epsilon": number;
};
export type ArchFalcon = TransformerLLMBase<"falcon"> & {
"falcon.attention.layer_norm_epsilon": number;
};
Expand Down Expand Up @@ -130,19 +149,16 @@ export type ArchRefact = TransformerLLMBase<"refact"> & {
export type ArchBert = TransformerLLMBase<"bert"> & {
"bert.attention.layer_norm_epsilon": number;
"bert.attention.causal": boolean;
"tokenizer.ggml.token_type_count": number;
"bert.pooling_type": TransformerLLMPoolingType;
};
export type ArchNomicBert = TransformerLLMBase<"nomic-bert"> & {
"nomic-bert.attention.layer_norm_epsilon": number;
"nomic-bert.attention.causal": boolean;
"tokenizer.ggml.token_type_count": number;
"nomic-bert.pooling_type": TransformerLLMPoolingType;
};
export type ArchJinaBertV2 = TransformerLLMBase<"jina-bert-v2"> & {
"jina-bert-v2.attention.layer_norm_epsilon": number;
"jina-bert-v2.attention.causal": boolean;
"tokenizer.ggml.token_type_count": number;
"jina-bert-v2.pooling_type": TransformerLLMPoolingType;
};
export type ArchBloom = TransformerLLMBase<"bloom"> & {
Expand All @@ -162,13 +178,19 @@ export type ArchQwen2moe = TransformerLLMBase<"qwen2moe"> & {
"qwen2moe.expert_shared_feed_forward_length": number;
"qwen2moe.attention.layer_norm_rms_epsilon": number;
};
export type ArchQwen2vl = TransformerLLMBase<"qwen2vl"> & {
"qwen2vl.rope.dimension_sections": number[];
};
export type ArchPhi2 = TransformerLLMBase<"phi2"> & {
"phi2.attention.layer_norm_epsilon": number;
};
export type ArchPhi3 = TransformerLLMBase<"phi3"> & {
"phi3.attention.layer_norm_rms_epsilon": number;
"phi3.attention.sliding_window": number;
};
export type ArchPhimoe = TransformerLLMBase<"phimoe"> & {
"phimoe.attention.layer_norm_rms_epsilon": number;
};
export type ArchPlamo = TransformerLLMBase<"plamo"> & {
"plamo.attention.layer_norm_rms_epsilon": number;
};
Expand All @@ -183,6 +205,14 @@ export type ArchInternlm2 = TransformerLLMBase<"internlm2"> & {
};
export type ArchMinicpm = TransformerLLMBase<"minicpm"> & {
"minicpm.attention.layer_norm_rms_epsilon": number;
"minicpm.embedding_scale": number;
"minicpm.residual_scale": number;
"minicpm.logit_scale": number;
};
export type ArchMinicpm3 = TransformerLLMBase<"minicpm3"> & {
"minicpm3.attention.layer_norm_rms_epsilon": number;
"minicpm3.attention.q_lora_rank": number;
"minicpm3.attention.kv_lora_rank": number;
};
export type ArchGemma = TransformerLLMBase<"gemma"> & {
"gemma.attention.layer_norm_rms_epsilon": number;
Expand All @@ -201,6 +231,7 @@ export type ArchMamba = TransformerLLMBase<"mamba"> & {
"mamba.ssm.inner_size": number;
"mamba.ssm.state_size": number;
"mamba.ssm.time_step_rank": number;
"mamba.ssm.dt_b_c_rms": boolean;
"mamba.attention.layer_norm_rms_epsilon": number;
};
export type ArchXverse = TransformerLLMBase<"xverse"> & {
Expand All @@ -210,6 +241,11 @@ export type ArchCommandR = TransformerLLMBase<"command-r"> & {
"command-r.logit_scale": number;
"command-r.attention.layer_norm_epsilon": number;
};
export type ArchCohere2 = TransformerLLMBase<"cohere2"> & {
"cohere2.attention.sliding_window": number;
"cohere2.logit_scale": number;
"cohere2.attention.layer_norm_epsilon": number;
};
export type ArchDbrx = TransformerLLMBase<"dbrx"> & {
"dbrx.attention.layer_norm_epsilon": number;
"dbrx.attention.clamp_kqv": number;
Expand All @@ -218,12 +254,25 @@ export type ArchOlmo = TransformerLLMBase<"olmo"> & {
"olmo.attention.layer_norm_epsilon": number;
"olmo.attention.clamp_kqv": number;
};
export type ArchOlmo2 = TransformerLLMBase<"olmo2"> & {
"olmo2.attention.layer_norm_rms_epsilon": number;
};
export type ArchOlmoe = TransformerLLMBase<"olmoe"> & {
"olmoe.attention.layer_norm_rms_epsilon": number;
};
export type ArchOpenelm = TransformerLLMBase<"openelm"> & {
"openelm.attention.layer_norm_rms_epsilon": number;
};
export type ArchArctic = TransformerLLMBase<"arctic"> & {
"arctic.attention.layer_norm_rms_epsilon": number;
};
export type ArchDeepseek = TransformerLLMBase<"deepseek"> & {
"deepseek.attention.layer_norm_rms_epsilon": number;
"deepseek.leading_dense_block_count": number;
"deepseek.expert_feed_forward_length": number;
"deepseek.expert_shared_count": number;
"deepseek.expert_weights_scale": number;
};
export type ArchDeepseek2 = TransformerLLMBase<"deepseek2"> & {
"deepseek2.attention.layer_norm_rms_epsilon": number;
"deepseek2.leading_dense_block_count": number;
Expand All @@ -232,6 +281,8 @@ export type ArchDeepseek2 = TransformerLLMBase<"deepseek2"> & {
"deepseek2.expert_feed_forward_length": number;
"deepseek2.expert_shared_count": number;
"deepseek2.expert_weights_scale": number;
"deepseek2.expert_weights_norm": boolean;
"deepseek2.expert_gating_func": boolean;
"deepseek2.rope.scaling.yarn_log_multiplier": number;
};
export type ArchChatglm = TransformerLLMBase<"chatglm"> & {
Expand All @@ -253,9 +304,44 @@ export type ArchJais = TransformerLLMBase<"jais"> & {
"jais.attention.layer_norm_epsilon": number;
"jais.attention.max_alibi_bias": number;
};
export type ArchNemotron = TransformerLLMBase<"nemotron"> & {
"nemotron.attention.layer_norm_epsilon": number;
};
export type ArchExaone = TransformerLLMBase<"exaone"> & {
"exaone.attention.layer_norm_rms_epsilon": number;
};
export type ArchRwkv6 = TransformerLLMBase<"rwkv6">;
export type ArchRwkv6qwen2 = TransformerLLMBase<"rwkv6qwen2"> & {
"rwkv6qwen2.attention.layer_norm_epsilon": number;
"rwkv6qwen2.attention.layer_norm_rms_epsilon": number;
"rwkv6qwen2.wkv.head_size": number;
"rwkv6qwen2.time_mix_extra_dim": number;
"rwkv6qwen2.time_decay_extra_dim": number;
"rwkv6qwen2.rescale_every_n_layers": boolean;
"rwkv6qwen2.token_shift_count": boolean;
};
export type ArchGranite = TransformerLLMBase<"granite">;
export type ArchGraniteMoe = TransformerLLMBase<"granitemoe"> & {
"granitemoe.attention.layer_norm_rms_epsilon": number;
"granitemoe.logit_scale": number;
"granitemoe.residual_scale": number;
"granitemoe.embedding_scale": number;
"granitemoe.attention.scale": number;
};
export type ArchChameleon = TransformerLLMBase<"chameleon"> & {
"chameleon.attention.layer_norm_rms_epsilon": number;
"chameleon.swin_norm": boolean;
};
export type ArchWavtokenizerDec = TransformerLLMBase<"wavtokenizer-dec"> & {
"wavtokenizer-dec.attention.layer_norm_epsilon": number;
"wavtokenizer-dec.attention.group_norm_epsilon": number;
"wavtokenizer-dec.attention.group_norm_groups": number;
"wavtokenizer-dec.attention.causal": boolean;
};

export type TransformerLLM =
| ArchLlama
| ArchDeci
| ArchFalcon
| ArchGrok
| ArchGpt2
Expand All @@ -273,26 +359,41 @@ export type TransformerLLM =
| ArchQwen
| ArchQwen2
| ArchQwen2moe
| ArchQwen2vl
| ArchPhi2
| ArchPhi3
| ArchPhimoe
| ArchPlamo
| ArchCodeshell
| ArchOrion
| ArchInternlm2
| ArchMinicpm
| ArchMinicpm3
| ArchGemma
| ArchGemma2
| ArchStarcoder2
| ArchMamba
| ArchXverse
| ArchCommandR
| ArchCohere2
| ArchDbrx
| ArchOlmo
| ArchOlmo2
| ArchOlmoe
| ArchOpenelm
| ArchArctic
| ArchDeepseek
| ArchDeepseek2
| ArchChatglm
| ArchBitnet
| ArchT5
| ArchT5encoder
| ArchJais;
| ArchJais
| ArchNemotron
| ArchExaone
| ArchRwkv6
| ArchRwkv6qwen2
| ArchGranite
| ArchGraniteMoe
| ArchChameleon
| ArchWavtokenizerDec;