diff --git a/packages/gguf/README.md b/packages/gguf/README.md index 763b49470..a6817c381 100644 --- a/packages/gguf/README.md +++ b/packages/gguf/README.md @@ -96,6 +96,52 @@ In case you want to use your own GGUF metadata structure, you can disable strict const { metadata, tensorInfos }: GGUFParseOutput<{ strict: false }> = await gguf(URL_LLAMA); ``` +## Command line interface + +This package provides a CLI equivalent to [`gguf_dump.py`](https://github.com/ggml-org/llama.cpp/blob/7a2c913e66353362d7f28d612fd3c9d51a831eda/gguf-py/gguf/scripts/gguf_dump.py) script. You can dump GGUF metadata and list of tensors using this command: + +```bash +npx @huggingface/gguf my_model.gguf + +# or, with a remote GGUF file: +# npx @huggingface/gguf https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_K_M.gguf +``` + +Example for the output: + +``` +* Dumping 36 key/value pair(s) + Idx | Count | Value + ----|--------|---------------------------------------------------------------------------------- + 1 | 1 | version = 3 + 2 | 1 | tensor_count = 292 + 3 | 1 | kv_count = 33 + 4 | 1 | general.architecture = "llama" + 5 | 1 | general.type = "model" + 6 | 1 | general.name = "Meta Llama 3.1 8B Instruct" + 7 | 1 | general.finetune = "Instruct" + 8 | 1 | general.basename = "Meta-Llama-3.1" + +[truncated] + +* Dumping 292 tensor(s) + Idx | Num Elements | Shape | Data Type | Name + ----|--------------|--------------------------------|-----------|-------------------------- + 1 | 64 | 64, 1, 1, 1 | F32 | rope_freqs.weight + 2 | 525336576 | 4096, 128256, 1, 1 | Q4_K | token_embd.weight + 3 | 4096 | 4096, 1, 1, 1 | F32 | blk.0.attn_norm.weight + 4 | 58720256 | 14336, 4096, 1, 1 | Q6_K | blk.0.ffn_down.weight + +[truncated] +``` + +Alternatively, you can install this package as global, which will provide the `gguf-view` command: + +```bash +npm i -g @huggingface/gguf +gguf-view my_model.gguf +``` + ## Hugging Face Hub The Hub supports all file formats and has built-in features for GGUF format. diff --git a/packages/gguf/package.json b/packages/gguf/package.json index af13d7096..3ef15f2d3 100644 --- a/packages/gguf/package.json +++ b/packages/gguf/package.json @@ -10,6 +10,9 @@ "main": "./dist/index.js", "module": "./dist/index.mjs", "types": "./dist/index.d.ts", + "bin": { + "gguf-view": "./dist/cli.js" + }, "exports": { ".": { "types": "./dist/index.d.ts", @@ -18,6 +21,7 @@ } }, "browser": { + "./src/cli.ts": false, "./src/utils/FileBlob.ts": false, "./dist/index.js": "./dist/browser/index.js", "./dist/index.mjs": "./dist/browser/index.mjs" @@ -32,7 +36,7 @@ "format": "prettier --write .", "format:check": "prettier --check .", "prepublishOnly": "pnpm run build", - "build": "tsup src/index.ts --format cjs,esm --clean && tsc --emitDeclarationOnly --declaration", + "build": "tsup src/index.ts src/cli.ts --format cjs,esm --clean && tsc --emitDeclarationOnly --declaration", "build:llm": "tsx scripts/generate-llm.ts && pnpm run format", "test": "vitest run", "check": "tsc" diff --git a/packages/gguf/src/cli.ts b/packages/gguf/src/cli.ts new file mode 100644 index 000000000..e822a8729 --- /dev/null +++ b/packages/gguf/src/cli.ts @@ -0,0 +1,102 @@ +#!/usr/bin/env node + +import { GGMLQuantizationType, gguf } from "."; + +interface PrintColumnHeader { + name: string; + maxWidth?: number; + alignRight?: boolean; +} + +const mapDtypeToName = Object.fromEntries(Object.entries(GGMLQuantizationType).map(([name, value]) => [value, name])); + +async function main() { + const ggufPath = process.argv[2]; + const { metadata, tensorInfos } = await gguf(ggufPath, { + allowLocalFile: true, + }); + + // TODO: print info about endianess + console.log(`* Dumping ${Object.keys(metadata).length} key/value pair(s)`); + printTable( + [ + { name: "Idx", alignRight: true }, + // { name: 'Type' }, // TODO: support this + { name: "Count", alignRight: true }, + { name: "Value" }, + ], + Object.entries(metadata).map(([key, value], i) => { + const MAX_LEN = 50; + let strVal = ""; + let count = 1; + if (Array.isArray(value)) { + strVal = JSON.stringify(value); + count = value.length; + } else if (value instanceof String || typeof value === "string") { + strVal = JSON.stringify(value); + } else { + strVal = value.toString(); + } + strVal = strVal.length > MAX_LEN ? strVal.slice(0, MAX_LEN) + "..." : strVal; + return [(i + 1).toString(), count.toString(), `${key} = ${strVal}`]; + }) + ); + + console.log(); + console.log(`* Dumping ${tensorInfos.length} tensor(s)`); + printTable( + [ + { name: "Idx", alignRight: true }, + { name: "Num Elements", alignRight: true }, + { name: "Shape" }, + { name: "Data Type" }, + { name: "Name" }, + ], + tensorInfos.map((tensorInfo, i) => { + const shape = [1n, 1n, 1n, 1n]; + tensorInfo.shape.forEach((dim, i) => { + shape[i] = dim; + }); + return [ + (i + 1).toString(), + shape.reduce((acc, n) => acc * n, 1n).toString(), + shape.map((n) => n.toString().padStart(6)).join(", "), + mapDtypeToName[tensorInfo.dtype], + tensorInfo.name, + ]; + }) + ); +} + +function printTable(header: PrintColumnHeader[], rows: string[][], leftPad = 2) { + const leftPadStr = " ".repeat(leftPad); + + // Calculate column widths + const columnWidths = header.map((h, i) => { + const maxContentWidth = Math.max(h.name.length, ...rows.map((row) => (row[i] || "").length)); + return h.maxWidth ? Math.min(maxContentWidth, h.maxWidth) : maxContentWidth; + }); + + // Print header + const headerLine = header + .map((h, i) => { + return h.name.padEnd(columnWidths[i]); + }) + .join(" | "); + console.log(leftPadStr + headerLine); + + // Print separator + console.log(leftPadStr + columnWidths.map((w) => "-".repeat(w)).join("-|-")); + + // Print rows + for (const row of rows) { + const line = header + .map((h, i) => { + return h.alignRight ? (row[i] || "").padStart(columnWidths[i]) : (row[i] || "").padEnd(columnWidths[i]); + }) + .join(" | "); + console.log(leftPadStr + line); + } +} + +main();