Skip to content

Commit 4c36014

Browse files
committed
Merge remote-tracking branch 'upstream/main'
2 parents b93d276 + 3fd3440 commit 4c36014

36 files changed

+520
-113
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
9797
```html
9898
<script type="module">
9999
import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/+esm';
100-
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected].1/+esm";
100+
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected].2/+esm";
101101
</script>
102102
```
103103

packages/gguf/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@huggingface/gguf",
33
"packageManager": "[email protected]",
4-
"version": "0.1.13",
4+
"version": "0.1.14",
55
"description": "a GGUF parser that works on remotely hosted files",
66
"repository": "https://github.com/huggingface/huggingface.js.git",
77
"publishConfig": {

packages/gguf/src/cli.ts

Lines changed: 141 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env node
22

3-
import { GGMLQuantizationType, gguf } from ".";
3+
import { GGMLQuantizationType, gguf, ggufAllShards, GGUFParseOutput } from ".";
4+
import { GGML_QUANT_SIZES } from "./quant-descriptions";
45

56
interface PrintColumnHeader {
67
name: string;
@@ -10,11 +11,44 @@ interface PrintColumnHeader {
1011

1112
const mapDtypeToName = Object.fromEntries(Object.entries(GGMLQuantizationType).map(([name, value]) => [value, name]));
1213

14+
function showHelp(exitCode: number) {
15+
console.error("Usage: gguf-view [--help|-h] [--show-tensor] [--context|-c N] <path/to/gguf>");
16+
console.error(" --help, -h Show this help message");
17+
console.error(" --show-tensor Show tensor information");
18+
console.error(" --context, -c N Number of tokens in context (default: 4096)");
19+
process.exit(exitCode);
20+
}
21+
1322
async function main() {
14-
const ggufPath = process.argv[2];
15-
const { metadata, tensorInfos } = await gguf(ggufPath, {
23+
let ggufPath = "";
24+
let showTensors = false;
25+
let nCtx = 4096;
26+
for (let i = 2; i < process.argv.length; i++) {
27+
if (process.argv[i] === "--help" || process.argv[i] === "-h") {
28+
showHelp(0);
29+
} else if (process.argv[i] === "--show-tensor") {
30+
showTensors = true;
31+
} else if (process.argv[i] === "--context" || process.argv[i] === "-c") {
32+
nCtx = Number(process.argv[++i]);
33+
} else {
34+
ggufPath = process.argv[i];
35+
}
36+
}
37+
38+
if (!ggufPath.length) {
39+
console.error("Error: Missing path to gguf file");
40+
showHelp(1);
41+
}
42+
43+
const { shards } = await ggufAllShards(ggufPath, {
1644
allowLocalFile: true,
1745
});
46+
const { metadata, tensorInfos } = shards[0];
47+
48+
// merge all metadata
49+
for (let i = 1; i < shards.length; i++) {
50+
tensorInfos.push(...shards[i].tensorInfos);
51+
}
1852

1953
// TODO: print info about endianess
2054
console.log(`* Dumping ${Object.keys(metadata).length} key/value pair(s)`);
@@ -43,29 +77,110 @@ async function main() {
4377
);
4478

4579
console.log();
46-
console.log(`* Dumping ${tensorInfos.length} tensor(s)`);
47-
printTable(
48-
[
49-
{ name: "Idx", alignRight: true },
50-
{ name: "Num Elements", alignRight: true },
51-
{ name: "Shape" },
52-
{ name: "Data Type" },
53-
{ name: "Name" },
54-
],
55-
tensorInfos.map((tensorInfo, i) => {
56-
const shape = [1n, 1n, 1n, 1n];
57-
tensorInfo.shape.forEach((dim, i) => {
58-
shape[i] = dim;
59-
});
60-
return [
61-
(i + 1).toString(),
62-
shape.reduce((acc, n) => acc * n, 1n).toString(),
63-
shape.map((n) => n.toString().padStart(6)).join(", "),
64-
mapDtypeToName[tensorInfo.dtype],
65-
tensorInfo.name,
66-
];
67-
})
68-
);
80+
console.log(`* Memory usage estimation (with context length of ${nCtx} tokens)`);
81+
try {
82+
const kvUsage = calcMemoryUsage(metadata as GGUFParseOutput<{ strict: false }>["metadata"], nCtx);
83+
let modelWeightInBytes = 0;
84+
for (const tensorInfo of tensorInfos) {
85+
const nElem = Number(tensorInfo.shape.reduce((a, b) => a * b, 1n));
86+
const tensorSizeInBytes = nElem * (GGML_QUANT_SIZES[tensorInfo.dtype] / 8);
87+
modelWeightInBytes += tensorSizeInBytes;
88+
}
89+
const overhead =
90+
calcMemoryUsage(metadata as GGUFParseOutput<{ strict: false }>["metadata"], 256).totalBytes +
91+
modelWeightInBytes * 0.05;
92+
const totalMemoryUsage = kvUsage.totalBytes + overhead + modelWeightInBytes;
93+
printTable(
94+
[{ name: "Item" }, { name: "Memory usage", alignRight: true }],
95+
[
96+
["K cache", (kvUsage.totalBytesK / 1e9).toFixed(2) + " GB"],
97+
["V cache", (kvUsage.totalBytesV / 1e9).toFixed(2) + " GB"],
98+
["Weight", (modelWeightInBytes / 1e9).toFixed(2) + " GB"],
99+
["Overhead", (overhead / 1e9).toFixed(2) + " GB"],
100+
["", "---"],
101+
["TOTAL", (totalMemoryUsage / 1e9).toFixed(2) + " GB"],
102+
]
103+
);
104+
} catch (e) {
105+
console.error(`Error: ${(e as Error).message}`);
106+
}
107+
108+
if (showTensors) {
109+
console.log();
110+
console.log(`* Dumping ${tensorInfos.length} tensor(s)`);
111+
printTable(
112+
[
113+
{ name: "Idx", alignRight: true },
114+
{ name: "Num Elements", alignRight: true },
115+
{ name: "Shape" },
116+
{ name: "Data Type" },
117+
{ name: "Name" },
118+
],
119+
tensorInfos.map((tensorInfo, i) => {
120+
const shape = [1n, 1n, 1n, 1n];
121+
tensorInfo.shape.forEach((dim, i) => {
122+
shape[i] = dim;
123+
});
124+
return [
125+
(i + 1).toString(),
126+
shape.reduce((acc, n) => acc * n, 1n).toString(),
127+
shape.map((n) => n.toString().padStart(6)).join(", "),
128+
mapDtypeToName[tensorInfo.dtype],
129+
tensorInfo.name,
130+
];
131+
})
132+
);
133+
} else {
134+
console.log();
135+
console.log(`* Use --show-tensor to display tensor information`);
136+
}
137+
}
138+
139+
function calcMemoryUsage(
140+
metadata: GGUFParseOutput<{ strict: false }>["metadata"],
141+
kvSize: number,
142+
kvTypeK: GGMLQuantizationType = GGMLQuantizationType.F16,
143+
kvTypeV: GGMLQuantizationType = GGMLQuantizationType.F16
144+
) {
145+
const arch = metadata["general.architecture"] ?? "unknown";
146+
const n_embd = (metadata[`${arch}.embedding_length`] as number) ?? 0;
147+
const n_head = (metadata[`${arch}.attention.head_count`] as number) ?? 0;
148+
const n_embd_head_k = (metadata[`${arch}.attention.key_length`] as number) ?? n_embd / n_head;
149+
const n_embd_head_v = (metadata[`${arch}.attention.value_length`] as number) ?? n_embd / n_head;
150+
const n_head_kv = (metadata[`${arch}.attention.head_count_kv`] as number[] | number) ?? [];
151+
const n_layer = (metadata[`${arch}.block_count`] as number) ?? 0;
152+
153+
if (arch.startsWith("mamba") || arch.startsWith("rwkv")) {
154+
throw new Error(`Memory usage estimation for arch "${arch}" is not supported`);
155+
}
156+
157+
const n_head_kv_arr = Array(n_layer).fill(n_head);
158+
if (Array.isArray(n_head_kv)) {
159+
for (let i = 0; i < n_layer; i++) {
160+
if (n_head_kv[i]) {
161+
n_head_kv_arr[i] = n_head_kv[i];
162+
}
163+
}
164+
} else {
165+
for (let i = 0; i < n_layer; i++) {
166+
n_head_kv_arr[i] = n_head_kv;
167+
}
168+
}
169+
170+
let totalElemsK = 0;
171+
let totalElemsV = 0;
172+
for (let i = 0; i < n_layer; i++) {
173+
const n_embd_k_gqa = n_embd_head_k * n_head_kv_arr[i];
174+
const n_embd_v_gqa = n_embd_head_v * n_head_kv_arr[i];
175+
totalElemsK += n_embd_k_gqa * kvSize;
176+
totalElemsV += n_embd_v_gqa * kvSize;
177+
}
178+
179+
return {
180+
totalBytesK: totalElemsK * (GGML_QUANT_SIZES[kvTypeK] / 8),
181+
totalBytesV: totalElemsV * (GGML_QUANT_SIZES[kvTypeV] / 8),
182+
totalBytes: (totalElemsK + totalElemsV) * (GGML_QUANT_SIZES[kvTypeV] / 8),
183+
};
69184
}
70185

71186
function printTable(header: PrintColumnHeader[], rows: string[][], leftPad = 2) {

packages/gguf/src/gguf.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,7 @@ export async function ggufAllShards(
410410
fetch?: typeof fetch;
411411
additionalFetchHeaders?: Record<string, string>;
412412
parallelDownloads?: number;
413+
allowLocalFile?: boolean;
413414
}
414415
): Promise<{ shards: GGUFParseOutput[]; parameterCount: number }> {
415416
const parallelDownloads = params?.parallelDownloads ?? PARALLEL_DOWNLOADS;

packages/gguf/src/quant-descriptions.ts

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,44 @@ export const GGUF_QUANT_DESCRIPTIONS: Record<GGMLQuantizationType, { txt: string
125125
src_url: "https://en.wikipedia.org/wiki/Bfloat16_floating-point_format",
126126
},
127127
};
128+
129+
const QK_K = 256;
130+
const calcBPW = (blockSize: number, typeSize: number) => {
131+
return (typeSize * 8) / blockSize;
132+
};
133+
134+
// copied from https://github.com/ggml-org/llama.cpp/tree/master/gguf-py/gguf/constants.py
135+
// map quantization type to element size in bits per weight (example: Q4_K -> 4.5 bpw)
136+
export const GGML_QUANT_SIZES = {
137+
[GGMLQuantizationType.F32]: calcBPW(1, 4),
138+
[GGMLQuantizationType.F16]: calcBPW(1, 2),
139+
[GGMLQuantizationType.Q4_0]: calcBPW(32, 2 + 16),
140+
[GGMLQuantizationType.Q4_1]: calcBPW(32, 2 + 2 + 16),
141+
[GGMLQuantizationType.Q5_0]: calcBPW(32, 2 + 4 + 16),
142+
[GGMLQuantizationType.Q5_1]: calcBPW(32, 2 + 2 + 4 + 16),
143+
[GGMLQuantizationType.Q8_0]: calcBPW(32, 2 + 32),
144+
[GGMLQuantizationType.Q8_1]: calcBPW(32, 4 + 4 + 32),
145+
[GGMLQuantizationType.Q2_K]: calcBPW(256, 2 + 2 + QK_K / 16 + QK_K / 4),
146+
[GGMLQuantizationType.Q3_K]: calcBPW(256, 2 + QK_K / 4 + QK_K / 8 + 12),
147+
[GGMLQuantizationType.Q4_K]: calcBPW(256, 2 + 2 + QK_K / 2 + 12),
148+
[GGMLQuantizationType.Q5_K]: calcBPW(256, 2 + 2 + QK_K / 2 + QK_K / 8 + 12),
149+
[GGMLQuantizationType.Q6_K]: calcBPW(256, 2 + QK_K / 2 + QK_K / 4 + QK_K / 16),
150+
[GGMLQuantizationType.Q8_K]: calcBPW(256, 4 + QK_K + QK_K / 8),
151+
[GGMLQuantizationType.IQ2_XXS]: calcBPW(256, 2 + QK_K / 4),
152+
[GGMLQuantizationType.IQ2_XS]: calcBPW(256, 2 + QK_K / 4 + QK_K / 32),
153+
[GGMLQuantizationType.IQ3_XXS]: calcBPW(256, 2 + QK_K / 4 + QK_K / 8),
154+
[GGMLQuantizationType.IQ1_S]: calcBPW(256, 2 + QK_K / 8 + QK_K / 16),
155+
[GGMLQuantizationType.IQ4_NL]: calcBPW(32, 2 + 16),
156+
[GGMLQuantizationType.IQ3_S]: calcBPW(256, 2 + QK_K / 4 + QK_K / 8 + QK_K / 32 + 4),
157+
[GGMLQuantizationType.IQ2_S]: calcBPW(256, 2 + QK_K / 4 + QK_K / 16),
158+
[GGMLQuantizationType.IQ4_XS]: calcBPW(256, 2 + 2 + QK_K / 2 + QK_K / 64),
159+
[GGMLQuantizationType.I8]: calcBPW(1, 1),
160+
[GGMLQuantizationType.I16]: calcBPW(1, 2),
161+
[GGMLQuantizationType.I32]: calcBPW(1, 4),
162+
[GGMLQuantizationType.I64]: calcBPW(1, 8),
163+
[GGMLQuantizationType.F64]: calcBPW(1, 8),
164+
[GGMLQuantizationType.IQ1_M]: calcBPW(256, QK_K / 8 + QK_K / 16 + QK_K / 32),
165+
[GGMLQuantizationType.BF16]: calcBPW(1, 2),
166+
// [GGMLQuantizationType.TQ1_0]: calcBPW(256, 2 + 4 * 13),
167+
// [GGMLQuantizationType.TQ2_0]: calcBPW(256, 2 + 64),
168+
};

packages/hub/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@huggingface/hub",
33
"packageManager": "[email protected]",
4-
"version": "1.0.1",
4+
"version": "1.0.2",
55
"description": "Utilities to interact with the Hugging Face hub",
66
"repository": "https://github.com/huggingface/huggingface.js.git",
77
"publishConfig": {

packages/hub/src/lib/download-file-to-cache-dir.spec.ts

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,47 @@ describe("downloadFileToCacheDir", () => {
114114
expect(output).toBe(expectPointer);
115115
});
116116

117+
test("existing symlinked and blob with default revision should not re-download it", async () => {
118+
// <cache>/<repo>/<revision>/snapshots/README.md
119+
const expectPointer = _getSnapshotFile({
120+
repo: DUMMY_REPO,
121+
path: "/README.md",
122+
revision: "main",
123+
});
124+
// stat ensure a symlink and the pointed file exists
125+
vi.mocked(stat).mockResolvedValue({} as Stats); // prevent default mocked reject
126+
vi.mocked(lstat).mockResolvedValue({} as Stats);
127+
vi.mocked(pathsInfo).mockResolvedValue([
128+
{
129+
oid: DUMMY_ETAG,
130+
size: 55,
131+
path: "README.md",
132+
type: "file",
133+
lastCommit: {
134+
date: new Date(),
135+
id: "main",
136+
title: "Commit msg",
137+
},
138+
},
139+
]);
140+
141+
const output = await downloadFileToCacheDir({
142+
repo: DUMMY_REPO,
143+
path: "/README.md",
144+
fetch: fetchMock,
145+
});
146+
147+
expect(stat).toHaveBeenCalledOnce();
148+
expect(symlink).not.toHaveBeenCalledOnce();
149+
// Get call argument for stat
150+
const starArg = vi.mocked(stat).mock.calls[0][0];
151+
152+
expect(starArg).toBe(expectPointer);
153+
expect(fetchMock).not.toHaveBeenCalledWith();
154+
155+
expect(output).toBe(expectPointer);
156+
});
157+
117158
test("existing blob should only create the symlink", async () => {
118159
// <cache>/<repo>/<revision>/snapshots/README.md
119160
const expectPointer = _getSnapshotFile({
@@ -150,7 +191,6 @@ describe("downloadFileToCacheDir", () => {
150191
fetch: fetchMock,
151192
});
152193

153-
expect(stat).not.toHaveBeenCalled();
154194
// should have check for the blob
155195
expect(lstat).toHaveBeenCalled();
156196
expect(vi.mocked(lstat).mock.calls[0][0]).toBe(expectedBlob);

packages/hub/src/lib/download-file-to-cache-dir.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ export async function downloadFileToCacheDir(
9696
const pointerPath = getFilePointer(storageFolder, commitHash ?? pathsInformation[0].lastCommit.id, params.path);
9797
const blobPath = join(storageFolder, "blobs", etag);
9898

99+
// if we have the pointer file, we can shortcut the download
100+
if (await exists(pointerPath, true)) return pointerPath;
101+
99102
// mkdir blob and pointer path parent directory
100103
await mkdir(dirname(blobPath), { recursive: true });
101104
await mkdir(dirname(pointerPath), { recursive: true });

0 commit comments

Comments
 (0)