Skip to content

Commit 00710be

Browse files
authored
Merge branch 'main' into patch-1
2 parents 6f4effe + d66f7e4 commit 00710be

File tree

3 files changed

+29
-3
lines changed

3 files changed

+29
-3
lines changed

packages/gguf/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@huggingface/gguf",
33
"packageManager": "[email protected]",
4-
"version": "0.1.10",
4+
"version": "0.1.11",
55
"description": "a GGUF parser that works on remotely hosted files",
66
"repository": "https://github.com/huggingface/huggingface.js.git",
77
"publishConfig": {

packages/gguf/src/gguf.spec.ts

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
import { beforeAll, describe, expect, it } from "vitest";
22
import type { GGUFParseOutput } from "./gguf";
3-
import { GGMLFileQuantizationType, GGMLQuantizationType, gguf, ggufAllShards, parseGgufShardFilename } from "./gguf";
3+
import {
4+
GGMLFileQuantizationType,
5+
GGMLQuantizationType,
6+
gguf,
7+
ggufAllShards,
8+
parseGgufShardFilename,
9+
parseGGUFQuantLabel,
10+
} from "./gguf";
411
import fs from "node:fs";
512

613
const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf";
@@ -266,4 +273,14 @@ describe("gguf", () => {
266273
const { parameterCount } = await ggufAllShards(URL_SHARDED_GROK);
267274
expect(parameterCount).toEqual(316_490_127_360); // 316B
268275
});
276+
277+
it("parse quant label", async () => {
278+
expect(parseGGUFQuantLabel("Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")).toEqual("Q4_K_M");
279+
expect(parseGGUFQuantLabel("subdir/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")).toEqual("Q4_K_M");
280+
expect(parseGGUFQuantLabel("Codestral-22B-v0.1-Q2_K.gguf")).toEqual("Q2_K");
281+
expect(parseGGUFQuantLabel("Codestral-22B-v0.1.gguf")).toEqual(undefined);
282+
expect(parseGGUFQuantLabel("Codestral-22B-v0.1-F32-Q2_K.gguf")).toEqual("Q2_K"); // gguf name with two quant labels [F32, Q2_K]
283+
expect(parseGGUFQuantLabel("Codestral-22B-v0.1-IQ3_XS.gguf")).toEqual(undefined); // TODO: investigate IQ3_XS
284+
expect(parseGGUFQuantLabel("Codestral-22B-v0.1-Q4_0_4_4.gguf")).toEqual("Q4_0"); // TODO: investigate Q4_0_4_4
285+
});
269286
});

packages/gguf/src/gguf.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
2-
import { GGUFValueType } from "./types";
2+
import { GGMLQuantizationType, GGUFValueType } from "./types";
33
import { isBackend } from "./utils/isBackend";
44
import { promisesQueue } from "./utils/promisesQueue";
55

@@ -29,6 +29,15 @@ export function parseGgufShardFilename(filename: string): GgufShardFileInfo | nu
2929
return null;
3030
}
3131

32+
const ggufQuants = Object.values(GGMLQuantizationType).filter((v): v is string => typeof v === "string");
33+
export const GGUF_QUANT_RE = new RegExp(`(?<quant>${ggufQuants.join("|")})` + "(_(?<sizeVariation>[A-Z]+))?");
34+
export const GGUF_QUANT_RE_GLOBAL = new RegExp(GGUF_QUANT_RE, "g");
35+
36+
export function parseGGUFQuantLabel(fname: string): string | undefined {
37+
const quantLabel = fname.toUpperCase().match(GGUF_QUANT_RE_GLOBAL)?.at(-1); // if there is multiple quant substrings in a name, we prefer the last one
38+
return quantLabel;
39+
}
40+
3241
const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3;
3342

3443
/**

0 commit comments

Comments
 (0)