Skip to content

Commit ab84639

Browse files
authored
gguf: Add ability to load local file (#656)
Being able to load a local gguf file can be useful when we want to debug a gguf file. **Without this PR**, this ability could be done by using [file-fetch](https://www.npmjs.com/package/file-fetch). However, that won't work with big models, since the whole file is loaded into RAM. This PR add a new `RangeViewLocalFile` internal class that extends `RangeView`. It redirects calls to `fetchChunk()` to `fs.createReadStream` with the appropriate byte range. This allows the library to read specific chunk from a local file. For security reason, this ability is locked under `localFile: boolean` param. By default, it is disabled (i.e. when this library is run on hub backend, this param is disabled if unspecified) - [x] Add test case to `gguf.spec.ts` - [x] Being able to build with target=browser (only build, but will throw error on browser if being used)
1 parent 34c8ec1 commit ab84639

File tree

6 files changed

+205
-9
lines changed

6 files changed

+205
-9
lines changed

packages/gguf/package.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
}
1919
},
2020
"browser": {
21+
"./src/utils/FileBlob.ts": false,
2122
"./dist/index.js": "./dist/browser/index.js",
2223
"./dist/index.mjs": "./dist/browser/index.mjs"
2324
},
@@ -47,5 +48,8 @@
4748
"gguf"
4849
],
4950
"author": "Hugging Face",
50-
"license": "MIT"
51+
"license": "MIT",
52+
"devDependencies": {
53+
"@types/node": "^20.12.8"
54+
}
5155
}

packages/gguf/pnpm-lock.yaml

Lines changed: 17 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/gguf/src/gguf.spec.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { describe, expect, it } from "vitest";
22
import { GGMLQuantizationType, gguf, ggufAllShards, parseGgufShardFilename } from "./gguf";
3+
import fs from "node:fs";
34

45
const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf";
56
const URL_MISTRAL_7B =
@@ -226,6 +227,19 @@ describe("gguf", () => {
226227
});
227228
});
228229

230+
it("should parse a local file", async () => {
231+
// download the file and save to .cache folder
232+
if (!fs.existsSync(".cache")) {
233+
fs.mkdirSync(".cache");
234+
}
235+
const res = await fetch(URL_V1);
236+
const arrayBuf = await res.arrayBuffer();
237+
fs.writeFileSync(".cache/model.gguf", Buffer.from(arrayBuf));
238+
239+
const { metadata } = await gguf(".cache/model.gguf", { allowLocalFile: true });
240+
expect(metadata["general.name"]).toEqual("tinyllamas-stories-260k");
241+
});
242+
229243
it("should detect sharded gguf filename", async () => {
230244
const ggufPath = "grok-1/grok-1-q4_0-00003-of-00009.gguf"; // https://huggingface.co/ggml-org/models/blob/fcf344adb9686474c70e74dd5e55465e9e6176ef/grok-1/grok-1-q4_0-00003-of-00009.gguf
231245
const ggufShardFileInfo = parseGgufShardFilename(ggufPath);

packages/gguf/src/gguf.ts

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
22
import { GGUFValueType } from "./types";
3+
import { isBackend } from "./utils/isBackend";
34
import { promisesQueue } from "./utils/promisesQueue";
45

56
export type { MetadataBaseValue, MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
@@ -49,7 +50,7 @@ const HTTP_TOTAL_MAX_SIZE = 50 * 10 ** 6; /// 50MB
4950
* Internal stateful instance to fetch ranges of HTTP data when needed
5051
*/
5152
class RangeView {
52-
private chunk: number;
53+
protected chunk: number;
5354
private buffer: ArrayBuffer;
5455
private dataView: DataView;
5556

@@ -58,7 +59,7 @@ class RangeView {
5859
}
5960

6061
constructor(
61-
public url: string,
62+
public uri: string,
6263
private params?: {
6364
/**
6465
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
@@ -81,7 +82,7 @@ class RangeView {
8182
const range = [this.chunk * HTTP_CHUNK_SIZE, (this.chunk + 1) * HTTP_CHUNK_SIZE - 1];
8283
const buf = new Uint8Array(
8384
await (
84-
await (this.params?.fetch ?? fetch)(this.url, {
85+
await (this.params?.fetch ?? fetch)(this.uri, {
8586
headers: {
8687
...(this.params?.additionalFetchHeaders ?? {}),
8788
Range: `bytes=${range[0]}-${range[1]}`,
@@ -128,6 +129,23 @@ class RangeView {
128129
}
129130
}
130131

132+
/**
133+
* Internal stateful instance to read ranges of local file when needed.
134+
* Only usable in with nodejs FS API.
135+
*/
136+
class RangeViewLocalFile extends RangeView {
137+
/**
138+
* Read a new chunk from local file system.
139+
*/
140+
override async fetchChunk(): Promise<void> {
141+
const { FileBlob } = await import("./utils/FileBlob");
142+
const blob = await FileBlob.create(this.uri);
143+
const range = [this.chunk * HTTP_CHUNK_SIZE, (this.chunk + 1) * HTTP_CHUNK_SIZE - 1];
144+
const buffer = await blob.slice(range[0], range[1]).arrayBuffer();
145+
this.appendBuffer(new Uint8Array(buffer));
146+
}
147+
}
148+
131149
interface Slice<T> {
132150
value: T;
133151
length: number;
@@ -205,38 +223,57 @@ function readMetadataValue(
205223
}
206224

207225
export async function gguf(
208-
url: string,
226+
uri: string,
209227
params: {
210228
/**
211229
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
212230
*/
213231
fetch?: typeof fetch;
214232
additionalFetchHeaders?: Record<string, string>;
215233
computeParametersCount: true;
234+
allowLocalFile?: boolean;
216235
}
217236
): Promise<GGUFParseOutput & { parameterCount: number }>;
218237
export async function gguf(
219-
url: string,
238+
uri: string,
220239
params?: {
221240
/**
222241
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
223242
*/
224243
fetch?: typeof fetch;
225244
additionalFetchHeaders?: Record<string, string>;
245+
allowLocalFile?: boolean;
226246
}
227247
): Promise<GGUFParseOutput>;
228248
export async function gguf(
229-
url: string,
249+
uri: string,
230250
params?: {
231251
/**
232252
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
233253
*/
234254
fetch?: typeof fetch;
235255
additionalFetchHeaders?: Record<string, string>;
236256
computeParametersCount?: boolean;
257+
allowLocalFile?: boolean;
237258
}
238259
): Promise<GGUFParseOutput & { parameterCount?: number }> {
239-
const r = new RangeView(url, params);
260+
let r: RangeView;
261+
if (isBackend) {
262+
/// On backend, we switch between remote/local file based on protocol
263+
if (uri.match(/^https?:\/\//)) {
264+
r = new RangeView(uri, params);
265+
} else if (params?.allowLocalFile) {
266+
r = new RangeViewLocalFile(uri, params);
267+
} else {
268+
throw new Error("Access to local file is not enabled, please set allowLocalFile to true");
269+
}
270+
} else {
271+
/// On frontend, we only allow using remote file
272+
if (params?.allowLocalFile) {
273+
throw new Error("allowLocalFile cannot be used on browser");
274+
}
275+
r = new RangeView(uri, params);
276+
}
240277
await r.fetchChunk();
241278

242279
const checkBuffer = (buffer: Uint8Array, header: Uint8Array) => {
@@ -377,7 +414,7 @@ export async function ggufAllShards(
377414

378415
const PARALLEL_DOWNLOADS = 20;
379416
const shards = await promisesQueue(
380-
urls.map((shardUrl) => () => gguf(shardUrl, { computeParametersCount: true })),
417+
urls.map((shardUrl) => () => gguf(shardUrl, { ...params, computeParametersCount: true })),
381418
PARALLEL_DOWNLOADS
382419
);
383420
return {

packages/gguf/src/utils/FileBlob.ts

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import { createReadStream } from "node:fs";
2+
import { open, stat } from "node:fs/promises";
3+
import { Readable } from "node:stream";
4+
import type { FileHandle } from "node:fs/promises";
5+
import { fileURLToPath } from "node:url";
6+
7+
/**
8+
* @internal
9+
*
10+
* A FileBlob is a replacement for the Blob class that allows to lazy read files
11+
* in order to preserve memory.
12+
*
13+
* It is a drop-in replacement for the Blob class, so you can use it as a Blob.
14+
*
15+
* The main difference is the instantiation, which is done asynchronously using the `FileBlob.create` method.
16+
*
17+
* @example
18+
* const fileBlob = await FileBlob.create("path/to/package.json");
19+
*
20+
* await fetch("https://aschen.tech", { method: "POST", body: fileBlob });
21+
*/
22+
export class FileBlob extends Blob {
23+
/**
24+
* Creates a new FileBlob on the provided file.
25+
*
26+
* @param path Path to the file to be lazy readed
27+
*/
28+
static async create(path: string | URL): Promise<FileBlob> {
29+
path = path instanceof URL ? fileURLToPath(path) : path;
30+
31+
const { size } = await stat(path);
32+
33+
const fileBlob = new FileBlob(path, 0, size);
34+
35+
return fileBlob;
36+
}
37+
38+
private path: string;
39+
private start: number;
40+
private end: number;
41+
42+
private constructor(path: string, start: number, end: number) {
43+
super();
44+
45+
this.path = path;
46+
this.start = start;
47+
this.end = end;
48+
}
49+
50+
/**
51+
* Returns the size of the blob.
52+
*/
53+
override get size(): number {
54+
return this.end - this.start;
55+
}
56+
57+
/**
58+
* Returns a new instance of FileBlob that is a slice of the current one.
59+
*
60+
* The slice is inclusive of the start and exclusive of the end.
61+
*
62+
* The slice method does not supports negative start/end.
63+
*
64+
* @param start beginning of the slice
65+
* @param end end of the slice
66+
*/
67+
override slice(start = 0, end = this.size): FileBlob {
68+
if (start < 0 || end < 0) {
69+
new TypeError("Unsupported negative start/end on FileBlob.slice");
70+
}
71+
72+
const slice = new FileBlob(this.path, this.start + start, Math.min(this.start + end, this.end));
73+
74+
return slice;
75+
}
76+
77+
/**
78+
* Read the part of the file delimited by the FileBlob and returns it as an ArrayBuffer.
79+
*/
80+
override async arrayBuffer(): Promise<ArrayBuffer> {
81+
const slice = await this.execute((file) => file.read(Buffer.alloc(this.size), 0, this.size, this.start));
82+
83+
return slice.buffer;
84+
}
85+
86+
/**
87+
* Read the part of the file delimited by the FileBlob and returns it as a string.
88+
*/
89+
override async text(): Promise<string> {
90+
const buffer = (await this.arrayBuffer()) as Buffer;
91+
92+
return buffer.toString("utf8");
93+
}
94+
95+
/**
96+
* Returns a stream around the part of the file delimited by the FileBlob.
97+
*/
98+
override stream(): ReturnType<Blob["stream"]> {
99+
return Readable.toWeb(createReadStream(this.path, { start: this.start, end: this.end - 1 })) as ReturnType<
100+
Blob["stream"]
101+
>;
102+
}
103+
104+
/**
105+
* We are opening and closing the file for each action to prevent file descriptor leaks.
106+
*
107+
* It is an intended choice of developer experience over performances.
108+
*/
109+
private async execute<T>(action: (file: FileHandle) => Promise<T>) {
110+
const file = await open(this.path, "r");
111+
112+
try {
113+
return await action(file);
114+
} finally {
115+
await file.close();
116+
}
117+
}
118+
}

packages/gguf/src/utils/isBackend.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
const isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
2+
3+
const isWebWorker =
4+
typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
5+
6+
export const isBackend = !isBrowser && !isWebWorker;

0 commit comments

Comments
 (0)