Skip to content

gguf: Add ability to load local file #656

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion packages/gguf/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
}
},
"browser": {
"./src/utils/FileBlob.ts": false,
"./dist/index.js": "./dist/browser/index.js",
"./dist/index.mjs": "./dist/browser/index.mjs"
},
Expand Down Expand Up @@ -47,5 +48,8 @@
"gguf"
],
"author": "Hugging Face",
"license": "MIT"
"license": "MIT",
"devDependencies": {
"@types/node": "^20.12.8"
}
}
17 changes: 17 additions & 0 deletions packages/gguf/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions packages/gguf/src/gguf.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { describe, expect, it } from "vitest";
import { GGMLQuantizationType, gguf, ggufAllShards, parseGgufShardFilename } from "./gguf";
import fs from "node:fs";

const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf";
const URL_MISTRAL_7B =
Expand Down Expand Up @@ -223,6 +224,19 @@ describe("gguf", () => {
});
});

it("should parse a local file", async () => {
// download the file and save to .cache folder
if (!fs.existsSync(".cache")) {
fs.mkdirSync(".cache");
}
const res = await fetch(URL_V1);
const arrayBuf = await res.arrayBuffer();
fs.writeFileSync(".cache/model.gguf", Buffer.from(arrayBuf));

const { metadata } = await gguf(".cache/model.gguf", { allowLocalFile: true });
expect(metadata["general.name"]).toEqual("tinyllamas-stories-260k");
});

it("should detect sharded gguf filename", async () => {
const ggufPath = "grok-1/grok-1-q4_0-00003-of-00009.gguf"; // https://huggingface.co/ggml-org/models/blob/fcf344adb9686474c70e74dd5e55465e9e6176ef/grok-1/grok-1-q4_0-00003-of-00009.gguf
const ggufShardFileInfo = parseGgufShardFilename(ggufPath);
Expand Down
53 changes: 45 additions & 8 deletions packages/gguf/src/gguf.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
import { GGUFValueType } from "./types";
import { isBackend } from "./utils/isBackend";
import { promisesQueue } from "./utils/promisesQueue";

export type { MetadataBaseValue, MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
Expand Down Expand Up @@ -49,7 +50,7 @@ const HTTP_TOTAL_MAX_SIZE = 50 * 10 ** 6; /// 50MB
* Internal stateful instance to fetch ranges of HTTP data when needed
*/
class RangeView {
private chunk: number;
protected chunk: number;
private buffer: ArrayBuffer;
private dataView: DataView;

Expand All @@ -58,7 +59,7 @@ class RangeView {
}

constructor(
public url: string,
public uri: string,
private params?: {
/**
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
Expand All @@ -81,7 +82,7 @@ class RangeView {
const range = [this.chunk * HTTP_CHUNK_SIZE, (this.chunk + 1) * HTTP_CHUNK_SIZE - 1];
const buf = new Uint8Array(
await (
await (this.params?.fetch ?? fetch)(this.url, {
await (this.params?.fetch ?? fetch)(this.uri, {
headers: {
...(this.params?.additionalFetchHeaders ?? {}),
Range: `bytes=${range[0]}-${range[1]}`,
Expand Down Expand Up @@ -128,6 +129,23 @@ class RangeView {
}
}

/**
* Internal stateful instance to read ranges of local file when needed.
* Only usable in with nodejs FS API.
*/
class RangeViewLocalFile extends RangeView {
/**
* Read a new chunk from local file system.
*/
override async fetchChunk(): Promise<void> {
const { FileBlob } = await import("./utils/FileBlob");
const blob = await FileBlob.create(this.uri);
const range = [this.chunk * HTTP_CHUNK_SIZE, (this.chunk + 1) * HTTP_CHUNK_SIZE - 1];
const buffer = await blob.slice(range[0], range[1]).arrayBuffer();
this.appendBuffer(new Uint8Array(buffer));
}
}

interface Slice<T> {
value: T;
length: number;
Expand Down Expand Up @@ -205,38 +223,57 @@ function readMetadataValue(
}

export async function gguf(
url: string,
uri: string,
params: {
/**
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
*/
fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
computeParametersCount: true;
allowLocalFile?: boolean;
}
): Promise<GGUFParseOutput & { parameterCount: number }>;
export async function gguf(
url: string,
uri: string,
params?: {
/**
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
*/
fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
allowLocalFile?: boolean;
}
): Promise<GGUFParseOutput>;
export async function gguf(
url: string,
uri: string,
params?: {
/**
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
*/
fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
computeParametersCount?: boolean;
allowLocalFile?: boolean;
}
): Promise<GGUFParseOutput & { parameterCount?: number }> {
const r = new RangeView(url, params);
let r: RangeView;
if (isBackend) {
/// On backend, we switch between remote/local file based on protocol
if (uri.match(/^https?:\/\//)) {
r = new RangeView(uri, params);
} else if (params?.allowLocalFile) {
r = new RangeViewLocalFile(uri, params);
} else {
throw new Error("Access to local file is not enabled, please set allowLocalFile to true");
}
} else {
/// On frontend, we only allow using remote file
if (params?.allowLocalFile) {
throw new Error("allowLocalFile cannot be used on browser");
}
r = new RangeView(uri, params);
}
await r.fetchChunk();

const checkBuffer = (buffer: Uint8Array, header: Uint8Array) => {
Expand Down Expand Up @@ -377,7 +414,7 @@ export async function ggufAllShards(

const PARALLEL_DOWNLOADS = 20;
const shards = await promisesQueue(
urls.map((shardUrl) => () => gguf(shardUrl, { computeParametersCount: true })),
urls.map((shardUrl) => () => gguf(shardUrl, { ...params, computeParametersCount: true })),
PARALLEL_DOWNLOADS
);
return {
Expand Down
118 changes: 118 additions & 0 deletions packages/gguf/src/utils/FileBlob.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import { createReadStream } from "node:fs";
import { open, stat } from "node:fs/promises";
import { Readable } from "node:stream";
import type { FileHandle } from "node:fs/promises";
import { fileURLToPath } from "node:url";

/**
* @internal
*
* A FileBlob is a replacement for the Blob class that allows to lazy read files
* in order to preserve memory.
*
* It is a drop-in replacement for the Blob class, so you can use it as a Blob.
*
* The main difference is the instantiation, which is done asynchronously using the `FileBlob.create` method.
*
* @example
* const fileBlob = await FileBlob.create("path/to/package.json");
*
* await fetch("https://aschen.tech", { method: "POST", body: fileBlob });
*/
export class FileBlob extends Blob {
/**
* Creates a new FileBlob on the provided file.
*
* @param path Path to the file to be lazy readed
*/
static async create(path: string | URL): Promise<FileBlob> {
path = path instanceof URL ? fileURLToPath(path) : path;

const { size } = await stat(path);

const fileBlob = new FileBlob(path, 0, size);

return fileBlob;
}

private path: string;
private start: number;
private end: number;

private constructor(path: string, start: number, end: number) {
super();

this.path = path;
this.start = start;
this.end = end;
}

/**
* Returns the size of the blob.
*/
override get size(): number {
return this.end - this.start;
}

/**
* Returns a new instance of FileBlob that is a slice of the current one.
*
* The slice is inclusive of the start and exclusive of the end.
*
* The slice method does not supports negative start/end.
*
* @param start beginning of the slice
* @param end end of the slice
*/
override slice(start = 0, end = this.size): FileBlob {
if (start < 0 || end < 0) {
new TypeError("Unsupported negative start/end on FileBlob.slice");
}

const slice = new FileBlob(this.path, this.start + start, Math.min(this.start + end, this.end));

return slice;
}

/**
* Read the part of the file delimited by the FileBlob and returns it as an ArrayBuffer.
*/
override async arrayBuffer(): Promise<ArrayBuffer> {
const slice = await this.execute((file) => file.read(Buffer.alloc(this.size), 0, this.size, this.start));

return slice.buffer;
}

/**
* Read the part of the file delimited by the FileBlob and returns it as a string.
*/
override async text(): Promise<string> {
const buffer = (await this.arrayBuffer()) as Buffer;

return buffer.toString("utf8");
}

/**
* Returns a stream around the part of the file delimited by the FileBlob.
*/
override stream(): ReturnType<Blob["stream"]> {
return Readable.toWeb(createReadStream(this.path, { start: this.start, end: this.end - 1 })) as ReturnType<
Blob["stream"]
>;
}

/**
* We are opening and closing the file for each action to prevent file descriptor leaks.
*
* It is an intended choice of developer experience over performances.
*/
private async execute<T>(action: (file: FileHandle) => Promise<T>) {
const file = await open(this.path, "r");

try {
return await action(file);
} finally {
await file.close();
}
}
}
6 changes: 6 additions & 0 deletions packages/gguf/src/utils/isBackend.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
const isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";

const isWebWorker =
typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";

export const isBackend = !isBrowser && !isWebWorker;