Skip to content

Commit fc97b6a

Browse files
committed
Merge branch 'main' into fix-openai-inference-snippets
2 parents 3d904d2 + 0a0960c commit fc97b6a

26 files changed

+1691
-26
lines changed

.github/workflows/test.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,13 @@ jobs:
6161
echo "SINCE=$(git merge-base origin/${{ github.event.pull_request.base.ref }} ${{ github.sha }})" >> $GITHUB_OUTPUT
6262
fi
6363
64+
- run: google-chrome --version
65+
6466
- run: npm install -g corepack@latest && corepack enable
6567

6668
- uses: actions/setup-node@v3
6769
with:
68-
node-version: "20"
70+
node-version: "22"
6971
cache: "pnpm"
7072
cache-dependency-path: "**/pnpm-lock.yaml"
7173
- run: |

CONTRIBUTING.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@ It's not a hard requirement, but please consider using an icon from [Gitmoji](ht
1818

1919
## Tests
2020

21-
If you want to run only specific tests, you can do `pnpm test -- -t "test name"`
21+
If you want to run only specific tests, you can do `pnpm test -- -t "test name"`.
22+
23+
You can also do `npx vitest ./packages/hub/src/utils/XetBlob.spec.ts` to run a specific test file.
24+
25+
Or `cd packages/hub && npx vitest --browser.name=chrome --browser.headless --config vitest-browser.config.mts ./src/utils/XetBlob.spec.ts` to run browser tests on a specific file
2226

2327
## Adding a package
2428

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
9898
```html
9999
<script type="module">
100100
import { InferenceClient } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/+esm';
101-
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/hub@1.0.2/+esm";
101+
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/hub@1.1.0/+esm";
102102
</script>
103103
```
104104

packages/hub/.eslintignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
dist
22
sha256.js
3+
src/vendor

packages/hub/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,4 @@ Under the hood, `@huggingface/hub` uses a lazy blob implementation to load the f
174174
## Dependencies
175175

176176
- `@huggingface/tasks` : Typings only
177+
- `@huggingface/lz4` : URL join utility

packages/hub/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@huggingface/hub",
33
"packageManager": "[email protected]",
4-
"version": "1.0.2",
4+
"version": "1.1.0",
55
"description": "Utilities to interact with the Hugging Face hub",
66
"repository": "https://github.com/huggingface/huggingface.js.git",
77
"publishConfig": {

packages/hub/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@ export { HubApiError, InvalidApiResponseFormatError } from "./error";
2222
* Only exported for E2Es convenience
2323
*/
2424
export { sha256 as __internal_sha256 } from "./utils/sha256";
25+
export { XetBlob as __internal_XetBlob } from "./utils/XetBlob";

packages/hub/src/lib/download-file-to-cache-dir.spec.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import type { Stats } from "node:fs";
77
import { getHFHubCachePath, getRepoFolderName } from "./cache-management";
88
import { toRepoId } from "../utils/toRepoId";
99
import { downloadFileToCacheDir } from "./download-file-to-cache-dir";
10+
import { createSymlink } from "../utils/symlink";
1011

1112
vi.mock("node:fs/promises", () => ({
1213
writeFile: vi.fn(),
@@ -21,6 +22,10 @@ vi.mock("./paths-info", () => ({
2122
pathsInfo: vi.fn(),
2223
}));
2324

25+
vi.mock("../utils/symlink", () => ({
26+
createSymlink: vi.fn(),
27+
}));
28+
2429
const DUMMY_REPO: RepoId = {
2530
name: "hello-world",
2631
type: "model",
@@ -196,7 +201,7 @@ describe("downloadFileToCacheDir", () => {
196201
expect(vi.mocked(lstat).mock.calls[0][0]).toBe(expectedBlob);
197202

198203
// symlink should have been created
199-
expect(symlink).toHaveBeenCalledOnce();
204+
expect(createSymlink).toHaveBeenCalledOnce();
200205
// no download done
201206
expect(fetchMock).not.toHaveBeenCalled();
202207

@@ -283,6 +288,6 @@ describe("downloadFileToCacheDir", () => {
283288
// 2. should rename the incomplete to the blob expected name
284289
expect(rename).toHaveBeenCalledWith(incomplete, expectedBlob);
285290
// 3. should create symlink pointing to blob
286-
expect(symlink).toHaveBeenCalledWith(expectedBlob, expectPointer);
291+
expect(createSymlink).toHaveBeenCalledWith(expectedBlob, expectPointer);
287292
});
288293
});

packages/hub/src/lib/download-file-to-cache-dir.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import { getHFHubCachePath, getRepoFolderName } from "./cache-management";
22
import { dirname, join } from "node:path";
3-
import { writeFile, rename, symlink, lstat, mkdir, stat } from "node:fs/promises";
3+
import { writeFile, rename, lstat, mkdir, stat } from "node:fs/promises";
44
import type { CommitInfo, PathInfo } from "./paths-info";
55
import { pathsInfo } from "./paths-info";
66
import type { CredentialsParams, RepoDesignation } from "../types/public";
77
import { toRepoId } from "../utils/toRepoId";
88
import { downloadFile } from "./download-file";
9+
import { createSymlink } from "../utils/symlink";
910

1011
export const REGEX_COMMIT_HASH: RegExp = new RegExp("^[0-9a-f]{40}$");
1112

@@ -107,7 +108,7 @@ export async function downloadFileToCacheDir(
107108
// shortcut the download if needed
108109
if (await exists(blobPath)) {
109110
// create symlinks in snapshot folder to blob object
110-
await symlink(blobPath, pointerPath);
111+
await createSymlink(blobPath, pointerPath);
111112
return pointerPath;
112113
}
113114

@@ -127,6 +128,6 @@ export async function downloadFileToCacheDir(
127128
// rename .incomplete file to expect blob
128129
await rename(incomplete, blobPath);
129130
// create symlinks in snapshot folder to blob object
130-
await symlink(blobPath, pointerPath);
131+
await createSymlink(blobPath, pointerPath);
131132
return pointerPath;
132133
}

packages/hub/src/lib/list-files.spec.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ describe("listFiles", () => {
6060
type: "file",
6161
},
6262
]);
63-
});
63+
}, 30_000);
6464

6565
it("should fetch the list of files from the repo, including last commit", async () => {
6666
const cursor = listFiles({
@@ -146,7 +146,7 @@ describe("listFiles", () => {
146146
type: "file",
147147
},
148148
]);
149-
});
149+
}, 30_000);
150150

151151
it("should fetch the list of files from the repo, including subfolders", async () => {
152152
const cursor = listFiles({
@@ -165,5 +165,5 @@ describe("listFiles", () => {
165165
}
166166

167167
assert(files.some((file) => file.path === "data/XSUM-EMNLP18-Summary-Data-Original.tar.gz"));
168-
});
168+
}, 30_000);
169169
});

packages/hub/src/utils/WebBlob.spec.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ describe("WebBlob", () => {
5858
expect(webBlob).toBeInstanceOf(WebBlob);
5959
expect(webBlob).toMatchObject({ url });
6060
expect(await webBlob.slice(10, 22).text()).toBe("__metadata__");
61-
});
61+
}, 30_000);
6262

6363
it("should lazy load a Xet file hosted on Hugging Face", async () => {
6464
const stableDiffusionUrl =
@@ -70,7 +70,7 @@ describe("WebBlob", () => {
7070
expect(webBlob).toBeInstanceOf(WebBlob);
7171
expect(webBlob).toMatchObject({ url });
7272
expect(await webBlob.slice(10, 22).text()).toBe("__metadata__");
73-
});
73+
}, 30_000);
7474

7575
it("should create a slice on the file", async () => {
7676
const expectedText = fullText.slice(10, 20);

packages/hub/src/utils/WebBlob.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ export class WebBlob extends Blob {
6060

6161
override slice(start = 0, end = this.size): WebBlob {
6262
if (start < 0 || end < 0) {
63-
new TypeError("Unsupported negative start/end on FileBlob.slice");
63+
new TypeError("Unsupported negative start/end on WebBlob.slice");
6464
}
6565

6666
const slice = new WebBlob(
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
import { describe, expect, it } from "vitest";
2+
import { bg4_regoup_bytes, XetBlob } from "./XetBlob";
3+
4+
describe("XetBlob", () => {
5+
it("should lazy load the first 22 bytes", async () => {
6+
const blob = new XetBlob({
7+
repo: {
8+
type: "model",
9+
name: "celinah/xet-experiments",
10+
},
11+
hash: "7b3b6d07673a88cf467e67c1f7edef1a8c268cbf66e9dd9b0366322d4ab56d9b",
12+
size: 5_234_139_343,
13+
});
14+
15+
expect(await blob.slice(10, 22).text()).toBe("__metadata__");
16+
}, 30_000);
17+
18+
it("should load the first chunk correctly", async () => {
19+
let xorbCount = 0;
20+
const blob = new XetBlob({
21+
repo: {
22+
type: "model",
23+
name: "celinah/xet-experiments",
24+
},
25+
hash: "7b3b6d07673a88cf467e67c1f7edef1a8c268cbf66e9dd9b0366322d4ab56d9b",
26+
size: 5_234_139_343,
27+
fetch: async (url, opts) => {
28+
if (typeof url === "string" && url.includes("/xorbs/")) {
29+
xorbCount++;
30+
}
31+
return fetch(url, opts);
32+
},
33+
});
34+
35+
const xetDownload = await blob.slice(0, 29928).arrayBuffer();
36+
const bridgeDownload = await fetch(
37+
"https://huggingface.co/celinah/xet-experiments/resolve/main/model5GB.safetensors",
38+
{
39+
headers: {
40+
Range: "bytes=0-29927",
41+
},
42+
}
43+
).then((res) => res.arrayBuffer());
44+
45+
expect(new Uint8Array(xetDownload)).toEqual(new Uint8Array(bridgeDownload));
46+
expect(xorbCount).toBe(1);
47+
}, 30_000);
48+
49+
it("should load just past the first chunk correctly", async () => {
50+
let xorbCount = 0;
51+
const blob = new XetBlob({
52+
repo: {
53+
type: "model",
54+
name: "celinah/xet-experiments",
55+
},
56+
hash: "7b3b6d07673a88cf467e67c1f7edef1a8c268cbf66e9dd9b0366322d4ab56d9b",
57+
size: 5_234_139_343,
58+
fetch: async (url, opts) => {
59+
if (typeof url === "string" && url.includes("/xorbs/")) {
60+
xorbCount++;
61+
}
62+
return fetch(url, opts);
63+
},
64+
});
65+
66+
const xetDownload = await blob.slice(0, 29929).arrayBuffer();
67+
const bridgeDownload = await fetch(
68+
"https://huggingface.co/celinah/xet-experiments/resolve/main/model5GB.safetensors",
69+
{
70+
headers: {
71+
Range: "bytes=0-29928",
72+
},
73+
}
74+
).then((res) => res.arrayBuffer());
75+
76+
expect(new Uint8Array(xetDownload)).toEqual(new Uint8Array(bridgeDownload));
77+
expect(xetDownload.byteLength).toBe(29929);
78+
expect(xorbCount).toBe(2);
79+
});
80+
81+
// In github actions, this test doesn't work inside the browser, but it works locally
82+
// inside both chrome and chromium browsers
83+
// TODO: figure out why
84+
if (typeof window === "undefined") {
85+
it("should load correctly when loading far into a chunk range", async () => {
86+
const blob = new XetBlob({
87+
repo: {
88+
type: "model",
89+
name: "celinah/xet-experiments",
90+
},
91+
hash: "7b3b6d07673a88cf467e67c1f7edef1a8c268cbf66e9dd9b0366322d4ab56d9b",
92+
size: 5_234_139_343,
93+
});
94+
95+
const xetDownload = await blob.slice(10_000_000, 10_100_000).arrayBuffer();
96+
const bridgeDownload = await fetch(
97+
"https://huggingface.co/celinah/xet-experiments/resolve/main/model5GB.safetensors",
98+
{
99+
headers: {
100+
Range: "bytes=10000000-10099999",
101+
},
102+
}
103+
).then((res) => res.arrayBuffer());
104+
105+
console.log("xet", xetDownload.byteLength, "bridge", bridgeDownload.byteLength);
106+
expect(new Uint8Array(xetDownload)).toEqual(new Uint8Array(bridgeDownload));
107+
}, 30_000);
108+
}
109+
110+
it("should load text correctly when offset_into_range starts in a chunk further than the first", async () => {
111+
const blob = new XetBlob({
112+
repo: {
113+
type: "model",
114+
name: "celinah/xet-experiments",
115+
},
116+
hash: "794efea76d8cb372bbe1385d9e51c3384555f3281e629903ecb6abeff7d54eec",
117+
size: 62_914_580,
118+
});
119+
120+
// Reconstruction info
121+
// {
122+
// "offset_into_first_range": 600000,
123+
// "terms":
124+
// [
125+
// {
126+
// "hash": "be748f77930d5929cabd510a15f2c30f2f460b639804ef79dea46affa04fd8b2",
127+
// "unpacked_length": 655360,
128+
// "range": { "start": 0, "end": 5 },
129+
// },
130+
// {
131+
// "hash": "be748f77930d5929cabd510a15f2c30f2f460b639804ef79dea46affa04fd8b2",
132+
// "unpacked_length": 655360,
133+
// "range": { "start": 0, "end": 5 },
134+
// },
135+
// ],
136+
// "fetch_info":
137+
// {
138+
// "be748f77930d5929cabd510a15f2c30f2f460b639804ef79dea46affa04fd8b2":
139+
// [
140+
// {
141+
// "range": { "start": 0, "end": 5 },
142+
// "url": "...",
143+
// "url_range": { "start": 0, "end": 2839 },
144+
// },
145+
// ],
146+
// },
147+
// }
148+
149+
const text = await blob.slice(600_000, 700_000).text();
150+
const bridgeDownload = await fetch("https://huggingface.co/celinah/xet-experiments/resolve/main/large_text.txt", {
151+
headers: {
152+
Range: "bytes=600000-699999",
153+
},
154+
}).then((res) => res.text());
155+
156+
console.log("xet", text.length, "bridge", bridgeDownload.length);
157+
expect(text.length).toBe(bridgeDownload.length);
158+
}, 30_000);
159+
160+
describe("bg4_regoup_bytes", () => {
161+
it("should regroup bytes when the array is %4 length", () => {
162+
expect(bg4_regoup_bytes(new Uint8Array([1, 5, 2, 6, 3, 7, 4, 8]))).toEqual(
163+
new Uint8Array([1, 2, 3, 4, 5, 6, 7, 8])
164+
);
165+
});
166+
167+
it("should regroup bytes when the array is %4 + 1 length", () => {
168+
expect(bg4_regoup_bytes(new Uint8Array([1, 5, 9, 2, 6, 3, 7, 4, 8]))).toEqual(
169+
new Uint8Array([1, 2, 3, 4, 5, 6, 7, 8, 9])
170+
);
171+
});
172+
173+
it("should regroup bytes when the array is %4 + 2 length", () => {
174+
expect(bg4_regoup_bytes(new Uint8Array([1, 5, 9, 2, 6, 10, 3, 7, 4, 8]))).toEqual(
175+
new Uint8Array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
176+
);
177+
});
178+
179+
it("should regroup bytes when the array is %4 + 3 length", () => {
180+
expect(bg4_regoup_bytes(new Uint8Array([1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8]))).toEqual(
181+
new Uint8Array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
182+
);
183+
});
184+
});
185+
});

0 commit comments

Comments
 (0)