Skip to content

Commit b049c77

Browse files
axel7083Wauplin
andauthored
feat(@huggingface/hub): adding scanCacheDir (#908)
## Description Took https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/utils/_cache_manager.py as heavy inspiration for the implementation. This is a very basic implementation, which do not cover the deletion. ## Related issue Fixes #905 ## Usage ```ts import { scanCacheDir } from "@huggingface/hub"; const result = await scanCacheDir(); console.log(result); ``` --------- Co-authored-by: Lucain <[email protected]> Co-authored-by: Lucain <[email protected]>
1 parent cee831a commit b049c77

File tree

6 files changed

+411
-1
lines changed

6 files changed

+411
-1
lines changed

packages/hub/README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,19 @@ console.log(oauthResult);
110110

111111
Checkout the demo: https://huggingface.co/spaces/huggingfacejs/client-side-oauth
112112

113+
## Hugging face cache
114+
115+
The `@huggingface/hub` package provide basic capabilities to scan the cache directory. Learn more about [Manage huggingface_hub cache-system](https://huggingface.co/docs/huggingface_hub/en/guides/manage-cache).
116+
117+
```ts
118+
import { scanCacheDir } from "@huggingface/hub";
119+
120+
const result = await scanCacheDir();
121+
122+
console.log(result);
123+
```
124+
Note that the cache directory is created and used only by the Python and Rust libraries. Downloading files using the `@huggingface/hub` package won't use the cache directory.
125+
113126
## Performance considerations
114127

115128
When uploading large files, you may want to run the `commit` calls inside a worker, to offload the sha256 computations.

packages/hub/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"browser": {
2121
"./src/utils/sha256-node.ts": false,
2222
"./src/utils/FileBlob.ts": false,
23+
"./src/lib/cache-management.ts": false,
2324
"./dist/index.js": "./dist/browser/index.js",
2425
"./dist/index.mjs": "./dist/browser/index.mjs"
2526
},
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import { describe, test, expect, vi, beforeEach } from "vitest";
2+
import {
3+
scanCacheDir,
4+
scanCachedRepo,
5+
scanSnapshotDir,
6+
parseRepoType,
7+
getBlobStat,
8+
type CachedFileInfo,
9+
} from "./cache-management";
10+
import { stat, readdir, realpath, lstat } from "node:fs/promises";
11+
import type { Dirent, Stats } from "node:fs";
12+
import { join } from "node:path";
13+
14+
// Mocks
15+
vi.mock("node:fs/promises");
16+
17+
beforeEach(() => {
18+
vi.resetAllMocks();
19+
vi.restoreAllMocks();
20+
});
21+
22+
describe("scanCacheDir", () => {
23+
test("should throw an error if cacheDir is not a directory", async () => {
24+
vi.mocked(stat).mockResolvedValueOnce({
25+
isDirectory: () => false,
26+
} as Stats);
27+
28+
await expect(scanCacheDir("/fake/dir")).rejects.toThrow("Scan cache expects a directory");
29+
});
30+
31+
test("empty directory should return an empty set of repository and no warnings", async () => {
32+
vi.mocked(stat).mockResolvedValueOnce({
33+
isDirectory: () => true,
34+
} as Stats);
35+
36+
// mock empty cache folder
37+
vi.mocked(readdir).mockResolvedValue([]);
38+
39+
const result = await scanCacheDir("/fake/dir");
40+
41+
// cacheDir must have been read
42+
expect(readdir).toHaveBeenCalledWith("/fake/dir");
43+
44+
expect(result.warnings.length).toBe(0);
45+
expect(result.repos).toHaveLength(0);
46+
expect(result.size).toBe(0);
47+
});
48+
});
49+
50+
describe("scanCachedRepo", () => {
51+
test("should throw an error for invalid repo path", async () => {
52+
await expect(() => {
53+
return scanCachedRepo("/fake/repo_path");
54+
}).rejects.toThrow("Repo path is not a valid HuggingFace cache directory");
55+
});
56+
57+
test("should throw an error if the snapshot folder does not exist", async () => {
58+
vi.mocked(readdir).mockResolvedValue([]);
59+
vi.mocked(stat).mockResolvedValue({
60+
isDirectory: () => false,
61+
} as Stats);
62+
63+
await expect(() => {
64+
return scanCachedRepo("/fake/cacheDir/models--hello-world--name");
65+
}).rejects.toThrow("Snapshots dir doesn't exist in cached repo");
66+
});
67+
68+
test("should properly parse the repository name", async () => {
69+
const repoPath = "/fake/cacheDir/models--hello-world--name";
70+
vi.mocked(readdir).mockResolvedValue([]);
71+
vi.mocked(stat).mockResolvedValue({
72+
isDirectory: () => true,
73+
} as Stats);
74+
75+
const result = await scanCachedRepo(repoPath);
76+
expect(readdir).toHaveBeenCalledWith(join(repoPath, "refs"), {
77+
withFileTypes: true,
78+
});
79+
80+
expect(result.id.name).toBe("hello-world/name");
81+
expect(result.id.type).toBe("model");
82+
});
83+
});
84+
85+
describe("scanSnapshotDir", () => {
86+
test("should scan a valid snapshot directory", async () => {
87+
const cachedFiles: CachedFileInfo[] = [];
88+
const blobStats = new Map<string, Stats>();
89+
vi.mocked(readdir).mockResolvedValueOnce([{ name: "file1", isDirectory: () => false } as Dirent]);
90+
91+
vi.mocked(realpath).mockResolvedValueOnce("/fake/realpath");
92+
vi.mocked(lstat).mockResolvedValueOnce({ size: 1024, atimeMs: Date.now(), mtimeMs: Date.now() } as Stats);
93+
94+
await scanSnapshotDir("/fake/revision", cachedFiles, blobStats);
95+
96+
expect(cachedFiles).toHaveLength(1);
97+
expect(blobStats.size).toBe(1);
98+
});
99+
});
100+
101+
describe("getBlobStat", () => {
102+
test("should retrieve blob stat if already cached", async () => {
103+
const blobStats = new Map<string, Stats>([["/fake/blob", { size: 1024 } as Stats]]);
104+
const result = await getBlobStat("/fake/blob", blobStats);
105+
106+
expect(lstat).not.toHaveBeenCalled();
107+
expect(result.size).toBe(1024);
108+
});
109+
110+
test("should fetch and cache blob stat if not cached", async () => {
111+
const blobStats = new Map();
112+
vi.mocked(lstat).mockResolvedValueOnce({ size: 2048 } as Stats);
113+
114+
const result = await getBlobStat("/fake/blob", blobStats);
115+
116+
expect(result.size).toBe(2048);
117+
expect(blobStats.size).toBe(1);
118+
});
119+
});
120+
121+
describe("parseRepoType", () => {
122+
test("should parse models repo type", () => {
123+
expect(parseRepoType("models")).toBe("model");
124+
});
125+
126+
test("should parse dataset repo type", () => {
127+
expect(parseRepoType("datasets")).toBe("dataset");
128+
});
129+
130+
test("should parse space repo type", () => {
131+
expect(parseRepoType("spaces")).toBe("space");
132+
});
133+
134+
test("should throw an error for invalid repo type", () => {
135+
expect(() => parseRepoType("invalid")).toThrowError("Invalid repo type: invalid");
136+
});
137+
});

0 commit comments

Comments
 (0)