Skip to content

Commit ccedf63

Browse files
committed
fix: make file download info compliant with python tool
1 parent 27b995d commit ccedf63

File tree

2 files changed

+82
-13
lines changed

2 files changed

+82
-13
lines changed

packages/hub/src/lib/file-download-info.spec.ts

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ describe("fileDownloadInfo", () => {
1313
});
1414

1515
assert.strictEqual(info?.size, 536063208);
16-
assert.strictEqual(info?.etag, '"41a0e56472bad33498744818c8b1ef2c-64"');
16+
assert.strictEqual(info?.etag, '"a7a17d6d844b5de815ccab5f42cad6d24496db3850a2a43d8258221018ce87d2"');
17+
assert.strictEqual(info?.commitHash, 'dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7');
1718
assert(info?.downloadLink);
1819
});
1920

@@ -30,6 +31,7 @@ describe("fileDownloadInfo", () => {
3031

3132
assert.strictEqual(info?.size, 134);
3233
assert.strictEqual(info?.etag, '"9eb98c817f04b051b3bcca591bcd4e03cec88018"');
34+
assert.strictEqual(info?.commitHash, 'dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7');
3335
assert(!info?.downloadLink);
3436
});
3537

@@ -45,5 +47,22 @@ describe("fileDownloadInfo", () => {
4547

4648
assert.strictEqual(info?.size, 28);
4749
assert.strictEqual(info?.etag, '"a661b1a138dac6dc5590367402d100765010ffd6"');
50+
assert.strictEqual(info?.commitHash, '1a7dd4986e3dab699c24ca19b2afd0f5e1a80f37');
51+
});
52+
53+
it("should fetch LFS file info without redirect", async () => {
54+
const info = await fileDownloadInfo({
55+
repo: {
56+
name: "google-bert/bert-base-uncased", // full name no redirect needed
57+
type: "model",
58+
},
59+
path: "tf_model.h5",
60+
revision: "dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7",
61+
});
62+
63+
assert.strictEqual(info?.size, 536063208);
64+
assert.strictEqual(info?.etag, '"a7a17d6d844b5de815ccab5f42cad6d24496db3850a2a43d8258221018ce87d2"');
65+
assert.strictEqual(info?.commitHash, 'dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7');
66+
assert(info?.downloadLink);
4867
});
4968
});

packages/hub/src/lib/file-download-info.ts

Lines changed: 62 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,58 @@ import type { CredentialsParams, RepoDesignation } from "../types/public";
44
import { checkCredentials } from "../utils/checkCredentials";
55
import { toRepoId } from "../utils/toRepoId";
66

7+
const HUGGINGFACE_HEADER_X_REPO_COMMIT = "X-Repo-Commit"
8+
const HUGGINGFACE_HEADER_X_LINKED_ETAG = "X-Linked-Etag"
9+
const HUGGINGFACE_HEADER_X_LINKED_SIZE = "X-Linked-Size"
10+
711
export interface FileDownloadInfoOutput {
812
size: number;
913
etag: string;
14+
commitHash: string | null;
1015
/**
1116
* In case of LFS file, link to download directly from cloud provider
1217
*/
1318
downloadLink: string | null;
1419
}
20+
21+
/**
22+
* Useful when we want to follow a redirection to a renamed repository without following redirection to a CDN.
23+
* If a Location header is `/hello` we should follow the relative direct
24+
* However we may have full url redirect, on the same origin, we need to properly compare the origin then.
25+
* @param params
26+
*/
27+
async function followSameOriginRedirect(params: {
28+
url: string,
29+
method: string,
30+
headers: Record<string, string>,
31+
/**
32+
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
33+
*/
34+
fetch?: typeof fetch;
35+
}): Promise<Response> {
36+
const resp = await (params.fetch ?? fetch)(params.url, {
37+
method: params.method,
38+
headers: params.headers,
39+
// prevent automatic redirect
40+
redirect: 'manual',
41+
});
42+
43+
const location: string | null = resp.headers.get('Location');
44+
if(!location) return resp;
45+
46+
// new URL('http://foo/bar', 'http://example.com/hello').href == http://foo/bar
47+
// new URL('/bar', 'http://example.com/hello').href == http://example.com/bar
48+
const nURL = new URL(location, params.url);
49+
// ensure origin are matching
50+
if(new URL(params.url).origin !== nURL.origin)
51+
return resp;
52+
53+
return followSameOriginRedirect({
54+
...params,
55+
url: nURL.href,
56+
});
57+
}
58+
1559
/**
1660
* @returns null when the file doesn't exist
1761
*/
@@ -47,38 +91,42 @@ export async function fileDownloadInfo(
4791
}/${encodeURIComponent(params.revision ?? "main")}/${params.path}` +
4892
(params.noContentDisposition ? "?noContentDisposition=1" : "");
4993

50-
const resp = await (params.fetch ?? fetch)(url, {
51-
method: "GET",
94+
//
95+
const resp = await followSameOriginRedirect({
96+
url: url,
97+
method: "HEAD",
5298
headers: {
5399
...(params.credentials && {
54100
Authorization: `Bearer ${accessToken}`,
101+
// prevent any compression => we want to know the real size of the file
102+
'Accept-Encoding': 'identity',
55103
}),
56-
Range: "bytes=0-0",
57104
},
58105
});
59106

60107
if (resp.status === 404 && resp.headers.get("X-Error-Code") === "EntryNotFound") {
61108
return null;
62109
}
63110

64-
if (!resp.ok) {
111+
// redirect to CDN is okay not an error
112+
if (!resp.ok && !resp.headers.get('Location')) {
65113
throw await createApiError(resp);
66114
}
67115

68-
const etag = resp.headers.get("ETag");
69-
116+
// We favor a custom header indicating the etag of the linked resource, and
117+
// we fallback to the regular etag header.
118+
const etag = resp.headers.get(HUGGINGFACE_HEADER_X_LINKED_ETAG) ?? resp.headers.get("ETag");
70119
if (!etag) {
71120
throw new InvalidApiResponseFormatError("Expected ETag");
72121
}
73122

74-
const contentRangeHeader = resp.headers.get("content-range");
75-
76-
if (!contentRangeHeader) {
123+
// size is required
124+
const contentSize = resp.headers.get(HUGGINGFACE_HEADER_X_LINKED_SIZE) ?? resp.headers.get("Content-Length")
125+
if (!contentSize) {
77126
throw new InvalidApiResponseFormatError("Expected size information");
78127
}
79128

80-
const [, parsedSize] = contentRangeHeader.split("/");
81-
const size = parseInt(parsedSize);
129+
const size = parseInt(contentSize);
82130

83131
if (isNaN(size)) {
84132
throw new InvalidApiResponseFormatError("Invalid file size received");
@@ -87,6 +135,8 @@ export async function fileDownloadInfo(
87135
return {
88136
etag,
89137
size,
90-
downloadLink: new URL(resp.url).hostname !== new URL(hubUrl).hostname ? resp.url : null,
138+
// Either from response headers (if redirected) or defaults to request url
139+
downloadLink: resp.headers.get('Location') ?? new URL(resp.url).hostname !== new URL(hubUrl).hostname ? resp.url : null,
140+
commitHash: resp.headers.get(HUGGINGFACE_HEADER_X_REPO_COMMIT),
91141
};
92142
}

0 commit comments

Comments
 (0)