Skip to content

Commit d5e538d

Browse files
Mishigcoyotte508
andauthored
[safetensors] add named groups to RE_SAFETENSORS_SHARD_FILE regex (#622)
add named groups to RE_SAFETENSORS_SHARD_FILE regex safetensors equivalent of #621 --------- Co-authored-by: Eliott C. <[email protected]>
1 parent 4d82150 commit d5e538d

File tree

2 files changed

+12
-2
lines changed

2 files changed

+12
-2
lines changed

packages/hub/src/lib/parse-safetensors-metadata.spec.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { assert, it, describe } from "vitest";
2-
import { parseSafetensorsMetadata } from "./parse-safetensors-metadata";
2+
import { RE_SAFETENSORS_SHARD_FILE, parseSafetensorsMetadata } from "./parse-safetensors-metadata";
33
import { sum } from "../utils/sum";
44

55
describe("parseSafetensorsMetadata", () => {
@@ -109,4 +109,14 @@ describe("parseSafetensorsMetadata", () => {
109109
assert.deepStrictEqual(parse.parameterCount, { BF16: 8_537_680_896 });
110110
assert.deepStrictEqual(sum(Object.values(parse.parameterCount)), 8_537_680_896);
111111
});
112+
113+
it("should detect sharded safetensors filename", async () => {
114+
const safetensorsFilename = "model_00005-of-00072.safetensors"; // https://huggingface.co/bigscience/bloom/blob/4d8e28c67403974b0f17a4ac5992e4ba0b0dbb6f/model_00005-of-00072.safetensors
115+
const match = safetensorsFilename.match(RE_SAFETENSORS_SHARD_FILE);
116+
117+
assert.strictEqual(RE_SAFETENSORS_SHARD_FILE.test(safetensorsFilename), true);
118+
assert.strictEqual(match?.groups?.prefix, "model");
119+
assert.strictEqual(match?.groups?.shard, "00005");
120+
assert.strictEqual(match?.groups?.total, "00072");
121+
});
112122
});

packages/hub/src/lib/parse-safetensors-metadata.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ export const SAFETENSORS_INDEX_FILE = "model.safetensors.index.json";
1414
/// but in some situations safetensors weights have different filenames.
1515
export const RE_SAFETENSORS_FILE = /\.safetensors$/;
1616
export const RE_SAFETENSORS_INDEX_FILE = /\.safetensors\.index\.json$/;
17-
export const RE_SAFETENSORS_SHARD_FILE = /\d{5}-of-\d{5}\.safetensors$/;
17+
export const RE_SAFETENSORS_SHARD_FILE = /^(?<prefix>.*?)[_-]?(?<shard>\d{5})-of-(?<total>\d{5})\.safetensors$/;
1818
const PARALLEL_DOWNLOADS = 20;
1919
const MAX_HEADER_LENGTH = 25_000_000;
2020

0 commit comments

Comments
 (0)