Skip to content

Commit 7f5ce5c

Browse files
committed
sha256 emscripten
1 parent d76cbc3 commit 7f5ce5c

File tree

9 files changed

+1255
-29
lines changed

9 files changed

+1255
-29
lines changed

packages/hub/.eslintignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
dist
2+
sha256.js

packages/hub/src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,5 @@ export type {
1818
SpaceStage,
1919
} from "./types/public";
2020
export { HubApiError, InvalidApiResponseFormatError } from "./error";
21+
export { sha256 } from "./utils/sha256";
22+
export { WebBlob } from "./utils/WebBlob";

packages/hub/src/utils/sha256.spec.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import { describe, it, expect } from "vitest";
2+
import { WebBlob } from "./WebBlob";
3+
import { sha256 } from "./sha256";
4+
5+
describe("sha256", () => {
6+
const resourceUrl = new URL(
7+
"https://huggingface.co/ngxson/tinyllama_split_test/resolve/main/stories15M-q8_0-00001-of-00003.gguf"
8+
);
9+
10+
it("Calculate hash in nodejs", async () => {
11+
const blob = await WebBlob.create(resourceUrl, { cacheBelow: 0 });
12+
const iterator = sha256(blob, { useWebWorker: true });
13+
let res: IteratorResult<number, string>;
14+
do {
15+
res = await iterator.next();
16+
} while (!res.done);
17+
const sha = res.value;
18+
expect(sha).toBe("8a3a74042ae05dda34985dff2d49adf2ee3f9d0fd73b03fa6cc4307c924e4040");
19+
});
20+
21+
// TODO: how to test on browser (with / without web worker)
22+
});

packages/hub/src/utils/sha256.ts

Lines changed: 8 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,10 @@ import { eventToGenerator } from "./eventToGenerator";
22
import { hexFromBytes } from "./hexFromBytes";
33
import { isFrontend } from "./isFrontend";
44

5-
const webWorkerCode = `
6-
// Would prefer no CDN, but need a clever way to not burden the main file of the bundle
7-
importScripts("https://cdn.jsdelivr.net/npm/hash-wasm@4/dist/sha256.umd.min.js");
8-
9-
const createSHA256 = hashwasm.createSHA256;
10-
11-
self.addEventListener('message', async (event) => {
12-
const { file } = event.data;
13-
const sha256 = await createSHA256();
14-
sha256.init();
15-
const reader = file.stream().getReader();
16-
const total = file.size;
17-
let bytesDone = 0;
18-
while (true) {
19-
const { done, value } = await reader.read();
20-
if (done) {
21-
break;
22-
}
23-
sha256.update(value);
24-
bytesDone += value.length;
25-
postMessage({ progress: bytesDone / total });
26-
}
27-
postMessage({ sha256: sha256.digest('hex') });
28-
});
29-
`;
5+
async function getWebWorkerCode() {
6+
const sha256Module = await import("../vendor/hash-wasm/sha256-wrapper");
7+
return URL.createObjectURL(new Blob([sha256Module.createSHA256WorkerCode()]));
8+
}
309

3110
const pendingWorkers: Worker[] = [];
3211
const runningWorkers: Set<Worker> = new Set();
@@ -45,7 +24,7 @@ async function getWorker(poolSize?: number): Promise<Worker> {
4524
}
4625
}
4726
if (!poolSize) {
48-
const worker = new Worker(URL.createObjectURL(new Blob([webWorkerCode])));
27+
const worker = new Worker(await getWebWorkerCode());
4928
runningWorkers.add(worker);
5029
return worker;
5130
}
@@ -58,7 +37,7 @@ async function getWorker(poolSize?: number): Promise<Worker> {
5837
await waitPromise;
5938
}
6039

61-
const worker = new Worker(URL.createObjectURL(new Blob([webWorkerCode])));
40+
const worker = new Worker(await getWebWorkerCode());
6241
runningWorkers.add(worker);
6342
return worker;
6443
}
@@ -147,7 +126,7 @@ export async function* sha256(
147126
}
148127
}
149128
if (!wasmModule) {
150-
wasmModule = await import("hash-wasm");
129+
wasmModule = await import("../vendor/hash-wasm/sha256-wrapper");
151130
}
152131

153132
const sha256 = await wasmModule.createSHA256();
@@ -184,4 +163,4 @@ export async function* sha256(
184163
// eslint-disable-next-line @typescript-eslint/consistent-type-imports
185164
let cryptoModule: typeof import("./sha256-node");
186165
// eslint-disable-next-line @typescript-eslint/consistent-type-imports
187-
let wasmModule: typeof import("hash-wasm");
166+
let wasmModule: typeof import("../vendor/hash-wasm/sha256-wrapper");
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/bin/bash
2+
3+
CURRENT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
4+
cd $CURRENT_PATH
5+
6+
# Clean up
7+
docker kill hash-wasm-builder
8+
docker rm hash-wasm-builder
9+
10+
# Start container
11+
docker run -it -d --name hash-wasm-builder emscripten/emsdk:3.1.55 bash
12+
13+
# Copy & compile
14+
docker exec hash-wasm-builder bash -c "mkdir /source"
15+
docker cp ./sha256.c hash-wasm-builder:/source
16+
docker exec hash-wasm-builder bash -c "\
17+
cd /source && \
18+
emcc sha256.c -o sha256.js -msimd128 -sSINGLE_FILE -sMODULARIZE=1 -sENVIRONMENT=web,worker -sEXPORTED_FUNCTIONS=_Hash_Init,_Hash_Update,_Hash_Final,_GetBufferPtr -sFILESYSTEM=0 -fno-rtti -fno-exceptions -O1 -sMODULARIZE=1 \
19+
"
20+
docker cp hash-wasm-builder:/source/sha256.js .
21+
22+
# Clean up
23+
docker kill hash-wasm-builder
24+
docker rm hash-wasm-builder
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import WasmModule from "./sha256";
2+
3+
const BUFFER_MAX_SIZE = 8 * 1024 * 1024;
4+
5+
export async function createSHA256(isInsideWorker = false): Promise<{
6+
init(): void,
7+
update(data: Uint8Array): void,
8+
digest(method: "hex"): string,
9+
}> {
10+
const wasm: Awaited<ReturnType<typeof WasmModule>> = isInsideWorker
11+
? // @ts-expect-error WasmModule will be populated inside self object
12+
await self["WasmModule"]()
13+
: await WasmModule();
14+
const heap = wasm.HEAPU8.subarray(wasm._GetBufferPtr());
15+
return {
16+
init() {
17+
wasm._Hash_Init(256);
18+
},
19+
update(data: Uint8Array) {
20+
let byteUsed = 0;
21+
while (byteUsed < data.byteLength) {
22+
const bytesLeft = data.byteLength - byteUsed;
23+
const length = bytesLeft < BUFFER_MAX_SIZE ? bytesLeft : BUFFER_MAX_SIZE;
24+
heap.set(data.subarray(byteUsed, length));
25+
wasm._Hash_Update(length);
26+
byteUsed += length;
27+
}
28+
},
29+
digest(method: "hex") {
30+
if (method !== "hex") {
31+
throw new Error("Only digest hex is supported");
32+
}
33+
wasm._Hash_Final();
34+
const result = Array.from(heap.slice(0, 32));
35+
return result.map((b) => b.toString(16).padStart(2, "0")).join("");
36+
},
37+
};
38+
}
39+
40+
export function createSHA256WorkerCode(): string {
41+
return `
42+
const _scriptDir = "";
43+
self.WasmModule = ${WasmModule.toString()};
44+
45+
${createSHA256.toString()};
46+
47+
self.addEventListener('message', async (event) => {
48+
const { file } = event.data;
49+
const sha256 = await createSHA256(true);
50+
sha256.init();
51+
const reader = file.stream().getReader();
52+
const total = file.size;
53+
let bytesDone = 0;
54+
while (true) {
55+
const { done, value } = await reader.read();
56+
if (done) {
57+
break;
58+
}
59+
sha256.update(value);
60+
bytesDone += value.length;
61+
postMessage({ progress: bytesDone / total });
62+
}
63+
postMessage({ sha256: sha256.digest('hex') });
64+
});
65+
`;
66+
}

0 commit comments

Comments
 (0)