@@ -285,7 +285,7 @@ describe("XetBlob", () => {
285
285
range : { start : 0 , end : 2 } ,
286
286
url_range : {
287
287
start : 0 ,
288
- end : mergedChunks . byteLength - 1 ,
288
+ end : mergedChunks . byteLength / 1000 - 1 ,
289
289
} ,
290
290
} ,
291
291
] ,
@@ -318,6 +318,115 @@ describe("XetBlob", () => {
318
318
debugged . length = 0 ;
319
319
}
320
320
} ) ;
321
+
322
+ it ( "should load different slices when working with different XORBS" , async ( ) => {
323
+ const chunk1Content = "hello" ;
324
+ const chunk2Content = "world!" ;
325
+ const debugged : Array < { event : "read" } > = [ ] ;
326
+
327
+ const chunks = Array ( 1000 )
328
+ . fill ( 0 )
329
+ . flatMap ( ( ) => [ makeChunk ( chunk1Content ) , makeChunk ( chunk2Content ) ] ) ;
330
+
331
+ const mergedChunks = await new Blob ( chunks ) . arrayBuffer ( ) ;
332
+ const wholeText = ( chunk1Content + chunk2Content ) . repeat ( 1000 ) ;
333
+
334
+ const totalSize = wholeText . length ;
335
+ let fetchCount = 0 ;
336
+
337
+ const blob = new XetBlob ( {
338
+ hash : "test" ,
339
+ repo : {
340
+ name : "test" ,
341
+ type : "model" ,
342
+ } ,
343
+ size : totalSize ,
344
+ hubUrl : "https://huggingface.co" ,
345
+ debug : ( e ) => debugged . push ( e ) ,
346
+ fetch : async function ( _url , opts ) {
347
+ const url = new URL ( _url as string ) ;
348
+ const headers = opts ?. headers as Record < string , string > | undefined ;
349
+
350
+ switch ( url . hostname ) {
351
+ case "huggingface.co" : {
352
+ // This is a token
353
+ return new Response (
354
+ JSON . stringify ( {
355
+ casUrl : "https://cas.co" ,
356
+ accessToken : "boo" ,
357
+ exp : 1_000_000 ,
358
+ } )
359
+ ) ;
360
+ }
361
+ case "cas.co" : {
362
+ // This is the reconstruction info
363
+ const range = headers ?. [ "Range" ] ?. slice ( "bytes=" . length ) . split ( "-" ) . map ( Number ) ;
364
+
365
+ const start = range ?. [ 0 ] ?? 0 ;
366
+ // const end = range?.[1] ?? (totalSize - 1);
367
+
368
+ return new Response (
369
+ JSON . stringify ( {
370
+ terms : Array ( 1000 )
371
+ . fill ( 0 )
372
+ . map ( ( _ , i ) => ( {
373
+ hash : "test" + ( i % 2 ) ,
374
+ range : {
375
+ start : 0 ,
376
+ end : 2 ,
377
+ } ,
378
+ unpacked_length : chunk1Content . length + chunk2Content . length ,
379
+ } ) ) ,
380
+ fetch_info : {
381
+ test0 : [
382
+ {
383
+ url : "https://fetch.co" ,
384
+ range : { start : 0 , end : 2 } ,
385
+ url_range : {
386
+ start : 0 ,
387
+ end : mergedChunks . byteLength - 1 ,
388
+ } ,
389
+ } ,
390
+ ] ,
391
+ test1 : [
392
+ {
393
+ url : "https://fetch.co" ,
394
+ range : { start : 0 , end : 2 } ,
395
+ url_range : {
396
+ start : 0 ,
397
+ end : mergedChunks . byteLength - 1 ,
398
+ } ,
399
+ } ,
400
+ ] ,
401
+ } ,
402
+ offset_into_first_range : start ,
403
+ } satisfies ReconstructionInfo )
404
+ ) ;
405
+ }
406
+ case "fetch.co" : {
407
+ fetchCount ++ ;
408
+ return new Response ( mergedChunks ) ;
409
+ }
410
+ default :
411
+ throw new Error ( "Unhandled URL" ) ;
412
+ }
413
+ } ,
414
+ } ) ;
415
+
416
+ const startIndexes = [ 0 , 5 , 11 , 6 , 12 , 100 , 2000 , totalSize - 12 , totalSize - 2 ] ;
417
+
418
+ for ( const index of startIndexes ) {
419
+ console . log ( "slice" , index ) ;
420
+ const content = await blob . slice ( index ) . text ( ) ;
421
+ expect ( content . length ) . toBe ( wholeText . length - index ) ;
422
+ expect ( content . slice ( 0 , 1000 ) ) . toEqual ( wholeText . slice ( index ) . slice ( 0 , 1000 ) ) ;
423
+ expect ( debugged . filter ( ( e ) => e . event === "read" ) . length ) . toBe ( 4 ) ; // 1 read + 1 undefined
424
+ expect ( fetchCount ) . toEqual ( 2 ) ;
425
+
426
+ fetchCount = 0 ;
427
+ debugged . length = 0 ;
428
+ }
429
+ } ) ;
321
430
} ) ;
322
431
323
432
describe ( "loading one chunk at a time" , ( ) => {
0 commit comments