@@ -8,6 +8,7 @@ import { webByteUtils } from '../../src/utils/web_byte_utils';
8
8
import * as sinon from 'sinon' ;
9
9
import { loadCJSModuleBSON , loadReactNativeCJSModuleBSON , loadESModuleBSON } from '../load_bson' ;
10
10
import * as crypto from 'node:crypto' ;
11
+ import { BSONError } from '../../src/error' ;
11
12
12
13
type ByteUtilTest < K extends keyof ByteUtils > = {
13
14
name : string ;
@@ -399,6 +400,7 @@ const fromUTF8Tests: ByteUtilTest<'encodeUTF8Into'>[] = [
399
400
}
400
401
}
401
402
] ;
403
+
402
404
const toUTF8Tests : ByteUtilTest < 'toUTF8' > [ ] = [
403
405
{
404
406
name : 'should create utf8 string from buffer input' ,
@@ -417,21 +419,57 @@ const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
417
419
}
418
420
} ,
419
421
{
420
- name : 'should throw an error if fatal is set and string is invalid' ,
422
+ name : 'should insert replacement character fatal is false and string is invalid' ,
423
+ inputs : [ Buffer . from ( '616263f09fa4' , 'hex' ) , 0 , 7 , false ] ,
424
+ expectation ( { error, output } ) {
425
+ expect ( error ) . to . not . exist ;
426
+ expect ( output ) . to . equal ( 'abc\uFFFD' ) ;
427
+ }
428
+ } ,
429
+ {
430
+ name : 'should throw an error if fatal is set and string is a sequence that decodes to an invalid code point' ,
421
431
inputs : [ Buffer . from ( '616263f09fa4' , 'hex' ) , 0 , 7 , true ] ,
422
432
expectation ( { error } ) {
423
433
expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
424
434
}
425
435
} ,
426
436
{
427
- name : 'should insert replacement character fatal is false and string is invalid' ,
428
- inputs : [ Buffer . from ( '616263f09fa4' , 'hex' ) , 0 , 7 , false ] ,
429
- expectation ( { error, output } ) {
430
- expect ( error ) . to . not . exist ;
431
- expect ( output ) . to . equal ( 'abc\uFFFD' ) ;
437
+ name : 'throw an error if fatal is set and string contains overlong encoding' ,
438
+ inputs : [ Buffer . from ( '11000000025f0005000000f08282ac0000' , 'hex' ) , 0 , 18 , true ] ,
439
+ expectation ( { error } ) {
440
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
441
+ }
442
+ } ,
443
+ {
444
+ name : 'throw an error if fatal is set and string contains invalid bytes' ,
445
+ inputs : [ Buffer . from ( 'abcff' , 'hex' ) , 0 , 2 , true ] ,
446
+ expectation ( { error } ) {
447
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
448
+ }
449
+ } ,
450
+ {
451
+ name : 'throw an error if fatal is set and string contains an unexpected continuation byte' ,
452
+ inputs : [ Buffer . from ( '7F80' , 'hex' ) , 0 , 2 , true ] ,
453
+ expectation ( { error } ) {
454
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
455
+ }
456
+ } ,
457
+ {
458
+ name : 'throw an error if fatal is set and string contains a non-continuation byte before the end of the character' ,
459
+ inputs : [ Buffer . from ( 'c000' , 'hex' ) , 0 , 2 , true ] ,
460
+ expectation ( { error } ) {
461
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
462
+ }
463
+ } ,
464
+ {
465
+ name : 'throw an error if fatal is set and string ends before the end of the character' ,
466
+ inputs : [ Buffer . from ( 'c0' , 'hex' ) , 0 , 1 , true ] ,
467
+ expectation ( { error } ) {
468
+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
432
469
}
433
470
}
434
471
] ;
472
+
435
473
const utf8ByteLengthTests : ByteUtilTest < 'utf8ByteLength' > [ ] = [
436
474
{
437
475
name : 'should return zero for empty string' ,
@@ -493,6 +531,51 @@ const randomBytesTests: ByteUtilTest<'randomBytes'>[] = [
493
531
}
494
532
] ;
495
533
534
+ // extra error cases copied from Web platform specs
535
+ const toUTF8ErrorCaseTests = [
536
+ { input : [ 0xff ] , name : 'invalid code' } ,
537
+ { input : [ 0xc0 ] , name : 'ends early' } ,
538
+ { input : [ 0xe0 ] , name : 'ends early 2' } ,
539
+ { input : [ 0xc0 , 0x00 ] , name : 'invalid trail' } ,
540
+ { input : [ 0xc0 , 0xc0 ] , name : 'invalid trail 2' } ,
541
+ { input : [ 0xe0 , 0x00 ] , name : 'invalid trail 3' } ,
542
+ { input : [ 0xe0 , 0xc0 ] , name : 'invalid trail 4' } ,
543
+ { input : [ 0xe0 , 0x80 , 0x00 ] , name : 'invalid trail 5' } ,
544
+ { input : [ 0xe0 , 0x80 , 0xc0 ] , name : 'invalid trail 6' } ,
545
+ { input : [ 0xfc , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 ] , name : '> 0x10ffff' } ,
546
+ { input : [ 0xfe , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 ] , name : 'obsolete lead byte' } ,
547
+
548
+ // Overlong encodings
549
+ { input : [ 0xc0 , 0x80 ] , name : 'overlong U+0000 - 2 bytes' } ,
550
+ { input : [ 0xe0 , 0x80 , 0x80 ] , name : 'overlong U+0000 - 3 bytes' } ,
551
+ { input : [ 0xf0 , 0x80 , 0x80 , 0x80 ] , name : 'overlong U+0000 - 4 bytes' } ,
552
+ { input : [ 0xf8 , 0x80 , 0x80 , 0x80 , 0x80 ] , name : 'overlong U+0000 - 5 bytes' } ,
553
+ { input : [ 0xfc , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 ] , name : 'overlong U+0000 - 6 bytes' } ,
554
+
555
+ { input : [ 0xc1 , 0xbf ] , name : 'overlong U+007f - 2 bytes' } ,
556
+ { input : [ 0xe0 , 0x81 , 0xbf ] , name : 'overlong U+007f - 3 bytes' } ,
557
+ { input : [ 0xf0 , 0x80 , 0x81 , 0xbf ] , name : 'overlong U+007f - 4 bytes' } ,
558
+ { input : [ 0xf8 , 0x80 , 0x80 , 0x81 , 0xbf ] , name : 'overlong U+007f - 5 bytes' } ,
559
+ { input : [ 0xfc , 0x80 , 0x80 , 0x80 , 0x81 , 0xbf ] , name : 'overlong U+007f - 6 bytes' } ,
560
+
561
+ { input : [ 0xe0 , 0x9f , 0xbf ] , name : 'overlong U+07ff - 3 bytes' } ,
562
+ { input : [ 0xf0 , 0x80 , 0x9f , 0xbf ] , name : 'overlong U+07ff - 4 bytes' } ,
563
+ { input : [ 0xf8 , 0x80 , 0x80 , 0x9f , 0xbf ] , name : 'overlong U+07ff - 5 bytes' } ,
564
+ { input : [ 0xfc , 0x80 , 0x80 , 0x80 , 0x9f , 0xbf ] , name : 'overlong U+07ff - 6 bytes' } ,
565
+
566
+ { input : [ 0xf0 , 0x8f , 0xbf , 0xbf ] , name : 'overlong U+ffff - 4 bytes' } ,
567
+ { input : [ 0xf8 , 0x80 , 0x8f , 0xbf , 0xbf ] , name : 'overlong U+ffff - 5 bytes' } ,
568
+ { input : [ 0xfc , 0x80 , 0x80 , 0x8f , 0xbf , 0xbf ] , name : 'overlong U+ffff - 6 bytes' } ,
569
+
570
+ { input : [ 0xf8 , 0x84 , 0x8f , 0xbf , 0xbf ] , name : 'overlong U+10ffff - 5 bytes' } ,
571
+ { input : [ 0xfc , 0x80 , 0x84 , 0x8f , 0xbf , 0xbf ] , name : 'overlong U+10ffff - 6 bytes' } ,
572
+
573
+ // UTf-16 surrogates encoded as code points in UTf-8
574
+ { input : [ 0xed , 0xa0 , 0x80 ] , name : 'lead surrogate' } ,
575
+ { input : [ 0xed , 0xb0 , 0x80 ] , name : 'trail surrogate' } ,
576
+ { input : [ 0xed , 0xa0 , 0x80 , 0xed , 0xb0 , 0x80 ] , name : 'surrogate pair' }
577
+ ] ;
578
+
496
579
const utils = new Map ( [
497
580
[ 'nodeJsByteUtils' , nodeJsByteUtils ] ,
498
581
[ 'webByteUtils' , webByteUtils ]
@@ -798,6 +881,14 @@ describe('ByteUtils', () => {
798
881
test . expectation ( { web : byteUtilsName === 'webByteUtils' , output, error } ) ;
799
882
} ) ;
800
883
}
884
+ if ( utility === 'toUTF8' )
885
+ for ( const test of toUTF8ErrorCaseTests ) {
886
+ it ( `throws error when fatal is set and provided ${ test . name } as input` , ( ) => {
887
+ expect ( ( ) =>
888
+ byteUtils [ utility ] ( Uint8Array . from ( test . input ) , 0 , test . input . length , true )
889
+ ) . to . throw ( BSONError , / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
890
+ } ) ;
891
+ }
801
892
} ) ;
802
893
}
803
894
}
0 commit comments