Skip to content

Commit e353a30

Browse files
committed
feat(NODE-5861): move UTF8 validation below basic latin parsing
1 parent 6c0ec8d commit e353a30

File tree

9 files changed

+63
-47
lines changed

9 files changed

+63
-47
lines changed

rollup.config.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ const tsConfig = {
1313
module: 'esnext',
1414
moduleResolution: 'node',
1515
removeComments: true,
16-
lib: ['es2021'],
16+
lib: ['es2021', 'ES2022.Error'],
1717
importHelpers: false,
1818
noEmitHelpers: false,
1919
noEmitOnError: true,

src/binary.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,8 @@ export class Binary extends BSONValue {
191191
if (encoding === 'hex') return ByteUtils.toHex(this.buffer);
192192
if (encoding === 'base64') return ByteUtils.toBase64(this.buffer);
193193
if (encoding === 'utf8' || encoding === 'utf-8')
194-
return ByteUtils.toUTF8(this.buffer, 0, this.buffer.byteLength);
195-
return ByteUtils.toUTF8(this.buffer, 0, this.buffer.byteLength);
194+
return ByteUtils.toUTF8(this.buffer, 0, this.buffer.byteLength, false);
195+
return ByteUtils.toUTF8(this.buffer, 0, this.buffer.byteLength, false);
196196
}
197197

198198
/** @internal */

src/error.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { BSON_MAJOR_VERSION } from './constants';
44
* @public
55
* @category Error
66
*
7-
* `BSONError` objects are thrown when BSON ecounters an error.
7+
* `BSONError` objects are thrown when BSON encounters an error.
88
*
99
* This is the parent class for all the other errors thrown by this library.
1010
*/
@@ -23,8 +23,8 @@ export class BSONError extends Error {
2323
return 'BSONError';
2424
}
2525

26-
constructor(message: string) {
27-
super(message);
26+
constructor(message: string, options?: ErrorOptions) {
27+
super(message, options);
2828
}
2929

3030
/**

src/parser/deserializer.ts

Lines changed: 10 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ function deserializeObject(
236236
if (i >= buffer.byteLength) throw new BSONError('Bad BSON Document: illegal CString');
237237

238238
// Represents the key
239-
const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer, index, i);
239+
const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer, index, i, false);
240240

241241
// shouldValidateKey is true if the key should be validated, false otherwise
242242
let shouldValidateKey = true;
@@ -266,7 +266,7 @@ function deserializeObject(
266266
) {
267267
throw new BSONError('bad string length in bson');
268268
}
269-
value = getValidatedString(buffer, index, index + stringSize - 1, shouldValidateKey);
269+
value = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, shouldValidateKey);
270270
index = index + stringSize;
271271
} else if (elementType === constants.BSON_DATA_OID) {
272272
const oid = ByteUtils.allocate(12);
@@ -476,7 +476,7 @@ function deserializeObject(
476476
// If are at the end of the buffer there is a problem with the document
477477
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
478478
// Return the C string
479-
const source = ByteUtils.toUTF8(buffer, index, i);
479+
const source = ByteUtils.toUTF8(buffer, index, i, false);
480480
// Create the regexp
481481
index = i + 1;
482482

@@ -489,7 +489,7 @@ function deserializeObject(
489489
// If are at the end of the buffer there is a problem with the document
490490
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
491491
// Return the C string
492-
const regExpOptions = ByteUtils.toUTF8(buffer, index, i);
492+
const regExpOptions = ByteUtils.toUTF8(buffer, index, i, false);
493493
index = i + 1;
494494

495495
// For each option add the corresponding one for javascript
@@ -521,7 +521,7 @@ function deserializeObject(
521521
// If are at the end of the buffer there is a problem with the document
522522
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
523523
// Return the C string
524-
const source = ByteUtils.toUTF8(buffer, index, i);
524+
const source = ByteUtils.toUTF8(buffer, index, i, false);
525525
index = i + 1;
526526

527527
// Get the start search index
@@ -533,7 +533,7 @@ function deserializeObject(
533533
// If are at the end of the buffer there is a problem with the document
534534
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
535535
// Return the C string
536-
const regExpOptions = ByteUtils.toUTF8(buffer, index, i);
536+
const regExpOptions = ByteUtils.toUTF8(buffer, index, i, false);
537537
index = i + 1;
538538

539539
// Set the object
@@ -551,7 +551,7 @@ function deserializeObject(
551551
) {
552552
throw new BSONError('bad string length in bson');
553553
}
554-
const symbol = getValidatedString(buffer, index, index + stringSize - 1, shouldValidateKey);
554+
const symbol = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, shouldValidateKey);
555555
value = promoteValues ? symbol : new BSONSymbol(symbol);
556556
index = index + stringSize;
557557
} else if (elementType === constants.BSON_DATA_TIMESTAMP) {
@@ -587,7 +587,7 @@ function deserializeObject(
587587
) {
588588
throw new BSONError('bad string length in bson');
589589
}
590-
const functionString = getValidatedString(
590+
const functionString = ByteUtils.toUTF8(
591591
buffer,
592592
index,
593593
index + stringSize - 1,
@@ -626,7 +626,7 @@ function deserializeObject(
626626
}
627627

628628
// Javascript function
629-
const functionString = getValidatedString(
629+
const functionString = ByteUtils.toUTF8(
630630
buffer,
631631
index,
632632
index + stringSize - 1,
@@ -678,7 +678,7 @@ function deserializeObject(
678678
throw new BSONError('Invalid UTF-8 string in BSON document');
679679
}
680680
}
681-
const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1);
681+
const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, false);
682682
// Update parse index position
683683
index = index + stringSize;
684684

@@ -728,24 +728,3 @@ function deserializeObject(
728728

729729
return object;
730730
}
731-
732-
function getValidatedString(
733-
buffer: Uint8Array,
734-
start: number,
735-
end: number,
736-
shouldValidateUtf8: boolean
737-
) {
738-
const value = ByteUtils.toUTF8(buffer, start, end);
739-
// if utf8 validation is on, do the check
740-
if (shouldValidateUtf8) {
741-
for (let i = 0; i < value.length; i++) {
742-
if (value.charCodeAt(i) === 0xfffd) {
743-
if (!validateUtf8(buffer, start, end)) {
744-
throw new BSONError('Invalid UTF-8 string in BSON document');
745-
}
746-
break;
747-
}
748-
}
749-
}
750-
return value;
751-
}

src/utils/byte_utils.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ export type ByteUtils = {
2525
toHex: (buffer: Uint8Array) => string;
2626
/** Create a Uint8Array containing utf8 code units from a string */
2727
fromUTF8: (text: string) => Uint8Array;
28-
/** Create a string from utf8 code units */
29-
toUTF8: (buffer: Uint8Array, start: number, end: number) => string;
28+
/** Create a string from utf8 code units, fatal=true will throw an error if UTF-8 bytes are invalid, fatal=false will insert replacement characters */
29+
toUTF8: (buffer: Uint8Array, start: number, end: number, fatal: boolean) => string;
3030
/** Get the utf8 code unit count from a string if it were to be transformed to utf8 */
3131
utf8ByteLength: (input: string) => number;
3232
/** Encode UTF8 bytes generated from `source` string into `destination` at byteOffset. Returns the number of bytes encoded. */

src/utils/node_byte_utils.ts

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { BSONError } from '../error';
2+
import { validateUtf8 } from '../validate_utf8';
23

34
type NodeJsEncoding = 'base64' | 'hex' | 'utf8' | 'binary';
45
type NodeJsBuffer = ArrayBufferView &
@@ -125,7 +126,7 @@ export const nodeJsByteUtils = {
125126
return Buffer.from(text, 'utf8');
126127
},
127128

128-
toUTF8(buffer: Uint8Array, start: number, end: number): string {
129+
toUTF8(buffer: Uint8Array, start: number, end: number, fatal: boolean): string {
129130
if (buffer.length === 0) {
130131
return '';
131132
}
@@ -153,7 +154,19 @@ export const nodeJsByteUtils = {
153154
}
154155
}
155156

156-
return nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end);
157+
const string = nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end);
158+
if (fatal) {
159+
// TODO(NODE-4930): Insufficiently strict BSON UTF8 validation
160+
for (let i = 0; i < string.length; i++) {
161+
if (string.charCodeAt(i) === 0xfffd) {
162+
if (!validateUtf8(buffer, start, end)) {
163+
throw new BSONError('Invalid UTF-8 string in BSON document');
164+
}
165+
break;
166+
}
167+
}
168+
}
169+
return string;
157170
},
158171

159172
utf8ByteLength(input: string): number {

src/utils/web_byte_utils.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ export const webByteUtils = {
172172
return new TextEncoder().encode(text);
173173
},
174174

175-
toUTF8(uint8array: Uint8Array, start: number, end: number): string {
175+
toUTF8(uint8array: Uint8Array, start: number, end: number, fatal: boolean): string {
176176
if (uint8array.length === 0) {
177177
return '';
178178
}
@@ -200,7 +200,14 @@ export const webByteUtils = {
200200
}
201201
}
202202

203-
return new TextDecoder('utf8', { fatal: false }).decode(uint8array.slice(start, end));
203+
if (fatal) {
204+
try {
205+
return new TextDecoder('utf8', { fatal }).decode(uint8array.slice(start, end));
206+
} catch (cause) {
207+
throw new BSONError('Invalid UTF-8 string in BSON document', { cause });
208+
}
209+
}
210+
return new TextDecoder('utf8', { fatal }).decode(uint8array.slice(start, end));
204211
},
205212

206213
utf8ByteLength(input: string): number {

test/node/byte_utils.test.ts

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import { webByteUtils } from '../../src/utils/web_byte_utils';
88
import * as sinon from 'sinon';
99
import { loadCJSModuleBSON, loadReactNativeCJSModuleBSON, loadESModuleBSON } from '../load_bson';
1010
import * as crypto from 'node:crypto';
11+
import { BSONError } from '../register-bson';
1112

1213
type ByteUtilTest<K extends keyof ByteUtils> = {
1314
name: string;
@@ -400,19 +401,34 @@ const fromUTF8Tests: ByteUtilTest<'fromUTF8'>[] = [
400401
const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
401402
{
402403
name: 'should create utf8 string from buffer input',
403-
inputs: [Buffer.from('abc\u{1f913}', 'utf8'), 0, 7],
404+
inputs: [Buffer.from('abc\u{1f913}', 'utf8'), 0, 7, false],
404405
expectation({ output, error }) {
405406
expect(error).to.be.null;
406407
expect(output).to.deep.equal(Buffer.from('abc\u{1f913}', 'utf8').toString('utf8'));
407408
}
408409
},
409410
{
410411
name: 'should return empty string for empty buffer input',
411-
inputs: [Buffer.alloc(0), 0, 1],
412+
inputs: [Buffer.alloc(0), 0, 1, false],
412413
expectation({ output, error }) {
413414
expect(error).to.be.null;
414415
expect(output).to.be.a('string').with.lengthOf(0);
415416
}
417+
},
418+
{
419+
name: 'should throw an error if fatal is set and string is invalid',
420+
inputs: [Buffer.from('616263f09fa4', 'hex'), 0, 7, true],
421+
expectation({ error }) {
422+
expect(error).to.be.instanceOf(BSONError);
423+
}
424+
},
425+
{
426+
name: 'should insert replacement character fatal is false and string is invalid',
427+
inputs: [Buffer.from('616263f09fa4', 'hex'), 0, 7, false],
428+
expectation({ error, output }) {
429+
expect(error).to.not.exist;
430+
expect(output).to.equal('abc\uFFFD');
431+
}
416432
}
417433
];
418434
const utf8ByteLengthTests: ByteUtilTest<'utf8ByteLength'>[] = [
@@ -605,15 +621,15 @@ describe('ByteUtils', () => {
605621
it('should not invoke Buffer.toString', () => {
606622
const buffer = Buffer.from('abcdef', 'utf8');
607623
const spy = sinon.spy(buffer, 'toString');
608-
nodeJsByteUtils.toUTF8(buffer, 0, 6);
624+
nodeJsByteUtils.toUTF8(buffer, 0, 6, false);
609625
expect(spy).to.not.have.been.called;
610626
});
611627

612628
it('should not invoke TextDecoder.decode', () => {
613629
const utf8Bytes = Buffer.from('abcdef', 'utf8');
614630
const buffer = new Uint8Array(utf8Bytes.buffer, utf8Bytes.byteOffset, utf8Bytes.byteLength);
615631
const spy = sinon.spy(TextDecoder.prototype, 'decode');
616-
webByteUtils.toUTF8(buffer, 0, 6);
632+
webByteUtils.toUTF8(buffer, 0, 6, false);
617633
expect(spy).to.not.have.been.called;
618634
});
619635
});

tsconfig.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
"skipLibCheck": true,
1111
"lib": [
1212
"es2021",
13+
"ES2022.Error"
1314
],
1415
"outDir": "lib",
1516
"importHelpers": false,

0 commit comments

Comments
 (0)