Skip to content

Commit 6c0ec8d

Browse files
committed
feat(NODE-5861): optimize parsing basic latin strings
1 parent 44bec19 commit 6c0ec8d

File tree

3 files changed

+79
-2
lines changed

3 files changed

+79
-2
lines changed

src/utils/node_byte_utils.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,33 @@ export const nodeJsByteUtils = {
126126
},
127127

128128
toUTF8(buffer: Uint8Array, start: number, end: number): string {
129+
if (buffer.length === 0) {
130+
return '';
131+
}
132+
133+
const stringByteLength = end - start;
134+
if (stringByteLength === 0) {
135+
return '';
136+
}
137+
138+
if (stringByteLength < 200) {
139+
let basicLatin = true;
140+
const latinBytes = [];
141+
for (let i = start; i < end; i++) {
142+
const byte = buffer[i];
143+
if (byte > 127) {
144+
basicLatin = false;
145+
break;
146+
}
147+
latinBytes.push(byte);
148+
}
149+
150+
if (basicLatin) {
151+
// eslint-disable-next-line prefer-spread
152+
return String.fromCharCode.apply(String, latinBytes);
153+
}
154+
}
155+
129156
return nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end);
130157
},
131158

src/utils/web_byte_utils.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,33 @@ export const webByteUtils = {
173173
},
174174

175175
toUTF8(uint8array: Uint8Array, start: number, end: number): string {
176+
if (uint8array.length === 0) {
177+
return '';
178+
}
179+
180+
const stringByteLength = end - start;
181+
if (stringByteLength === 0) {
182+
return '';
183+
}
184+
185+
if (stringByteLength < 200) {
186+
let basicLatin = true;
187+
const latinBytes = [];
188+
for (let i = start; i < end; i++) {
189+
const byte = uint8array[i];
190+
if (byte > 127) {
191+
basicLatin = false;
192+
break;
193+
}
194+
latinBytes.push(byte);
195+
}
196+
197+
if (basicLatin) {
198+
// eslint-disable-next-line prefer-spread
199+
return String.fromCharCode.apply(String, latinBytes);
200+
}
201+
}
202+
176203
return new TextDecoder('utf8', { fatal: false }).decode(uint8array.slice(start, end));
177204
},
178205

test/node/byte_utils.test.ts

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -400,15 +400,15 @@ const fromUTF8Tests: ByteUtilTest<'fromUTF8'>[] = [
400400
const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
401401
{
402402
name: 'should create utf8 string from buffer input',
403-
inputs: [Buffer.from('abc\u{1f913}', 'utf8')],
403+
inputs: [Buffer.from('abc\u{1f913}', 'utf8'), 0, 7],
404404
expectation({ output, error }) {
405405
expect(error).to.be.null;
406406
expect(output).to.deep.equal(Buffer.from('abc\u{1f913}', 'utf8').toString('utf8'));
407407
}
408408
},
409409
{
410410
name: 'should return empty string for empty buffer input',
411-
inputs: [Buffer.alloc(0)],
411+
inputs: [Buffer.alloc(0), 0, 1],
412412
expectation({ output, error }) {
413413
expect(error).to.be.null;
414414
expect(output).to.be.a('string').with.lengthOf(0);
@@ -596,6 +596,29 @@ describe('ByteUtils', () => {
596596
});
597597
});
598598

599+
describe('toUTF8 basic latin optimization', () => {
600+
afterEach(() => {
601+
sinon.restore();
602+
});
603+
604+
context('Given a basic latin string', () => {
605+
it('should not invoke Buffer.toString', () => {
606+
const buffer = Buffer.from('abcdef', 'utf8');
607+
const spy = sinon.spy(buffer, 'toString');
608+
nodeJsByteUtils.toUTF8(buffer, 0, 6);
609+
expect(spy).to.not.have.been.called;
610+
});
611+
612+
it('should not invoke TextDecoder.decode', () => {
613+
const utf8Bytes = Buffer.from('abcdef', 'utf8');
614+
const buffer = new Uint8Array(utf8Bytes.buffer, utf8Bytes.byteOffset, utf8Bytes.byteLength);
615+
const spy = sinon.spy(TextDecoder.prototype, 'decode');
616+
webByteUtils.toUTF8(buffer, 0, 6);
617+
expect(spy).to.not.have.been.called;
618+
});
619+
});
620+
});
621+
599622
describe('randomBytes fallback case when crypto is not present', () => {
600623
describe('web', function () {
601624
let bsonWithNoCryptoCtx;

0 commit comments

Comments
 (0)