Skip to content

Commit 9d5a5df

Browse files
feat(NODE-5648): add Long.fromStringStrict() (#675)
1 parent ae8bac7 commit 9d5a5df

File tree

7 files changed

+353
-17
lines changed

7 files changed

+353
-17
lines changed

src/int_32.ts

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { BSON_INT32_MAX, BSON_INT32_MIN } from './constants';
33
import { BSONError } from './error';
44
import type { EJSONOptions } from './extended_json';
55
import { type InspectFn, defaultInspect } from './parser/utils';
6+
import { removeLeadingZerosAndExplicitPlus } from './utils/string_utils';
67

78
/** @public */
89
export interface Int32Extended {
@@ -48,11 +49,7 @@ export class Int32 extends BSONValue {
4849
* @param value - the string we want to represent as an int32.
4950
*/
5051
static fromString(value: string): Int32 {
51-
const cleanedValue = !/[^0]+/.test(value)
52-
? value.replace(/^0+/, '0') // all zeros case
53-
: value[0] === '-'
54-
? value.replace(/^-0+/, '-') // negative number with leading zeros
55-
: value.replace(/^\+?0+/, ''); // positive number with leading zeros
52+
const cleanedValue = removeLeadingZerosAndExplicitPlus(value);
5653

5754
const coercedValue = Number(value);
5855

src/long.ts

Lines changed: 132 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { BSONError } from './error';
33
import type { EJSONOptions } from './extended_json';
44
import { type InspectFn, defaultInspect } from './parser/utils';
55
import type { Timestamp } from './timestamp';
6+
import * as StringUtils from './utils/string_utils';
67

78
interface LongWASMHelpers {
89
/** Gets the high bits of the last operation performed */
@@ -246,29 +247,24 @@ export class Long extends BSONValue {
246247
}
247248

248249
/**
250+
* @internal
249251
* Returns a Long representation of the given string, written using the specified radix.
252+
* Throws an error if `throwsError` is set to true and any of the following conditions are true:
253+
* - the string contains invalid characters for the given radix
254+
* - the string contains whitespace
250255
* @param str - The textual representation of the Long
251256
* @param unsigned - Whether unsigned or not, defaults to signed
252257
* @param radix - The radix in which the text is written (2-36), defaults to 10
253258
* @returns The corresponding Long value
254259
*/
255-
static fromString(str: string, unsigned?: boolean, radix?: number): Long {
260+
private static _fromString(str: string, unsigned: boolean, radix: number): Long {
256261
if (str.length === 0) throw new BSONError('empty string');
257-
if (str === 'NaN' || str === 'Infinity' || str === '+Infinity' || str === '-Infinity')
258-
return Long.ZERO;
259-
if (typeof unsigned === 'number') {
260-
// For goog.math.long compatibility
261-
(radix = unsigned), (unsigned = false);
262-
} else {
263-
unsigned = !!unsigned;
264-
}
265-
radix = radix || 10;
266262
if (radix < 2 || 36 < radix) throw new BSONError('radix');
267263

268264
let p;
269265
if ((p = str.indexOf('-')) > 0) throw new BSONError('interior hyphen');
270266
else if (p === 0) {
271-
return Long.fromString(str.substring(1), unsigned, radix).neg();
267+
return Long._fromString(str.substring(1), unsigned, radix).neg();
272268
}
273269

274270
// Do several (8) digits each time through the loop, so as to
@@ -291,6 +287,131 @@ export class Long extends BSONValue {
291287
return result;
292288
}
293289

290+
/**
291+
* Returns a signed Long representation of the given string, written using radix 10.
292+
* Will throw an error if the given text is not exactly representable as a Long.
293+
* Throws an error if any of the following conditions are true:
294+
* - the string contains invalid characters for the radix 10
295+
* - the string contains whitespace
296+
* - the value the string represents is too large or too small to be a Long
297+
* Unlike Long.fromString, this method does not coerce '+/-Infinity' and 'NaN' to Long.Zero
298+
* @param str - The textual representation of the Long
299+
* @returns The corresponding Long value
300+
*/
301+
static fromStringStrict(str: string): Long;
302+
/**
303+
* Returns a Long representation of the given string, written using the radix 10.
304+
* Will throw an error if the given parameters are not exactly representable as a Long.
305+
* Throws an error if any of the following conditions are true:
306+
* - the string contains invalid characters for the given radix
307+
* - the string contains whitespace
308+
* - the value the string represents is too large or too small to be a Long
309+
* Unlike Long.fromString, this method does not coerce '+/-Infinity' and 'NaN' to Long.Zero
310+
* @param str - The textual representation of the Long
311+
* @param unsigned - Whether unsigned or not, defaults to signed
312+
* @returns The corresponding Long value
313+
*/
314+
static fromStringStrict(str: string, unsigned?: boolean): Long;
315+
/**
316+
* Returns a signed Long representation of the given string, written using the specified radix.
317+
* Will throw an error if the given parameters are not exactly representable as a Long.
318+
* Throws an error if any of the following conditions are true:
319+
* - the string contains invalid characters for the given radix
320+
* - the string contains whitespace
321+
* - the value the string represents is too large or too small to be a Long
322+
* Unlike Long.fromString, this method does not coerce '+/-Infinity' and 'NaN' to Long.Zero
323+
* @param str - The textual representation of the Long
324+
* @param radix - The radix in which the text is written (2-36), defaults to 10
325+
* @returns The corresponding Long value
326+
*/
327+
static fromStringStrict(str: string, radix?: boolean): Long;
328+
/**
329+
* Returns a Long representation of the given string, written using the specified radix.
330+
* Will throw an error if the given parameters are not exactly representable as a Long.
331+
* Throws an error if any of the following conditions are true:
332+
* - the string contains invalid characters for the given radix
333+
* - the string contains whitespace
334+
* - the value the string represents is too large or too small to be a Long
335+
* Unlike Long.fromString, this method does not coerce '+/-Infinity' and 'NaN' to Long.Zero
336+
* @param str - The textual representation of the Long
337+
* @param unsigned - Whether unsigned or not, defaults to signed
338+
* @param radix - The radix in which the text is written (2-36), defaults to 10
339+
* @returns The corresponding Long value
340+
*/
341+
static fromStringStrict(str: string, unsigned?: boolean, radix?: number): Long;
342+
static fromStringStrict(str: string, unsignedOrRadix?: boolean | number, radix?: number): Long {
343+
let unsigned = false;
344+
if (typeof unsignedOrRadix === 'number') {
345+
// For goog.math.long compatibility
346+
(radix = unsignedOrRadix), (unsignedOrRadix = false);
347+
} else {
348+
unsigned = !!unsignedOrRadix;
349+
}
350+
radix ??= 10;
351+
352+
if (str.trim() !== str) {
353+
throw new BSONError(`Input: '${str}' contains leading and/or trailing whitespace`);
354+
}
355+
if (!StringUtils.validateStringCharacters(str, radix)) {
356+
throw new BSONError(`Input: '${str}' contains invalid characters for radix: ${radix}`);
357+
}
358+
359+
// remove leading zeros (for later string comparison and to make math faster)
360+
const cleanedStr = StringUtils.removeLeadingZerosAndExplicitPlus(str);
361+
362+
// check roundtrip result
363+
const result = Long._fromString(cleanedStr, unsigned, radix);
364+
if (result.toString(radix).toLowerCase() !== cleanedStr.toLowerCase()) {
365+
throw new BSONError(
366+
`Input: ${str} is not representable as ${result.unsigned ? 'an unsigned' : 'a signed'} 64-bit Long ${radix != null ? `with radix: ${radix}` : ''}`
367+
);
368+
}
369+
return result;
370+
}
371+
372+
/**
373+
* Returns a signed Long representation of the given string, written using radix 10.
374+
* @param str - The textual representation of the Long
375+
* @returns The corresponding Long value
376+
*/
377+
static fromString(str: string): Long;
378+
/**
379+
* Returns a signed Long representation of the given string, written using radix 10.
380+
* @param str - The textual representation of the Long
381+
* @param radix - The radix in which the text is written (2-36), defaults to 10
382+
* @returns The corresponding Long value
383+
*/
384+
static fromString(str: string, radix?: number): Long;
385+
/**
386+
* Returns a Long representation of the given string, written using radix 10.
387+
* @param str - The textual representation of the Long
388+
* @param unsigned - Whether unsigned or not, defaults to signed
389+
* @returns The corresponding Long value
390+
*/
391+
static fromString(str: string, unsigned?: boolean): Long;
392+
/**
393+
* Returns a Long representation of the given string, written using the specified radix.
394+
* @param str - The textual representation of the Long
395+
* @param unsigned - Whether unsigned or not, defaults to signed
396+
* @param radix - The radix in which the text is written (2-36), defaults to 10
397+
* @returns The corresponding Long value
398+
*/
399+
static fromString(str: string, unsigned?: boolean, radix?: number): Long;
400+
static fromString(str: string, unsignedOrRadix?: boolean | number, radix?: number): Long {
401+
let unsigned = false;
402+
if (typeof unsignedOrRadix === 'number') {
403+
// For goog.math.long compatibility
404+
(radix = unsignedOrRadix), (unsignedOrRadix = false);
405+
} else {
406+
unsigned = !!unsignedOrRadix;
407+
}
408+
radix ??= 10;
409+
if (str === 'NaN' || str === 'Infinity' || str === '+Infinity' || str === '-Infinity') {
410+
return Long.ZERO;
411+
}
412+
return Long._fromString(str, unsigned, radix);
413+
}
414+
294415
/**
295416
* Creates a Long from its byte representation.
296417
* @param bytes - Byte representation

src/utils/string_utils.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/**
2+
* @internal
3+
* Removes leading zeros and explicit plus from textual representation of a number.
4+
*/
5+
export function removeLeadingZerosAndExplicitPlus(str: string): string {
6+
if (str === '') {
7+
return str;
8+
}
9+
10+
let startIndex = 0;
11+
12+
const isNegative = str[startIndex] === '-';
13+
const isExplicitlyPositive = str[startIndex] === '+';
14+
15+
if (isExplicitlyPositive || isNegative) {
16+
startIndex += 1;
17+
}
18+
19+
let foundInsignificantZero = false;
20+
21+
for (; startIndex < str.length && str[startIndex] === '0'; ++startIndex) {
22+
foundInsignificantZero = true;
23+
}
24+
25+
if (!foundInsignificantZero) {
26+
return isExplicitlyPositive ? str.slice(1) : str;
27+
}
28+
29+
return `${isNegative ? '-' : ''}${str.length === startIndex ? '0' : str.slice(startIndex)}`;
30+
}
31+
32+
/**
33+
* @internal
34+
* Returns false for an string that contains invalid characters for its radix, else returns the original string.
35+
* @param str - The textual representation of the Long
36+
* @param radix - The radix in which the text is written (2-36), defaults to 10
37+
*/
38+
export function validateStringCharacters(str: string, radix?: number): false | string {
39+
radix = radix ?? 10;
40+
const validCharacters = '0123456789abcdefghijklmnopqrstuvwxyz'.slice(0, radix);
41+
// regex is case insensitive and checks that each character within the string is one of the validCharacters
42+
const regex = new RegExp(`[^-+${validCharacters}]`, 'i');
43+
return regex.test(str) ? false : str;
44+
}

test/node/int_32_tests.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,9 @@ describe('Int32', function () {
108108
['a string with zero with leading zeros', '000000', 0],
109109
['a string with positive leading zeros', '000000867', 867],
110110
['a string with explicity positive leading zeros', '+000000867', 867],
111-
['a string with negative leading zeros', '-00007', -7]
111+
['a string with negative leading zeros', '-00007', -7],
112+
['a string with explicit positive zeros', '+000000', 0],
113+
['a string explicit positive no leading zeros', '+32', 32]
112114
];
113115
const errorInputs = [
114116
['Int32.max + 1', '2147483648', 'larger than the maximum value for Int32'],

test/node/long.test.ts

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,4 +163,120 @@ describe('Long', function () {
163163
});
164164
});
165165
});
166+
167+
describe('static fromString()', function () {
168+
const successInputs: [
169+
name: string,
170+
input: string,
171+
unsigned: boolean | undefined,
172+
radix: number | undefined,
173+
expectedStr?: string
174+
][] = [
175+
['Infinity', 'Infinity', false, 34, '0'],
176+
['-Infinity', '-Infinity', false, 23, '0'],
177+
['+Infinity', '+Infinity', false, 12, '0'],
178+
['NaN', 'NaN', false, 16, '0']
179+
];
180+
181+
for (const [testName, str, unsigned, radix, expectedStr] of successInputs) {
182+
context(`when the input is ${testName}`, () => {
183+
it(`should return a Long representation of the input`, () => {
184+
expect(Long.fromString(str, unsigned, radix).toString(radix)).to.equal(
185+
expectedStr ?? str.toLowerCase()
186+
);
187+
});
188+
});
189+
}
190+
});
191+
192+
describe('static fromStringStrict()', function () {
193+
const successInputs: [
194+
name: string,
195+
input: string,
196+
unsigned: boolean | undefined,
197+
radix: number | undefined,
198+
expectedStr?: string
199+
][] = [
200+
['basic no alphabet low radix', '1236', true, 8],
201+
['negative basic no alphabet low radix', '-1236', false, 8],
202+
['valid upper and lower case letters in string with radix > 10', 'eEe', true, 15],
203+
['hexadecimal letters', '126073efbcdADEF', true, 16],
204+
['negative hexadecimal letters', '-1267efbcdDEF', false, 16],
205+
['negative leading zeros', '-00000032', false, 15, '-32'],
206+
['leading zeros', '00000032', false, 15, '32'],
207+
['explicit positive leading zeros', '+00000032', false, 15, '32'],
208+
['max unsigned binary input', Long.MAX_UNSIGNED_VALUE.toString(2), true, 2],
209+
['max unsigned decimal input', Long.MAX_UNSIGNED_VALUE.toString(10), true, 10],
210+
['max unsigned hex input', Long.MAX_UNSIGNED_VALUE.toString(16), true, 16],
211+
['max signed binary input', Long.MAX_VALUE.toString(2), false, 2],
212+
['max signed decimal input', Long.MAX_VALUE.toString(10), false, 10],
213+
['max signed hex input', Long.MAX_VALUE.toString(16), false, 16],
214+
['min signed binary input', Long.MIN_VALUE.toString(2), false, 2],
215+
['min signed decimal input', Long.MIN_VALUE.toString(10), false, 10],
216+
['min signed hex input', Long.MIN_VALUE.toString(16), false, 16],
217+
['signed zeros', '+000000', false, 10, '0'],
218+
['unsigned zero', '0', true, 10],
219+
['explicit positive no leading zeros', '+32', true, 10, '32'],
220+
// the following inputs are valid radix 36 inputs, but will not represent NaN or +/- Infinity
221+
['radix 36 Infinity', 'Infinity', false, 36],
222+
['radix 36 -Infinity', '-Infinity', false, 36],
223+
['radix 36 +Infinity', '+Infinity', false, 36, 'infinity'],
224+
['radix 36 NaN', 'NaN', false, 36],
225+
['overload no unsigned and no radix parameter', '-32', undefined, undefined],
226+
['overload no unsigned parameter', '-32', undefined, 12],
227+
['overload no radix parameter', '32', true, undefined]
228+
];
229+
230+
const failureInputs: [
231+
name: string,
232+
input: string,
233+
unsigned: boolean | undefined,
234+
radix: number | undefined
235+
][] = [
236+
['empty string', '', true, 2],
237+
['invalid numbers in binary string', '234', true, 2],
238+
['non a-z or numeric string', '~~', true, 36],
239+
['alphabet in radix < 10', 'a', true, 9],
240+
['radix does not allow all alphabet letters', 'eee', false, 14],
241+
['over max unsigned binary input', Long.MAX_UNSIGNED_VALUE.toString(2) + '1', true, 2],
242+
['over max unsigned decimal input', Long.MAX_UNSIGNED_VALUE.toString(10) + '1', true, 10],
243+
['over max unsigned hex input', Long.MAX_UNSIGNED_VALUE.toString(16) + '1', true, 16],
244+
['over max signed binary input', Long.MAX_VALUE.toString(2) + '1', false, 2],
245+
['over max signed decimal input', Long.MAX_VALUE.toString(10) + '1', false, 10],
246+
['over max signed hex input', Long.MAX_VALUE.toString(16) + '1', false, 16],
247+
['under min signed binary input', Long.MIN_VALUE.toString(2) + '1', false, 2],
248+
['under min signed decimal input', Long.MIN_VALUE.toString(10) + '1', false, 10],
249+
['under min signed hex input', Long.MIN_VALUE.toString(16) + '1', false, 16],
250+
['string with whitespace', ' 3503a ', false, 11],
251+
['negative zero unsigned', '-0', true, 9],
252+
['negative zero signed', '-0', false, 13],
253+
['radix 1', '12', false, 1],
254+
['negative radix', '12', false, -4],
255+
['radix over 36', '12', false, 37],
256+
// the following inputs are invalid radix 16 inputs
257+
// this is because of the characters, not because of the values they commonly represent
258+
['radix 10 Infinity', 'Infinity', false, 10],
259+
['radix 10 -Infinity', '-Infinity', false, 10],
260+
['radix 10 +Infinity', '+Infinity', false, 10],
261+
['radix 10 NaN', 'NaN', false, 10],
262+
['overload no radix parameter and invalid sign', '-32', true, undefined]
263+
];
264+
265+
for (const [testName, str, unsigned, radix, expectedStr] of successInputs) {
266+
context(`when the input is ${testName}`, () => {
267+
it(`should return a Long representation of the input`, () => {
268+
expect(Long.fromStringStrict(str, unsigned, radix).toString(radix)).to.equal(
269+
expectedStr ?? str.toLowerCase()
270+
);
271+
});
272+
});
273+
}
274+
for (const [testName, str, unsigned, radix] of failureInputs) {
275+
context(`when the input is ${testName}`, () => {
276+
it(`should throw BSONError`, () => {
277+
expect(() => Long.fromStringStrict(str, unsigned, radix)).to.throw(BSONError);
278+
});
279+
});
280+
}
281+
});
166282
});

test/node/release.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ const REQUIRED_FILES = [
4848
'src/utils/byte_utils.ts',
4949
'src/utils/node_byte_utils.ts',
5050
'src/utils/number_utils.ts',
51+
'src/utils/string_utils.ts',
5152
'src/utils/web_byte_utils.ts',
5253
'src/utils/latin.ts',
5354
'src/parse_utf8.ts',

0 commit comments

Comments
 (0)