Skip to content

Commit bf7adce

Browse files
committed
fix unit test to not include invalid surrogate
1 parent 9653bdc commit bf7adce

File tree

2 files changed

+18
-36
lines changed

2 files changed

+18
-36
lines changed

packages/firestore/src/util/misc.ts

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ export interface Equatable<T> {
7777

7878
/** Compare strings in UTF-8 encoded byte order */
7979
export function compareUtf8Strings(left: string, right: string): number {
80-
for (let i = 0; i < left.length && i < right.length; i++) {
80+
let i = 0;
81+
while (i < left.length && i < right.length) {
8182
const leftCodePoint = left.codePointAt(i)!;
8283
const rightCodePoint = right.codePointAt(i)!;
8384

@@ -89,7 +90,7 @@ export function compareUtf8Strings(left: string, right: string): number {
8990
// Lazy instantiate TextEncoder
9091
const encoder = newTextEncoder();
9192

92-
// Substring and do UTF-8 encoded byte comparison
93+
// UTF-8 encode the character at index i for byte comparison.
9394
const leftBytes = encoder.encode(getUtf8SafeSubstring(left, i));
9495
const rightBytes = encoder.encode(getUtf8SafeSubstring(right, i));
9596
for (
@@ -102,8 +103,17 @@ export function compareUtf8Strings(left: string, right: string): number {
102103
return comp;
103104
}
104105
}
106+
// EXTREMELY RARE CASE: Code points differ, but their UTF-8 byte
107+
// representations are identical. This can happen with malformed input
108+
// (invalid surrogate pairs). The backend also actively prevents invalid
109+
// surrogates as INVALID_ARGUMENT errors, so we almost never receive
110+
// invalid strings from backend.
111+
// Fallback to code point comparison for graceful handling.
112+
return primitiveComparator(leftCodePoint, rightCodePoint);
105113
}
106114
}
115+
// Increment by 2 for surrogate pairs, 1 otherwise
116+
i += leftCodePoint > 0xffff ? 2 : 1;
107117
}
108118

109119
// Compare lengths if all characters are equal

packages/firestore/test/unit/util/misc.test.ts

Lines changed: 6 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -73,22 +73,6 @@ class StringGenerator {
7373
private static readonly DEFAULT_SURROGATE_PAIR_PROBABILITY = 0.33;
7474
private static readonly DEFAULT_MAX_LENGTH = 20;
7575

76-
// The first Unicode code point that is in the basic multilingual plane ("BMP") and,
77-
// therefore requires 1 UTF-16 code unit to be represented in UTF-16.
78-
private static readonly MIN_BMP_CODE_POINT = 0x00000000;
79-
80-
// The last Unicode code point that is in the basic multilingual plane ("BMP") and,
81-
// therefore requires 1 UTF-16 code unit to be represented in UTF-16.
82-
private static readonly MAX_BMP_CODE_POINT = 0x0000ffff;
83-
84-
// The first Unicode code point that is outside of the basic multilingual plane ("BMP") and,
85-
// therefore requires 2 UTF-16 code units, a surrogate pair, to be represented in UTF-16.
86-
private static readonly MIN_SUPPLEMENTARY_CODE_POINT = 0x00010000;
87-
88-
// The last Unicode code point that is outside of the basic multilingual plane ("BMP") and,
89-
// therefore requires 2 UTF-16 code units, a surrogate pair, to be represented in UTF-16.
90-
private static readonly MAX_SUPPLEMENTARY_CODE_POINT = 0x0010ffff;
91-
9276
private readonly rnd: Random;
9377
private readonly surrogatePairProbability: number;
9478
private readonly maxLength: number;
@@ -198,31 +182,19 @@ class StringGenerator {
198182
}
199183

200184
private nextNonSurrogateCodePoint(): number {
201-
return this.nextCodePointRange(
202-
StringGenerator.MIN_BMP_CODE_POINT,
203-
StringGenerator.MAX_BMP_CODE_POINT
204-
);
185+
let codePoint;
186+
do {
187+
codePoint = this.nextCodePointRange(0, 0xffff); // BMP range
188+
} while (codePoint >= 0xd800 && codePoint <= 0xdfff); // Exclude surrogate range
189+
190+
return codePoint;
205191
}
206192

207193
private nextCodePointRange(min: number, max: number): number {
208194
const rangeSize = max - min + 1;
209195
const offset = this.rnd.nextInt(rangeSize);
210196
return min + offset;
211197
}
212-
213-
// private nextCodePointRange(min: number, max: number, expectedCharCount: number): number {
214-
// const rangeSize = max - min;
215-
// const offset = this.rnd.nextInt(rangeSize);
216-
// const codePoint = min + offset;
217-
// if (String.fromCharCode(codePoint).length !== expectedCharCount) {
218-
// throw new Error(
219-
// `internal error vqgqnxcy97: Character.charCount(${codePoint}) returned ${
220-
// String.fromCharCode(codePoint).length
221-
// }, but expected ${expectedCharCount}`,
222-
// );
223-
// }
224-
// return codePoint;
225-
// }
226198
}
227199

228200
class Random {

0 commit comments

Comments
 (0)