Skip to content

Commit 4f3662b

Browse files
Add UTF-8 Encoding to OrderedCode (#5817)
1 parent 1ef1341 commit 4f3662b

File tree

2 files changed

+170
-19
lines changed

2 files changed

+170
-19
lines changed

packages/firestore/src/index/ordered_code_writer.ts

Lines changed: 105 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,17 @@
1717
import { debugAssert, fail } from '../util/assert';
1818
import { ByteString } from '../util/byte_string';
1919

20+
/** These constants are taken from the backend. */
21+
const MIN_SURROGATE = '\uD800';
22+
const MAX_SURROGATE = '\uDBFF';
23+
24+
const ESCAPE1 = 0x00;
25+
const NULL_BYTE = 0xff; // Combined with ESCAPE1
26+
const SEPARATOR = 0x01; // Combined with ESCAPE1
27+
28+
const ESCAPE2 = 0xff;
29+
const FF_BYTE = 0x00; // Combined with ESCAPE2
30+
2031
const LONG_SIZE = 64;
2132
const BYTE_SIZE = 8;
2233

@@ -100,6 +111,54 @@ export class OrderedCodeWriter {
100111
buffer = new Uint8Array(DEFAULT_BUFFER_SIZE);
101112
position = 0;
102113

114+
/** Writes utf8 bytes into this byte sequence, ascending. */
115+
writeUtf8Ascending(sequence: string): void {
116+
for (const c of sequence) {
117+
const charCode = c.charCodeAt(0);
118+
if (charCode < 0x80) {
119+
this.writeByteAscending(charCode);
120+
} else if (charCode < 0x800) {
121+
this.writeByteAscending((0x0f << 6) | (charCode >>> 6));
122+
this.writeByteAscending(0x80 | (0x3f & charCode));
123+
} else if (c < MIN_SURROGATE || MAX_SURROGATE < c) {
124+
this.writeByteAscending((0x0f << 5) | (charCode >>> 12));
125+
this.writeByteAscending(0x80 | (0x3f & (charCode >>> 6)));
126+
this.writeByteAscending(0x80 | (0x3f & charCode));
127+
} else {
128+
const codePoint = c.codePointAt(0)!;
129+
this.writeByteAscending((0x0f << 4) | (codePoint >>> 18));
130+
this.writeByteAscending(0x80 | (0x3f & (codePoint >>> 12)));
131+
this.writeByteAscending(0x80 | (0x3f & (codePoint >>> 6)));
132+
this.writeByteAscending(0x80 | (0x3f & codePoint));
133+
}
134+
}
135+
this.writeSeparatorAscending();
136+
}
137+
138+
/** Writes utf8 bytes into this byte sequence, descending */
139+
writeUtf8Descending(sequence: string): void {
140+
for (const c of sequence) {
141+
const charCode = c.charCodeAt(0);
142+
if (charCode < 0x80) {
143+
this.writeByteDescending(charCode);
144+
} else if (charCode < 0x800) {
145+
this.writeByteDescending((0x0f << 6) | (charCode >>> 6));
146+
this.writeByteDescending(0x80 | (0x3f & charCode));
147+
} else if (c < MIN_SURROGATE || MAX_SURROGATE < c) {
148+
this.writeByteDescending((0x0f << 5) | (charCode >>> 12));
149+
this.writeByteDescending(0x80 | (0x3f & (charCode >>> 6)));
150+
this.writeByteDescending(0x80 | (0x3f & charCode));
151+
} else {
152+
const codePoint = c.codePointAt(0)!;
153+
this.writeByteDescending((0x0f << 4) | (codePoint >>> 18));
154+
this.writeByteDescending(0x80 | (0x3f & (codePoint >>> 12)));
155+
this.writeByteDescending(0x80 | (0x3f & (codePoint >>> 6)));
156+
this.writeByteDescending(0x80 | (0x3f & codePoint));
157+
}
158+
}
159+
this.writeSeparatorDescending();
160+
}
161+
103162
writeNumberAscending(val: number): void {
104163
// Values are encoded with a single byte length prefix, followed by the
105164
// actual value in big-endian format with leading 0 bytes dropped.
@@ -155,19 +214,59 @@ export class OrderedCodeWriter {
155214
return this.buffer.slice(0, this.position);
156215
}
157216

158-
writeBytesAscending(value: ByteString): void {
159-
fail('Not implemented');
217+
/** Writes a single byte ascending to the buffer. */
218+
private writeByteAscending(b: number): void {
219+
const masked = b & 0xff;
220+
if (masked === ESCAPE1) {
221+
this.writeEscapedByteAscending(ESCAPE1);
222+
this.writeEscapedByteAscending(NULL_BYTE);
223+
} else if (masked === ESCAPE2) {
224+
this.writeEscapedByteAscending(ESCAPE2);
225+
this.writeEscapedByteAscending(FF_BYTE);
226+
} else {
227+
this.writeEscapedByteAscending(masked);
228+
}
160229
}
161230

162-
writeBytesDescending(value: ByteString): void {
163-
fail('Not implemented');
231+
/** Writes a single byte descending to the buffer. */
232+
private writeByteDescending(b: number): void {
233+
const masked = b & 0xff;
234+
if (masked === ESCAPE1) {
235+
this.writeEscapedByteDescending(ESCAPE1);
236+
this.writeEscapedByteDescending(NULL_BYTE);
237+
} else if (masked === ESCAPE2) {
238+
this.writeEscapedByteDescending(ESCAPE2);
239+
this.writeEscapedByteDescending(FF_BYTE);
240+
} else {
241+
this.writeEscapedByteDescending(b);
242+
}
164243
}
165244

166-
writeUtf8Ascending(sequence: string): void {
245+
private writeSeparatorAscending(): void {
246+
this.writeEscapedByteAscending(ESCAPE1);
247+
this.writeEscapedByteAscending(SEPARATOR);
248+
}
249+
250+
private writeSeparatorDescending(): void {
251+
this.writeEscapedByteDescending(ESCAPE1);
252+
this.writeEscapedByteDescending(SEPARATOR);
253+
}
254+
255+
private writeEscapedByteAscending(b: number): void {
256+
this.ensureAvailable(1);
257+
this.buffer[this.position++] = b;
258+
}
259+
260+
private writeEscapedByteDescending(b: number): void {
261+
this.ensureAvailable(1);
262+
this.buffer[this.position++] = ~b;
263+
}
264+
265+
writeBytesAscending(value: ByteString): void {
167266
fail('Not implemented');
168267
}
169268

170-
writeUtf8Descending(sequence: string): void {
269+
writeBytesDescending(value: ByteString): void {
171270
fail('Not implemented');
172271
}
173272

packages/firestore/test/unit/index/ordered_code_writer.test.ts

Lines changed: 65 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,38 @@ const NUMBER_TEST_CASES: Array<ValueTestCase<number>> = [
7676
new ValueTestCase(Number.NaN, '08fff8000000000000', 'f70007ffffffffffff')
7777
];
7878

79+
const STRING_TEST_CASES: Array<ValueTestCase<string>> = [
80+
new ValueTestCase('', '0001', 'fffe'),
81+
new ValueTestCase('\u0000', '00ff0001', 'ff00fffe'),
82+
new ValueTestCase('\u0000\u0000', '00ff00ff0001', 'ff00ff00fffe'),
83+
new ValueTestCase('abc', '6162630001', '9e9d9cfffe'),
84+
new ValueTestCase(
85+
'xy¢z𠜎€𠜱あ𠝹',
86+
'7879c2a27af0a09c8ee282acf0a09cb1e38182f0a09db90001',
87+
'87863d5d850f5f63711d7d530f5f634e1c7e7d0f5f6246fffe'
88+
),
89+
new ValueTestCase(
90+
'¬˚ß∂∆ç',
91+
'c2accb9ac39fe28882e28886c3a70001',
92+
'3d5334653c601d777d1d77793c58fffe'
93+
),
94+
new ValueTestCase(
95+
'œ∑´´ß™£',
96+
'c593e28891c2b4c2b4c39fe284a2c2a30001',
97+
'3a6c1d776e3d4b3d4b3c601d7b5d3d5cfffe'
98+
),
99+
new ValueTestCase(
100+
'πåçasdl߬µœ∑âsldalskdåßµ∂π',
101+
'cf80c3a5c3a76173646cc39fc2acc2b5c593e28891c3a2736c64616c736b64c3a5c39fc2b5e28882cf800001',
102+
'307f3c5a3c589e8c9b933c603d533d4a3a6c1d776e3c5d8c939b9e938c949b3c5a3c603d4a1d777d307ffffe'
103+
),
104+
new ValueTestCase(
105+
'†¥¬´´`',
106+
'e280a0c2a5c2acc2b4c2b4600001',
107+
'1d7f5f3d5a3d533d4b3d4b9ffffe'
108+
)
109+
];
110+
79111
describe('Ordered Code Writer', () => {
80112
it('computes number of leading zeros', () => {
81113
for (let i = 0; i < 0xff; ++i) {
@@ -92,25 +124,42 @@ describe('Ordered Code Writer', () => {
92124
});
93125

94126
it('converts numbers to bits', () => {
95-
for (let i = 0; i < NUMBER_TEST_CASES.length; ++i) {
96-
const bytes = getBytes(NUMBER_TEST_CASES[i].val);
127+
verifyEncoding(NUMBER_TEST_CASES);
128+
});
129+
130+
it('orders numbers correctly', () => {
131+
verifyOrdering(NUMBER_TEST_CASES);
132+
});
133+
134+
it('converts strings to bits', () => {
135+
verifyEncoding(STRING_TEST_CASES);
136+
});
137+
138+
it('orders strings correctly', () => {
139+
verifyOrdering(STRING_TEST_CASES);
140+
});
141+
142+
function verifyEncoding(testCases: Array<ValueTestCase<unknown>>): void {
143+
for (let i = 0; i < testCases.length; ++i) {
144+
const bytes = getBytes(testCases[i].val);
97145
expect(bytes.asc).to.deep.equal(
98-
fromHex(NUMBER_TEST_CASES[i].ascString),
99-
'Ascending for ' + NUMBER_TEST_CASES[i].val
146+
fromHex(testCases[i].ascString),
147+
'Ascending for ' + testCases[i].val
100148
);
101149
expect(bytes.desc).to.deep.equal(
102-
fromHex(NUMBER_TEST_CASES[i].descString),
103-
'Descending for ' + NUMBER_TEST_CASES[i].val
150+
fromHex(testCases[i].descString),
151+
'Descending for ' + testCases[i].val
104152
);
105153
}
106-
});
154+
}
107155

108-
it('orders numbers correctly', () => {
109-
for (let i = 0; i < NUMBER_TEST_CASES.length; ++i) {
110-
for (let j = i; j < NUMBER_TEST_CASES.length; ++j) {
111-
const left = NUMBER_TEST_CASES[i].val;
156+
function verifyOrdering(testCases: Array<ValueTestCase<unknown>>): void {
157+
for (let i = 0; i < testCases.length; ++i) {
158+
for (let j = i; j < testCases.length; ++j) {
159+
const left = testCases[i].val;
112160
const leftBytes = getBytes(left);
113-
const right = NUMBER_TEST_CASES[j].val;
161+
const right = testCases[j].val;
162+
114163
const rightBytes = getBytes(right);
115164
expect(compare(leftBytes.asc, rightBytes.asc)).to.equal(
116165
i === j ? 0 : -1,
@@ -122,7 +171,7 @@ describe('Ordered Code Writer', () => {
122171
);
123172
}
124173
}
125-
});
174+
}
126175
});
127176

128177
function fromHex(hexString: string): Uint8Array {
@@ -151,6 +200,9 @@ function getBytes(val: unknown): { asc: Uint8Array; desc: Uint8Array } {
151200
if (typeof val === 'number') {
152201
ascWriter.writeNumberAscending(val);
153202
descWriter.writeNumberDescending(val);
203+
} else if (typeof val === 'string') {
204+
ascWriter.writeUtf8Ascending(val);
205+
descWriter.writeUtf8Descending(val);
154206
} else {
155207
throw new Error('Encoding not yet supported for ' + val);
156208
}

0 commit comments

Comments
 (0)