Skip to content

Commit 7688714

Browse files
committed
fix base64 encoding and decoding methods, add tests
1 parent eaae7b0 commit 7688714

File tree

2 files changed

+111
-72
lines changed

2 files changed

+111
-72
lines changed

packages/utils/src/string.ts

Lines changed: 56 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -108,109 +108,94 @@ export function isMatchingPattern(value: string, pattern: RegExp | string): bool
108108
return false;
109109
}
110110

111-
// TODO: Base64 crossed with different character encodings turns out to be a ridiculous can of worms. Base64 expects
112-
// 8-bit data. JS only uses UTF-16. We need a way to be sure that every SDK is speaking the same language and can decode
113-
// values base64-encoded by other SDKs. The current proposal is to use UTF-8 as the common standard, and then
114-
// base64-encode that (meaning in JS we need to get there first). Doing it that way makes a whole lot of sense but is a
115-
// work in progress which isn't yet actually working. Leaving the current solution for now and will come back to it.
116-
117-
/**
118-
* Convert a Unicode string to a string in which each 16-bit unit occupies only one byte, which makes it safe to use as
119-
* input to `btoa`.
120-
*
121-
* Copied from https://developer.mozilla.org/en-US/docs/Web/API/WindowOrWorkerGlobalScope/btoa#Unicode_strings.
122-
*
123-
* @param unicodeString The string to convert
124-
* @returns A btoa-compatible encoding of the string
125-
*/
126-
function unicodeToBinary(unicodeString: string): string {
127-
const codeUnits = new Uint16Array(unicodeString.length);
128-
for (let i = 0; i < codeUnits.length; i++) {
129-
codeUnits[i] = unicodeString.charCodeAt(i);
130-
}
131-
return String.fromCharCode(...new Uint8Array(codeUnits.buffer));
132-
}
133-
134-
/**
135-
* Convert a binary string (such as one would get from `atob`) into a Unicode string.
136-
*
137-
* Copied from https://developer.mozilla.org/en-US/docs/Web/API/WindowOrWorkerGlobalScope/btoa#Unicode_strings.
138-
*
139-
* @param binaryString The string to convert
140-
* @returns A btoa-compatible encoding of the string
141-
*/
142-
function binaryToUnicode(binaryString: string): string {
143-
const bytes = new Uint8Array(binaryString.length);
144-
for (let i = 0; i < bytes.length; i++) {
145-
bytes[i] = binaryString.charCodeAt(i);
146-
}
147-
return String.fromCharCode(...new Uint16Array(bytes.buffer));
148-
}
149-
150111
/**
151-
* Convert a base64 string to a Unicode (UTF-16) string.
112+
* Convert a Unicode string to a base64 string.
152113
*
153-
* @param base64String The string to decode.
114+
* @param unicodeString The string to base64-encode
154115
* @throws SentryError (because using the logger creates a circular dependency)
155-
* @returns A Unicode string
116+
* @returns A base64-encoded version of the string
156117
*/
157-
export function base64ToUnicode(base64String: string): string {
158-
if (typeof base64String !== 'string' || !BASE64_REGEX.test(base64String)) {
159-
throw new SentryError(`Unable to convert from base64. Input either isn't a string or isn't valid base64.`);
160-
}
118+
export function unicodeToBase64(unicodeString: string): string {
119+
const globalObject = getGlobalObject();
161120

162-
const errMsg = `Unable to convert string from base64: ${
163-
base64String.length > 256 ? `${base64String.slice(0, 256)}...` : base64String
121+
// Cast to a string just in case we're given something else
122+
const stringifiedInput = String(unicodeString);
123+
const errMsg = `Unable to convert to base64: ${
124+
stringifiedInput.length > 256 ? `${stringifiedInput.slice(0, 256)}...` : stringifiedInput
164125
}`;
165126

127+
// To account for the fact that different platforms use different character encodings natively, our `tracestate`
128+
// spec calls for all jsonified data to be encoded in UTF-8 bytes before being passed to the base64 encoder.
166129
try {
167-
// browsers have atob built in
168-
if ('atob' in getGlobalObject()) {
169-
// atob takes base64 (written in (a)scii) to (b)inary
170-
return binaryToUnicode(atob(base64String));
130+
// browser
131+
if ('btoa' in globalObject) {
132+
// encode using UTF-8
133+
const bytes = new TextEncoder().encode(unicodeString);
134+
135+
// decode using UTF-16 (JS's native encoding) since `btoa` requires string input
136+
const bytesAsString = String.fromCharCode(...bytes);
137+
138+
return btoa(bytesAsString);
171139
}
172140

173-
// Buffer only exists in node
174-
if ('Buffer' in getGlobalObject()) {
175-
return Buffer.from(base64String, 'base64').toString('utf16le');
141+
// Node
142+
if ('Buffer' in globalObject) {
143+
// encode using UTF-8
144+
const bytes = Buffer.from(unicodeString, 'utf-8');
145+
146+
// unlike the browser, Node can go straight from bytes to base64
147+
return bytes.toString('base64');
176148
}
177149
} catch (err) {
178150
throw new SentryError(`${errMsg} Got error: ${err}`);
179151
}
180152

153+
// we shouldn't ever get here, because one of `btoa` and `Buffer` should exist, but just in case...
181154
throw new SentryError(errMsg);
182155
}
183156

184157
/**
185-
* Convert a Unicode (UTF-16) string to a base64 string.
158+
* Convert a base64 string to a Unicode string.
186159
*
187-
* @param unicodeString The string to encode
160+
* @param base64String The string to decode
188161
* @throws SentryError (because using the logger creates a circular dependency)
189-
* @returns A base64-encoded version of the string
162+
* @returns A Unicode string
190163
*/
191-
export function unicodeToBase64(unicodeString: string): string {
192-
if (typeof unicodeString !== 'string') {
193-
throw new SentryError(`Unable to convert to base64. Input isn't a string.`);
194-
}
164+
export function base64ToUnicode(base64String: string): string {
165+
const globalObject = getGlobalObject();
195166

196-
const errMsg = `Unable to convert string to base64: ${
197-
unicodeString.length > 256 ? `${unicodeString.slice(0, 256)}...` : unicodeString
167+
// we cast to a string just in case we're given something else
168+
const stringifiedInput = String(base64String);
169+
const errMsg = `Unable to convert from base64: ${
170+
stringifiedInput.length > 256 ? `${stringifiedInput.slice(0, 256)}...` : stringifiedInput
198171
}`;
199172

173+
// To account for the fact that different platforms use different character encodings natively, our `tracestate` spec
174+
// calls for all jsonified data to be encoded in UTF-8 bytes before being passed to the base64 encoder. So to reverse
175+
// the process, decode from base64 to bytes, then feed those bytes to a UTF-8 decoder.
200176
try {
201-
// browsers have btoa built in
202-
if ('btoa' in getGlobalObject()) {
203-
// btoa takes (b)inary to base64 (written in (a)scii)
204-
return btoa(unicodeToBinary(unicodeString));
177+
// browser
178+
if ('atob' in globalObject) {
179+
// `atob` returns a string rather than bytes, so we first need to encode using the native encoding (UTF-16)
180+
const bytesAsString = atob(base64String);
181+
const bytes = [...bytesAsString].map(char => char.charCodeAt(0));
182+
183+
// decode using UTF-8 (cast the `bytes` arry to a Uint8Array just because that's the format `decode()` expects)
184+
return new TextDecoder().decode(Uint8Array.from(bytes));
205185
}
206186

207-
// Buffer only exists in node
208-
if ('Buffer' in getGlobalObject()) {
209-
return Buffer.from(unicodeString, 'utf16le').toString('base64');
187+
// Node
188+
if ('Buffer' in globalObject) {
189+
// unlike the browser, Node can go straight from base64 to bytes
190+
const utf8Bytes = Buffer.from(base64String, 'base64');
191+
192+
// decode using UTF-8
193+
return utf8Bytes.toString('utf-8');
210194
}
211195
} catch (err) {
212196
throw new SentryError(`${errMsg} Got error: ${err}`);
213197
}
214198

199+
// we shouldn't ever get here, because one of `atob` and `Buffer` should exist, but just in case...
215200
throw new SentryError(errMsg);
216201
}

packages/utils/test/string.test.ts

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { isMatchingPattern, truncate } from '../src/string';
1+
import { BASE64_REGEX, base64ToUnicode, isMatchingPattern, truncate, unicodeToBase64 } from '../src/string';
22

33
describe('truncate()', () => {
44
test('it works as expected', () => {
@@ -39,3 +39,57 @@ describe('isMatchingPattern()', () => {
3939
expect(isMatchingPattern([], 'foo')).toEqual(false);
4040
});
4141
});
42+
43+
describe('base64ToUnicode/unicodeToBase64', () => {
44+
const unicodeString = 'Dogs are great!';
45+
const base64String = 'RG9ncyBhcmUgZ3JlYXQh';
46+
47+
test('converts to valid base64', () => {
48+
expect(BASE64_REGEX.test(unicodeToBase64(unicodeString))).toBe(true);
49+
});
50+
51+
test('works as expected', () => {
52+
expect(unicodeToBase64(unicodeString)).toEqual(base64String);
53+
expect(base64ToUnicode(base64String)).toEqual(unicodeString);
54+
});
55+
56+
test('conversion functions are inverses', () => {
57+
expect(base64ToUnicode(unicodeToBase64(unicodeString))).toEqual(unicodeString);
58+
expect(unicodeToBase64(base64ToUnicode(base64String))).toEqual(base64String);
59+
});
60+
61+
test('can handle and preserve multi-byte characters in original string', () => {
62+
['🐶', 'כלבים נהדרים!', 'Of margir hundar! Ég geri ráð fyrir að ég þurfi stærra rúm.'].forEach(orig => {
63+
expect(() => {
64+
unicodeToBase64(orig);
65+
}).not.toThrowError();
66+
expect(base64ToUnicode(unicodeToBase64(orig))).toEqual(orig);
67+
});
68+
});
69+
70+
test('throws an error when given invalid input', () => {
71+
expect(() => {
72+
unicodeToBase64(null as any);
73+
}).toThrowError('Unable to convert to base64');
74+
expect(() => {
75+
unicodeToBase64(undefined as any);
76+
}).toThrowError('Unable to convert to base64');
77+
expect(() => {
78+
unicodeToBase64({} as any);
79+
}).toThrowError('Unable to convert to base64');
80+
81+
expect(() => {
82+
base64ToUnicode(null as any);
83+
}).toThrowError('Unable to convert from base64');
84+
expect(() => {
85+
base64ToUnicode(undefined as any);
86+
}).toThrowError('Unable to convert from base64');
87+
expect(() => {
88+
base64ToUnicode({} as any);
89+
}).toThrowError('Unable to convert from base64');
90+
91+
// Note that by design, in node base64 encoding and decoding will accept any string, whether or not it's valid
92+
// base64, by ignoring all invalid characters, including whitespace. Therefore, no wacky strings have been included
93+
// here because they don't actually error.
94+
});
95+
});

0 commit comments

Comments
 (0)