@@ -108,109 +108,94 @@ export function isMatchingPattern(value: string, pattern: RegExp | string): bool
108
108
return false ;
109
109
}
110
110
111
- // TODO: Base64 crossed with different character encodings turns out to be a ridiculous can of worms. Base64 expects
112
- // 8-bit data. JS only uses UTF-16. We need a way to be sure that every SDK is speaking the same language and can decode
113
- // values base64-encoded by other SDKs. The current proposal is to use UTF-8 as the common standard, and then
114
- // base64-encode that (meaning in JS we need to get there first). Doing it that way makes a whole lot of sense but is a
115
- // work in progress which isn't yet actually working. Leaving the current solution for now and will come back to it.
116
-
117
- /**
118
- * Convert a Unicode string to a string in which each 16-bit unit occupies only one byte, which makes it safe to use as
119
- * input to `btoa`.
120
- *
121
- * Copied from https://developer.mozilla.org/en-US/docs/Web/API/WindowOrWorkerGlobalScope/btoa#Unicode_strings.
122
- *
123
- * @param unicodeString The string to convert
124
- * @returns A btoa-compatible encoding of the string
125
- */
126
- function unicodeToBinary ( unicodeString : string ) : string {
127
- const codeUnits = new Uint16Array ( unicodeString . length ) ;
128
- for ( let i = 0 ; i < codeUnits . length ; i ++ ) {
129
- codeUnits [ i ] = unicodeString . charCodeAt ( i ) ;
130
- }
131
- return String . fromCharCode ( ...new Uint8Array ( codeUnits . buffer ) ) ;
132
- }
133
-
134
- /**
135
- * Convert a binary string (such as one would get from `atob`) into a Unicode string.
136
- *
137
- * Copied from https://developer.mozilla.org/en-US/docs/Web/API/WindowOrWorkerGlobalScope/btoa#Unicode_strings.
138
- *
139
- * @param binaryString The string to convert
140
- * @returns A btoa-compatible encoding of the string
141
- */
142
- function binaryToUnicode ( binaryString : string ) : string {
143
- const bytes = new Uint8Array ( binaryString . length ) ;
144
- for ( let i = 0 ; i < bytes . length ; i ++ ) {
145
- bytes [ i ] = binaryString . charCodeAt ( i ) ;
146
- }
147
- return String . fromCharCode ( ...new Uint16Array ( bytes . buffer ) ) ;
148
- }
149
-
150
111
/**
151
- * Convert a base64 string to a Unicode (UTF-16) string.
112
+ * Convert a Unicode string to a base64 string.
152
113
*
153
- * @param base64String The string to decode.
114
+ * @param unicodeString The string to base64-encode
154
115
* @throws SentryError (because using the logger creates a circular dependency)
155
- * @returns A Unicode string
116
+ * @returns A base64-encoded version of the string
156
117
*/
157
- export function base64ToUnicode ( base64String : string ) : string {
158
- if ( typeof base64String !== 'string' || ! BASE64_REGEX . test ( base64String ) ) {
159
- throw new SentryError ( `Unable to convert from base64. Input either isn't a string or isn't valid base64.` ) ;
160
- }
118
+ export function unicodeToBase64 ( unicodeString : string ) : string {
119
+ const globalObject = getGlobalObject ( ) ;
161
120
162
- const errMsg = `Unable to convert string from base64: ${
163
- base64String . length > 256 ? `${ base64String . slice ( 0 , 256 ) } ...` : base64String
121
+ // Cast to a string just in case we're given something else
122
+ const stringifiedInput = String ( unicodeString ) ;
123
+ const errMsg = `Unable to convert to base64: ${
124
+ stringifiedInput . length > 256 ? `${ stringifiedInput . slice ( 0 , 256 ) } ...` : stringifiedInput
164
125
} `;
165
126
127
+ // To account for the fact that different platforms use different character encodings natively, our `tracestate`
128
+ // spec calls for all jsonified data to be encoded in UTF-8 bytes before being passed to the base64 encoder.
166
129
try {
167
- // browsers have atob built in
168
- if ( 'atob' in getGlobalObject ( ) ) {
169
- // atob takes base64 (written in (a)scii) to (b)inary
170
- return binaryToUnicode ( atob ( base64String ) ) ;
130
+ // browser
131
+ if ( 'btoa' in globalObject ) {
132
+ // encode using UTF-8
133
+ const bytes = new TextEncoder ( ) . encode ( unicodeString ) ;
134
+
135
+ // decode using UTF-16 (JS's native encoding) since `btoa` requires string input
136
+ const bytesAsString = String . fromCharCode ( ...bytes ) ;
137
+
138
+ return btoa ( bytesAsString ) ;
171
139
}
172
140
173
- // Buffer only exists in node
174
- if ( 'Buffer' in getGlobalObject ( ) ) {
175
- return Buffer . from ( base64String , 'base64' ) . toString ( 'utf16le' ) ;
141
+ // Node
142
+ if ( 'Buffer' in globalObject ) {
143
+ // encode using UTF-8
144
+ const bytes = Buffer . from ( unicodeString , 'utf-8' ) ;
145
+
146
+ // unlike the browser, Node can go straight from bytes to base64
147
+ return bytes . toString ( 'base64' ) ;
176
148
}
177
149
} catch ( err ) {
178
150
throw new SentryError ( `${ errMsg } Got error: ${ err } ` ) ;
179
151
}
180
152
153
+ // we shouldn't ever get here, because one of `btoa` and `Buffer` should exist, but just in case...
181
154
throw new SentryError ( errMsg ) ;
182
155
}
183
156
184
157
/**
185
- * Convert a Unicode (UTF-16) string to a base64 string.
158
+ * Convert a base64 string to a Unicode string.
186
159
*
187
- * @param unicodeString The string to encode
160
+ * @param base64String The string to decode
188
161
* @throws SentryError (because using the logger creates a circular dependency)
189
- * @returns A base64-encoded version of the string
162
+ * @returns A Unicode string
190
163
*/
191
- export function unicodeToBase64 ( unicodeString : string ) : string {
192
- if ( typeof unicodeString !== 'string' ) {
193
- throw new SentryError ( `Unable to convert to base64. Input isn't a string.` ) ;
194
- }
164
+ export function base64ToUnicode ( base64String : string ) : string {
165
+ const globalObject = getGlobalObject ( ) ;
195
166
196
- const errMsg = `Unable to convert string to base64: ${
197
- unicodeString . length > 256 ? `${ unicodeString . slice ( 0 , 256 ) } ...` : unicodeString
167
+ // we cast to a string just in case we're given something else
168
+ const stringifiedInput = String ( base64String ) ;
169
+ const errMsg = `Unable to convert from base64: ${
170
+ stringifiedInput . length > 256 ? `${ stringifiedInput . slice ( 0 , 256 ) } ...` : stringifiedInput
198
171
} `;
199
172
173
+ // To account for the fact that different platforms use different character encodings natively, our `tracestate` spec
174
+ // calls for all jsonified data to be encoded in UTF-8 bytes before being passed to the base64 encoder. So to reverse
175
+ // the process, decode from base64 to bytes, then feed those bytes to a UTF-8 decoder.
200
176
try {
201
- // browsers have btoa built in
202
- if ( 'btoa' in getGlobalObject ( ) ) {
203
- // btoa takes (b)inary to base64 (written in (a)scii)
204
- return btoa ( unicodeToBinary ( unicodeString ) ) ;
177
+ // browser
178
+ if ( 'atob' in globalObject ) {
179
+ // `atob` returns a string rather than bytes, so we first need to encode using the native encoding (UTF-16)
180
+ const bytesAsString = atob ( base64String ) ;
181
+ const bytes = [ ...bytesAsString ] . map ( char => char . charCodeAt ( 0 ) ) ;
182
+
183
+ // decode using UTF-8 (cast the `bytes` arry to a Uint8Array just because that's the format `decode()` expects)
184
+ return new TextDecoder ( ) . decode ( Uint8Array . from ( bytes ) ) ;
205
185
}
206
186
207
- // Buffer only exists in node
208
- if ( 'Buffer' in getGlobalObject ( ) ) {
209
- return Buffer . from ( unicodeString , 'utf16le' ) . toString ( 'base64' ) ;
187
+ // Node
188
+ if ( 'Buffer' in globalObject ) {
189
+ // unlike the browser, Node can go straight from base64 to bytes
190
+ const utf8Bytes = Buffer . from ( base64String , 'base64' ) ;
191
+
192
+ // decode using UTF-8
193
+ return utf8Bytes . toString ( 'utf-8' ) ;
210
194
}
211
195
} catch ( err ) {
212
196
throw new SentryError ( `${ errMsg } Got error: ${ err } ` ) ;
213
197
}
214
198
199
+ // we shouldn't ever get here, because one of `atob` and `Buffer` should exist, but just in case...
215
200
throw new SentryError ( errMsg ) ;
216
201
}
0 commit comments