Skip to content

Commit 5375de8

Browse files
authored
ref(utils): Simplify normalization code and be more specific when erroring (#4761)
Currently, if an error occurs anywhere in the normalization process, the entire value is clobbered, in favor of the string `"**non-serializable**"`. This has a few drawbacks: - All data is lost, even data that wasn't malformed - It's harder to tell where the problem is, because there's no way to know which part of the data caused the problem - It's possible to pass `normalize` an object and get back a string, which can prove problematic if data is later added to the normalized property as if it were still an object (see the issue linked below). The goal of this change, then, was to identify all of the places where errors could occur and catch them right there, rather than relying on the `try-catch` in the main `normalize` function. In order to make this easier to reason about, the normalization code was first streamlined a bit. Key changes: - `serializeValue` and `makeSerializable` have been combined into a single function, `stringifyValue`, and any code not resulting in a string has been moved into the calling function. - All code in `stringifyValue` has been wrapped in a `try-catch`, so that errors which originate there will only affect the value at hand, rather than then entire object. - When `"**non-serializable**"` has to be used, it's now accompanied by the message belonging to the error which broke the serialization. - When the outer `normalize` function needs to return `"**non-serializable**"`, it now wraps it in an object, so that other data can still be added. - The `walk` function has been renamed `visit`, to better conform to the language of the Visitor Pattern. For the moment, it's still aliased to `walk` when exported, to preserve backwards compatibility. - The `getWalkSource` function has been renamed `convertToPlainObject`, to better reflect what it does, and some of its repetitive code has been extracted into the `serializeEventTarget` and `getOwnProperties` functions. Fixes #4719
1 parent 66ce092 commit 5375de8

File tree

3 files changed

+188
-181
lines changed

3 files changed

+188
-181
lines changed

packages/utils/src/normalize.ts

Lines changed: 128 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
1-
import { isPrimitive, isSyntheticEvent } from './is';
1+
import { Primitive } from '@sentry/types';
2+
3+
import { isError, isEvent, isNaN, isSyntheticEvent } from './is';
24
import { memoBuilder, MemoFunc } from './memo';
3-
import { getWalkSource } from './object';
5+
import { convertToPlainObject } from './object';
46
import { getFunctionName } from './stacktrace';
57

6-
type UnknownMaybeWithToJson = unknown & { toJSON?: () => string };
8+
type Prototype = { constructor: (...args: unknown[]) => unknown };
9+
// This is a hack to placate TS, relying on the fact that technically, arrays are objects with integer keys. Normally we
10+
// think of those keys as actual numbers, but `arr['0']` turns out to work just as well as `arr[0]`, and doing it this
11+
// way lets us use a single type in the places where behave as if we are only dealing with objects, even if some of them
12+
// might be arrays.
13+
type ObjOrArray<T> = { [key: string]: T };
714

815
/**
916
* Recursively normalizes the given object.
@@ -27,9 +34,9 @@ type UnknownMaybeWithToJson = unknown & { toJSON?: () => string };
2734
export function normalize(input: unknown, depth: number = +Infinity, maxProperties: number = +Infinity): any {
2835
try {
2936
// since we're at the outermost level, there is no key
30-
return walk('', input as UnknownMaybeWithToJson, depth, maxProperties);
31-
} catch (_oO) {
32-
return '**non-serializable**';
37+
return visit('', input, depth, maxProperties);
38+
} catch (err) {
39+
return { ERROR: `**non-serializable** (${err})` };
3340
}
3441
}
3542

@@ -41,180 +48,181 @@ export function normalizeToSize<T>(
4148
// 100kB, as 200kB is max payload size, so half sounds reasonable
4249
maxSize: number = 100 * 1024,
4350
): T {
44-
const serialized = normalize(object, depth);
51+
const normalized = normalize(object, depth);
4552

46-
if (jsonSize(serialized) > maxSize) {
53+
if (jsonSize(normalized) > maxSize) {
4754
return normalizeToSize(object, depth - 1, maxSize);
4855
}
4956

50-
return serialized as T;
57+
return normalized as T;
5158
}
5259

5360
/**
54-
* Walks an object to perform a normalization on it
61+
* Visits a node to perform normalization on it
5562
*
56-
* @param key of object that's walked in current iteration
57-
* @param value object to be walked
58-
* @param depth Optional number indicating how deep should walking be performed
59-
* @param maxProperties Optional maximum number of properties/elements included in any single object/array
63+
* @param key The key corresponding to the given node
64+
* @param value The node to be visited
65+
* @param depth Optional number indicating the maximum recursion depth
66+
* @param maxProperties Optional maximum number of properties/elements included in any single object/array
6067
* @param memo Optional Memo class handling decycling
6168
*/
62-
export function walk(
69+
function visit(
6370
key: string,
64-
value: UnknownMaybeWithToJson,
71+
value: unknown,
6572
depth: number = +Infinity,
6673
maxProperties: number = +Infinity,
6774
memo: MemoFunc = memoBuilder(),
68-
): unknown {
75+
): Primitive | ObjOrArray<unknown> {
6976
const [memoize, unmemoize] = memo;
7077

71-
// If we reach the maximum depth, serialize whatever is left
72-
if (depth === 0) {
73-
return serializeValue(value);
78+
// If the value has a `toJSON` method, see if we can bail and let it do the work
79+
const valueWithToJSON = value as unknown & { toJSON?: () => Primitive | ObjOrArray<unknown> };
80+
if (valueWithToJSON && typeof valueWithToJSON.toJSON === 'function') {
81+
try {
82+
return valueWithToJSON.toJSON();
83+
} catch (err) {
84+
// pass (The built-in `toJSON` failed, but we can still try to do it ourselves)
85+
}
7486
}
7587

76-
// If value implements `toJSON` method, call it and return early
77-
if (value !== null && value !== undefined && typeof value.toJSON === 'function') {
78-
return value.toJSON();
88+
// Get the simple cases out of the way first
89+
if (value === null || (['number', 'boolean', 'string'].includes(typeof value) && !isNaN(value))) {
90+
return value as Primitive;
7991
}
8092

81-
// `makeSerializable` provides a string representation of certain non-serializable values. For all others, it's a
82-
// pass-through. If what comes back is a primitive (either because it's been stringified or because it was primitive
83-
// all along), we're done.
84-
const serializable = makeSerializable(value, key);
85-
if (isPrimitive(serializable)) {
86-
return serializable;
87-
}
93+
const stringified = stringifyValue(key, value);
8894

89-
// Create source that we will use for the next iteration. It will either be an objectified error object (`Error` type
90-
// with extracted key:value pairs) or the input itself.
91-
const source = getWalkSource(value);
95+
// Anything we could potentially dig into more (objects or arrays) will have come back as `"[object XXXX]"`.
96+
// Everything else will have already been serialized, so if we don't see that pattern, we're done.
97+
if (!stringified.startsWith('[object ')) {
98+
return stringified;
99+
}
92100

93-
// Create an accumulator that will act as a parent for all future itterations of that branch
94-
const acc: { [key: string]: any } = Array.isArray(value) ? [] : {};
101+
// We're also done if we've reached the max depth
102+
if (depth === 0) {
103+
// At this point we know `serialized` is a string of the form `"[object XXXX]"`. Clean it up so it's just `"[XXXX]"`.
104+
return stringified.replace('object ', '');
105+
}
95106

96-
// If we already walked that branch, bail out, as it's circular reference
107+
// If we've already visited this branch, bail out, as it's circular reference. If not, note that we're seeing it now.
97108
if (memoize(value)) {
98109
return '[Circular ~]';
99110
}
100111

101-
let propertyCount = 0;
102-
// Walk all keys of the source
103-
for (const innerKey in source) {
112+
// At this point we know we either have an object or an array, we haven't seen it before, and we're going to recurse
113+
// because we haven't yet reached the max depth. Create an accumulator to hold the results of visiting each
114+
// property/entry, and keep track of the number of items we add to it.
115+
const normalized = (Array.isArray(value) ? [] : {}) as ObjOrArray<unknown>;
116+
let numAdded = 0;
117+
118+
// Before we begin, convert`Error` and`Event` instances into plain objects, since some of each of their relevant
119+
// properties are non-enumerable and otherwise would get missed.
120+
const visitable = (isError(value) || isEvent(value) ? convertToPlainObject(value) : value) as ObjOrArray<unknown>;
121+
122+
for (const visitKey in visitable) {
104123
// Avoid iterating over fields in the prototype if they've somehow been exposed to enumeration.
105-
if (!Object.prototype.hasOwnProperty.call(source, innerKey)) {
124+
if (!Object.prototype.hasOwnProperty.call(visitable, visitKey)) {
106125
continue;
107126
}
108127

109-
if (propertyCount >= maxProperties) {
110-
acc[innerKey] = '[MaxProperties ~]';
128+
if (numAdded >= maxProperties) {
129+
normalized[visitKey] = '[MaxProperties ~]';
111130
break;
112131
}
113132

114-
propertyCount += 1;
133+
// Recursively visit all the child nodes
134+
const visitValue = visitable[visitKey];
135+
normalized[visitKey] = visit(visitKey, visitValue, depth - 1, maxProperties, memo);
115136

116-
// Recursively walk through all the child nodes
117-
const innerValue: UnknownMaybeWithToJson = source[innerKey];
118-
acc[innerKey] = walk(innerKey, innerValue, depth - 1, maxProperties, memo);
137+
numAdded += 1;
119138
}
120139

121-
// Once walked through all the branches, remove the parent from memo storage
140+
// Once we've visited all the branches, remove the parent from memo storage
122141
unmemoize(value);
123142

124143
// Return accumulated values
125-
return acc;
144+
return normalized;
126145
}
127146

128-
/**
129-
* Transform any non-primitive, BigInt, or Symbol-type value into a string. Acts as a no-op on strings, numbers,
130-
* booleans, null, and undefined.
131-
*
132-
* @param value The value to stringify
133-
* @returns For non-primitive, BigInt, and Symbol-type values, a string denoting the value's type, type and value, or
134-
* type and `description` property, respectively. For non-BigInt, non-Symbol primitives, returns the original value,
135-
* unchanged.
136-
*/
137-
function serializeValue(value: any): any {
138-
// Node.js REPL notation
139-
if (typeof value === 'string') {
140-
return value;
141-
}
142-
143-
const type = Object.prototype.toString.call(value);
144-
if (type === '[object Object]') {
145-
return '[Object]';
146-
}
147-
if (type === '[object Array]') {
148-
return '[Array]';
149-
}
150-
151-
// `makeSerializable` provides a string representation of certain non-serializable values. For all others, it's a
152-
// pass-through.
153-
const serializable = makeSerializable(value);
154-
return isPrimitive(serializable) ? serializable : type;
155-
}
147+
// TODO remove this in v7 (this means the method will no longer be exported, under any name)
148+
export { visit as walk };
156149

157150
/**
158-
* makeSerializable()
151+
* Stringify the given value. Handles various known special values and types.
159152
*
160-
* Takes unserializable input and make it serializer-friendly.
153+
* Not meant to be used on simple primitives which already have a string representation, as it will, for example, turn
154+
* the number 1231 into "[Object Number]", nor on `null`, as it will throw.
161155
*
162-
* Handles globals, functions, `undefined`, `NaN`, and other non-serializable values.
156+
* @param value The value to stringify
157+
* @returns A stringified representation of the given value
163158
*/
164-
function makeSerializable<T>(value: T, key?: any): T | string {
165-
if (key === 'domain' && value && typeof value === 'object' && (value as unknown as { _events: any })._events) {
166-
return '[Domain]';
167-
}
159+
function stringifyValue(
160+
key: unknown,
161+
// this type is a tiny bit of a cheat, since this function does handle NaN (which is technically a number), but for
162+
// our internal use, it'll do
163+
value: Exclude<unknown, string | number | boolean | null>,
164+
): string {
165+
try {
166+
if (key === 'domain' && value && typeof value === 'object' && (value as { _events: unknown })._events) {
167+
return '[Domain]';
168+
}
168169

169-
if (key === 'domainEmitter') {
170-
return '[DomainEmitter]';
171-
}
170+
if (key === 'domainEmitter') {
171+
return '[DomainEmitter]';
172+
}
172173

173-
if (typeof (global as any) !== 'undefined' && (value as unknown) === global) {
174-
return '[Global]';
175-
}
174+
// It's safe to use `global`, `window`, and `document` here in this manner, as we are asserting using `typeof` first
175+
// which won't throw if they are not present.
176176

177-
// It's safe to use `window` and `document` here in this manner, as we are asserting using `typeof` first
178-
// which won't throw if they are not present.
177+
if (typeof global !== 'undefined' && value === global) {
178+
return '[Global]';
179+
}
179180

180-
// eslint-disable-next-line no-restricted-globals
181-
if (typeof (window as any) !== 'undefined' && (value as unknown) === window) {
182-
return '[Window]';
183-
}
181+
// eslint-disable-next-line no-restricted-globals
182+
if (typeof window !== 'undefined' && value === window) {
183+
return '[Window]';
184+
}
184185

185-
// eslint-disable-next-line no-restricted-globals
186-
if (typeof (document as any) !== 'undefined' && (value as unknown) === document) {
187-
return '[Document]';
188-
}
186+
// eslint-disable-next-line no-restricted-globals
187+
if (typeof document !== 'undefined' && value === document) {
188+
return '[Document]';
189+
}
189190

190-
// React's SyntheticEvent thingy
191-
if (isSyntheticEvent(value)) {
192-
return '[SyntheticEvent]';
193-
}
191+
// React's SyntheticEvent thingy
192+
if (isSyntheticEvent(value)) {
193+
return '[SyntheticEvent]';
194+
}
194195

195-
if (typeof value === 'number' && value !== value) {
196-
return '[NaN]';
197-
}
196+
if (typeof value === 'number' && value !== value) {
197+
return '[NaN]';
198+
}
198199

199-
if (value === void 0) {
200-
return '[undefined]';
201-
}
200+
// this catches `undefined` (but not `null`, which is a primitive and can be serialized on its own)
201+
if (value === void 0) {
202+
return '[undefined]';
203+
}
202204

203-
if (typeof value === 'function') {
204-
return `[Function: ${getFunctionName(value)}]`;
205-
}
205+
if (typeof value === 'function') {
206+
return `[Function: ${getFunctionName(value)}]`;
207+
}
206208

207-
// symbols and bigints are considered primitives by TS, but aren't natively JSON-serilaizable
209+
if (typeof value === 'symbol') {
210+
return `[${String(value)}]`;
211+
}
208212

209-
if (typeof value === 'symbol') {
210-
return `[${String(value)}]`;
211-
}
213+
// stringified BigInts are indistinguishable from regular numbers, so we need to label them to avoid confusion
214+
if (typeof value === 'bigint') {
215+
return `[BigInt: ${String(value)}]`;
216+
}
212217

213-
if (typeof value === 'bigint') {
214-
return `[BigInt: ${String(value)}]`;
218+
// Now that we've knocked out all the special cases and the primitives, all we have left are objects. Simply casting
219+
// them to strings means that instances of classes which haven't defined their `toStringTag` will just come out as
220+
// `"[object Object]"`. If we instead look at the constructor's name (which is the same as the name of the class),
221+
// we can make sure that only plain objects come out that way.
222+
return `[object ${(Object.getPrototypeOf(value) as Prototype).constructor.name}]`;
223+
} catch (err) {
224+
return `**non-serializable** (${err})`;
215225
}
216-
217-
return value;
218226
}
219227

220228
/** Calculates bytes size of input string */

0 commit comments

Comments
 (0)