Skip to content

ref(utils): Simplify normalization code and be more specific when erroring #4761

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
248 changes: 128 additions & 120 deletions packages/utils/src/normalize.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
import { isPrimitive, isSyntheticEvent } from './is';
import { Primitive } from '@sentry/types';

import { isError, isEvent, isNaN, isSyntheticEvent } from './is';
import { memoBuilder, MemoFunc } from './memo';
import { getWalkSource } from './object';
import { convertToPlainObject } from './object';
import { getFunctionName } from './stacktrace';

type UnknownMaybeWithToJson = unknown & { toJSON?: () => string };
type Prototype = { constructor: (...args: unknown[]) => unknown };
// This is a hack to placate TS, relying on the fact that technically, arrays are objects with integer keys. Normally we
// think of those keys as actual numbers, but `arr['0']` turns out to work just as well as `arr[0]`, and doing it this
// way lets us use a single type in the places where behave as if we are only dealing with objects, even if some of them
// might be arrays.
type ObjOrArray<T> = { [key: string]: T };

/**
* Recursively normalizes the given object.
Expand All @@ -27,9 +34,9 @@ type UnknownMaybeWithToJson = unknown & { toJSON?: () => string };
export function normalize(input: unknown, depth: number = +Infinity, maxProperties: number = +Infinity): any {
try {
// since we're at the outermost level, there is no key
return walk('', input as UnknownMaybeWithToJson, depth, maxProperties);
} catch (_oO) {
return '**non-serializable**';
return visit('', input, depth, maxProperties);
} catch (err) {
return { ERROR: `**non-serializable** (${err})` };
}
}

Expand All @@ -41,180 +48,181 @@ export function normalizeToSize<T>(
// 100kB, as 200kB is max payload size, so half sounds reasonable
maxSize: number = 100 * 1024,
): T {
const serialized = normalize(object, depth);
const normalized = normalize(object, depth);

if (jsonSize(serialized) > maxSize) {
if (jsonSize(normalized) > maxSize) {
return normalizeToSize(object, depth - 1, maxSize);
}

return serialized as T;
return normalized as T;
}

/**
* Walks an object to perform a normalization on it
* Visits a node to perform normalization on it
*
* @param key of object that's walked in current iteration
* @param value object to be walked
* @param depth Optional number indicating how deep should walking be performed
* @param maxProperties Optional maximum number of properties/elements included in any single object/array
* @param key The key corresponding to the given node
* @param value The node to be visited
* @param depth Optional number indicating the maximum recursion depth
* @param maxProperties Optional maximum number of properties/elements included in any single object/array
* @param memo Optional Memo class handling decycling
*/
export function walk(
function visit(
key: string,
value: UnknownMaybeWithToJson,
value: unknown,
depth: number = +Infinity,
maxProperties: number = +Infinity,
memo: MemoFunc = memoBuilder(),
): unknown {
): Primitive | ObjOrArray<unknown> {
const [memoize, unmemoize] = memo;

// If we reach the maximum depth, serialize whatever is left
if (depth === 0) {
return serializeValue(value);
// If the value has a `toJSON` method, see if we can bail and let it do the work
const valueWithToJSON = value as unknown & { toJSON?: () => Primitive | ObjOrArray<unknown> };
if (valueWithToJSON && typeof valueWithToJSON.toJSON === 'function') {
try {
return valueWithToJSON.toJSON();
} catch (err) {
// pass (The built-in `toJSON` failed, but we can still try to do it ourselves)
}
}

// If value implements `toJSON` method, call it and return early
if (value !== null && value !== undefined && typeof value.toJSON === 'function') {
return value.toJSON();
// Get the simple cases out of the way first
if (value === null || (['number', 'boolean', 'string'].includes(typeof value) && !isNaN(value))) {
return value as Primitive;
}

// `makeSerializable` provides a string representation of certain non-serializable values. For all others, it's a
// pass-through. If what comes back is a primitive (either because it's been stringified or because it was primitive
// all along), we're done.
const serializable = makeSerializable(value, key);
if (isPrimitive(serializable)) {
return serializable;
}
const stringified = stringifyValue(key, value);

// Create source that we will use for the next iteration. It will either be an objectified error object (`Error` type
// with extracted key:value pairs) or the input itself.
const source = getWalkSource(value);
// Anything we could potentially dig into more (objects or arrays) will have come back as `"[object XXXX]"`.
// Everything else will have already been serialized, so if we don't see that pattern, we're done.
if (!stringified.startsWith('[object ')) {
return stringified;
}

// Create an accumulator that will act as a parent for all future itterations of that branch
const acc: { [key: string]: any } = Array.isArray(value) ? [] : {};
// We're also done if we've reached the max depth
if (depth === 0) {
// At this point we know `serialized` is a string of the form `"[object XXXX]"`. Clean it up so it's just `"[XXXX]"`.
return stringified.replace('object ', '');
}

// If we already walked that branch, bail out, as it's circular reference
// If we've already visited this branch, bail out, as it's circular reference. If not, note that we're seeing it now.
if (memoize(value)) {
return '[Circular ~]';
}

let propertyCount = 0;
// Walk all keys of the source
for (const innerKey in source) {
// At this point we know we either have an object or an array, we haven't seen it before, and we're going to recurse
// because we haven't yet reached the max depth. Create an accumulator to hold the results of visiting each
// property/entry, and keep track of the number of items we add to it.
const normalized = (Array.isArray(value) ? [] : {}) as ObjOrArray<unknown>;
let numAdded = 0;

// Before we begin, convert`Error` and`Event` instances into plain objects, since some of each of their relevant
// properties are non-enumerable and otherwise would get missed.
const visitable = (isError(value) || isEvent(value) ? convertToPlainObject(value) : value) as ObjOrArray<unknown>;

for (const visitKey in visitable) {
// Avoid iterating over fields in the prototype if they've somehow been exposed to enumeration.
if (!Object.prototype.hasOwnProperty.call(source, innerKey)) {
if (!Object.prototype.hasOwnProperty.call(visitable, visitKey)) {
continue;
}

if (propertyCount >= maxProperties) {
acc[innerKey] = '[MaxProperties ~]';
if (numAdded >= maxProperties) {
normalized[visitKey] = '[MaxProperties ~]';
break;
}

propertyCount += 1;
// Recursively visit all the child nodes
const visitValue = visitable[visitKey];
normalized[visitKey] = visit(visitKey, visitValue, depth - 1, maxProperties, memo);

// Recursively walk through all the child nodes
const innerValue: UnknownMaybeWithToJson = source[innerKey];
acc[innerKey] = walk(innerKey, innerValue, depth - 1, maxProperties, memo);
numAdded += 1;
}

// Once walked through all the branches, remove the parent from memo storage
// Once we've visited all the branches, remove the parent from memo storage
unmemoize(value);

// Return accumulated values
return acc;
return normalized;
}

/**
* Transform any non-primitive, BigInt, or Symbol-type value into a string. Acts as a no-op on strings, numbers,
* booleans, null, and undefined.
*
* @param value The value to stringify
* @returns For non-primitive, BigInt, and Symbol-type values, a string denoting the value's type, type and value, or
* type and `description` property, respectively. For non-BigInt, non-Symbol primitives, returns the original value,
* unchanged.
*/
function serializeValue(value: any): any {
// Node.js REPL notation
if (typeof value === 'string') {
return value;
}

const type = Object.prototype.toString.call(value);
if (type === '[object Object]') {
return '[Object]';
}
if (type === '[object Array]') {
return '[Array]';
}

// `makeSerializable` provides a string representation of certain non-serializable values. For all others, it's a
// pass-through.
const serializable = makeSerializable(value);
return isPrimitive(serializable) ? serializable : type;
}
// TODO remove this in v7 (this means the method will no longer be exported, under any name)
export { visit as walk };

/**
* makeSerializable()
* Stringify the given value. Handles various known special values and types.
*
* Takes unserializable input and make it serializer-friendly.
* Not meant to be used on simple primitives which already have a string representation, as it will, for example, turn
* the number 1231 into "[Object Number]", nor on `null`, as it will throw.
*
* Handles globals, functions, `undefined`, `NaN`, and other non-serializable values.
* @param value The value to stringify
* @returns A stringified representation of the given value
*/
function makeSerializable<T>(value: T, key?: any): T | string {
if (key === 'domain' && value && typeof value === 'object' && (value as unknown as { _events: any })._events) {
return '[Domain]';
}
function stringifyValue(
key: unknown,
// this type is a tiny bit of a cheat, since this function does handle NaN (which is technically a number), but for
// our internal use, it'll do
value: Exclude<unknown, string | number | boolean | null>,
): string {
try {
if (key === 'domain' && value && typeof value === 'object' && (value as { _events: unknown })._events) {
return '[Domain]';
}

if (key === 'domainEmitter') {
return '[DomainEmitter]';
}
if (key === 'domainEmitter') {
return '[DomainEmitter]';
}

if (typeof (global as any) !== 'undefined' && (value as unknown) === global) {
return '[Global]';
}
// It's safe to use `global`, `window`, and `document` here in this manner, as we are asserting using `typeof` first
// which won't throw if they are not present.

// It's safe to use `window` and `document` here in this manner, as we are asserting using `typeof` first
// which won't throw if they are not present.
if (typeof global !== 'undefined' && value === global) {
return '[Global]';
}

// eslint-disable-next-line no-restricted-globals
if (typeof (window as any) !== 'undefined' && (value as unknown) === window) {
return '[Window]';
}
// eslint-disable-next-line no-restricted-globals
if (typeof window !== 'undefined' && value === window) {
return '[Window]';
}

// eslint-disable-next-line no-restricted-globals
if (typeof (document as any) !== 'undefined' && (value as unknown) === document) {
return '[Document]';
}
// eslint-disable-next-line no-restricted-globals
if (typeof document !== 'undefined' && value === document) {
return '[Document]';
}

// React's SyntheticEvent thingy
if (isSyntheticEvent(value)) {
return '[SyntheticEvent]';
}
// React's SyntheticEvent thingy
if (isSyntheticEvent(value)) {
return '[SyntheticEvent]';
}

if (typeof value === 'number' && value !== value) {
return '[NaN]';
}
if (typeof value === 'number' && value !== value) {
return '[NaN]';
}

if (value === void 0) {
return '[undefined]';
}
// this catches `undefined` (but not `null`, which is a primitive and can be serialized on its own)
if (value === void 0) {
return '[undefined]';
}

if (typeof value === 'function') {
return `[Function: ${getFunctionName(value)}]`;
}
if (typeof value === 'function') {
return `[Function: ${getFunctionName(value)}]`;
}

// symbols and bigints are considered primitives by TS, but aren't natively JSON-serilaizable
if (typeof value === 'symbol') {
return `[${String(value)}]`;
}

if (typeof value === 'symbol') {
return `[${String(value)}]`;
}
// stringified BigInts are indistinguishable from regular numbers, so we need to label them to avoid confusion
if (typeof value === 'bigint') {
return `[BigInt: ${String(value)}]`;
}

if (typeof value === 'bigint') {
return `[BigInt: ${String(value)}]`;
// Now that we've knocked out all the special cases and the primitives, all we have left are objects. Simply casting
// them to strings means that instances of classes which haven't defined their `toStringTag` will just come out as
// `"[object Object]"`. If we instead look at the constructor's name (which is the same as the name of the class),
// we can make sure that only plain objects come out that way.
return `[object ${(Object.getPrototypeOf(value) as Prototype).constructor.name}]`;
} catch (err) {
return `**non-serializable** (${err})`;
}

return value;
}

/** Calculates bytes size of input string */
Expand Down
Loading