Skip to content

ref(replay): improve dedupe logic #8836

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions packages/replay/src/coreHandlers/performanceObserver.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { AllPerformanceEntry, ReplayContainer } from '../types';
import type { ReplayContainer } from '../types';
import { dedupePerformanceEntries } from '../util/dedupePerformanceEntries';

/**
Expand All @@ -8,11 +8,7 @@ export function setupPerformanceObserver(replay: ReplayContainer): PerformanceOb
const performanceObserverHandler = (list: PerformanceObserverEntryList): void => {
// For whatever reason the observer was returning duplicate navigation
// entries (the other entry types were not duplicated).
const newPerformanceEntries = dedupePerformanceEntries(
replay.performanceEvents,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 I don't remember if we were seeing duplicate entries across calls to the observer handler

list.getEntries() as AllPerformanceEntry[],
);
replay.performanceEvents = newPerformanceEntries;
replay.performanceEvents = dedupePerformanceEntries(list.getEntries(), replay.performanceEvents);
};

const performanceObserver = new PerformanceObserver(performanceObserverHandler);
Expand Down
177 changes: 104 additions & 73 deletions packages/replay/src/util/dedupePerformanceEntries.ts
Original file line number Diff line number Diff line change
@@ -1,90 +1,121 @@
const NAVIGATION_ENTRY_KEYS: Array<keyof PerformanceNavigationTiming> = [
'name',
'type',
'startTime',
'transferSize',
'duration',
];

function isNavigationEntryEqual(a: PerformanceNavigationTiming) {
return function (b: PerformanceNavigationTiming) {
return NAVIGATION_ENTRY_KEYS.every(key => a[key] === b[key]);
};
function nbDuplicatesToSkip(queue: PerformanceEntry[], curr: PerformanceEntry, startAt: number): number {
let i = startAt;
let nextNode: PerformanceEntry = queue[i];
while (
nextNode &&
// Cheap reference check first, then compare keys. Since entries are sorted by startTime, compare that first.
(nextNode === curr ||
(nextNode.startTime === curr.startTime &&
nextNode.duration === curr.duration &&
nextNode.name === curr.name &&
nextNode.entryType === curr.entryType &&
(nextNode as PerformanceResourceTiming).transferSize === (curr as PerformanceResourceTiming).transferSize))
) {
nextNode = queue[++i];
}

return i - startAt;
}

enum Queue {
CURRENT = 0,
PREVIOUS = 1,
}

/**
* There are some difficulties diagnosing why there are duplicate navigation
* entries. We've witnessed several intermittent results:
* - duplicate entries have duration = 0
* - duplicate entries are the same object reference
* - none of the above
* Deduplicates performance entries list - assumes a sorted list as input as per
* the spec https://w3c.github.io/performance-timeline/#dom-performance-getentries.
*
*
* Compare the values of several keys to determine if the entries are duplicates or not.
* Deduplication is performed because we have observed duplicate navigation entries in real world data:
* Some of the duplicates include:
* - entries where duration = 0
* - same reference entries
* - none of the above
* @param list {PerformanceEntryList}
* @returns PerformanceEntryList
*/
// TODO (high-prio): Figure out wth is returned here
// eslint-disable-next-line @typescript-eslint/explicit-function-return-type
// eslint-disable-next-line complexity
export function dedupePerformanceEntries(
currentList: PerformanceEntryList,
newList: PerformanceEntryList,
list: PerformanceEntryList,
previousList: PerformanceEntryList,
): PerformanceEntryList {
// Partition `currentList` into 3 different lists based on entryType
const [existingNavigationEntries, existingLcpEntries, existingEntries] = currentList.reduce(
(acc: [PerformanceNavigationTiming[], PerformancePaintTiming[], PerformanceEntryList], entry) => {
if (entry.entryType === 'navigation') {
acc[0].push(entry as PerformanceNavigationTiming);
} else if (entry.entryType === 'largest-contentful-paint') {
acc[1].push(entry as PerformancePaintTiming);
} else {
acc[2].push(entry);
}
return acc;
},
[[], [], []],
);

const newEntries: PerformanceEntryList = [];
const newNavigationEntries: PerformanceNavigationTiming[] = [];
let newLcpEntry: PerformancePaintTiming | undefined = existingLcpEntries.length
? existingLcpEntries[existingLcpEntries.length - 1] // Take the last element as list is sorted
: undefined;

newList.forEach(entry => {
if (!list.length && !previousList.length) {
return [];
}

let i = 0;
let n = 0;

// Helper to advance queue
function advanceQueue(queue: Queue): void {
if (queue === Queue.CURRENT) {
i++;
} else {
n++;
}
}

// Initialize deduped list and lcp entry index.
// We keep track of the lcp entry index so that we can replace it if we find a later entry.
const deduped: PerformanceEntryList = [];
let lcpEntryIndex: number | undefined;

while (i < list.length || n < previousList.length) {
const current = list[i];
const previous = previousList[n];

// If nothing in queue A, take from bueue B,
// if nothing in queue B, take from queue A.
// If both queues have entries, take the earliest one.
const queueType = !previous
? Queue.CURRENT
: !current
? Queue.PREVIOUS
: list[i].startTime <= previousList[n].startTime
? Queue.CURRENT
: Queue.PREVIOUS;

const entry = queueType === Queue.CURRENT ? list[i] : previousList[n];

// Assign latest lcp entry if it later than current latest entry
// If we do not have one yet, add it to the list and store it's index
if (entry.entryType === 'largest-contentful-paint') {
// We want the latest LCP event only
if (!newLcpEntry || newLcpEntry.startTime < entry.startTime) {
newLcpEntry = entry;
if (lcpEntryIndex === undefined || entry.startTime > deduped[lcpEntryIndex].startTime) {
if (lcpEntryIndex === undefined) {
deduped.push(entry);
lcpEntryIndex = deduped.length - 1;
} else {
deduped[lcpEntryIndex] = entry;
}
}
return;
advanceQueue(queueType);
continue;
}

if (entry.entryType === 'navigation') {
const navigationEntry = entry as PerformanceNavigationTiming;

// Check if the navigation entry is contained in currentList or newList
if (
// Ignore any navigation entries with duration 0, as they are likely duplicates
entry.duration > 0 &&
// Ensure new entry does not already exist in existing entries
!existingNavigationEntries.find(isNavigationEntryEqual(navigationEntry)) &&
// Ensure new entry does not already exist in new list of navigation entries
!newNavigationEntries.find(isNavigationEntryEqual(navigationEntry))
) {
newNavigationEntries.push(navigationEntry);
// By default, any entry is considered new and we peek ahead to see how many duplicates there are.
// We can rely on the peek behavior as the spec states that entries are sorted by startTime. As we peek,
// we keep a count of how many duplicates we see and skip over them.
const currentDuplicates = nbDuplicatesToSkip(list, entry, i);
const previousDuplicates = nbDuplicatesToSkip(previousList, entry, n);

// Jump to next entry after the duplicates - if there are none, advance the queue
i += currentDuplicates ? currentDuplicates : 0;
n += previousDuplicates ? previousDuplicates : 0;

if (entry.duration <= 0) {
advanceQueue(queueType);
continue;
}

// Otherwise this navigation entry is considered a duplicate and is thrown away
return;
deduped.push(entry);
continue;
}

newEntries.push(entry);
});

// Re-combine and sort by startTime
return [
...(newLcpEntry ? [newLcpEntry] : []),
...existingNavigationEntries,
...existingEntries,
...newEntries,
...newNavigationEntries,
].sort((a, b) => a.startTime - b.startTime);
deduped.push(entry);
advanceQueue(queueType);
}

return deduped;
}
47 changes: 42 additions & 5 deletions packages/replay/test/unit/util/dedupePerformanceEntries.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ describe('Unit | util | dedupePerformanceEntries', () => {
});
const c = PerformanceEntryNavigation({ startTime: 2, type: 'reload' });
const d = PerformanceEntryResource({ startTime: 1.5 });
const entries = [a, a, b, d, b, c];
const entries = [a, a, b, d, b, c].sort((a, b) => a.startTime - b.startTime);
expect(dedupePerformanceEntries([], entries)).toEqual([a, b, d, c]);
});

Expand All @@ -34,7 +34,7 @@ describe('Unit | util | dedupePerformanceEntries', () => {
});
const c = PerformanceEntryNavigation({ startTime: 2, type: 'reload' });
const d = PerformanceEntryNavigation({ startTime: 1000 });
const entries = [a, a, b, b, c];
const entries = [a, a, b, b, c].sort((a, b) => a.startTime - b.startTime);
expect(dedupePerformanceEntries([a, d], entries)).toEqual([a, b, c, d]);
});

Expand All @@ -43,7 +43,7 @@ describe('Unit | util | dedupePerformanceEntries', () => {
const b = PerformanceEntryLcp({ startTime: 100, name: 'b' });
const c = PerformanceEntryLcp({ startTime: 200, name: 'c' });
const d = PerformanceEntryLcp({ startTime: 5, name: 'd' }); // don't assume they are ordered
const entries = [a, b, c, d];
const entries = [a, b, c, d].sort((a, b) => a.startTime - b.startTime);
expect(dedupePerformanceEntries([], entries)).toEqual([a, c]);
});

Expand All @@ -52,7 +52,7 @@ describe('Unit | util | dedupePerformanceEntries', () => {
const b = PerformanceEntryLcp({ startTime: 100, name: 'b' });
const c = PerformanceEntryLcp({ startTime: 200, name: 'c' });
const d = PerformanceEntryLcp({ startTime: 5, name: 'd' }); // don't assume they are ordered
const entries = [b, c, d];
const entries = [b, c, d].sort((a, b) => a.startTime - b.startTime);
expect(dedupePerformanceEntries([a, d], entries)).toEqual([a, c]);
});

Expand All @@ -61,7 +61,44 @@ describe('Unit | util | dedupePerformanceEntries', () => {
const b = PerformanceEntryLcp({ startTime: 100, name: 'b' });
const c = PerformanceEntryLcp({ startTime: 200, name: 'c' });
const d = PerformanceEntryLcp({ startTime: 5, name: 'd' }); // don't assume they are ordered
const entries = [b, d];
const entries = [b, d].sort((a, b) => a.startTime - b.startTime);
expect(dedupePerformanceEntries([a, c], entries)).toEqual([a, c]);
});

it('dedupes when the list is the same', () => {
const a = PerformanceEntryNavigation({ startTime: Number.NEGATIVE_INFINITY });
expect(dedupePerformanceEntries([a], [a])).toEqual([a]);
});

it('does not spin forever in weird edge cases', function () {
expect(dedupePerformanceEntries([], [])).toEqual([]);

const randomFrom = (arr: number[]) => arr[Math.floor(Math.random() * arr.length)];
const randomNumberBetweenInclusive = (min: number, max: number) =>
Math.floor(Math.random() * (max - min + 1)) + min;

const starts = [-100, 0, 100, Number.NEGATIVE_INFINITY, Number.POSITIVE_INFINITY, NaN, undefined];
const entries = [PerformanceEntryLcp, PerformanceEntryNavigation, PerformanceEntryResource];

for (let i = 0; i < 100; i++) {
const previousEntries: any[] = [];
const currEntries: any[] = [];
for (let j = 0; j < randomNumberBetweenInclusive(0, 10); j++) {
// @ts-expect-error
previousEntries.push(randomFrom(entries)({ startTime: randomFrom(starts) }));
}
for (let j = 0; j < randomNumberBetweenInclusive(0, 10); j++) {
// @ts-expect-error
currEntries.push(randomFrom(entries)({ startTime: randomFrom(starts) }));
}

expect(() => dedupePerformanceEntries(previousEntries, currEntries)).not.toThrow();
expect(() =>
dedupePerformanceEntries(
previousEntries.sort((a, b) => a.startTime - b.startTime),
currEntries.sort((a, b) => a.startTime - b.startTime),
),
).not.toThrow();
}
});
});