Skip to content

Commit c0261ec

Browse files
authored
Merge pull request #4018 from asgerf/asgerf/compare-fixes
Compare performance: Detect 'shadowed' cache hits and a few other fixes
2 parents fd73c06 + bfbf5a4 commit c0261ec

File tree

3 files changed

+257
-59
lines changed

3 files changed

+257
-59
lines changed

extensions/ql-vscode/src/log-insights/log-summary.ts

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ type EvaluationStrategy =
1616
| "EXTENSIONAL"
1717
| "SENTINEL_EMPTY"
1818
| "CACHACA"
19-
| "CACHE_HIT";
19+
| "CACHE_HIT"
20+
| "NAMED_LOCAL";
2021

2122
interface SummaryEventBase {
2223
evaluationStrategy: EvaluationStrategy;
@@ -28,6 +29,8 @@ interface SummaryEventBase {
2829

2930
interface ResultEventBase extends SummaryEventBase {
3031
resultSize: number;
32+
dependencies?: { [key: string]: string };
33+
mainHash?: string;
3134
}
3235

3336
export interface ComputeSimple extends ResultEventBase {
@@ -60,6 +63,15 @@ export interface InLayer extends ResultEventBase {
6063
predicateIterationMillis: number[];
6164
}
6265

66+
interface NamedLocal extends ResultEventBase {
67+
evaluationStrategy: "NAMED_LOCAL";
68+
deltaSizes: number[];
69+
ra: Ra;
70+
pipelineRuns: PipelineRun[];
71+
queryCausingWork?: string;
72+
predicateIterationMillis: number[];
73+
}
74+
6375
interface ComputedExtensional extends ResultEventBase {
6476
evaluationStrategy: "COMPUTED_EXTENSIONAL";
6577
queryCausingWork?: string;
@@ -92,4 +104,5 @@ export type SummaryEvent =
92104
| Extensional
93105
| SentinelEmpty
94106
| Cachaca
95-
| CacheHit;
107+
| CacheHit
108+
| NamedLocal;

extensions/ql-vscode/src/log-insights/performance-comparison.ts

Lines changed: 83 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
import { createHash } from "crypto";
12
import type { SummaryEvent } from "./log-summary";
23

34
export interface PipelineSummary {
45
steps: string[];
56
/** Total counts for each step in the RA array, across all iterations */
67
counts: number[];
8+
hash: string;
79
}
810

911
/**
@@ -26,6 +28,9 @@ export interface PerformanceComparisonDataFromLog {
2628
*/
2729
names: string[];
2830

31+
/** RA hash of the `i`th predicate event */
32+
raHashes: string[];
33+
2934
/** Number of milliseconds spent evaluating the `i`th predicate from the `names` array. */
3035
timeCosts: number[];
3136

@@ -52,41 +57,51 @@ export interface PerformanceComparisonDataFromLog {
5257
* All the pipeline runs seen for the `i`th predicate from the `names` array.
5358
*/
5459
pipelineSummaryList: Array<Record<string, PipelineSummary>>;
60+
61+
/** All dependencies of the `i`th predicate from the `names` array, encoded as a list of indices in `names`. */
62+
dependencyLists: number[][];
5563
}
5664

5765
export class PerformanceOverviewScanner {
58-
private readonly nameToIndex = new Map<string, number>();
5966
private readonly data: PerformanceComparisonDataFromLog = {
6067
names: [],
68+
raHashes: [],
6169
timeCosts: [],
6270
tupleCosts: [],
6371
cacheHitIndices: [],
6472
sentinelEmptyIndices: [],
6573
pipelineSummaryList: [],
6674
evaluationCounts: [],
6775
iterationCounts: [],
76+
dependencyLists: [],
6877
};
78+
private readonly raToIndex = new Map<string, number>();
79+
private readonly mainHashToRepr = new Map<string, number>();
80+
private readonly nameToIndex = new Map<string, number>();
6981

70-
private getPredicateIndex(name: string): number {
71-
const { nameToIndex } = this;
72-
let index = nameToIndex.get(name);
82+
private getPredicateIndex(name: string, ra: string): number {
83+
let index = this.raToIndex.get(ra);
7384
if (index === undefined) {
74-
index = nameToIndex.size;
75-
nameToIndex.set(name, index);
85+
index = this.raToIndex.size;
86+
this.raToIndex.set(ra, index);
7687
const {
7788
names,
89+
raHashes,
7890
timeCosts,
7991
tupleCosts,
8092
iterationCounts,
8193
evaluationCounts,
8294
pipelineSummaryList,
95+
dependencyLists,
8396
} = this.data;
8497
names.push(name);
98+
raHashes.push(ra);
8599
timeCosts.push(0);
86100
tupleCosts.push(0);
87101
iterationCounts.push(0);
88102
evaluationCounts.push(0);
89103
pipelineSummaryList.push({});
104+
dependencyLists.push([]);
90105
}
91106
return index;
92107
}
@@ -96,46 +111,63 @@ export class PerformanceOverviewScanner {
96111
}
97112

98113
onEvent(event: SummaryEvent): void {
99-
if (
100-
event.completionType !== undefined &&
101-
event.completionType !== "SUCCESS"
102-
) {
114+
const { completionType, evaluationStrategy, predicateName, raHash } = event;
115+
if (completionType !== undefined && completionType !== "SUCCESS") {
103116
return; // Skip any evaluation that wasn't successful
104117
}
105118

106-
switch (event.evaluationStrategy) {
107-
case "EXTENSIONAL":
119+
switch (evaluationStrategy) {
120+
case "EXTENSIONAL": {
121+
break;
122+
}
108123
case "COMPUTED_EXTENSIONAL": {
124+
if (predicateName.startsWith("cached_")) {
125+
// Add a dependency from a cached COMPUTED_EXTENSIONAL to the predicate with the actual contents.
126+
// The raHash of the this event may appear in a CACHE_HIT event in the other event log. The dependency
127+
// we're adding here is needed in order to associate the original predicate with such a cache hit.
128+
const originalName = predicateName.substring("cached_".length);
129+
const originalIndex = this.nameToIndex.get(originalName);
130+
if (originalIndex != null) {
131+
const index = this.getPredicateIndex(predicateName, raHash);
132+
this.data.dependencyLists[index].push(originalIndex);
133+
}
134+
}
109135
break;
110136
}
111137
case "CACHE_HIT":
112138
case "CACHACA": {
113139
// Record a cache hit, but only if the predicate has not been seen before.
114140
// We're mainly interested in the reuse of caches from an earlier query run as they can distort comparisons.
115-
if (!this.nameToIndex.has(event.predicateName)) {
141+
if (!this.raToIndex.has(raHash)) {
116142
this.data.cacheHitIndices.push(
117-
this.getPredicateIndex(event.predicateName),
143+
this.getPredicateIndex(predicateName, raHash),
118144
);
119145
}
120146
break;
121147
}
122148
case "SENTINEL_EMPTY": {
123-
this.data.sentinelEmptyIndices.push(
124-
this.getPredicateIndex(event.predicateName),
125-
);
149+
const index = this.getPredicateIndex(predicateName, raHash);
150+
this.data.sentinelEmptyIndices.push(index);
151+
const sentinelIndex = this.raToIndex.get(event.sentinelRaHash);
152+
if (sentinelIndex != null) {
153+
this.data.dependencyLists[index].push(sentinelIndex); // needed for matching up cache hits
154+
}
126155
break;
127156
}
128157
case "COMPUTE_RECURSIVE":
129158
case "COMPUTE_SIMPLE":
159+
case "NAMED_LOCAL":
130160
case "IN_LAYER": {
131-
const index = this.getPredicateIndex(event.predicateName);
161+
const index = this.getPredicateIndex(predicateName, raHash);
162+
this.nameToIndex.set(predicateName, index);
132163
let totalTime = 0;
133164
let totalTuples = 0;
134-
if (event.evaluationStrategy !== "IN_LAYER") {
165+
if (evaluationStrategy === "COMPUTE_SIMPLE") {
135166
totalTime += event.millis;
136167
} else {
137-
// IN_LAYER events do no record of their total time.
138-
// Make a best-effort estimate by adding up the positive iteration times (they can be negative).
168+
// Make a best-effort estimate of the total time by adding up the positive iteration times (they can be negative).
169+
// Note that for COMPUTE_RECURSIVE the "millis" field contain the total time of the SCC, not just that predicate,
170+
// but we don't have a good way to show that in the UI, so we rely on the accumulated iteration times.
139171
for (const millis of event.predicateIterationMillis ?? []) {
140172
if (millis > 0) {
141173
totalTime += millis;
@@ -148,13 +180,16 @@ export class PerformanceOverviewScanner {
148180
iterationCounts,
149181
evaluationCounts,
150182
pipelineSummaryList,
183+
dependencyLists,
151184
} = this.data;
152185
const pipelineSummaries = pipelineSummaryList[index];
186+
const dependencyList = dependencyLists[index];
153187
for (const { counts, raReference } of event.pipelineRuns ?? []) {
154188
// Get or create the pipeline summary for this RA
155189
const pipelineSummary = (pipelineSummaries[raReference] ??= {
156190
steps: event.ra[raReference],
157191
counts: counts.map(() => 0),
192+
hash: getPipelineHash(event.ra[raReference]),
158193
});
159194
const { counts: totalTuplesPerStep } = pipelineSummary;
160195
for (let i = 0, length = counts.length; i < length; ++i) {
@@ -169,6 +204,25 @@ export class PerformanceOverviewScanner {
169204
totalTuplesPerStep[i] += count;
170205
}
171206
}
207+
for (const dependencyHash of Object.values(event.dependencies ?? {})) {
208+
const dependencyIndex = this.raToIndex.get(dependencyHash);
209+
if (dependencyIndex != null) {
210+
dependencyList.push(dependencyIndex);
211+
}
212+
}
213+
// For predicates in the same SCC, add two-way dependencies with an arbitrary SCC member
214+
const sccHash =
215+
event.mainHash ??
216+
(evaluationStrategy === "COMPUTE_RECURSIVE" ? raHash : null);
217+
if (sccHash != null) {
218+
const mainIndex = this.mainHashToRepr.get(sccHash);
219+
if (mainIndex == null) {
220+
this.mainHashToRepr.set(sccHash, index);
221+
} else {
222+
dependencyLists[index].push(mainIndex);
223+
dependencyLists[mainIndex].push(index);
224+
}
225+
}
172226
timeCosts[index] += totalTime;
173227
tupleCosts[index] += totalTuples;
174228
iterationCounts[index] += event.pipelineRuns?.length ?? 0;
@@ -178,3 +232,11 @@ export class PerformanceOverviewScanner {
178232
}
179233
}
180234
}
235+
236+
function getPipelineHash(steps: string[]) {
237+
const md5 = createHash("md5");
238+
for (const step of steps) {
239+
md5.write(step);
240+
}
241+
return md5.digest("base64");
242+
}

0 commit comments

Comments
 (0)