Skip to content

Commit 345fd0c

Browse files
committed
[FS-AFDO] Generate pseudo-probe-based profiles with FS-discriminators.
This change enables generating pseudo-probe-based FS-AFDO profiles. The change is straightforward based-on previous change {D147651} by just injecting FS-discriminators into various profile generation spot. Reviewed By: wenlei Differential Revision: https://reviews.llvm.org/D147957
1 parent 9849291 commit 345fd0c

File tree

5 files changed

+228
-21
lines changed

5 files changed

+228
-21
lines changed

llvm/include/llvm/ProfileData/SampleProf.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -800,13 +800,6 @@ class FunctionSamples {
800800
return Count;
801801
}
802802

803-
sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num,
804-
uint64_t Weight = 1) {
805-
SampleRecord S;
806-
S.addSamples(Num, Weight);
807-
return BodySamples[LineLocation(Index, 0)].merge(S, Weight);
808-
}
809-
810803
// Accumulate all call target samples to update the body samples.
811804
void updateCallsiteSamples() {
812805
for (auto &I : BodySamples) {
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
43
2+
650-66c:445
3+
650-675:409
4+
650-68b:175
5+
650-6a2:66
6+
685-68b:199
7+
685-6a2:205
8+
688-68b:453
9+
68d-6a2:782
10+
6b0-6b7:775
11+
6b0-6c0:2778
12+
6b0-6dc:856
13+
6b0-6f1:1550
14+
6b9-6c0:463
15+
6b9-6dc:122
16+
6b9-6f1:211
17+
6d4-6dc:2259
18+
6d4-6f1:1019
19+
700-71c:508
20+
720-734:194
21+
720-73c:751
22+
720-741:382
23+
720-751:1226
24+
722-734:306
25+
722-73c:2808
26+
722-751:790
27+
736-73c:315
28+
736-741:196
29+
758-76e:503
30+
770-77a:849
31+
770-798:678
32+
790-798:910
33+
79a-7ab:1478
34+
7b0-7b9:885
35+
7b0-7c3:682
36+
7bb-7c3:873
37+
800-81f:9
38+
824-852:9
39+
860-860:2542
40+
865-894:8
41+
865-8b0:2549
42+
865-8b5:302
43+
883-8b0:246
44+
89a-8b5:8
45+
27
46+
ffffffffffc00001->865:2868
47+
66c->688:458
48+
675->685:423
49+
68b->6b9:848
50+
6a2->79a:1086
51+
6b7->68d:793
52+
6c0->6d4:3343
53+
6dc->6b0:3340
54+
6f1->6b0:2873
55+
71c->736:520
56+
734->758:516
57+
73c->722:4012
58+
741->720:600
59+
751->720:2079
60+
76e->79a:524
61+
77a->7bb:879
62+
798->650:1117
63+
798->700:516
64+
7ab->770:1526
65+
7b9->790:916
66+
7c3->7b0:1597
67+
7c3->824:9
68+
81f->770:10
69+
852->89a:9
70+
894->800:9
71+
8b0->860:2831
72+
8b5->883:321
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
; RUN: llvm-profgen --unsymbolized-profile=%S/Inputs/fs-discriminator-probe.raw.prof --binary=%S/Inputs/fs-discriminator-probe.perfbin --output=%t1
2+
; RUN: llvm-profdata show --sample --show-sec-info-only %t1 | FileCheck %s --check-prefix=CHECK-SECTION
3+
; RUN: llvm-profdata merge --sample %t1 -o %t2 --text
4+
; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK
5+
6+
; CHECK-SECTION: ProfileSummarySection - Offset: [[#]], Size: [[#]], Flags: {fs-discriminator}
7+
8+
; CHECK: partition_pivot_first:29661:2739
9+
; CHECK-NEXT: 1: 2739
10+
; CHECK-NEXT: 2.1: 2739
11+
; CHECK-NEXT: 2.11265: 0
12+
; CHECK-NEXT: 3: 6457
13+
; CHECK-NEXT: 4: 508
14+
; CHECK-NEXT: 5.1: 508
15+
; CHECK-NEXT: 6.2: 508
16+
; CHECK-NEXT: 7.3: 2780
17+
; CHECK-NEXT: 8.4: 0
18+
; CHECK-NEXT: 9.2: 0
19+
; CHECK-NEXT: 10: 6457
20+
; CHECK-NEXT: 11: swap:508
21+
; CHECK-NEXT: 1.7680: 508
22+
; CHECK-NEXT: !CFGChecksum: 4294967295
23+
; CHECK-NEXT: 12: swap:6457
24+
; CHECK-NEXT: 1.7168: 6457
25+
; CHECK-NEXT: !CFGChecksum: 4294967295
26+
; CHECK-NEXT: !CFGChecksum: 563159988274199
27+
; CHECK-NEXT: main:16724:1478
28+
; CHECK-NEXT: 1: 1478
29+
; CHECK-NEXT: 2.1: 1478
30+
; CHECK-NEXT: 2.3585: 0
31+
; CHECK-NEXT: 3: 3105
32+
; CHECK-NEXT: 5.1: 9
33+
; CHECK-NEXT: 5.1537: 0
34+
; CHECK-NEXT: 6: 0
35+
; CHECK-NEXT: 7: 2859
36+
; CHECK-NEXT: 8.1: 9
37+
; CHECK-NEXT: 10.3: 0
38+
; CHECK-NEXT: 11.2: 0
39+
; CHECK-NEXT: 12: 0
40+
; CHECK-NEXT: 13.2: 0
41+
; CHECK-NEXT: 14: 3105
42+
; CHECK-NEXT: 15: 1567
43+
; CHECK-NEXT: 16: 9
44+
; CHECK-NEXT: 17: 0
45+
; CHECK-NEXT: 18: 3105
46+
; CHECK-NEXT: !CFGChecksum: 1126178599120658
47+
; CHECK-NEXT: partition_pivot_last:10497:0
48+
; CHECK-NEXT: 1: 0
49+
; CHECK-NEXT: 2.1: 0
50+
; CHECK-NEXT: 2.6145: 1095
51+
; CHECK-NEXT: 2.7681: 241
52+
; CHECK-NEXT: 2.8193: 0
53+
; CHECK-NEXT: 3: 1095
54+
; CHECK-NEXT: 4: 0
55+
; CHECK-NEXT: 4.3072: 1098
56+
; CHECK-NEXT: 4.12800: 0
57+
; CHECK-NEXT: 5: 0
58+
; CHECK-NEXT: 5.5632: 0
59+
; CHECK-NEXT: 5.6144: 1053
60+
; CHECK-NEXT: 6.1: 1095
61+
; CHECK-NEXT: 6.12801: 0
62+
; CHECK-NEXT: 6.13825: 241
63+
; CHECK-NEXT: 7.2: 1095
64+
; CHECK-NEXT: 7.7170: 241
65+
; CHECK-NEXT: 7.7682: 0
66+
; CHECK-NEXT: 8: 1095
67+
; CHECK-NEXT: 9: swap:1053
68+
; CHECK-NEXT: 1.1024: 1053
69+
; CHECK-NEXT: 1.4608: 0
70+
; CHECK-NEXT: 1.15360: 0
71+
; CHECK-NEXT: !CFGChecksum: 4294967295
72+
; CHECK-NEXT: 10: swap:1095
73+
; CHECK-NEXT: 1.14848: 1095
74+
; CHECK-NEXT: !CFGChecksum: 4294967295
75+
; CHECK-NEXT: !CFGChecksum: 563108639284859
76+
; CHECK-NEXT: quick_sort:4881:2519
77+
; CHECK-NEXT: 1: 2016
78+
; CHECK-NEXT: 1.15360: 503
79+
; CHECK-NEXT: 2: 503
80+
; CHECK-NEXT: 3: 678
81+
; CHECK-NEXT: 4: 503
82+
; CHECK-NEXT: 5: 678
83+
; CHECK-NEXT: !CFGChecksum: 844480566202114
84+
85+
86+
87+
; original code:
88+
; clang -O3 -g -mllvm --enable-fs-discriminator -fdebug-info-for-profiling -fpseudo-probe-for-profiling qsort.c -o a.out
89+
#include <stdio.h>
90+
#include <stdlib.h>
91+
92+
void swap(int *a, int *b) {
93+
int t = *a;
94+
*a = *b;
95+
*b = t;
96+
}
97+
98+
int partition_pivot_last(int* array, int low, int high) {
99+
int pivot = array[high];
100+
int i = low - 1;
101+
for (int j = low; j < high; j++)
102+
if (array[j] < pivot)
103+
swap(&array[++i], &array[j]);
104+
swap(&array[i + 1], &array[high]);
105+
return (i + 1);
106+
}
107+
108+
int partition_pivot_first(int* array, int low, int high) {
109+
int pivot = array[low];
110+
int i = low + 1;
111+
for (int j = low + 1; j <= high; j++)
112+
if (array[j] < pivot) { if (j != i) swap(&array[i], &array[j]); i++;}
113+
swap(&array[i - 1], &array[low]);
114+
return i - 1;
115+
}
116+
117+
void quick_sort(int* array, int low, int high, int (*partition_func)(int *, int, int)) {
118+
if (low < high) {
119+
int pi = (*partition_func)(array, low, high);
120+
quick_sort(array, low, pi - 1, partition_func);
121+
quick_sort(array, pi + 1, high, partition_func);
122+
}
123+
}
124+
125+
int main() {
126+
const int size = 200;
127+
int sum = 0;
128+
int *array = malloc(size * sizeof(int));
129+
for(int i = 0; i < 100 * 1000; i++) {
130+
for(int j = 0; j < size; j++)
131+
array[j] = j % 10 ? rand() % size: j;
132+
int (*fptr)(int *, int, int) = i % 3 ? partition_pivot_last : partition_pivot_first;
133+
quick_sort(array, 0, size - 1, fptr);
134+
sum += array[i % size];
135+
}
136+
printf("sum=%d\n", sum);
137+
138+
return 0;
139+
}

llvm/tools/llvm-profgen/PerfReader.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -480,8 +480,6 @@ static std::string getContextKeyStr(ContextKey *K,
480480
}
481481

482482
void HybridPerfReader::unwindSamples() {
483-
if (Binary->useFSDiscriminator())
484-
exitWithError("FS discriminator is not supported in CS profile.");
485483
VirtualUnwinder Unwinder(&SampleCounters, Binary);
486484
for (const auto &Item : AggregatedSamples) {
487485
const PerfSample *Sample = Item.first.getPtr();

llvm/tools/llvm-profgen/ProfileGenerator.cpp

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,6 @@ ProfileGeneratorBase::create(ProfiledBinary *Binary,
122122
bool ProfileIsCS) {
123123
std::unique_ptr<ProfileGeneratorBase> Generator;
124124
if (ProfileIsCS) {
125-
if (Binary->useFSDiscriminator())
126-
exitWithError("FS discriminator is not supported in CS profile.");
127125
Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
128126
} else {
129127
Generator.reset(new ProfileGenerator(Binary, SampleCounters));
@@ -139,8 +137,6 @@ ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles,
139137
bool ProfileIsCS) {
140138
std::unique_ptr<ProfileGeneratorBase> Generator;
141139
if (ProfileIsCS) {
142-
if (Binary->useFSDiscriminator())
143-
exitWithError("FS discriminator is not supported in CS profile.");
144140
Generator.reset(new CSProfileGenerator(Binary, Profiles));
145141
} else {
146142
Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
@@ -561,7 +557,8 @@ void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
561557
Binary->getInlineContextForProbe(Probe, FrameVec, true);
562558
FunctionSamples &FunctionProfile =
563559
getLeafProfileAndAddTotalSamples(FrameVec, Count);
564-
FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
560+
FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(),
561+
Count);
565562
if (Probe->isEntry())
566563
FunctionProfile.addHeadSamples(Count);
567564
}
@@ -592,7 +589,9 @@ void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
592589
FunctionSamples &FunctionProfile =
593590
getLeafProfileAndAddTotalSamples(FrameVec, 0);
594591
FunctionProfile.addCalledTargetSamples(
595-
FrameVec.back().Location.LineOffset, 0, CalleeName, Count);
592+
FrameVec.back().Location.LineOffset,
593+
FrameVec.back().Location.Discriminator,
594+
CalleeName, Count);
596595
}
597596
}
598597
}
@@ -1159,7 +1158,8 @@ void CSProfileGenerator::populateBodySamplesWithProbes(
11591158
// collected for non-danglie probes. This is for reporting all of the
11601159
// zero count probes of the frame later.
11611160
FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
1162-
FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
1161+
FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(),
1162+
Count);
11631163
FunctionProfile.addTotalSamples(Count);
11641164
if (Probe->isEntry()) {
11651165
FunctionProfile.addHeadSamples(Count);
@@ -1171,14 +1171,17 @@ void CSProfileGenerator::populateBodySamplesWithProbes(
11711171
// context id to infer caller's context id to ensure they share the
11721172
// same context prefix.
11731173
uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset;
1174+
uint64_t CallerDiscriminator = ContextNode->getCallSiteLoc().Discriminator;
11741175
assert(CallerIndex &&
11751176
"Inferred caller's location index shouldn't be zero!");
1177+
assert(!CallerDiscriminator &&
1178+
"Callsite probe should not have a discriminator!");
11761179
FunctionSamples &CallerProfile =
11771180
*getOrCreateFunctionSamples(CallerNode);
11781181
CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
1179-
CallerProfile.addBodySamples(CallerIndex, 0, Count);
1182+
CallerProfile.addBodySamples(CallerIndex, CallerDiscriminator, Count);
11801183
CallerProfile.addTotalSamples(Count);
1181-
CallerProfile.addCalledTargetSamples(CallerIndex, 0,
1184+
CallerProfile.addCalledTargetSamples(CallerIndex, CallerDiscriminator,
11821185
ContextNode->getFuncName(), Count);
11831186
}
11841187
}
@@ -1190,7 +1193,8 @@ void CSProfileGenerator::populateBodySamplesWithProbes(
11901193
for (auto &I : FrameSamples) {
11911194
for (auto *FunctionProfile : I.second) {
11921195
for (auto *Probe : I.first->getProbes()) {
1193-
FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
1196+
FunctionProfile->addBodySamples(Probe->getIndex(),
1197+
Probe->getDiscriminator(), 0);
11941198
}
11951199
}
11961200
}
@@ -1213,8 +1217,9 @@ void CSProfileGenerator::populateBoundarySamplesWithProbes(
12131217
StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
12141218
if (CalleeName.size() == 0)
12151219
continue;
1216-
FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
1217-
Count);
1220+
FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(),
1221+
CallProbe->getDiscriminator(),
1222+
CalleeName, Count);
12181223
}
12191224
}
12201225

0 commit comments

Comments
 (0)