Skip to content

Commit 0f28aa6

Browse files
authored
[CSSPGO] Return error_code for missing probe profile (#102085)
We should undo this https://reviews.llvm.org/D102007 after undoing https://reviews.llvm.org/D104477 to use missing probe to represent unknown count, zero count and unknown count are different to profile inference. It only affects post-inline(linker) pipeline(with `--overwrite-existing-weights` on ) and flow-sensitive FDO.
1 parent d385485 commit 0f28aa6

File tree

3 files changed

+263
-14
lines changed

3 files changed

+263
-14
lines changed

llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -449,9 +449,6 @@ SampleProfileLoaderBaseImpl<BT>::getInstWeightImpl(const InstructionT &Inst) {
449449
return R;
450450
}
451451

452-
// Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight
453-
// of non-probe instruction. So if all instructions of the BB give error_code,
454-
// tell the inference algorithm to infer the BB weight.
455452
template <typename BT>
456453
ErrorOr<uint64_t>
457454
SampleProfileLoaderBaseImpl<BT>::getProbeWeight(const InstructionT &Inst) {
@@ -464,17 +461,13 @@ SampleProfileLoaderBaseImpl<BT>::getProbeWeight(const InstructionT &Inst) {
464461
return std::error_code();
465462

466463
const FunctionSamples *FS = findFunctionSamples(Inst);
467-
// If none of the instruction has FunctionSample, we choose to return zero
468-
// value sample to indicate the BB is cold. This could happen when the
469-
// instruction is from inlinee and no profile data is found.
470-
// FIXME: This should not be affected by the source drift issue as 1) if the
471-
// newly added function is top-level inliner, it won't match the CFG checksum
472-
// in the function profile or 2) if it's the inlinee, the inlinee should have
473-
// a profile, otherwise it wouldn't be inlined. For non-probe based profile,
474-
// we can improve it by adding a switch for profile-sample-block-accurate for
475-
// block level counts in the future.
476-
if (!FS)
477-
return 0;
464+
if (!FS) {
465+
// If we can't find the function samples for a probe, it could be due to the
466+
// probe is later optimized away or the inlining context is mismatced. We
467+
// treat it as unknown, leaving it to profile inference instead of forcing a
468+
// zero count.
469+
return std::error_code();
470+
}
478471

479472
auto R = FS->findSamplesAt(Probe->Id, Probe->Discriminator);
480473
if (R) {
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
main:89650:0
2+
1: 0
3+
2: 16724
4+
3: 16724
5+
4: 14342
6+
5: 15026 bar:15026
7+
6: 1882
8+
8: 16724
9+
9: 0
10+
!CFGChecksum: 563091374530180
11+
bar:15026:15026
12+
1: 15026
13+
!CFGChecksum: 4294967295
Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-missing-probe.prof -S | FileCheck %s
2+
3+
; CHECK: br i1 %tobool.not.i, label %if.end.i, label %if.then.i, !dbg ![[#]], !prof ![[#PROF:]]
4+
5+
; CHECK: [[#PROF]] = !{!"branch_weights", i32 918, i32 918}
6+
; Verify the else branch is not set to a zero count
7+
; CHECK-NOT: [[#PROF]] = !{!"branch_weights", i32 1698, i32 0}
8+
9+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
10+
target triple = "x86_64-unknown-linux-gnu"
11+
12+
@x = dso_local global i32 0, align 4, !dbg !0
13+
14+
; Function Attrs: nofree noinline norecurse nounwind memory(readwrite, argmem: none) uwtable
15+
define dso_local void @bar(i32 %i) local_unnamed_addr #0 !dbg !18 {
16+
entry:
17+
#dbg_value(i32 poison, !22, !DIExpression(), !23)
18+
call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !24
19+
%0 = load volatile i32, ptr @x, align 4, !dbg !24, !tbaa !25
20+
%add = add nsw i32 %0, 5, !dbg !24
21+
store volatile i32 %add, ptr @x, align 4, !dbg !24, !tbaa !25
22+
ret void, !dbg !29
23+
}
24+
25+
; Function Attrs: nofree norecurse nounwind memory(readwrite, argmem: none) uwtable
26+
define dso_local void @baz(i32 noundef %i) local_unnamed_addr #1 !dbg !30 {
27+
entry:
28+
#dbg_value(i32 %i, !32, !DIExpression(), !33)
29+
call void @llvm.pseudoprobe(i64 7546896869197086323, i64 1, i32 0, i64 -1), !dbg !34
30+
%rem = srem i32 %i, 100, !dbg !36
31+
%tobool.not = icmp eq i32 %rem, 0, !dbg !36
32+
br i1 %tobool.not, label %if.end, label %if.then, !dbg !37
33+
34+
if.then: ; preds = %entry
35+
call void @llvm.pseudoprobe(i64 7546896869197086323, i64 2, i32 0, i64 -1), !dbg !38
36+
%0 = load volatile i32, ptr @x, align 4, !dbg !38, !tbaa !25
37+
%inc = add nsw i32 %0, 1, !dbg !38
38+
store volatile i32 %inc, ptr @x, align 4, !dbg !38, !tbaa !25
39+
br label %if.end, !dbg !39
40+
41+
if.end: ; preds = %if.then, %entry
42+
call void @llvm.pseudoprobe(i64 7546896869197086323, i64 3, i32 0, i64 -1), !dbg !40
43+
%1 = load volatile i32, ptr @x, align 4, !dbg !40, !tbaa !25
44+
%add = add nsw i32 %1, 2, !dbg !40
45+
store volatile i32 %add, ptr @x, align 4, !dbg !40, !tbaa !25
46+
%2 = and i32 %i, 1, !dbg !41
47+
%tobool2.not = icmp eq i32 %2, 0, !dbg !41
48+
br i1 %tobool2.not, label %if.else, label %if.end11, !dbg !43
49+
50+
if.else: ; preds = %if.end
51+
call void @llvm.pseudoprobe(i64 7546896869197086323, i64 5, i32 0, i64 -1), !dbg !44
52+
%rem5 = srem i32 %i, 3, !dbg !46
53+
%tobool6.not = icmp eq i32 %rem5, 0, !dbg !46
54+
%spec.select = select i1 %tobool6.not, i32 -1, i32 2, !dbg !47
55+
br label %if.end11, !dbg !47
56+
57+
if.end11: ; preds = %if.else, %if.end
58+
%.sink14 = phi i32 [ 1, %if.end ], [ %spec.select, %if.else ]
59+
%3 = load volatile i32, ptr @x, align 4, !dbg !48, !tbaa !25
60+
%add8 = add nsw i32 %3, %.sink14, !dbg !48
61+
store volatile i32 %add8, ptr @x, align 4, !dbg !48, !tbaa !25
62+
call void @llvm.pseudoprobe(i64 7546896869197086323, i64 9, i32 0, i64 -1), !dbg !49
63+
ret void, !dbg !49
64+
}
65+
66+
; Function Attrs: nofree norecurse nounwind uwtable
67+
define dso_local noundef i32 @main() local_unnamed_addr #2 !dbg !50 {
68+
entry:
69+
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !55
70+
#dbg_value(i32 0, !54, !DIExpression(), !56)
71+
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !57
72+
br label %while.body, !dbg !58
73+
74+
while.body: ; preds = %entry, %if.end
75+
%inc7 = phi i32 [ 1, %entry ], [ %inc, %if.end ]
76+
%i.06 = phi i32 [ 0, %entry ], [ %inc7, %if.end ]
77+
#dbg_value(i32 %i.06, !54, !DIExpression(), !56)
78+
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !59
79+
%rem = urem i32 %inc7, 10, !dbg !62
80+
%tobool.not = icmp eq i32 %rem, 0, !dbg !62
81+
br i1 %tobool.not, label %if.else, label %if.then, !dbg !63
82+
83+
if.then: ; preds = %while.body
84+
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !64
85+
tail call void @bar(i32 poison), !dbg !65
86+
br label %if.end, !dbg !67
87+
88+
if.else: ; preds = %while.body
89+
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !68
90+
#dbg_value(i32 %inc7, !32, !DIExpression(), !69)
91+
call void @llvm.pseudoprobe(i64 7546896869197086323, i64 1, i32 0, i64 -1), !dbg !72
92+
%rem.i4 = urem i32 %inc7, 100, !dbg !73
93+
%tobool.not.i = icmp eq i32 %rem.i4, 0, !dbg !73
94+
br i1 %tobool.not.i, label %if.end.i, label %if.then.i, !dbg !74
95+
96+
if.then.i: ; preds = %if.else
97+
call void @llvm.pseudoprobe(i64 7546896869197086323, i64 2, i32 0, i64 -1), !dbg !75
98+
%0 = load volatile i32, ptr @x, align 4, !dbg !75, !tbaa !25
99+
%inc.i = add nsw i32 %0, 1, !dbg !75
100+
store volatile i32 %inc.i, ptr @x, align 4, !dbg !75, !tbaa !25
101+
br label %if.end.i, !dbg !76
102+
103+
if.end.i: ; preds = %if.then.i, %if.else
104+
call void @llvm.pseudoprobe(i64 7546896869197086323, i64 3, i32 0, i64 -1), !dbg !77
105+
%1 = load volatile i32, ptr @x, align 4, !dbg !77, !tbaa !25
106+
%add.i = add nsw i32 %1, 2, !dbg !77
107+
store volatile i32 %add.i, ptr @x, align 4, !dbg !77, !tbaa !25
108+
%2 = and i32 %i.06, 1, !dbg !78
109+
%tobool2.not.i.not = icmp eq i32 %2, 0, !dbg !78
110+
br i1 %tobool2.not.i.not, label %baz.exit, label %if.else.i, !dbg !79
111+
112+
if.else.i: ; preds = %if.end.i
113+
call void @llvm.pseudoprobe(i64 7546896869197086323, i64 5, i32 0, i64 -1), !dbg !80
114+
%rem5.i5 = urem i32 %inc7, 3, !dbg !81
115+
%tobool6.not.i = icmp eq i32 %rem5.i5, 0, !dbg !81
116+
%spec.select.i = select i1 %tobool6.not.i, i32 -1, i32 2, !dbg !82
117+
br label %baz.exit, !dbg !82
118+
119+
baz.exit: ; preds = %if.end.i, %if.else.i
120+
%.sink14.i = phi i32 [ 1, %if.end.i ], [ %spec.select.i, %if.else.i ]
121+
%3 = load volatile i32, ptr @x, align 4, !dbg !83, !tbaa !25
122+
%add8.i = add nsw i32 %3, %.sink14.i, !dbg !83
123+
store volatile i32 %add8.i, ptr @x, align 4, !dbg !83, !tbaa !25
124+
call void @llvm.pseudoprobe(i64 7546896869197086323, i64 9, i32 0, i64 -1), !dbg !84
125+
br label %if.end
126+
127+
if.end: ; preds = %baz.exit, %if.then
128+
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg !58
129+
#dbg_value(i32 %inc7, !54, !DIExpression(), !56)
130+
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !57
131+
%inc = add nuw nsw i32 %inc7, 1, !dbg !57
132+
#dbg_value(i32 %inc, !54, !DIExpression(), !56)
133+
%exitcond.not = icmp eq i32 %inc, 160000001, !dbg !85
134+
br i1 %exitcond.not, label %while.end, label %while.body, !dbg !58, !llvm.loop !86
135+
136+
while.end: ; preds = %if.end
137+
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 9, i32 0, i64 -1), !dbg !89
138+
ret i32 0, !dbg !89
139+
}
140+
141+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
142+
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3
143+
144+
attributes #0 = { nofree noinline norecurse nounwind memory(readwrite, argmem: none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
145+
attributes #1 = { nofree norecurse nounwind memory(readwrite, argmem: none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
146+
attributes #2 = { nofree norecurse nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile"}
147+
attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
148+
149+
!llvm.dbg.cu = !{!2}
150+
!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
151+
!llvm.ident = !{!14}
152+
!llvm.pseudo_probe_desc = !{!15, !16, !17}
153+
154+
!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
155+
!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
156+
!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
157+
!3 = !DIFile(filename: "test.c", directory: "/home", checksumkind: CSK_MD5, checksum: "b67c15e928f76c51702a59639dbebb4c")
158+
!4 = !{!0}
159+
!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
160+
!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
161+
!7 = !{i32 7, !"Dwarf Version", i32 5}
162+
!8 = !{i32 2, !"Debug Info Version", i32 3}
163+
!9 = !{i32 1, !"wchar_size", i32 4}
164+
!10 = !{i32 8, !"PIC Level", i32 2}
165+
!11 = !{i32 7, !"PIE Level", i32 2}
166+
!12 = !{i32 7, !"uwtable", i32 2}
167+
!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
168+
!14 = !{!"clang version 20.0.0"}
169+
!15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"}
170+
!16 = !{i64 7546896869197086323, i64 191430930410, !"baz"}
171+
!17 = !{i64 -2624081020897602054, i64 563091374530180, !"main"}
172+
!18 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
173+
!19 = !DISubroutineType(types: !20)
174+
!20 = !{null, !6}
175+
!21 = !{!22}
176+
!22 = !DILocalVariable(name: "i", arg: 1, scope: !18, file: !3, line: 3, type: !6)
177+
!23 = !DILocation(line: 0, scope: !18)
178+
!24 = !DILocation(line: 4, column: 5, scope: !18)
179+
!25 = !{!26, !26, i64 0}
180+
!26 = !{!"int", !27, i64 0}
181+
!27 = !{!"omnipotent char", !28, i64 0}
182+
!28 = !{!"Simple C/C++ TBAA"}
183+
!29 = !DILocation(line: 8, column: 1, scope: !18)
184+
!30 = distinct !DISubprogram(name: "baz", scope: !3, file: !3, line: 10, type: !19, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !31)
185+
!31 = !{!32}
186+
!32 = !DILocalVariable(name: "i", arg: 1, scope: !30, file: !3, line: 10, type: !6)
187+
!33 = !DILocation(line: 0, scope: !30)
188+
!34 = !DILocation(line: 11, column: 6, scope: !35)
189+
!35 = distinct !DILexicalBlock(scope: !30, file: !3, line: 11, column: 6)
190+
!36 = !DILocation(line: 11, column: 7, scope: !35)
191+
!37 = !DILocation(line: 11, column: 6, scope: !30)
192+
!38 = !DILocation(line: 12, column: 6, scope: !35)
193+
!39 = !DILocation(line: 12, column: 5, scope: !35)
194+
!40 = !DILocation(line: 14, column: 5, scope: !30)
195+
!41 = !DILocation(line: 15, column: 9, scope: !42)
196+
!42 = distinct !DILexicalBlock(scope: !30, file: !3, line: 15, column: 7)
197+
!43 = !DILocation(line: 15, column: 7, scope: !30)
198+
!44 = !DILocation(line: 17, column: 12, scope: !45)
199+
!45 = distinct !DILexicalBlock(scope: !42, file: !3, line: 17, column: 12)
200+
!46 = !DILocation(line: 17, column: 14, scope: !45)
201+
!47 = !DILocation(line: 17, column: 12, scope: !42)
202+
!48 = !DILocation(line: 0, scope: !42)
203+
!49 = !DILocation(line: 21, column: 1, scope: !30)
204+
!50 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 23, type: !51, scopeLine: 23, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !53)
205+
!51 = !DISubroutineType(types: !52)
206+
!52 = !{!6}
207+
!53 = !{!54}
208+
!54 = !DILocalVariable(name: "i", scope: !50, file: !3, line: 24, type: !6)
209+
!55 = !DILocation(line: 24, column: 7, scope: !50)
210+
!56 = !DILocation(line: 0, scope: !50)
211+
!57 = !DILocation(line: 25, column: 11, scope: !50)
212+
!58 = !DILocation(line: 25, column: 3, scope: !50)
213+
!59 = !DILocation(line: 26, column: 8, scope: !60)
214+
!60 = distinct !DILexicalBlock(scope: !61, file: !3, line: 26, column: 8)
215+
!61 = distinct !DILexicalBlock(scope: !50, file: !3, line: 25, column: 30)
216+
!62 = !DILocation(line: 26, column: 10, scope: !60)
217+
!63 = !DILocation(line: 26, column: 8, scope: !61)
218+
!64 = !DILocation(line: 27, column: 10, scope: !60)
219+
!65 = !DILocation(line: 27, column: 6, scope: !66)
220+
!66 = !DILexicalBlockFile(scope: !60, file: !3, discriminator: 455082031)
221+
!67 = !DILocation(line: 27, column: 6, scope: !60)
222+
!68 = !DILocation(line: 29, column: 10, scope: !60)
223+
!69 = !DILocation(line: 0, scope: !30, inlinedAt: !70)
224+
!70 = distinct !DILocation(line: 29, column: 6, scope: !71)
225+
!71 = !DILexicalBlockFile(scope: !60, file: !3, discriminator: 455082047)
226+
!72 = !DILocation(line: 11, column: 6, scope: !35, inlinedAt: !70)
227+
!73 = !DILocation(line: 11, column: 7, scope: !35, inlinedAt: !70)
228+
!74 = !DILocation(line: 11, column: 6, scope: !30, inlinedAt: !70)
229+
!75 = !DILocation(line: 12, column: 6, scope: !35, inlinedAt: !70)
230+
!76 = !DILocation(line: 12, column: 5, scope: !35, inlinedAt: !70)
231+
!77 = !DILocation(line: 14, column: 5, scope: !30, inlinedAt: !70)
232+
!78 = !DILocation(line: 15, column: 9, scope: !42, inlinedAt: !70)
233+
!79 = !DILocation(line: 15, column: 7, scope: !30, inlinedAt: !70)
234+
!80 = !DILocation(line: 17, column: 12, scope: !45, inlinedAt: !70)
235+
!81 = !DILocation(line: 17, column: 14, scope: !45, inlinedAt: !70)
236+
!82 = !DILocation(line: 17, column: 12, scope: !42, inlinedAt: !70)
237+
!83 = !DILocation(line: 0, scope: !42, inlinedAt: !70)
238+
!84 = !DILocation(line: 21, column: 1, scope: !30, inlinedAt: !70)
239+
!85 = !DILocation(line: 25, column: 14, scope: !50)
240+
!86 = distinct !{!86, !58, !87, !88}
241+
!87 = !DILocation(line: 30, column: 3, scope: !50)
242+
!88 = !{!"llvm.loop.mustprogress"}
243+
!89 = !DILocation(line: 31, column: 3, scope: !50)

0 commit comments

Comments
 (0)