Skip to content

Commit 82cee24

Browse files
committed
[JumpThreading] Preserve profile metadata during select unfolding, take 2
Jump threading can replace select and unconditional branch with conditional branch, but when doing so loses profile information. This destructive transform can eventually lead to a performance degradation due to folding of branches in shouldFoldCondBranchesToCommonDestination as branch probabilities are no longer known. The first version was reverted due to assert caused by i32 overflow, fixed in this version. Patch by Roman Paukner! Differential Revision: https://reviews.llvm.org/D138132 Reviewed By: mkazantsev
1 parent 45ab848 commit 82cee24

File tree

2 files changed

+47
-8
lines changed

2 files changed

+47
-8
lines changed

llvm/lib/Transforms/Scalar/JumpThreading.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2785,8 +2785,26 @@ void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
27852785
// Create a conditional branch and update PHI nodes.
27862786
auto *BI = BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
27872787
BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc());
2788+
BI->copyMetadata(*SI, {LLVMContext::MD_prof});
27882789
SIUse->setIncomingValue(Idx, SI->getFalseValue());
27892790
SIUse->addIncoming(SI->getTrueValue(), NewBB);
2791+
// Set the block frequency of NewBB.
2792+
if (HasProfileData) {
2793+
uint64_t TrueWeight, FalseWeight;
2794+
if (extractBranchWeights(*SI, TrueWeight, FalseWeight) &&
2795+
(TrueWeight + FalseWeight) != 0) {
2796+
SmallVector<BranchProbability, 2> BP;
2797+
BP.emplace_back(BranchProbability::getBranchProbability(
2798+
TrueWeight, TrueWeight + FalseWeight));
2799+
BP.emplace_back(BranchProbability::getBranchProbability(
2800+
FalseWeight, TrueWeight + FalseWeight));
2801+
BPI->setEdgeProbability(Pred, BP);
2802+
}
2803+
2804+
auto NewBBFreq =
2805+
BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, NewBB);
2806+
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
2807+
}
27902808

27912809
// The select is now dead.
27922810
SI->eraseFromParent();

llvm/test/Transforms/JumpThreading/select.ll

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -S -passes=jump-threading < %s | FileCheck %s
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
2+
; RUN: opt -S -passes=jump-threading -debug-only=branch-prob < %s 2>&1 | FileCheck %s
3+
; REQUIRES: asserts
4+
5+
; CHECK-LABEL: ---- Branch Probability Info : unfold1 ----
6+
; CHECK: set edge cond.false -> 0 successor probability to 0x20000000 / 0x80000000 = 25.00%
7+
; CHECK: set edge cond.false -> 1 successor probability to 0x60000000 / 0x80000000 = 75.00%
8+
; CHECK-LABEL: ---- Branch Probability Info : unfold2 ----
9+
; CHECK: set edge cond.false -> 0 successor probability to 0x20000000 / 0x80000000 = 25.00%
10+
; CHECK: set edge cond.false -> 1 successor probability to 0x60000000 / 0x80000000 = 75.00%
311

412
declare void @foo()
513
declare void @bar()
@@ -11,6 +19,9 @@ declare void @quux()
1119
; Mostly theoretical since instruction combining simplifies all selects of
1220
; booleans where at least one operand is true/false/undef.
1321

22+
;.
23+
; CHECK: @[[ANCHOR:[a-zA-Z0-9_$"\\.-]+]] = constant [3 x ptr] [ptr blockaddress(@test_indirectbr, [[L1:%.*]]), ptr inttoptr (i32 1 to ptr), ptr blockaddress(@test_indirectbr, [[L3:%.*]])]
24+
;.
1425
define void @test_br(i1 %cond, i1 %value) nounwind {
1526
; CHECK-LABEL: @test_br(
1627
; CHECK-NEXT: entry:
@@ -265,7 +276,7 @@ L4:
265276
ret void
266277
}
267278

268-
define void @unfold1(double %x, double %y) nounwind {
279+
define void @unfold1(double %x, double %y) nounwind !prof !1 {
269280
; CHECK-LABEL: @unfold1(
270281
; CHECK-NEXT: entry:
271282
; CHECK-NEXT: [[SUB:%.*]] = fsub double [[X:%.*]], [[Y:%.*]]
@@ -274,7 +285,7 @@ define void @unfold1(double %x, double %y) nounwind {
274285
; CHECK: cond.false:
275286
; CHECK-NEXT: [[ADD:%.*]] = fadd double [[X]], [[Y]]
276287
; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[ADD]], 1.000000e+01
277-
; CHECK-NEXT: br i1 [[CMP1]], label [[COND_END4]], label [[IF_THEN:%.*]]
288+
; CHECK-NEXT: br i1 [[CMP1]], label [[COND_END4]], label [[IF_THEN:%.*]], !prof [[PROF1:![0-9]+]]
278289
; CHECK: cond.end4:
279290
; CHECK-NEXT: [[COND5:%.*]] = phi double [ [[SUB]], [[ENTRY:%.*]] ], [ [[ADD]], [[COND_FALSE]] ]
280291
; CHECK-NEXT: [[CMP6:%.*]] = fcmp oeq double [[COND5]], 0.000000e+00
@@ -293,7 +304,7 @@ entry:
293304
cond.false: ; preds = %entry
294305
%add = fadd double %x, %y
295306
%cmp1 = fcmp ogt double %add, 1.000000e+01
296-
%add. = select i1 %cmp1, double %add, double 0.000000e+00
307+
%add. = select i1 %cmp1, double %add, double 0.000000e+00, !prof !0
297308
br label %cond.end4
298309

299310
cond.end4: ; preds = %entry, %cond.false
@@ -311,7 +322,7 @@ if.end: ; preds = %if.then, %cond.end4
311322
}
312323

313324

314-
define void @unfold2(i32 %x, i32 %y) nounwind {
325+
define void @unfold2(i32 %x, i32 %y) nounwind !prof !1 {
315326
; CHECK-LABEL: @unfold2(
316327
; CHECK-NEXT: entry:
317328
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
@@ -320,7 +331,7 @@ define void @unfold2(i32 %x, i32 %y) nounwind {
320331
; CHECK: cond.false:
321332
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X]], [[Y]]
322333
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[ADD]], 10
323-
; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[COND_END4:%.*]]
334+
; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[COND_END4:%.*]], !prof [[PROF1]]
324335
; CHECK: cond.end4:
325336
; CHECK-NEXT: [[COND5:%.*]] = phi i32 [ [[ADD]], [[COND_FALSE]] ]
326337
; CHECK-NEXT: [[CMP6:%.*]] = icmp eq i32 [[COND5]], 0
@@ -339,7 +350,7 @@ entry:
339350
cond.false: ; preds = %entry
340351
%add = add nsw i32 %x, %y
341352
%cmp1 = icmp sgt i32 %add, 10
342-
%add. = select i1 %cmp1, i32 0, i32 %add
353+
%add. = select i1 %cmp1, i32 0, i32 %add, !prof !0
343354
br label %cond.end4
344355

345356
cond.end4: ; preds = %entry, %cond.false
@@ -652,3 +663,13 @@ if.end:
652663
%v1 = select i1 %v, i32 %s, i32 42
653664
ret i32 %v1
654665
}
666+
667+
; branch_weights overflowing uint32_t
668+
!0 = !{!"branch_weights", i64 1073741824, i64 3221225472}
669+
!1 = !{!"function_entry_count", i64 1984}
670+
;.
671+
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind }
672+
;.
673+
; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1984}
674+
; CHECK: [[PROF1]] = !{!"branch_weights", i64 1073741824, i64 3221225472}
675+
;.

0 commit comments

Comments
 (0)