Skip to content

Commit 73548cb

Browse files
committed
[SimplifyCFG] Treat umul + extract pattern as cheap single instruction
1 parent 884b79a commit 73548cb

File tree

3 files changed

+78
-35
lines changed

3 files changed

+78
-35
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -421,11 +421,11 @@ static InstructionCost computeSpeculationCost(const User *I,
421421
/// After this function returns, Cost is increased by the cost of
422422
/// V plus its non-dominating operands. If that cost is greater than
423423
/// Budget, false is returned and Cost is undefined.
424-
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt,
425-
SmallPtrSetImpl<Instruction *> &AggressiveInsts,
426-
InstructionCost &Cost, InstructionCost Budget,
427-
const TargetTransformInfo &TTI,
428-
AssumptionCache *AC, unsigned Depth = 0) {
424+
static bool dominatesMergePoint(
425+
Value *V, BasicBlock *BB, Instruction *InsertPt,
426+
SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
427+
InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC,
428+
SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
429429
// It is possible to hit a zero-cost cycle (phi/gep instructions for example),
430430
// so limit the recursion depth.
431431
// TODO: While this recursion limit does prevent pathological behavior, it
@@ -463,7 +463,12 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt,
463463
if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
464464
return false;
465465

466-
Cost += computeSpeculationCost(I, TTI);
466+
WithOverflowInst *OverflowInst;
467+
if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
468+
ZeroCostInstructions.insert(OverflowInst);
469+
Cost += 1;
470+
} else if (!ZeroCostInstructions.contains(I))
471+
Cost += computeSpeculationCost(I, TTI);
467472

468473
// Allow exactly one instruction to be speculated regardless of its cost
469474
// (as long as it is safe to do so).
@@ -480,7 +485,7 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt,
480485
// not take us over the cost threshold.
481486
for (Use &Op : I->operands())
482487
if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
483-
TTI, AC, Depth + 1))
488+
TTI, AC, ZeroCostInstructions, Depth + 1))
484489
return false;
485490
// Okay, it's safe to do this! Remember this instruction.
486491
AggressiveInsts.insert(I);
@@ -3796,6 +3801,7 @@ static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
37963801
// instructions. While we are at it, keep track of the instructions
37973802
// that need to be moved to the dominating block.
37983803
SmallPtrSet<Instruction *, 4> AggressiveInsts;
3804+
SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
37993805
InstructionCost Cost = 0;
38003806
InstructionCost Budget =
38013807
TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
@@ -3813,9 +3819,9 @@ static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
38133819
}
38143820

38153821
if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3816-
AggressiveInsts, Cost, Budget, TTI, AC) ||
3822+
AggressiveInsts, Cost, Budget, TTI, AC, ZeroCostInstructions) ||
38173823
!dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3818-
AggressiveInsts, Cost, Budget, TTI, AC))
3824+
AggressiveInsts, Cost, Budget, TTI, AC, ZeroCostInstructions))
38193825
return Changed;
38203826
}
38213827

llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -45,26 +45,17 @@ define i1 @will_not_overflow(i64 %arg, i64 %arg1) {
4545
; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_not_overflow(
4646
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: bb:
4747
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
48-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
49-
; INSTCOMBINESIMPLIFYCFGONLY: bb2:
5048
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
5149
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
52-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br label [[BB5]]
53-
; INSTCOMBINESIMPLIFYCFGONLY: bb5:
54-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
50+
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 false, i1 [[MUL_OV]]
5551
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: ret i1 [[T6]]
5652
;
5753
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_not_overflow(
5854
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb:
59-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
60-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
61-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb2:
62-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
55+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[ARG1:%.*]] = freeze i64 [[ARG2:%.*]]
56+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1]])
6357
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
64-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br label [[BB5]]
65-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb5:
66-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
67-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]]
58+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[MUL_OV]]
6859
;
6960
bb:
7061
%t0 = icmp eq i64 %arg, 0
@@ -112,28 +103,19 @@ define i1 @will_overflow(i64 %arg, i64 %arg1) {
112103
; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_overflow(
113104
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: bb:
114105
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
115-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
116-
; INSTCOMBINESIMPLIFYCFGONLY: bb2:
117106
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
118107
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
119108
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
120-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br label [[BB5]]
121-
; INSTCOMBINESIMPLIFYCFGONLY: bb5:
122-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
109+
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 true, i1 [[PHI_BO]]
123110
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: ret i1 [[T6]]
124111
;
125112
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_overflow(
126113
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb:
127-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
128-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
129-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb2:
130-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
114+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[ARG1:%.*]] = freeze i64 [[ARG2:%.*]]
115+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1]])
131116
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
132117
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
133-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br label [[BB5]]
134-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb5:
135-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
136-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]]
118+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[PHI_BO]]
137119
;
138120
bb:
139121
%t0 = icmp eq i64 %arg, 0
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s
3+
target triple = "riscv64-unknown-unknown-elf"
4+
5+
define i16 @func2(i64 %x, i64 %y) {
6+
; CHECK-LABEL: @func2(
7+
; CHECK-NEXT: entry:
8+
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
9+
; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X:%.*]])
10+
; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
11+
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
12+
; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[SPEC_SELECT]] to i16
13+
; CHECK-NEXT: ret i16 [[CONV]]
14+
;
15+
entry:
16+
%cmp.not = icmp eq i64 %y, 0
17+
br i1 %cmp.not, label %land.end, label %land.rhs
18+
19+
land.rhs: ; preds = %entry
20+
%mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
21+
%mul.ov = extractvalue { i64, i1 } %mul, 1
22+
br label %land.end
23+
24+
land.end: ; preds = %land.rhs, %entry
25+
%0 = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
26+
%conv = zext i1 %0 to i16
27+
ret i16 %conv
28+
}
29+
30+
define i16 @noHoist(i64 %x, i64 %y) {
31+
; CHECK-LABEL: @noHoist(
32+
; CHECK-NEXT: entry:
33+
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
34+
; CHECK-NEXT: [[ADD2:%.*]] = add nsw i64 [[Y]], [[X:%.*]]
35+
; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ADD2]], i64 [[X]])
36+
; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
37+
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
38+
; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[SPEC_SELECT]] to i16
39+
; CHECK-NEXT: ret i16 [[CONV]]
40+
;
41+
entry:
42+
%cmp.not = icmp eq i64 %y, 0
43+
br i1 %cmp.not, label %land.end, label %land.rhs
44+
45+
land.rhs: ; preds = %entry
46+
%add = add nsw i64 %y, %x
47+
%mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %add, i64 %x)
48+
%mul.ov = extractvalue { i64, i1 } %mul, 1
49+
br label %land.end
50+
51+
land.end: ; preds = %land.rhs, %entry
52+
%0 = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
53+
%conv = zext i1 %0 to i16
54+
ret i16 %conv
55+
}

0 commit comments

Comments
 (0)