Skip to content

Commit a0b175c

Browse files
authored
[SimplifyCFG] Treat extract oneuse(op.with.overflow),1 pattern as a single instruction (#128021)
Closes #115683 . Overflow arithmetic instruction plus extract value are usually generated when a division is being replaced, but the zero check may still be there. In that case hoist these two instructions out of this basic block, and let later optimizations take care of the unnecessary zero checks.
1 parent 1a68269 commit a0b175c

File tree

3 files changed

+159
-35
lines changed

3 files changed

+159
-35
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -422,11 +422,11 @@ static InstructionCost computeSpeculationCost(const User *I,
422422
/// After this function returns, Cost is increased by the cost of
423423
/// V plus its non-dominating operands. If that cost is greater than
424424
/// Budget, false is returned and Cost is undefined.
425-
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt,
426-
SmallPtrSetImpl<Instruction *> &AggressiveInsts,
427-
InstructionCost &Cost, InstructionCost Budget,
428-
const TargetTransformInfo &TTI,
429-
AssumptionCache *AC, unsigned Depth = 0) {
425+
static bool dominatesMergePoint(
426+
Value *V, BasicBlock *BB, Instruction *InsertPt,
427+
SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
428+
InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC,
429+
SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
430430
// It is possible to hit a zero-cost cycle (phi/gep instructions for example),
431431
// so limit the recursion depth.
432432
// TODO: While this recursion limit does prevent pathological behavior, it
@@ -464,7 +464,17 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt,
464464
if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
465465
return false;
466466

467-
Cost += computeSpeculationCost(I, TTI);
467+
// Overflow arithmetic instruction plus extract value are usually generated
468+
// when a division is being replaced. But, in this case, the zero check may
469+
// still be kept in the code. In that case it would be worth to hoist these
470+
// two instruction out of the basic block. Let's treat this pattern as one
471+
// single cheap instruction here!
472+
WithOverflowInst *OverflowInst;
473+
if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
474+
ZeroCostInstructions.insert(OverflowInst);
475+
Cost += 1;
476+
} else if (!ZeroCostInstructions.contains(I))
477+
Cost += computeSpeculationCost(I, TTI);
468478

469479
// Allow exactly one instruction to be speculated regardless of its cost
470480
// (as long as it is safe to do so).
@@ -481,7 +491,7 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt,
481491
// not take us over the cost threshold.
482492
for (Use &Op : I->operands())
483493
if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
484-
TTI, AC, Depth + 1))
494+
TTI, AC, ZeroCostInstructions, Depth + 1))
485495
return false;
486496
// Okay, it's safe to do this! Remember this instruction.
487497
AggressiveInsts.insert(I);
@@ -3725,6 +3735,7 @@ static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
37253735
// instructions. While we are at it, keep track of the instructions
37263736
// that need to be moved to the dominating block.
37273737
SmallPtrSet<Instruction *, 4> AggressiveInsts;
3738+
SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
37283739
InstructionCost Cost = 0;
37293740
InstructionCost Budget =
37303741
TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
@@ -3742,9 +3753,11 @@ static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
37423753
}
37433754

37443755
if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3745-
AggressiveInsts, Cost, Budget, TTI, AC) ||
3756+
AggressiveInsts, Cost, Budget, TTI, AC,
3757+
ZeroCostInstructions) ||
37463758
!dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3747-
AggressiveInsts, Cost, Budget, TTI, AC))
3759+
AggressiveInsts, Cost, Budget, TTI, AC,
3760+
ZeroCostInstructions))
37483761
return Changed;
37493762
}
37503763

llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -45,26 +45,17 @@ define i1 @will_not_overflow(i64 %arg, i64 %arg1) {
4545
; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_not_overflow(
4646
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: bb:
4747
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
48-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
49-
; INSTCOMBINESIMPLIFYCFGONLY: bb2:
5048
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
5149
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
52-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br label [[BB5]]
53-
; INSTCOMBINESIMPLIFYCFGONLY: bb5:
54-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
50+
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 false, i1 [[MUL_OV]]
5551
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: ret i1 [[T6]]
5652
;
5753
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_not_overflow(
5854
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb:
59-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
60-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
61-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb2:
62-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
55+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[ARG1:%.*]] = freeze i64 [[ARG2:%.*]]
56+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1]])
6357
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
64-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br label [[BB5]]
65-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb5:
66-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
67-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]]
58+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[MUL_OV]]
6859
;
6960
bb:
7061
%t0 = icmp eq i64 %arg, 0
@@ -112,28 +103,19 @@ define i1 @will_overflow(i64 %arg, i64 %arg1) {
112103
; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_overflow(
113104
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: bb:
114105
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
115-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
116-
; INSTCOMBINESIMPLIFYCFGONLY: bb2:
117106
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
118107
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
119108
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
120-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br label [[BB5]]
121-
; INSTCOMBINESIMPLIFYCFGONLY: bb5:
122-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
109+
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 true, i1 [[PHI_BO]]
123110
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: ret i1 [[T6]]
124111
;
125112
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_overflow(
126113
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb:
127-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
128-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
129-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb2:
130-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
114+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[ARG1:%.*]] = freeze i64 [[ARG2:%.*]]
115+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1]])
131116
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
132117
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
133-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br label [[BB5]]
134-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb5:
135-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
136-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]]
118+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[PHI_BO]]
137119
;
138120
bb:
139121
%t0 = icmp eq i64 %arg, 0
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s
3+
target triple = "riscv64-unknown-unknown-elf"
4+
5+
define i16 @basicScenario(i64 %x, i64 %y) {
6+
; CHECK-LABEL: @basicScenario(
7+
; CHECK-NEXT: entry:
8+
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
9+
; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X:%.*]])
10+
; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
11+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
12+
; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i16
13+
; CHECK-NEXT: ret i16 [[CONV]]
14+
;
15+
entry:
16+
%cmp.not = icmp eq i64 %y, 0
17+
br i1 %cmp.not, label %land.end, label %land.rhs
18+
19+
land.rhs: ; preds = %entry
20+
%mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
21+
%mul.ov = extractvalue { i64, i1 } %mul, 1
22+
br label %land.end
23+
24+
land.end: ; preds = %land.rhs, %entry
25+
%result = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
26+
%conv = zext i1 %result to i16
27+
ret i16 %conv
28+
}
29+
30+
define i16 @samePatternTwice(i64 %x, i64 %y) {
31+
; CHECK-LABEL: @samePatternTwice(
32+
; CHECK-NEXT: entry:
33+
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
34+
; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X:%.*]])
35+
; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
36+
; CHECK-NEXT: [[MUL2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X]])
37+
; CHECK-NEXT: [[MUL_OV2:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
38+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
39+
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV2]]
40+
; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i16
41+
; CHECK-NEXT: [[CONV2:%.*]] = zext i1 [[TMP1]] to i16
42+
; CHECK-NEXT: [[TORET:%.*]] = add nsw i16 [[CONV]], [[CONV2]]
43+
; CHECK-NEXT: ret i16 [[TORET]]
44+
;
45+
entry:
46+
%cmp.not = icmp eq i64 %y, 0
47+
br i1 %cmp.not, label %land.end, label %land.rhs
48+
49+
land.rhs: ; preds = %entry
50+
%mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
51+
%mul.ov = extractvalue { i64, i1 } %mul, 1
52+
%mul2 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
53+
%mul.ov2 = extractvalue { i64, i1 } %mul2, 1
54+
br label %land.end
55+
56+
land.end: ; preds = %land.rhs, %entry
57+
%result1 = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
58+
%result2 = phi i1 [ false, %entry ], [ %mul.ov2, %land.rhs ]
59+
%conv1 = zext i1 %result1 to i16
60+
%conv2 = zext i1 %result2 to i16
61+
%toRet = add nsw i16 %conv1, %conv2
62+
ret i16 %toRet
63+
}
64+
65+
define i16 @stillHoistNotTooExpensive(i64 %x, i64 %y) {
66+
; CHECK-LABEL: @stillHoistNotTooExpensive(
67+
; CHECK-NEXT: entry:
68+
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
69+
; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[Y]], [[X:%.*]]
70+
; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ADD]], i64 [[X]])
71+
; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
72+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
73+
; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i16
74+
; CHECK-NEXT: ret i16 [[CONV]]
75+
;
76+
entry:
77+
%cmp.not = icmp eq i64 %y, 0
78+
br i1 %cmp.not, label %land.end, label %land.rhs
79+
80+
land.rhs: ; preds = %entry
81+
%add = add nsw i64 %y, %x
82+
%mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %add, i64 %x)
83+
%mul.ov = extractvalue { i64, i1 } %mul, 1
84+
br label %land.end
85+
86+
land.end: ; preds = %land.rhs, %entry
87+
%result = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
88+
%conv = zext i1 %result to i16
89+
ret i16 %conv
90+
}
91+
92+
define i16 @noHoistTooExpensive(i64 %x, i64 %y) {
93+
; CHECK-LABEL: @noHoistTooExpensive(
94+
; CHECK-NEXT: entry:
95+
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
96+
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[LAND_END:%.*]], label [[LAND_RHS:%.*]]
97+
; CHECK: land.rhs:
98+
; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[Y]], [[X:%.*]]
99+
; CHECK-NEXT: [[ADD2:%.*]] = add nsw i64 [[Y]], [[ADD]]
100+
; CHECK-NEXT: [[ADD3:%.*]] = add nsw i64 [[ADD]], [[ADD2]]
101+
; CHECK-NEXT: [[ADD4:%.*]] = add nsw i64 [[ADD2]], [[ADD3]]
102+
; CHECK-NEXT: [[ADD5:%.*]] = add nsw i64 [[ADD3]], [[ADD4]]
103+
; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ADD5]], i64 [[X]])
104+
; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
105+
; CHECK-NEXT: br label [[LAND_END]]
106+
; CHECK: land.end:
107+
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[MUL_OV]], [[LAND_RHS]] ]
108+
; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i16
109+
; CHECK-NEXT: ret i16 [[CONV]]
110+
;
111+
entry:
112+
%cmp.not = icmp eq i64 %y, 0
113+
br i1 %cmp.not, label %land.end, label %land.rhs
114+
115+
land.rhs: ; preds = %entry
116+
%add = add nsw i64 %y, %x
117+
%add2 = add nsw i64 %y, %add
118+
%add3 = add nsw i64 %add, %add2
119+
%add4 = add nsw i64 %add2, %add3
120+
%add5 = add nsw i64 %add3, %add4
121+
%mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %add5, i64 %x)
122+
%mul.ov = extractvalue { i64, i1 } %mul, 1
123+
br label %land.end
124+
125+
land.end: ; preds = %land.rhs, %entry
126+
%result = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
127+
%conv = zext i1 %result to i16
128+
ret i16 %conv
129+
}

0 commit comments

Comments
 (0)