Skip to content

Commit 838e617

Browse files
committed
[Transform] Treat umul + extract pattern as cheap single instruction.
1 parent 86e20b0 commit 838e617

File tree

3 files changed

+98
-32
lines changed

3 files changed

+98
-32
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3286,7 +3286,21 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
32863286

32873287
SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
32883288

3289+
// The number of already examined instructions. Debug instructions don't
3290+
// count!
32893291
unsigned SpeculatedInstructions = 0;
3292+
// By default the number of instructions that may be speculatevly executed is
3293+
// one. Whenever a pattern is found in the basic block, that is cheap for sure
3294+
// we increase this number to the size of the pattern (how many instructions
3295+
// are there in that pattern).
3296+
unsigned MaxSpeculatedInstructionsToHoist = 1;
3297+
// In case we have found a cheap pattern, we don't want to do cost checking
3298+
// anymore. We are sure we want to hoist the pattern. To know, that we are
3299+
// only hoisting the cheap pattern only and not other expensive instructions
3300+
// too, we have the `MaxSpeculatedInstructionsToHoist` variable to track that
3301+
// the basic block truly only contains that pattern.
3302+
bool PartialInst = false;
3303+
32903304
bool HoistLoadsStores = HoistLoadsStoresWithCondFaulting &&
32913305
Options.HoistLoadsStoresWithCondFaulting;
32923306
SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
@@ -3316,34 +3330,45 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
33163330
if (EphTracker.track(&I))
33173331
continue;
33183332

3319-
// Only speculatively execute a single instruction (not counting the
3320-
// terminator) for now.
33213333
bool IsSafeCheapLoadStore = HoistLoadsStores &&
33223334
isSafeCheapLoadStore(&I, TTI) &&
33233335
SpeculatedConditionalLoadsStores.size() <
33243336
HoistLoadsStoresWithCondFaultingThreshold;
3337+
3338+
// Overflow arithmetic instruction plus extract value are usually generated
3339+
// when a division is being replaced, but the zero check may still be there.
3340+
// In that case hoist these two instructions out of this basic block, and
3341+
// let later optimizations take care of the unnecessary zero checks.
3342+
WithOverflowInst *OverflowI;
3343+
if (match(&I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowI))))) {
3344+
MaxSpeculatedInstructionsToHoist = 2;
3345+
PartialInst = true;
3346+
}
33253347
// Not count load/store into cost if target supports conditional faulting
33263348
// b/c it's cheap to speculate it.
33273349
if (IsSafeCheapLoadStore)
33283350
SpeculatedConditionalLoadsStores.push_back(&I);
33293351
else
33303352
++SpeculatedInstructions;
33313353

3332-
if (SpeculatedInstructions > 1)
3333-
return false;
3334-
33353354
// Don't hoist the instruction if it's unsafe or expensive.
33363355
if (!IsSafeCheapLoadStore &&
33373356
!isSafeToSpeculativelyExecute(&I, BI, Options.AC) &&
33383357
!(HoistCondStores && !SpeculatedStoreValue &&
33393358
(SpeculatedStoreValue =
33403359
isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
33413360
return false;
3342-
if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3361+
3362+
if (!PartialInst && !IsSafeCheapLoadStore && !SpeculatedStoreValue &&
33433363
computeSpeculationCost(&I, TTI) >
33443364
PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
33453365
return false;
33463366

3367+
// The number of instrcutions to be speculatively executed is limited.
3368+
// This limit is dependent on the found patterns.
3369+
if (SpeculatedInstructions > MaxSpeculatedInstructionsToHoist)
3370+
return false;
3371+
33473372
// Store the store speculation candidate.
33483373
if (!SpeculatedStore && SpeculatedStoreValue)
33493374
SpeculatedStore = cast<StoreInst>(&I);

llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -45,26 +45,17 @@ define i1 @will_not_overflow(i64 %arg, i64 %arg1) {
4545
; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_not_overflow(
4646
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: bb:
4747
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
48-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
49-
; INSTCOMBINESIMPLIFYCFGONLY: bb2:
5048
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
5149
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
52-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br label [[BB5]]
53-
; INSTCOMBINESIMPLIFYCFGONLY: bb5:
54-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
50+
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 false, i1 [[MUL_OV]]
5551
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: ret i1 [[T6]]
5652
;
5753
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_not_overflow(
5854
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb:
59-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
60-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
61-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb2:
62-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
55+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[ARG1:%.*]] = freeze i64 [[ARG2:%.*]]
56+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1]])
6357
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
64-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br label [[BB5]]
65-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb5:
66-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
67-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]]
58+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[MUL_OV]]
6859
;
6960
bb:
7061
%t0 = icmp eq i64 %arg, 0
@@ -112,28 +103,19 @@ define i1 @will_overflow(i64 %arg, i64 %arg1) {
112103
; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_overflow(
113104
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: bb:
114105
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
115-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
116-
; INSTCOMBINESIMPLIFYCFGONLY: bb2:
117106
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
118107
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
119108
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
120-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br label [[BB5]]
121-
; INSTCOMBINESIMPLIFYCFGONLY: bb5:
122-
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
109+
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 true, i1 [[PHI_BO]]
123110
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: ret i1 [[T6]]
124111
;
125112
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_overflow(
126113
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb:
127-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
128-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
129-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb2:
130-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
114+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[ARG1:%.*]] = freeze i64 [[ARG2:%.*]]
115+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1]])
131116
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
132117
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
133-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br label [[BB5]]
134-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb5:
135-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
136-
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]]
118+
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[PHI_BO]]
137119
;
138120
bb:
139121
%t0 = icmp eq i64 %arg, 0
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s
3+
target triple = "riscv64-unknown-unknown-elf"
4+
5+
define i16 @func2(i64 %x, i64 %y) {
6+
; CHECK-LABEL: @func2(
7+
; CHECK-NEXT: entry:
8+
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
9+
; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X:%.*]])
10+
; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
11+
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
12+
; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[SPEC_SELECT]] to i16
13+
; CHECK-NEXT: ret i16 [[CONV]]
14+
;
15+
entry:
16+
%cmp.not = icmp eq i64 %y, 0
17+
br i1 %cmp.not, label %land.end, label %land.rhs
18+
19+
land.rhs: ; preds = %entry
20+
%mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
21+
%mul.ov = extractvalue { i64, i1 } %mul, 1
22+
br label %land.end
23+
24+
land.end: ; preds = %land.rhs, %entry
25+
%0 = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
26+
%conv = zext i1 %0 to i16
27+
ret i16 %conv
28+
}
29+
30+
define i16 @noHoist(i64 %x, i64 %y) {
31+
; CHECK-LABEL: @noHoist(
32+
; CHECK-NEXT: entry:
33+
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
34+
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[LAND_END:%.*]], label [[LAND_RHS:%.*]]
35+
; CHECK: land.rhs:
36+
; CHECK-NEXT: [[ADD2:%.*]] = add nsw i64 [[Y]], [[X:%.*]]
37+
; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ADD2]], i64 [[X]])
38+
; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
39+
; CHECK-NEXT: br label [[LAND_END]]
40+
; CHECK: land.end:
41+
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[MUL_OV]], [[LAND_RHS]] ]
42+
; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[SPEC_SELECT]] to i16
43+
; CHECK-NEXT: ret i16 [[CONV]]
44+
;
45+
entry:
46+
%cmp.not = icmp eq i64 %y, 0
47+
br i1 %cmp.not, label %land.end, label %land.rhs
48+
49+
land.rhs: ; preds = %entry
50+
%add = add nsw i64 %y, %x
51+
%mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %add, i64 %x)
52+
%mul.ov = extractvalue { i64, i1 } %mul, 1
53+
br label %land.end
54+
55+
land.end: ; preds = %land.rhs, %entry
56+
%0 = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
57+
%conv = zext i1 %0 to i16
58+
ret i16 %conv
59+
}

0 commit comments

Comments
 (0)