Skip to content

Commit 3636767

Browse files
committed
[SelectOpt] Support add and sub with zext operands.
Extend the support for implicit selects in the form of OR with a ZExt operand to support ADD and SUB binops as well. They similarly can form implicit selects which can be profitable to convert back the branches.
1 parent 7841ba1 commit 3636767

File tree

3 files changed

+146
-24
lines changed

3 files changed

+146
-24
lines changed

llvm/lib/CodeGen/SelectOptimize.cpp

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,49 @@ class SelectOptimizeImpl {
138138
unsigned CondIdx;
139139

140140
public:
141+
<<<<<<< HEAD
141142
SelectLike(Instruction *I, bool Inverted = false, unsigned CondIdx = 0)
142143
: I(I), Inverted(Inverted), CondIdx(CondIdx) {}
144+
=======
145+
/// Match a select or select-like instruction, returning a SelectLike.
146+
static SelectLike match(Instruction *I) {
147+
// Select instruction are what we are usually looking for.
148+
if (isa<SelectInst>(I))
149+
return SelectLike(I);
150+
151+
// An Or(zext(i1 X), Y) can also be treated like a select, with condition
152+
// C and values Y|1 and Y.
153+
switch (I->getOpcode()) {
154+
case Instruction::Add:
155+
case Instruction::Or:
156+
case Instruction::Sub: {
157+
Value *X;
158+
if ((PatternMatch::match(I->getOperand(0),
159+
m_OneUse(m_ZExt(m_Value(X)))) ||
160+
PatternMatch::match(I->getOperand(1),
161+
m_OneUse(m_ZExt(m_Value(X))))) &&
162+
X->getType()->isIntegerTy(1))
163+
return SelectLike(I);
164+
break;
165+
}
166+
}
167+
168+
return SelectLike(nullptr);
169+
}
170+
171+
bool isValid() { return I; }
172+
operator bool() { return isValid(); }
173+
174+
/// Invert the select by inverting the condition and switching the operands.
175+
void setInverted() {
176+
assert(!Inverted && "Trying to invert an inverted SelectLike");
177+
assert(isa<Instruction>(getCondition()) &&
178+
cast<Instruction>(getCondition())->getOpcode() ==
179+
Instruction::Xor);
180+
Inverted = true;
181+
}
182+
bool isInverted() const { return Inverted; }
183+
>>>>>>> 7e5ca4eafa3c ([SelectOpt] Support add and sub with zext operands.)
143184

144185
Instruction *getI() { return I; }
145186
const Instruction *getI() const { return I; }
@@ -195,6 +236,7 @@ class SelectOptimizeImpl {
195236
return It != InstCostMap.end() ? It->second.NonPredCost
196237
: Scaled64::getZero();
197238
}
239+
<<<<<<< HEAD
198240
return Scaled64::getZero();
199241
}
200242
// If getTrue(False)Value() return nullptr, it means we are dealing with
@@ -212,6 +254,48 @@ class SelectOptimizeImpl {
212254
TotalCost += It->second.NonPredCost;
213255
}
214256
return TotalCost;
257+
=======
258+
259+
// BinaryOp case - add the cost of an extra BinOp to the cost of the False
260+
// case.
261+
if (isa<BinaryOperator>(I)) {
262+
if (auto OpI = dyn_cast<Instruction>(getFalseValue())) {
263+
auto It = InstCostMap.find(I);
264+
if (It != InstCostMap.end()) {
265+
InstructionCost OrCost = TTI->getArithmeticInstrCost(
266+
I->getOpcode(), OpI->getType(),
267+
TargetTransformInfo::TCK_Latency,
268+
{TargetTransformInfo::OK_AnyValue,
269+
TargetTransformInfo::OP_None},
270+
{TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});
271+
return It->second.NonPredCost + Scaled64::get(*OrCost.getValue());
272+
}
273+
}
274+
}
275+
276+
return Scaled64::getZero();
277+
}
278+
279+
/// Return the NonPredCost cost of the false op, given the costs in
280+
/// InstCostMap. This may need to be generated for select-like instructions.
281+
Scaled64
282+
getFalseOpCost(DenseMap<const Instruction *, CostInfo> &InstCostMap,
283+
const TargetTransformInfo *TTI) {
284+
if (isa<SelectInst>(I))
285+
if (auto *I = dyn_cast<Instruction>(getFalseValue())) {
286+
auto It = InstCostMap.find(I);
287+
return It != InstCostMap.end() ? It->second.NonPredCost
288+
: Scaled64::getZero();
289+
}
290+
291+
// Or case - return the cost of the false case
292+
if (isa<BinaryOperator>(I))
293+
if (auto I = dyn_cast<Instruction>(getFalseValue()))
294+
if (auto It = InstCostMap.find(I); It != InstCostMap.end())
295+
return It->second.NonPredCost;
296+
297+
return Scaled64::getZero();
298+
>>>>>>> 7e5ca4eafa3c ([SelectOpt] Support add and sub with zext operands.)
215299
}
216300
};
217301

@@ -488,9 +572,23 @@ static Value *getTrueOrFalseValue(
488572
return V;
489573
}
490574

575+
<<<<<<< HEAD
491576
auto *BO = cast<BinaryOperator>(SI.getI());
492577
assert(BO->getOpcode() == Instruction::Or &&
493578
"Only currently handling Or instructions.");
579+
=======
580+
if (auto *BinOp = dyn_cast<BinaryOperator>(SI.getI())) {
581+
assert((BinOp->getOpcode() == Instruction::Add ||
582+
BinOp->getOpcode() == Instruction::Or ||
583+
BinOp->getOpcode() == Instruction::Sub) &&
584+
"Only currently handling Add, Or and Sub instructions.");
585+
V = SI.getFalseValue();
586+
if (isTrue) {
587+
Constant *CI = ConstantInt::get(V->getType(), 1);
588+
V = IB.CreateBinOp(BinOp->getOpcode(), V, CI);
589+
}
590+
}
591+
>>>>>>> 7e5ca4eafa3c ([SelectOpt] Support add and sub with zext operands.)
494592

495593
auto *CBO = BO->clone();
496594
auto CondIdx = SI.getConditionOpIndex();

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4796,14 +4796,20 @@ AArch64TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
47964796
}
47974797

47984798
bool AArch64TTIImpl::shouldTreatInstructionLikeSelect(const Instruction *I) {
4799-
// For the binary operators (e.g. or) we need to be more careful than
4800-
// selects, here we only transform them if they are already at a natural
4801-
// break point in the code - the end of a block with an unconditional
4802-
// terminator.
4803-
if (EnableOrLikeSelectOpt && I->getOpcode() == Instruction::Or &&
4804-
isa<BranchInst>(I->getNextNode()) &&
4805-
cast<BranchInst>(I->getNextNode())->isUnconditional())
4806-
return true;
4799+
if (EnableOrLikeSelectOpt) {
4800+
// For the binary operators (e.g. or) we need to be more careful than
4801+
// selects, here we only transform them if they are already at a natural
4802+
// break point in the code - the end of a block with an unconditional
4803+
// terminator.
4804+
if (I->getOpcode() == Instruction::Or &&
4805+
isa<BranchInst>(I->getNextNode()) &&
4806+
cast<BranchInst>(I->getNextNode())->isUnconditional())
4807+
return true;
4808+
4809+
if (I->getOpcode() == Instruction::Add ||
4810+
I->getOpcode() == Instruction::Sub)
4811+
return true;
4812+
}
48074813
return BaseT::shouldTreatInstructionLikeSelect(I);
48084814
}
48094815

llvm/test/CodeGen/AArch64/selectopt-cast.ll

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,22 @@ define void @test_add_zext(ptr %dst, ptr %src, i64 %j.start, i64 %p, i64 %i.star
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: br label [[LOOP:%.*]]
99
; CHECK: loop:
10-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
11-
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LOOP]] ]
12-
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[LOOP]] ]
10+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[SELECT_END:%.*]] ]
11+
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[SELECT_END]] ]
12+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[SELECT_END]] ]
1313
; CHECK-NEXT: [[GEP_I:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
1414
; CHECK-NEXT: [[L_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
1515
; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
1616
; CHECK-NEXT: [[L_J:%.*]] = load ptr, ptr [[GEP_J]], align 8
1717
; CHECK-NEXT: [[CMP3:%.*]] = icmp ult ptr [[L_I]], [[L_J]]
1818
; CHECK-NEXT: [[DEC:%.*]] = zext i1 [[CMP3]] to i64
19-
; CHECK-NEXT: [[J_NEXT]] = add nsw i64 [[J]], [[DEC]]
19+
; CHECK-NEXT: [[CMP3_FROZEN:%.*]] = freeze i1 [[CMP3]]
20+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[J]], 1
21+
; CHECK-NEXT: br i1 [[CMP3_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]]
22+
; CHECK: select.false:
23+
; CHECK-NEXT: br label [[SELECT_END]]
24+
; CHECK: select.end:
25+
; CHECK-NEXT: [[J_NEXT]] = phi i64 [ [[TMP0]], [[LOOP]] ], [ [[J]], [[SELECT_FALSE]] ]
2026
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[IV]]
2127
; CHECK-NEXT: store i64 [[J_NEXT]], ptr [[GEP_DST]], align 8
2228
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
@@ -101,17 +107,20 @@ define void @test_add_zext_not(ptr %dst, ptr %src, i64 %j.start, i64 %p, i64 %i.
101107
; CHECK-NEXT: entry:
102108
; CHECK-NEXT: br label [[LOOP:%.*]]
103109
; CHECK: loop:
104-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
105-
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_START:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LOOP]] ]
106-
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[LOOP]] ]
107110
; CHECK-NEXT: [[GEP_I:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
108111
; CHECK-NEXT: [[L_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
109112
; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
110113
; CHECK-NEXT: [[L_J:%.*]] = load ptr, ptr [[GEP_J]], align 8
111114
; CHECK-NEXT: [[CMP3:%.*]] = icmp ult ptr [[L_I]], [[L_J]]
112115
; CHECK-NEXT: [[NOT_CMP3:%.*]] = xor i1 [[CMP3]], true
113116
; CHECK-NEXT: [[DEC:%.*]] = zext i1 [[NOT_CMP3]] to i64
114-
; CHECK-NEXT: [[J_NEXT]] = add nsw i64 [[J]], [[DEC]]
117+
; CHECK-NEXT: [[NOT_CMP3_FROZEN:%.*]] = freeze i1 [[NOT_CMP3]]
118+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[J]], 1
119+
; CHECK-NEXT: br i1 [[NOT_CMP3_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]]
120+
; CHECK: select.false:
121+
; CHECK-NEXT: br label [[SELECT_END]]
122+
; CHECK: select.end:
123+
; CHECK-NEXT: [[J_NEXT]] = phi i64 [ [[TMP0]], [[LOOP]] ], [ [[J]], [[SELECT_FALSE]] ]
115124
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[IV]]
116125
; CHECK-NEXT: store i64 [[J_NEXT]], ptr [[GEP_DST]], align 8
117126
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
@@ -356,16 +365,22 @@ define void @test_sub_zext(ptr %dst, ptr %src, i64 %j.start, i64 %p, i64 %i.star
356365
; CHECK-NEXT: entry:
357366
; CHECK-NEXT: br label [[LOOP:%.*]]
358367
; CHECK: loop:
359-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
360-
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LOOP]] ]
361-
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[LOOP]] ]
368+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[SELECT_END:%.*]] ]
369+
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[SELECT_END]] ]
370+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[SELECT_END]] ]
362371
; CHECK-NEXT: [[GEP_I:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
363372
; CHECK-NEXT: [[L_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
364373
; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
365374
; CHECK-NEXT: [[L_J:%.*]] = load ptr, ptr [[GEP_J]], align 8
366375
; CHECK-NEXT: [[CMP3:%.*]] = icmp ult ptr [[L_I]], [[L_J]]
367376
; CHECK-NEXT: [[DEC:%.*]] = zext i1 [[CMP3]] to i64
368-
; CHECK-NEXT: [[J_NEXT]] = sub nsw i64 [[J]], [[DEC]]
377+
; CHECK-NEXT: [[CMP3_FROZEN:%.*]] = freeze i1 [[CMP3]]
378+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[J]], 1
379+
; CHECK-NEXT: br i1 [[CMP3_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]]
380+
; CHECK: select.false:
381+
; CHECK-NEXT: br label [[SELECT_END]]
382+
; CHECK: select.end:
383+
; CHECK-NEXT: [[J_NEXT]] = phi i64 [ [[TMP0]], [[LOOP]] ], [ [[J]], [[SELECT_FALSE]] ]
369384
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[IV]]
370385
; CHECK-NEXT: store i64 [[J_NEXT]], ptr [[GEP_DST]], align 8
371386
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
@@ -450,17 +465,20 @@ define void @test_sub_zext_not(ptr %dst, ptr %src, i64 %j.start, i64 %p, i64 %i.
450465
; CHECK-NEXT: entry:
451466
; CHECK-NEXT: br label [[LOOP:%.*]]
452467
; CHECK: loop:
453-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
454-
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_START:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LOOP]] ]
455-
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[LOOP]] ]
456468
; CHECK-NEXT: [[GEP_I:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
457469
; CHECK-NEXT: [[L_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
458470
; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
459471
; CHECK-NEXT: [[L_J:%.*]] = load ptr, ptr [[GEP_J]], align 8
460472
; CHECK-NEXT: [[CMP3:%.*]] = icmp ult ptr [[L_I]], [[L_J]]
461473
; CHECK-NEXT: [[NOT_CMP3:%.*]] = xor i1 [[CMP3]], true
462474
; CHECK-NEXT: [[DEC:%.*]] = zext i1 [[NOT_CMP3]] to i64
463-
; CHECK-NEXT: [[J_NEXT]] = sub nsw i64 [[J]], [[DEC]]
475+
; CHECK-NEXT: [[NOT_CMP3_FROZEN:%.*]] = freeze i1 [[NOT_CMP3]]
476+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[J]], 1
477+
; CHECK-NEXT: br i1 [[NOT_CMP3_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]]
478+
; CHECK: select.false:
479+
; CHECK-NEXT: br label [[SELECT_END]]
480+
; CHECK: select.end:
481+
; CHECK-NEXT: [[J_NEXT]] = phi i64 [ [[TMP0]], [[LOOP]] ], [ [[J]], [[SELECT_FALSE]] ]
464482
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[IV]]
465483
; CHECK-NEXT: store i64 [[J_NEXT]], ptr [[GEP_DST]], align 8
466484
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1

0 commit comments

Comments
 (0)