Skip to content

Commit 7e5ca4e

Browse files
committed
[SelectOpt] Support add and sub with zext operands.
Extend the support for implicit selects in the form of OR with a ZExt operand to support ADD and SUB binops as well. They similarly can form implicit selects which can be profitable to convert back the branches.
1 parent 6d23ac1 commit 7e5ca4e

File tree

3 files changed

+84
-38
lines changed

3 files changed

+84
-38
lines changed

llvm/lib/CodeGen/SelectOptimize.cpp

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -145,11 +145,20 @@ class SelectOptimizeImpl {
145145

146146
// An Or(zext(i1 X), Y) can also be treated like a select, with condition
147147
// C and values Y|1 and Y.
148-
Value *X;
149-
if (PatternMatch::match(
150-
I, m_c_Or(m_OneUse(m_ZExt(m_Value(X))), m_Value())) &&
151-
X->getType()->isIntegerTy(1))
152-
return SelectLike(I);
148+
switch (I->getOpcode()) {
149+
case Instruction::Add:
150+
case Instruction::Or:
151+
case Instruction::Sub: {
152+
Value *X;
153+
if ((PatternMatch::match(I->getOperand(0),
154+
m_OneUse(m_ZExt(m_Value(X)))) ||
155+
PatternMatch::match(I->getOperand(1),
156+
m_OneUse(m_ZExt(m_Value(X))))) &&
157+
X->getType()->isIntegerTy(1))
158+
return SelectLike(I);
159+
break;
160+
}
161+
}
153162

154163
return SelectLike(nullptr);
155164
}
@@ -250,19 +259,22 @@ class SelectOptimizeImpl {
250259
: Scaled64::getZero();
251260
}
252261

253-
// Or case - add the cost of an extra Or to the cost of the False case.
254-
if (isa<BinaryOperator>(I))
255-
if (auto I = dyn_cast<Instruction>(getFalseValue())) {
262+
// BinaryOp case - add the cost of an extra BinOp to the cost of the False
263+
// case.
264+
if (isa<BinaryOperator>(I)) {
265+
if (auto OpI = dyn_cast<Instruction>(getFalseValue())) {
256266
auto It = InstCostMap.find(I);
257267
if (It != InstCostMap.end()) {
258268
InstructionCost OrCost = TTI->getArithmeticInstrCost(
259-
Instruction::Or, I->getType(), TargetTransformInfo::TCK_Latency,
269+
I->getOpcode(), OpI->getType(),
270+
TargetTransformInfo::TCK_Latency,
260271
{TargetTransformInfo::OK_AnyValue,
261272
TargetTransformInfo::OP_None},
262273
{TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});
263274
return It->second.NonPredCost + Scaled64::get(*OrCost.getValue());
264275
}
265276
}
277+
}
266278

267279
return Scaled64::getZero();
268280
}
@@ -548,12 +560,16 @@ getTrueOrFalseValue(SelectOptimizeImpl::SelectLike SI, bool isTrue,
548560
V = (!isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
549561
}
550562

551-
if (isa<BinaryOperator>(SI.getI())) {
552-
assert(SI.getI()->getOpcode() == Instruction::Or &&
553-
"Only currently handling Or instructions.");
563+
if (auto *BinOp = dyn_cast<BinaryOperator>(SI.getI())) {
564+
assert((BinOp->getOpcode() == Instruction::Add ||
565+
BinOp->getOpcode() == Instruction::Or ||
566+
BinOp->getOpcode() == Instruction::Sub) &&
567+
"Only currently handling Add, Or and Sub instructions.");
554568
V = SI.getFalseValue();
555-
if (isTrue)
556-
V = IB.CreateOr(V, ConstantInt::get(V->getType(), 1));
569+
if (isTrue) {
570+
Constant *CI = ConstantInt::get(V->getType(), 1);
571+
V = IB.CreateBinOp(BinOp->getOpcode(), V, CI);
572+
}
557573
}
558574

559575
assert(V && "Failed to get select true/false value");

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4678,14 +4678,20 @@ AArch64TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
46784678
}
46794679

46804680
bool AArch64TTIImpl::shouldTreatInstructionLikeSelect(const Instruction *I) {
4681-
// For the binary operators (e.g. or) we need to be more careful than
4682-
// selects, here we only transform them if they are already at a natural
4683-
// break point in the code - the end of a block with an unconditional
4684-
// terminator.
4685-
if (EnableOrLikeSelectOpt && I->getOpcode() == Instruction::Or &&
4686-
isa<BranchInst>(I->getNextNode()) &&
4687-
cast<BranchInst>(I->getNextNode())->isUnconditional())
4688-
return true;
4681+
if (EnableOrLikeSelectOpt) {
4682+
// For the binary operators (e.g. or) we need to be more careful than
4683+
// selects, here we only transform them if they are already at a natural
4684+
// break point in the code - the end of a block with an unconditional
4685+
// terminator.
4686+
if (I->getOpcode() == Instruction::Or &&
4687+
isa<BranchInst>(I->getNextNode()) &&
4688+
cast<BranchInst>(I->getNextNode())->isUnconditional())
4689+
return true;
4690+
4691+
if (I->getOpcode() == Instruction::Add ||
4692+
I->getOpcode() == Instruction::Sub)
4693+
return true;
4694+
}
46894695
return BaseT::shouldTreatInstructionLikeSelect(I);
46904696
}
46914697

llvm/test/CodeGen/AArch64/selectopt-cast.ll

Lines changed: 40 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,22 @@ define void @test_add_zext(ptr %dst, ptr %src, i64 %j.start, i64 %p, i64 %i.star
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: br label [[LOOP:%.*]]
99
; CHECK: loop:
10-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
11-
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LOOP]] ]
12-
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[LOOP]] ]
10+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[SELECT_END:%.*]] ]
11+
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[SELECT_END]] ]
12+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[SELECT_END]] ]
1313
; CHECK-NEXT: [[GEP_I:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
1414
; CHECK-NEXT: [[L_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
1515
; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
1616
; CHECK-NEXT: [[L_J:%.*]] = load ptr, ptr [[GEP_J]], align 8
1717
; CHECK-NEXT: [[CMP3:%.*]] = icmp ult ptr [[L_I]], [[L_J]]
1818
; CHECK-NEXT: [[DEC:%.*]] = zext i1 [[CMP3]] to i64
19-
; CHECK-NEXT: [[J_NEXT]] = add nsw i64 [[J]], [[DEC]]
19+
; CHECK-NEXT: [[CMP3_FROZEN:%.*]] = freeze i1 [[CMP3]]
20+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[J]], 1
21+
; CHECK-NEXT: br i1 [[CMP3_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]]
22+
; CHECK: select.false:
23+
; CHECK-NEXT: br label [[SELECT_END]]
24+
; CHECK: select.end:
25+
; CHECK-NEXT: [[J_NEXT]] = phi i64 [ [[TMP0]], [[LOOP]] ], [ [[J]], [[SELECT_FALSE]] ]
2026
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[IV]]
2127
; CHECK-NEXT: store i64 [[J_NEXT]], ptr [[GEP_DST]], align 8
2228
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
@@ -54,17 +60,23 @@ define void @test_add_zext_not(ptr %dst, ptr %src, i64 %j.start, i64 %p, i64 %i.
5460
; CHECK-NEXT: entry:
5561
; CHECK-NEXT: br label [[LOOP:%.*]]
5662
; CHECK: loop:
57-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
58-
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LOOP]] ]
59-
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[LOOP]] ]
63+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[SELECT_END:%.*]] ]
64+
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[SELECT_END]] ]
65+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[SELECT_END]] ]
6066
; CHECK-NEXT: [[GEP_I:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
6167
; CHECK-NEXT: [[L_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
6268
; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
6369
; CHECK-NEXT: [[L_J:%.*]] = load ptr, ptr [[GEP_J]], align 8
6470
; CHECK-NEXT: [[CMP3:%.*]] = icmp ult ptr [[L_I]], [[L_J]]
6571
; CHECK-NEXT: [[NOT_CMP3:%.*]] = xor i1 [[CMP3]], true
6672
; CHECK-NEXT: [[DEC:%.*]] = zext i1 [[NOT_CMP3]] to i64
67-
; CHECK-NEXT: [[J_NEXT]] = add nsw i64 [[J]], [[DEC]]
73+
; CHECK-NEXT: [[NOT_CMP3_FROZEN:%.*]] = freeze i1 [[NOT_CMP3]]
74+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[J]], 1
75+
; CHECK-NEXT: br i1 [[NOT_CMP3_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]]
76+
; CHECK: select.false:
77+
; CHECK-NEXT: br label [[SELECT_END]]
78+
; CHECK: select.end:
79+
; CHECK-NEXT: [[J_NEXT]] = phi i64 [ [[TMP0]], [[LOOP]] ], [ [[J]], [[SELECT_FALSE]] ]
6880
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[IV]]
6981
; CHECK-NEXT: store i64 [[J_NEXT]], ptr [[GEP_DST]], align 8
7082
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
@@ -308,16 +320,22 @@ define void @test_sub_zext(ptr %dst, ptr %src, i64 %j.start, i64 %p, i64 %i.star
308320
; CHECK-NEXT: entry:
309321
; CHECK-NEXT: br label [[LOOP:%.*]]
310322
; CHECK: loop:
311-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
312-
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LOOP]] ]
313-
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[LOOP]] ]
323+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[SELECT_END:%.*]] ]
324+
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[SELECT_END]] ]
325+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[SELECT_END]] ]
314326
; CHECK-NEXT: [[GEP_I:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
315327
; CHECK-NEXT: [[L_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
316328
; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
317329
; CHECK-NEXT: [[L_J:%.*]] = load ptr, ptr [[GEP_J]], align 8
318330
; CHECK-NEXT: [[CMP3:%.*]] = icmp ult ptr [[L_I]], [[L_J]]
319331
; CHECK-NEXT: [[DEC:%.*]] = zext i1 [[CMP3]] to i64
320-
; CHECK-NEXT: [[J_NEXT]] = sub nsw i64 [[J]], [[DEC]]
332+
; CHECK-NEXT: [[CMP3_FROZEN:%.*]] = freeze i1 [[CMP3]]
333+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[J]], 1
334+
; CHECK-NEXT: br i1 [[CMP3_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]]
335+
; CHECK: select.false:
336+
; CHECK-NEXT: br label [[SELECT_END]]
337+
; CHECK: select.end:
338+
; CHECK-NEXT: [[J_NEXT]] = phi i64 [ [[TMP0]], [[LOOP]] ], [ [[J]], [[SELECT_FALSE]] ]
321339
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[IV]]
322340
; CHECK-NEXT: store i64 [[J_NEXT]], ptr [[GEP_DST]], align 8
323341
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
@@ -355,17 +373,23 @@ define void @test_sub_zext_not(ptr %dst, ptr %src, i64 %j.start, i64 %p, i64 %i.
355373
; CHECK-NEXT: entry:
356374
; CHECK-NEXT: br label [[LOOP:%.*]]
357375
; CHECK: loop:
358-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
359-
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LOOP]] ]
360-
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[LOOP]] ]
376+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[SELECT_END:%.*]] ]
377+
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[SELECT_END]] ]
378+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[SELECT_END]] ]
361379
; CHECK-NEXT: [[GEP_I:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
362380
; CHECK-NEXT: [[L_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
363381
; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
364382
; CHECK-NEXT: [[L_J:%.*]] = load ptr, ptr [[GEP_J]], align 8
365383
; CHECK-NEXT: [[CMP3:%.*]] = icmp ult ptr [[L_I]], [[L_J]]
366384
; CHECK-NEXT: [[NOT_CMP3:%.*]] = xor i1 [[CMP3]], true
367385
; CHECK-NEXT: [[DEC:%.*]] = zext i1 [[NOT_CMP3]] to i64
368-
; CHECK-NEXT: [[J_NEXT]] = sub nsw i64 [[J]], [[DEC]]
386+
; CHECK-NEXT: [[NOT_CMP3_FROZEN:%.*]] = freeze i1 [[NOT_CMP3]]
387+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[J]], 1
388+
; CHECK-NEXT: br i1 [[NOT_CMP3_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]]
389+
; CHECK: select.false:
390+
; CHECK-NEXT: br label [[SELECT_END]]
391+
; CHECK: select.end:
392+
; CHECK-NEXT: [[J_NEXT]] = phi i64 [ [[TMP0]], [[LOOP]] ], [ [[J]], [[SELECT_FALSE]] ]
369393
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[IV]]
370394
; CHECK-NEXT: store i64 [[J_NEXT]], ptr [[GEP_DST]], align 8
371395
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1

0 commit comments

Comments
 (0)