Skip to content

Commit a77dedc

Browse files
authored
[InstSimplify][InstCombine][ConstantFold] Move vector div/rem by zero fold to InstCombine (#114280)
Previously we fold `div/rem X, C` into `poison` if any element of the constant divisor `C` is zero or undef. However, it is incorrect when threading udiv over an vector select: https://alive2.llvm.org/ce/z/3Ninx5 ``` define <2 x i32> @vec_select_udiv_poison(<2 x i1> %x) { %sel = select <2 x i1> %x, <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 0, i32 1> %div = udiv <2 x i32> <i32 42, i32 -7>, %sel ret <2 x i32> %div } ``` In this case, `threadBinOpOverSelect` folds `udiv <i32 42, i32 -7>, <i32 -1, i32 -1>` and `udiv <i32 42, i32 -7>, <i32 0, i32 1>` into `zeroinitializer` and `poison`, respectively. One solution is to introduce a new flag indicating that we are threading over a vector select. But it requires to modify both `InstSimplify` and `ConstantFold`. However, this optimization doesn't provide benefits to real-world programs: https://dtcxzyw.github.io/llvm-opt-benchmark/coverage/data/zyw/opt-ci/actions-runner/_work/llvm-opt-benchmark/llvm-opt-benchmark/llvm/llvm-project/llvm/lib/IR/ConstantFold.cpp.html#L908 https://dtcxzyw.github.io/llvm-opt-benchmark/coverage/data/zyw/opt-ci/actions-runner/_work/llvm-opt-benchmark/llvm-opt-benchmark/llvm/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp.html#L1107 This patch moves the fold into InstCombine to avoid breaking numerous existing tests. Fixes #114191 and #113866 (only poison-safety issue).
1 parent e577f14 commit a77dedc

File tree

9 files changed

+79
-65
lines changed

9 files changed

+79
-65
lines changed

llvm/lib/Analysis/InstructionSimplify.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,19 +1095,6 @@ static Value *simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0,
10951095
if (match(Op1, m_Zero()))
10961096
return PoisonValue::get(Ty);
10971097

1098-
// If any element of a constant divisor fixed width vector is zero or undef
1099-
// the behavior is undefined and we can fold the whole op to poison.
1100-
auto *Op1C = dyn_cast<Constant>(Op1);
1101-
auto *VTy = dyn_cast<FixedVectorType>(Ty);
1102-
if (Op1C && VTy) {
1103-
unsigned NumElts = VTy->getNumElements();
1104-
for (unsigned i = 0; i != NumElts; ++i) {
1105-
Constant *Elt = Op1C->getAggregateElement(i);
1106-
if (Elt && (Elt->isNullValue() || Q.isUndefValue(Elt)))
1107-
return PoisonValue::get(Ty);
1108-
}
1109-
}
1110-
11111098
// poison / X -> poison
11121099
// poison % X -> poison
11131100
if (isa<PoisonValue>(Op0))

llvm/lib/IR/ConstantFold.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -902,11 +902,6 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
902902
Constant *ExtractIdx = ConstantInt::get(Ty, i);
903903
Constant *LHS = ConstantExpr::getExtractElement(C1, ExtractIdx);
904904
Constant *RHS = ConstantExpr::getExtractElement(C2, ExtractIdx);
905-
906-
// If any element of a divisor vector is zero, the whole op is poison.
907-
if (Instruction::isIntDivRem(Opcode) && RHS->isNullValue())
908-
return PoisonValue::get(VTy);
909-
910905
Constant *Res = ConstantExpr::isDesirableBinOp(Opcode)
911906
? ConstantExpr::get(Opcode, LHS, RHS)
912907
: ConstantFoldBinaryInstruction(Opcode, LHS, RHS);

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
102102
Instruction *visitSRem(BinaryOperator &I);
103103
Instruction *visitFRem(BinaryOperator &I);
104104
bool simplifyDivRemOfSelectWithZeroOp(BinaryOperator &I);
105+
Instruction *commonIDivRemTransforms(BinaryOperator &I);
105106
Instruction *commonIRemTransforms(BinaryOperator &I);
106107
Instruction *commonIDivTransforms(BinaryOperator &I);
107108
Instruction *visitUDiv(BinaryOperator &I);

llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

Lines changed: 39 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1158,29 +1158,39 @@ static Value *foldIDivShl(BinaryOperator &I, InstCombiner::BuilderTy &Builder) {
11581158
return nullptr;
11591159
}
11601160

1161-
/// This function implements the transforms common to both integer division
1162-
/// instructions (udiv and sdiv). It is called by the visitors to those integer
1163-
/// division instructions.
1164-
/// Common integer divide transforms
1165-
Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
1166-
if (Instruction *Phi = foldBinopWithPhiOperands(I))
1167-
return Phi;
1168-
1161+
/// Common integer divide/remainder transforms
1162+
Instruction *InstCombinerImpl::commonIDivRemTransforms(BinaryOperator &I) {
1163+
assert(I.isIntDivRem() && "Unexpected instruction");
11691164
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1170-
bool IsSigned = I.getOpcode() == Instruction::SDiv;
1165+
1166+
// If any element of a constant divisor fixed width vector is zero or undef
1167+
// the behavior is undefined and we can fold the whole op to poison.
1168+
auto *Op1C = dyn_cast<Constant>(Op1);
11711169
Type *Ty = I.getType();
1170+
auto *VTy = dyn_cast<FixedVectorType>(Ty);
1171+
if (Op1C && VTy) {
1172+
unsigned NumElts = VTy->getNumElements();
1173+
for (unsigned i = 0; i != NumElts; ++i) {
1174+
Constant *Elt = Op1C->getAggregateElement(i);
1175+
if (Elt && (Elt->isNullValue() || isa<UndefValue>(Elt)))
1176+
return replaceInstUsesWith(I, PoisonValue::get(Ty));
1177+
}
1178+
}
1179+
1180+
if (Instruction *Phi = foldBinopWithPhiOperands(I))
1181+
return Phi;
11721182

11731183
// The RHS is known non-zero.
11741184
if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I))
11751185
return replaceOperand(I, 1, V);
11761186

1177-
// Handle cases involving: [su]div X, (select Cond, Y, Z)
1178-
// This does not apply for fdiv.
1187+
// Handle cases involving: div/rem X, (select Cond, Y, Z)
11791188
if (simplifyDivRemOfSelectWithZeroOp(I))
11801189
return &I;
11811190

11821191
// If the divisor is a select-of-constants, try to constant fold all div ops:
1183-
// C / (select Cond, TrueC, FalseC) --> select Cond, (C / TrueC), (C / FalseC)
1192+
// C div/rem (select Cond, TrueC, FalseC) --> select Cond, (C div/rem TrueC),
1193+
// (C div/rem FalseC)
11841194
// TODO: Adapt simplifyDivRemOfSelectWithZeroOp to allow this and other folds.
11851195
if (match(Op0, m_ImmConstant()) &&
11861196
match(Op1, m_Select(m_Value(), m_ImmConstant(), m_ImmConstant()))) {
@@ -1189,6 +1199,21 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
11891199
return R;
11901200
}
11911201

1202+
return nullptr;
1203+
}
1204+
1205+
/// This function implements the transforms common to both integer division
1206+
/// instructions (udiv and sdiv). It is called by the visitors to those integer
1207+
/// division instructions.
1208+
/// Common integer divide transforms
1209+
Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
1210+
if (Instruction *Res = commonIDivRemTransforms(I))
1211+
return Res;
1212+
1213+
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1214+
bool IsSigned = I.getOpcode() == Instruction::SDiv;
1215+
Type *Ty = I.getType();
1216+
11921217
const APInt *C2;
11931218
if (match(Op1, m_APInt(C2))) {
11941219
Value *X;
@@ -2138,29 +2163,11 @@ static Instruction *simplifyIRemMulShl(BinaryOperator &I,
21382163
/// remainder instructions.
21392164
/// Common integer remainder transforms
21402165
Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) {
2141-
if (Instruction *Phi = foldBinopWithPhiOperands(I))
2142-
return Phi;
2166+
if (Instruction *Res = commonIDivRemTransforms(I))
2167+
return Res;
21432168

21442169
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
21452170

2146-
// The RHS is known non-zero.
2147-
if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I))
2148-
return replaceOperand(I, 1, V);
2149-
2150-
// Handle cases involving: rem X, (select Cond, Y, Z)
2151-
if (simplifyDivRemOfSelectWithZeroOp(I))
2152-
return &I;
2153-
2154-
// If the divisor is a select-of-constants, try to constant fold all rem ops:
2155-
// C % (select Cond, TrueC, FalseC) --> select Cond, (C % TrueC), (C % FalseC)
2156-
// TODO: Adapt simplifyDivRemOfSelectWithZeroOp to allow this and other folds.
2157-
if (match(Op0, m_ImmConstant()) &&
2158-
match(Op1, m_Select(m_Value(), m_ImmConstant(), m_ImmConstant()))) {
2159-
if (Instruction *R = FoldOpIntoSelect(I, cast<SelectInst>(Op1),
2160-
/*FoldWithMultiUse*/ true))
2161-
return R;
2162-
}
2163-
21642171
if (isa<Constant>(Op1)) {
21652172
if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
21662173
if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {

llvm/test/Transforms/InstCombine/div.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,7 +1163,8 @@ define <2 x i8> @sdiv_constant_dividend_select_of_constants_divisor_vec(i1 %b) {
11631163

11641164
define <2 x i8> @sdiv_constant_dividend_select_of_constants_divisor_vec_ub1(i1 %b) {
11651165
; CHECK-LABEL: @sdiv_constant_dividend_select_of_constants_divisor_vec_ub1(
1166-
; CHECK-NEXT: ret <2 x i8> <i8 -10, i8 -10>
1166+
; CHECK-NEXT: [[R:%.*]] = select i1 [[B:%.*]], <2 x i8> <i8 poison, i8 8>, <2 x i8> <i8 -10, i8 -10>
1167+
; CHECK-NEXT: ret <2 x i8> [[R]]
11671168
;
11681169
%s = select i1 %b, <2 x i8> <i8 0, i8 -5>, <2 x i8> <i8 -4, i8 4>
11691170
%r = sdiv <2 x i8> <i8 42, i8 -42>, %s
@@ -1269,7 +1270,8 @@ define <2 x i8> @udiv_constant_dividend_select_of_constants_divisor_vec(i1 %b) {
12691270

12701271
define <2 x i8> @udiv_constant_dividend_select_of_constants_divisor_vec_ub1(i1 %b) {
12711272
; CHECK-LABEL: @udiv_constant_dividend_select_of_constants_divisor_vec_ub1(
1272-
; CHECK-NEXT: ret <2 x i8> <i8 0, i8 53>
1273+
; CHECK-NEXT: [[R:%.*]] = select i1 [[B:%.*]], <2 x i8> <i8 poison, i8 0>, <2 x i8> <i8 0, i8 53>
1274+
; CHECK-NEXT: ret <2 x i8> [[R]]
12731275
;
12741276
%s = select i1 %b, <2 x i8> <i8 0, i8 -5>, <2 x i8> <i8 -4, i8 4>
12751277
%r = udiv <2 x i8> <i8 42, i8 -42>, %s

llvm/test/Transforms/InstCombine/rem.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -997,7 +997,8 @@ define <2 x i8> @urem_constant_dividend_select_of_constants_divisor_vec(i1 %b) {
997997

998998
define <2 x i8> @urem_constant_dividend_select_of_constants_divisor_vec_ub1(i1 %b) {
999999
; CHECK-LABEL: @urem_constant_dividend_select_of_constants_divisor_vec_ub1(
1000-
; CHECK-NEXT: ret <2 x i8> <i8 42, i8 2>
1000+
; CHECK-NEXT: [[R:%.*]] = select i1 [[B:%.*]], <2 x i8> <i8 poison, i8 -42>, <2 x i8> <i8 42, i8 2>
1001+
; CHECK-NEXT: ret <2 x i8> [[R]]
10011002
;
10021003
%s = select i1 %b, <2 x i8> <i8 0, i8 -5>, <2 x i8> <i8 -4, i8 4>
10031004
%r = urem <2 x i8> <i8 42, i8 -42>, %s

llvm/test/Transforms/InstCombine/vector-udiv.ll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,16 @@ define <4 x i32> @test_v4i32_zext_shl_const_pow2(<4 x i32> %a0, <4 x i16> %a1) {
9797
%3 = udiv <4 x i32> %a0, %2
9898
ret <4 x i32> %3
9999
}
100+
101+
; Make sure we do not simplify udiv <i32 42, i32 -7>, <i32 0, i32 1> to
102+
; poison when threading udiv over selects
103+
104+
define <2 x i32> @vec_select_udiv_poison(<2 x i1> %x) {
105+
; CHECK-LABEL: @vec_select_udiv_poison(
106+
; CHECK-NEXT: [[DIV:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> zeroinitializer, <2 x i32> <i32 poison, i32 -7>
107+
; CHECK-NEXT: ret <2 x i32> [[DIV]]
108+
;
109+
%sel = select <2 x i1> %x, <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 0, i32 1>
110+
%div = udiv <2 x i32> <i32 42, i32 -7>, %sel
111+
ret <2 x i32> %div
112+
}

llvm/test/Transforms/InstSimplify/div.ll

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,47 +29,51 @@ define <2 x i32> @zero_dividend_vector_poison_elt(<2 x i32> %A) {
2929

3030
define <2 x i8> @sdiv_zero_elt_vec_constfold(<2 x i8> %x) {
3131
; CHECK-LABEL: @sdiv_zero_elt_vec_constfold(
32-
; CHECK-NEXT: ret <2 x i8> poison
32+
; CHECK-NEXT: ret <2 x i8> <i8 poison, i8 0>
3333
;
3434
%div = sdiv <2 x i8> <i8 1, i8 2>, <i8 0, i8 -42>
3535
ret <2 x i8> %div
3636
}
3737

3838
define <2 x i8> @udiv_zero_elt_vec_constfold(<2 x i8> %x) {
3939
; CHECK-LABEL: @udiv_zero_elt_vec_constfold(
40-
; CHECK-NEXT: ret <2 x i8> poison
40+
; CHECK-NEXT: ret <2 x i8> <i8 0, i8 poison>
4141
;
4242
%div = udiv <2 x i8> <i8 1, i8 2>, <i8 42, i8 0>
4343
ret <2 x i8> %div
4444
}
4545

4646
define <2 x i8> @sdiv_zero_elt_vec(<2 x i8> %x) {
4747
; CHECK-LABEL: @sdiv_zero_elt_vec(
48-
; CHECK-NEXT: ret <2 x i8> poison
48+
; CHECK-NEXT: [[DIV:%.*]] = sdiv <2 x i8> [[X:%.*]], <i8 -42, i8 0>
49+
; CHECK-NEXT: ret <2 x i8> [[DIV]]
4950
;
5051
%div = sdiv <2 x i8> %x, <i8 -42, i8 0>
5152
ret <2 x i8> %div
5253
}
5354

5455
define <2 x i8> @udiv_zero_elt_vec(<2 x i8> %x) {
5556
; CHECK-LABEL: @udiv_zero_elt_vec(
56-
; CHECK-NEXT: ret <2 x i8> poison
57+
; CHECK-NEXT: [[DIV:%.*]] = udiv <2 x i8> [[X:%.*]], <i8 0, i8 42>
58+
; CHECK-NEXT: ret <2 x i8> [[DIV]]
5759
;
5860
%div = udiv <2 x i8> %x, <i8 0, i8 42>
5961
ret <2 x i8> %div
6062
}
6163

6264
define <2 x i8> @sdiv_poison_elt_vec(<2 x i8> %x) {
6365
; CHECK-LABEL: @sdiv_poison_elt_vec(
64-
; CHECK-NEXT: ret <2 x i8> poison
66+
; CHECK-NEXT: [[DIV:%.*]] = sdiv <2 x i8> [[X:%.*]], <i8 -42, i8 poison>
67+
; CHECK-NEXT: ret <2 x i8> [[DIV]]
6568
;
6669
%div = sdiv <2 x i8> %x, <i8 -42, i8 poison>
6770
ret <2 x i8> %div
6871
}
6972

7073
define <2 x i8> @udiv_poison_elt_vec(<2 x i8> %x) {
7174
; CHECK-LABEL: @udiv_poison_elt_vec(
72-
; CHECK-NEXT: ret <2 x i8> poison
75+
; CHECK-NEXT: [[DIV:%.*]] = udiv <2 x i8> [[X:%.*]], <i8 poison, i8 42>
76+
; CHECK-NEXT: ret <2 x i8> [[DIV]]
7377
;
7478
%div = udiv <2 x i8> %x, <i8 poison, i8 42>
7579
ret <2 x i8> %div

llvm/test/Transforms/InstSimplify/rem.ll

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,47 +29,51 @@ define <2 x i32> @zero_dividend_vector_poison_elt(<2 x i32> %A) {
2929

3030
define <2 x i8> @srem_zero_elt_vec_constfold(<2 x i8> %x) {
3131
; CHECK-LABEL: @srem_zero_elt_vec_constfold(
32-
; CHECK-NEXT: ret <2 x i8> poison
32+
; CHECK-NEXT: ret <2 x i8> <i8 poison, i8 2>
3333
;
3434
%rem = srem <2 x i8> <i8 1, i8 2>, <i8 0, i8 -42>
3535
ret <2 x i8> %rem
3636
}
3737

3838
define <2 x i8> @urem_zero_elt_vec_constfold(<2 x i8> %x) {
3939
; CHECK-LABEL: @urem_zero_elt_vec_constfold(
40-
; CHECK-NEXT: ret <2 x i8> poison
40+
; CHECK-NEXT: ret <2 x i8> <i8 1, i8 poison>
4141
;
4242
%rem = urem <2 x i8> <i8 1, i8 2>, <i8 42, i8 0>
4343
ret <2 x i8> %rem
4444
}
4545

4646
define <2 x i8> @srem_zero_elt_vec(<2 x i8> %x) {
4747
; CHECK-LABEL: @srem_zero_elt_vec(
48-
; CHECK-NEXT: ret <2 x i8> poison
48+
; CHECK-NEXT: [[REM:%.*]] = srem <2 x i8> [[X:%.*]], <i8 -42, i8 0>
49+
; CHECK-NEXT: ret <2 x i8> [[REM]]
4950
;
5051
%rem = srem <2 x i8> %x, <i8 -42, i8 0>
5152
ret <2 x i8> %rem
5253
}
5354

5455
define <2 x i8> @urem_zero_elt_vec(<2 x i8> %x) {
5556
; CHECK-LABEL: @urem_zero_elt_vec(
56-
; CHECK-NEXT: ret <2 x i8> poison
57+
; CHECK-NEXT: [[REM:%.*]] = urem <2 x i8> [[X:%.*]], <i8 0, i8 42>
58+
; CHECK-NEXT: ret <2 x i8> [[REM]]
5759
;
5860
%rem = urem <2 x i8> %x, <i8 0, i8 42>
5961
ret <2 x i8> %rem
6062
}
6163

6264
define <2 x i8> @srem_undef_elt_vec(<2 x i8> %x) {
6365
; CHECK-LABEL: @srem_undef_elt_vec(
64-
; CHECK-NEXT: ret <2 x i8> poison
66+
; CHECK-NEXT: [[REM:%.*]] = srem <2 x i8> [[X:%.*]], <i8 -42, i8 undef>
67+
; CHECK-NEXT: ret <2 x i8> [[REM]]
6568
;
6669
%rem = srem <2 x i8> %x, <i8 -42, i8 undef>
6770
ret <2 x i8> %rem
6871
}
6972

7073
define <2 x i8> @urem_undef_elt_vec(<2 x i8> %x) {
7174
; CHECK-LABEL: @urem_undef_elt_vec(
72-
; CHECK-NEXT: ret <2 x i8> poison
75+
; CHECK-NEXT: [[REM:%.*]] = urem <2 x i8> [[X:%.*]], <i8 undef, i8 42>
76+
; CHECK-NEXT: ret <2 x i8> [[REM]]
7377
;
7478
%rem = urem <2 x i8> %x, <i8 undef, i8 42>
7579
ret <2 x i8> %rem

0 commit comments

Comments
 (0)