Skip to content

Commit d7975c9

Browse files
[SLP]Add better minbitwidth analysis for udiv/urem instructions.
Adds improved bitwidth analysis for udiv/urem instructions. The analysis is based on similar version in InstCombiner. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: #85928
1 parent ff870ae commit d7975c9

File tree

2 files changed

+24
-6
lines changed

2 files changed

+24
-6
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14190,6 +14190,28 @@ bool BoUpSLP::collectValuesToDemote(
1419014190
return false;
1419114191
break;
1419214192
}
14193+
case Instruction::UDiv:
14194+
case Instruction::URem: {
14195+
if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14196+
return false;
14197+
// UDiv and URem can be truncated if all the truncated bits are zero.
14198+
if (!AttemptCheckBitwidth(
14199+
[&](unsigned BitWidth, unsigned OrigBitWidth) {
14200+
assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!");
14201+
APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
14202+
return MaskedValueIsZero(I->getOperand(0), Mask,
14203+
SimplifyQuery(*DL)) &&
14204+
MaskedValueIsZero(I->getOperand(1), Mask,
14205+
SimplifyQuery(*DL));
14206+
},
14207+
NeedToExit))
14208+
return false;
14209+
if (NeedToExit)
14210+
return true;
14211+
if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
14212+
return false;
14213+
break;
14214+
}
1419314215

1419414216
// We can demote selects if we can demote their true and false values.
1419514217
case Instruction::Select: {

llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,7 @@ define void @test_div() {
116116
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4
117117
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
118118
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]]
119-
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[TMP3]] to <4 x i64>
120-
; CHECK-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[TMP4]], <i64 1, i64 2, i64 1, i64 2>
121-
; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i32>
119+
; CHECK-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP3]], <i32 1, i32 2, i32 1, i32 2>
122120
; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
123121
; CHECK-NEXT: ret void
124122
;
@@ -170,9 +168,7 @@ define void @test_rem() {
170168
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4
171169
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
172170
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]]
173-
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[TMP3]] to <4 x i64>
174-
; CHECK-NEXT: [[TMP5:%.*]] = urem <4 x i64> [[TMP4]], <i64 1, i64 2, i64 1, i64 1>
175-
; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i32>
171+
; CHECK-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP3]], <i32 1, i32 2, i32 1, i32 1>
176172
; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
177173
; CHECK-NEXT: ret void
178174
;

0 commit comments

Comments
 (0)