Skip to content

Commit c1b911c

Browse files
committed
[SLP]Do correct signedness analysis for clustered nodes
Should get the signedness info from the original scalar instructions, if possible, to correctly generate sext/zext instructions. Also, the clustered node must be assigned a gather node user info to correctly estimate its bitwidth/sign.
1 parent 7d0ca60 commit c1b911c

File tree

2 files changed

+9
-3
lines changed

2 files changed

+9
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9185,12 +9185,13 @@ void BoUpSLP::transformNodes() {
91859185
for (unsigned Cnt : Slices) {
91869186
ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
91879187
// If any instruction is vectorized already - do not try again.
9188-
if (const TreeEntry *SE = getTreeEntry(Slice.front());
9188+
if (TreeEntry *SE = getTreeEntry(Slice.front());
91899189
SE || getTreeEntry(Slice.back())) {
91909190
if (!SE)
91919191
continue;
91929192
if (VF != SE->getVectorFactor() || !SE->isSame(Slice))
91939193
continue;
9194+
SE->UserTreeIndices.emplace_back(&E, UINT_MAX);
91949195
AddCombinedNode(SE->Idx, Cnt);
91959196
continue;
91969197
}
@@ -13446,7 +13447,12 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1344613447
if (CommonMask[Idx] != PoisonMaskElem)
1344713448
CommonMask[Idx] = Idx;
1344813449
for (auto [E, Idx] : SubVectors) {
13449-
Value *V = castToScalarTyElem(E->VectorizedValue);
13450+
Value *V = E->VectorizedValue;
13451+
if (V->getType()->isIntOrIntVectorTy())
13452+
V = castToScalarTyElem(V, any_of(E->Scalars, [&](Value *V) {
13453+
return !isKnownNonNegative(
13454+
V, SimplifyQuery(*R.DL));
13455+
}));
1345013456
Vec = Builder.CreateInsertVector(Vec->getType(), Vec, V,
1345113457
Builder.getInt64(Idx));
1345213458
if (!CommonMask.empty()) {

llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ define i1 @test(i64 %v1, ptr %v2, i32 %v3, i1 %v4) {
1414
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i32> [[TMP5]], zeroinitializer
1515
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[V3]], i32 0
1616
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 0>
17-
; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i8> [[TMP4]] to <2 x i32>
17+
; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i8> [[TMP4]] to <2 x i32>
1818
; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP8]], <2 x i32> [[TMP9]], i64 0)
1919
; CHECK-NEXT: [[TMP11:%.*]] = uitofp <4 x i32> [[TMP10]] to <4 x float>
2020
; CHECK-NEXT: [[TMP12:%.*]] = fdiv <4 x float> zeroinitializer, [[TMP11]]

0 commit comments

Comments
 (0)