Skip to content

Commit 691c953

Browse files
alexey-bataevAlexisPerry
authored andcommitted
[SLP]Fix incorrect promotion of nodes before shuffling.
If the base node is signed, but some values are unsigned, still the whole node should be considered signed. Also, an extra bitwidth analysis should be performed, when estimating the minimal bitwidth.
1 parent 8b03e64 commit 691c953

File tree

2 files changed

+18
-13
lines changed

2 files changed

+18
-13
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11803,13 +11803,13 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1180311803
void add(const TreeEntry &E1, const TreeEntry &E2, ArrayRef<int> Mask) {
1180411804
Value *V1 = E1.VectorizedValue;
1180511805
if (V1->getType()->isIntOrIntVectorTy())
11806-
V1 = castToScalarTyElem(V1, all_of(E1.Scalars, [&](Value *V) {
11806+
V1 = castToScalarTyElem(V1, any_of(E1.Scalars, [&](Value *V) {
1180711807
return !isKnownNonNegative(
1180811808
V, SimplifyQuery(*R.DL));
1180911809
}));
1181011810
Value *V2 = E2.VectorizedValue;
1181111811
if (V2->getType()->isIntOrIntVectorTy())
11812-
V2 = castToScalarTyElem(V2, all_of(E2.Scalars, [&](Value *V) {
11812+
V2 = castToScalarTyElem(V2, any_of(E2.Scalars, [&](Value *V) {
1181311813
return !isKnownNonNegative(
1181411814
V, SimplifyQuery(*R.DL));
1181511815
}));
@@ -11820,7 +11820,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1182011820
void add(const TreeEntry &E1, ArrayRef<int> Mask) {
1182111821
Value *V1 = E1.VectorizedValue;
1182211822
if (V1->getType()->isIntOrIntVectorTy())
11823-
V1 = castToScalarTyElem(V1, all_of(E1.Scalars, [&](Value *V) {
11823+
V1 = castToScalarTyElem(V1, any_of(E1.Scalars, [&](Value *V) {
1182411824
return !isKnownNonNegative(
1182511825
V, SimplifyQuery(*R.DL));
1182611826
}));
@@ -14900,24 +14900,30 @@ bool BoUpSLP::collectValuesToDemote(
1490014900
// If the value is not a vectorized instruction in the expression and not used
1490114901
// by the insertelement instruction and not used in multiple vector nodes, it
1490214902
// cannot be demoted.
14903+
bool IsSignedNode = any_of(E.Scalars, [&](Value *R) {
14904+
return !isKnownNonNegative(R, SimplifyQuery(*DL));
14905+
});
1490314906
auto IsPotentiallyTruncated = [&](Value *V, unsigned &BitWidth) -> bool {
1490414907
if (MultiNodeScalars.contains(V))
1490514908
return false;
14906-
if (OrigBitWidth > BitWidth) {
14909+
// For lat shuffle of sext/zext with many uses need to check the extra bit
14910+
// for unsigned values, otherwise may have incorrect casting for reused
14911+
// scalars.
14912+
bool IsSignedVal = !isKnownNonNegative(V, SimplifyQuery(*DL));
14913+
if ((!IsSignedNode || IsSignedVal) && OrigBitWidth > BitWidth) {
1490714914
APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
1490814915
if (MaskedValueIsZero(V, Mask, SimplifyQuery(*DL)))
1490914916
return true;
1491014917
}
14911-
auto NumSignBits = ComputeNumSignBits(V, *DL, 0, AC, nullptr, DT);
14918+
unsigned NumSignBits = ComputeNumSignBits(V, *DL, 0, AC, nullptr, DT);
1491214919
unsigned BitWidth1 = OrigBitWidth - NumSignBits;
14913-
bool IsSigned = !isKnownNonNegative(V, SimplifyQuery(*DL));
14914-
if (IsSigned)
14920+
if (IsSignedNode)
1491514921
++BitWidth1;
1491614922
if (auto *I = dyn_cast<Instruction>(V)) {
1491714923
APInt Mask = DB->getDemandedBits(I);
1491814924
unsigned BitWidth2 =
1491914925
std::max<unsigned>(1, Mask.getBitWidth() - Mask.countl_zero());
14920-
while (!IsSigned && BitWidth2 < OrigBitWidth) {
14926+
while (!IsSignedNode && BitWidth2 < OrigBitWidth) {
1492114927
APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth2 - 1);
1492214928
if (MaskedValueIsZero(V, Mask, SimplifyQuery(*DL)))
1492314929
break;

llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,12 @@ define i32 @test1(ptr %p) {
5959
; CHECK-NEXT: entry:
6060
; CHECK-NEXT: [[D_0:%.*]] = load i16, ptr [[P]], align 4
6161
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> <i16 0, i16 poison, i16 0, i16 0>, i16 [[D_0]], i32 1
62-
; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i16> [[TMP0]], zeroinitializer
63-
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i16> [[TMP1]], <i16 -16383, i16 -1, i16 -1, i16 -1>
6462
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
65-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> <i32 49153, i32 65535, i32 65535, i32 65535>, <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
63+
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer
64+
; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP2]], <i32 -16383, i32 -1, i32 65535, i32 -1>
65+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> <i32 -16383, i32 -1, i32 65535, i32 -1>, <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
6666
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i32> [[TMP4]], <i32 -16383, i32 65535, i32 65535, i32 65535>
67-
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i16> [[TMP2]], <4 x i16> <i16 3, i16 4, i16 2, i16 1>
68-
; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[TMP6]] to <4 x i32>
67+
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> <i32 3, i32 4, i32 2, i32 1>
6968
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
7069
; CHECK-NEXT: ret i32 [[TMP8]]
7170
;

0 commit comments

Comments
 (0)