Skip to content

Commit 4d7f3d9

Browse files
committed
[SLP]Fix final analysis for unsigned nodes.
Need to check that at least single bit is cleared for unsigned nodes before reducing their size. Otherwise they might be treated as signed in signed nodes.
1 parent 6f26867 commit 4d7f3d9

File tree

2 files changed

+20
-10
lines changed

2 files changed

+20
-10
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14442,11 +14442,19 @@ bool BoUpSLP::collectValuesToDemote(
1444214442
}
1444314443
auto NumSignBits = ComputeNumSignBits(V, *DL, 0, AC, nullptr, DT);
1444414444
unsigned BitWidth1 = OrigBitWidth - NumSignBits;
14445-
if (!isKnownNonNegative(V, SimplifyQuery(*DL)))
14445+
bool IsSigned = !isKnownNonNegative(V, SimplifyQuery(*DL));
14446+
if (IsSigned)
1444614447
++BitWidth1;
1444714448
if (auto *I = dyn_cast<Instruction>(V)) {
1444814449
APInt Mask = DB->getDemandedBits(I);
14449-
unsigned BitWidth2 = Mask.getBitWidth() - Mask.countl_zero();
14450+
unsigned BitWidth2 =
14451+
std::max<unsigned>(1, Mask.getBitWidth() - Mask.countl_zero());
14452+
while (!IsSigned && BitWidth2 < OrigBitWidth) {
14453+
APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth2 - 1);
14454+
if (MaskedValueIsZero(V, Mask, SimplifyQuery(*DL)))
14455+
break;
14456+
BitWidth2 *= 2;
14457+
}
1445014458
BitWidth1 = std::min(BitWidth1, BitWidth2);
1445114459
}
1445214460
BitWidth = std::max(BitWidth, BitWidth1);

llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,20 @@ define void @test(ptr %p, i16 %load794) {
88
; CHECK-NEXT: [[GEP799:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
99
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[P]], align 2
1010
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr [[GEP799]], align 2
11-
; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i16> [[TMP2]], [[TMP1]]
12-
; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i16> [[TMP3]], <i16 3329, i16 3329>
11+
; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32>
12+
; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32>
13+
; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> [[TMP4]], [[TMP3]]
14+
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i32> [[TMP7]], <i32 3329, i32 3329>
1315
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[ZEXT795]], i32 0
1416
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> zeroinitializer
15-
; CHECK-NEXT: [[TMP7:%.*]] = trunc <2 x i32> [[TMP6]] to <2 x i16>
16-
; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i16> [[TMP4]], [[TMP7]]
17-
; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i16> [[TMP8]] to <2 x i64>
17+
; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i32> [[TMP8]], [[TMP6]]
18+
; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i32> [[TMP12]] to <2 x i64>
1819
; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw <2 x i64> [[TMP9]], <i64 5039, i64 5039>
1920
; CHECK-NEXT: [[TMP11:%.*]] = lshr <2 x i64> [[TMP10]], <i64 24, i64 24>
20-
; CHECK-NEXT: [[TMP12:%.*]] = trunc <2 x i64> [[TMP11]] to <2 x i16>
21-
; CHECK-NEXT: [[TMP13:%.*]] = mul <2 x i16> [[TMP12]], <i16 -3329, i16 -3329>
22-
; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i16> [[TMP13]], [[TMP8]]
21+
; CHECK-NEXT: [[TMP13:%.*]] = trunc <2 x i64> [[TMP11]] to <2 x i32>
22+
; CHECK-NEXT: [[TMP20:%.*]] = mul <2 x i32> [[TMP13]], <i32 62207, i32 62207>
23+
; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP20]], [[TMP12]]
24+
; CHECK-NEXT: [[TMP14:%.*]] = trunc <2 x i32> [[TMP21]] to <2 x i16>
2325
; CHECK-NEXT: [[TMP15:%.*]] = add <2 x i16> [[TMP14]], <i16 -3329, i16 -3329>
2426
; CHECK-NEXT: [[TMP16:%.*]] = icmp slt <2 x i16> [[TMP15]], zeroinitializer
2527
; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> [[TMP14]], <2 x i16> zeroinitializer

0 commit comments

Comments
 (0)