Skip to content

Commit 74e07ab

Browse files
committed
[SLP]Fix final analysis for unsigned nodes.
Need to check that at least single bit is cleared for unsigned nodes before reducing their size. Otherwise they might be treated as signed in signed nodes.
1 parent 33786b6 commit 74e07ab

File tree

2 files changed

+18
-9
lines changed

2 files changed

+18
-9
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14442,11 +14442,18 @@ bool BoUpSLP::collectValuesToDemote(
1444214442
}
1444314443
auto NumSignBits = ComputeNumSignBits(V, *DL, 0, AC, nullptr, DT);
1444414444
unsigned BitWidth1 = OrigBitWidth - NumSignBits;
14445-
if (!isKnownNonNegative(V, SimplifyQuery(*DL)))
14445+
bool IsSigned = !isKnownNonNegative(V, SimplifyQuery(*DL));
14446+
if (IsSigned)
1444614447
++BitWidth1;
1444714448
if (auto *I = dyn_cast<Instruction>(V)) {
1444814449
APInt Mask = DB->getDemandedBits(I);
1444914450
unsigned BitWidth2 = Mask.getBitWidth() - Mask.countl_zero();
14451+
while (!IsSigned && BitWidth2 < OrigBitWidth) {
14452+
APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth2 - 1);
14453+
if (MaskedValueIsZero(V, Mask, SimplifyQuery(*DL)))
14454+
break;
14455+
BitWidth2 *= 2;
14456+
}
1445014457
BitWidth1 = std::min(BitWidth1, BitWidth2);
1445114458
}
1445214459
BitWidth = std::max(BitWidth, BitWidth1);

llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,20 @@ define void @test(ptr %p, i16 %load794) {
88
; CHECK-NEXT: [[GEP799:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
99
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[P]], align 2
1010
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr [[GEP799]], align 2
11-
; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i16> [[TMP2]], [[TMP1]]
12-
; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i16> [[TMP3]], <i16 3329, i16 3329>
11+
; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32>
12+
; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32>
13+
; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> [[TMP4]], [[TMP3]]
14+
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i32> [[TMP7]], <i32 3329, i32 3329>
1315
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[ZEXT795]], i32 0
1416
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> zeroinitializer
15-
; CHECK-NEXT: [[TMP7:%.*]] = trunc <2 x i32> [[TMP6]] to <2 x i16>
16-
; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i16> [[TMP4]], [[TMP7]]
17-
; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i16> [[TMP8]] to <2 x i64>
17+
; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i32> [[TMP8]], [[TMP6]]
18+
; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i32> [[TMP12]] to <2 x i64>
1819
; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw <2 x i64> [[TMP9]], <i64 5039, i64 5039>
1920
; CHECK-NEXT: [[TMP11:%.*]] = lshr <2 x i64> [[TMP10]], <i64 24, i64 24>
20-
; CHECK-NEXT: [[TMP12:%.*]] = trunc <2 x i64> [[TMP11]] to <2 x i16>
21-
; CHECK-NEXT: [[TMP13:%.*]] = mul <2 x i16> [[TMP12]], <i16 -3329, i16 -3329>
22-
; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i16> [[TMP13]], [[TMP8]]
21+
; CHECK-NEXT: [[TMP13:%.*]] = trunc <2 x i64> [[TMP11]] to <2 x i32>
22+
; CHECK-NEXT: [[TMP20:%.*]] = mul <2 x i32> [[TMP13]], <i32 62207, i32 62207>
23+
; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP20]], [[TMP12]]
24+
; CHECK-NEXT: [[TMP14:%.*]] = trunc <2 x i32> [[TMP21]] to <2 x i16>
2325
; CHECK-NEXT: [[TMP15:%.*]] = add <2 x i16> [[TMP14]], <i16 -3329, i16 -3329>
2426
; CHECK-NEXT: [[TMP16:%.*]] = icmp slt <2 x i16> [[TMP15]], zeroinitializer
2527
; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> [[TMP14]], <2 x i16> zeroinitializer

0 commit comments

Comments
 (0)