Skip to content

Commit 01d9528

Browse files
[SLP]Improve final minbitwidth analysis attempt.
Added part for demanded bits analysis in the IsPotentiallyTruncated to improve minbitwidth analysis final attempts. Metric: size..text Program size..text results results0 diff test-suite :: MultiSource/Benchmarks/MiBench/telecomm-gsm/telecomm-gsm.test 43069.00 42973.00 -0.2% test-suite :: MultiSource/Benchmarks/mediabench/gsm/toast/toast.test 43066.00 42970.00 -0.2% Extra trunc instructions are emitted to operate with <32 x i8> instead of <32 x i16>, will be removed in the next patches. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: #87786
1 parent aa6a089 commit 01d9528

File tree

4 files changed

+17
-9
lines changed

4 files changed

+17
-9
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14160,6 +14160,11 @@ bool BoUpSLP::collectValuesToDemote(
1416014160
unsigned BitWidth1 = OrigBitWidth - NumSignBits;
1416114161
if (!isKnownNonNegative(V, SimplifyQuery(*DL)))
1416214162
++BitWidth1;
14163+
if (auto *I = dyn_cast<Instruction>(V)) {
14164+
APInt Mask = DB->getDemandedBits(I);
14165+
unsigned BitWidth2 = Mask.getBitWidth() - Mask.countl_zero();
14166+
BitWidth1 = std::min(BitWidth1, BitWidth2);
14167+
}
1416314168
BitWidth = std::max(BitWidth, BitWidth1);
1416414169
return BitWidth > 0 && OrigBitWidth >= (BitWidth * 2);
1416514170
};

llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,9 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef
8181
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP4FT_0_LCSSA]], <2 x i64> [[TMP4TF_0_LCSSA]], <2 x i32> <i32 1, i32 3>
8282
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[TMP4FF_0_LCSSA]], <2 x i64> [[TMP4TT_0_LCSSA]], <2 x i32> <i32 1, i32 3>
8383
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
84-
; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i64> [[TMP12]], [[TMP15]]
85-
; CHECK-NEXT: [[TMP17:%.*]] = trunc <4 x i64> [[TMP16]] to <4 x i32>
84+
; CHECK-NEXT: [[TMP16:%.*]] = trunc <4 x i64> [[TMP12]] to <4 x i32>
85+
; CHECK-NEXT: [[TMP57:%.*]] = trunc <4 x i64> [[TMP15]] to <4 x i32>
86+
; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP16]], [[TMP57]]
8687
; CHECK-NEXT: [[AND:%.*]] = and i32 [[NUMBEROFBOOLS]], 127
8788
; CHECK-NEXT: [[CMP86284:%.*]] = icmp ugt i32 [[AND]], 31
8889
; CHECK-NEXT: br i1 [[CMP86284]], label [[WHILE_BODY88:%.*]], label [[WHILE_END122:%.*]]

llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@ define i32 @test() {
88
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
99
; CHECK-NEXT: entry:
1010
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 4)
11-
; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i32>
12-
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], <i32 65535, i32 65535, i32 65535, i32 65535>
13-
; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], <i32 65535, i32 65535, i32 65535, i32 65535>
14-
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP3]])
11+
; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i16>
12+
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i16> [[TMP1]], <i16 -1, i16 -1, i16 -1, i16 -1>
13+
; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[TMP2]], <i16 -1, i16 -1, i16 -1, i16 -1>
14+
; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[TMP3]])
15+
; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP4]] to i32
1516
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP5]], i32 1)
1617
; CHECK-NEXT: ret i32 [[TMP6]]
1718
;

llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@ define void @t(i64 %v) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i32 0
99
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> zeroinitializer
10-
; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
11-
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], <i32 5, i32 6, i32 3, i32 2>
12-
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP3]])
10+
; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i16>
11+
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> [[TMP2]], <i16 5, i16 6, i16 3, i16 2>
12+
; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP3]])
13+
; CHECK-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i32
1314
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 65535
1415
; CHECK-NEXT: store i32 [[TMP6]], ptr null, align 4
1516
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)