Skip to content

Commit 5adfad2

Browse files
committed
[SLP]Emit actual bitwidth for analyzed MinBitwidth nodes, NFCI.
SLP includes analysis for the minimum bitwidth, the actual integer operations can be emitted. It allows to reduce register pressure and improve perf. Currently, it includes only cost model and the next transformation relies on InstructionCombiner. Better to do it directly in SLP, it allows to reduce compile time and fix cost model issues.
1 parent 0358825 commit 5adfad2

File tree

7 files changed

+466
-304
lines changed

7 files changed

+466
-304
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 249 additions & 81 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll

Lines changed: 180 additions & 180 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,9 +235,9 @@ define void @test_i16_extend(ptr %p.1, ptr %p.2, i32 %idx.i32) {
235235
; CHECK-NEXT: [[T53:%.*]] = getelementptr inbounds i16, ptr [[P_1:%.*]], i64 [[IDX_0]]
236236
; CHECK-NEXT: [[T56:%.*]] = getelementptr inbounds i16, ptr [[P_2:%.*]], i64 [[IDX_0]]
237237
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[T53]], align 2
238-
; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32>
238+
; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i16> [[TMP1]] to <8 x i32>
239239
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr [[T56]], align 2
240-
; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[TMP3]] to <8 x i32>
240+
; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[TMP3]] to <8 x i32>
241241
; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <8 x i32> [[TMP2]], [[TMP4]]
242242
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP5]], i64 0
243243
; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64

llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,34 +8,31 @@ define dso_local void @l() local_unnamed_addr {
88
; CHECK-NEXT: bb:
99
; CHECK-NEXT: br label [[BB1:%.*]]
1010
; CHECK: bb1:
11-
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP11:%.*]], [[BB25:%.*]] ]
11+
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP9:%.*]], [[BB25:%.*]] ]
1212
; CHECK-NEXT: br i1 undef, label [[BB3:%.*]], label [[BB11:%.*]]
1313
; CHECK: bb3:
1414
; CHECK-NEXT: [[I4:%.*]] = zext i1 undef to i32
1515
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i16> [[TMP0]], undef
1616
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i16> [[TMP1]], <i16 8, i16 8>
17-
; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
1817
; CHECK-NEXT: br label [[BB25]]
1918
; CHECK: bb11:
2019
; CHECK-NEXT: [[I12:%.*]] = zext i1 undef to i32
21-
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i16> [[TMP0]], undef
22-
; CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i16> [[TMP4]] to <2 x i64>
23-
; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <2 x i64> undef, [[TMP5]]
24-
; CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i1> [[TMP6]] to <2 x i32>
25-
; CHECK-NEXT: [[TMP8:%.*]] = icmp ult <2 x i32> undef, [[TMP7]]
26-
; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i1> [[TMP8]] to <2 x i32>
20+
; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i16> [[TMP0]], undef
21+
; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i16> [[TMP3]] to <2 x i64>
22+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <2 x i64> undef, [[TMP4]]
23+
; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i1> [[TMP5]] to <2 x i32>
24+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ult <2 x i32> undef, [[TMP6]]
2725
; CHECK-NEXT: br label [[BB25]]
2826
; CHECK: bb25:
2927
; CHECK-NEXT: [[I28:%.*]] = phi i32 [ [[I12]], [[BB11]] ], [ [[I4]], [[BB3]] ]
30-
; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ [[TMP9]], [[BB11]] ], [ [[TMP3]], [[BB3]] ]
31-
; CHECK-NEXT: [[TMP11]] = phi <2 x i16> [ [[TMP4]], [[BB11]] ], [ [[TMP1]], [[BB3]] ]
32-
; CHECK-NEXT: [[TMP12:%.*]] = trunc <2 x i32> [[TMP10]] to <2 x i8>
33-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i8> [[TMP12]], i32 0
34-
; CHECK-NEXT: [[TMP14:%.*]] = zext i8 [[TMP13]] to i32
35-
; CHECK-NEXT: [[I31:%.*]] = and i32 undef, [[TMP14]]
36-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i8> [[TMP12]], i32 1
37-
; CHECK-NEXT: [[TMP16:%.*]] = zext i8 [[TMP15]] to i32
38-
; CHECK-NEXT: [[I32:%.*]] = and i32 [[I31]], [[TMP16]]
28+
; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i1> [ [[TMP7]], [[BB11]] ], [ [[TMP2]], [[BB3]] ]
29+
; CHECK-NEXT: [[TMP9]] = phi <2 x i16> [ [[TMP3]], [[BB11]] ], [ [[TMP1]], [[BB3]] ]
30+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
31+
; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
32+
; CHECK-NEXT: [[I31:%.*]] = and i32 undef, [[TMP11]]
33+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
34+
; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
35+
; CHECK-NEXT: [[I32:%.*]] = and i32 [[I31]], [[TMP13]]
3936
; CHECK-NEXT: [[I33:%.*]] = and i32 [[I32]], [[I28]]
4037
; CHECK-NEXT: br i1 undef, label [[BB34:%.*]], label [[BB1]]
4138
; CHECK: bb34:

llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-cast.ll

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,13 @@ define ptr @test(i8 %0) {
88
; CHECK-NEXT: [[CONV12_I:%.*]] = zext i8 [[TMP0]] to i32
99
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[CONV12_I]], i32 1
1010
; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], zeroinitializer
11-
; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
12-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP3]] to <2 x i8>
13-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP4]], i32 0
14-
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i64
15-
; CHECK-NEXT: [[ARRAYIDX50_I:%.*]] = getelementptr i8, ptr null, i64 [[TMP6]]
16-
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP4]], i32 1
17-
; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i64
18-
; CHECK-NEXT: [[ARRAYIDX16_I:%.*]] = getelementptr i8, ptr null, i64 [[TMP8]]
11+
; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
12+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[TMP3]], i32 0
13+
; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i64
14+
; CHECK-NEXT: [[ARRAYIDX50_I:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]]
15+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP3]], i32 1
16+
; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i64
17+
; CHECK-NEXT: [[ARRAYIDX16_I:%.*]] = getelementptr i8, ptr null, i64 [[TMP7]]
1918
; CHECK-NEXT: ret ptr null
2019
;
2120
entry:

llvm/test/Transforms/SLPVectorizer/X86/partail.ll

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,18 @@ define void @get_block(i32 %y_pos) local_unnamed_addr #0 {
2323
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer
2424
; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], undef
2525
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> undef
26-
; CHECK-NEXT: [[TMP10:%.*]] = sext <4 x i32> [[TMP9]] to <4 x i64>
27-
; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i64> [[TMP10]] to <4 x i32>
28-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP11]], i32 0
26+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP9]], i32 0
27+
; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
28+
; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP11]]
29+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP9]], i32 1
2930
; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
30-
; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP13]]
31-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP11]], i32 1
31+
; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP13]]
32+
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP9]], i32 2
3233
; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
33-
; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP15]]
34-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP11]], i32 2
34+
; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP15]]
35+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP9]], i32 3
3536
; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
36-
; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP17]]
37-
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[TMP11]], i32 3
38-
; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP18]] to i64
39-
; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP19]]
37+
; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP17]]
4038
; CHECK-NEXT: unreachable
4139
;
4240
entry:

llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ define i1 @test() {
88
; CHECK: then:
99
; CHECK-NEXT: br label [[ELSE]]
1010
; CHECK: else:
11-
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, [[THEN]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
12-
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[TMP0]] to <2 x i8>
13-
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i8> [[TMP1]], i32 0
14-
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
11+
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i1> [ zeroinitializer, [[THEN]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
12+
; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[TMP0]] to <2 x i32>
13+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
14+
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
1515
; CHECK-NEXT: [[BF_CAST162:%.*]] = and i32 [[TMP3]], 0
16-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP0]], <2 x i32> <i32 3, i32 1>
16+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP1]], <2 x i32> <i32 3, i32 1>
1717
; CHECK-NEXT: [[T13:%.*]] = and <2 x i32> [[TMP4]], zeroinitializer
1818
; CHECK-NEXT: br label [[ELSE1:%.*]]
1919
; CHECK: else1:

0 commit comments

Comments
 (0)