Skip to content

Commit 0e3049c

Browse files
[SLP]Support revectorization of the previously vectorized scalars
If the scalar instructions is marked for the vectorization in the tree, it cannot be vectorized as part of the another node in the same tree, in general. It may prevent some potentially profitable vectorization opportunities, since some nodes end up being buildvector/gather nodes, which add to the total cost. Patch allows revectorization of the previously vectorized scalars. Reviewers: hiraditya, RKSimon Reviewed By: RKSimon, hiraditya Pull Request: #133091
1 parent 2b06410 commit 0e3049c

File tree

8 files changed

+176
-126
lines changed

8 files changed

+176
-126
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 136 additions & 81 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,28 +39,26 @@ define void @test() {
3939
; CHECK: [[BB77]]:
4040
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 14, i32 15, i32 poison, i32 poison>
4141
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x float> [[TMP12]], float [[I70]], i32 0
42-
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x float> poison, float [[I68]], i32 0
43-
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x float> [[TMP30]], float [[I66]], i32 1
42+
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x float> poison, float [[I70]], i32 1
43+
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x float> [[TMP14]], float [[I68]], i32 2
44+
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x float> [[TMP19]], float [[I66]], i32 3
45+
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x float> [[TMP16]], float [[I67]], i32 6
46+
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x float> [[TMP20]], float [[I69]], i32 7
4447
; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x float> [[TMP25]], <16 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 3, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
4548
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x float> [[TMP39]], <16 x float> [[TMP25]], <16 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 18, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 19, i32 poison, i32 poison>
4649
; CHECK-NEXT: br label %[[BB78:.*]]
4750
; CHECK: [[BB78]]:
4851
; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP17]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ]
49-
; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ [[TMP31]], %[[BB77]] ], [ [[TMP37:%.*]], %[[BB78]] ]
52+
; CHECK-NEXT: [[TMP22:%.*]] = phi <8 x float> [ [[TMP21]], %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
53+
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <8 x float> [[TMP22]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 6, i32 2, i32 3, i32 0, i32 7, i32 6, i32 6>
5054
; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 1, i32 3, i32 5, i32 3, i32 1, i32 0, i32 4, i32 5, i32 5>
51-
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 2, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
52-
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
53-
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP20]], <16 x i32> <i32 0, i32 17, i32 2, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
54-
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
55-
; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> [[TMP22]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 18, i32 6, i32 7, i32 8, i32 20, i32 10, i32 11, i32 12, i32 21, i32 14, i32 15>
56-
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP40]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 3, i32 1, i32 3, i32 9, i32 3, i32 1, i32 5, i32 13, i32 9, i32 9>
5755
; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP13]]
5856
; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP38]], [[TMP25]]
5957
; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]]
6058
; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], poison
6159
; CHECK-NEXT: [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison
6260
; CHECK-NEXT: [[TMP36]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 5, i32 11, i32 12, i32 10, i32 14, i32 15, i32 poison, i32 poison>
63-
; CHECK-NEXT: [[TMP37]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> <i32 6, i32 7>
61+
; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 14, i32 15>
6462
; CHECK-NEXT: br i1 poison, label %[[BB78]], label %[[BB167]]
6563
; CHECK: [[BB167]]:
6664
; CHECK-NEXT: [[TMP32:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ]

llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -123,18 +123,17 @@ define <4 x i32> @build_vec_v4i32_reuse_0(<2 x i32> %v0, <2 x i32> %v1) {
123123

124124
define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) {
125125
; CHECK-LABEL: @build_vec_v4i32_reuse_1(
126-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[V0:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
127-
; CHECK-NEXT: [[V0_1:%.*]] = extractelement <2 x i32> [[V0]], i64 1
128-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[V1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
129-
; CHECK-NEXT: [[V1_1:%.*]] = extractelement <2 x i32> [[V1]], i64 1
130-
; CHECK-NEXT: [[TMP0_1:%.*]] = add i32 [[V0_1]], [[V1_1]]
131-
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
132-
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP1]], [[TMP2]]
126+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[V0:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
127+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[V1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
128+
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP6]], [[TMP7]]
129+
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP6]], [[TMP7]]
133130
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
134-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 2>
135-
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP0_1]], i64 0
136-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
137-
; CHECK-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP5]], [[TMP8]]
131+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[V0]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 0>
132+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[V1]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 0>
133+
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
134+
; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP1]], [[TMP2]]
135+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
136+
; CHECK-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP5]], [[TMP10]]
138137
; CHECK-NEXT: ret <4 x i32> [[TMP9]]
139138
;
140139
%v0.0 = extractelement <2 x i32> %v0, i32 0

llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -123,18 +123,17 @@ define <4 x i32> @build_vec_v4i32_reuse_0(<2 x i32> %v0, <2 x i32> %v1) {
123123

124124
define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) {
125125
; CHECK-LABEL: @build_vec_v4i32_reuse_1(
126-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[V0:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
127-
; CHECK-NEXT: [[V0_1:%.*]] = extractelement <2 x i32> [[V0]], i64 1
128-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[V1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
129-
; CHECK-NEXT: [[V1_1:%.*]] = extractelement <2 x i32> [[V1]], i64 1
130-
; CHECK-NEXT: [[TMP0_1:%.*]] = add i32 [[V0_1]], [[V1_1]]
131-
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
132-
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP1]], [[TMP2]]
126+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[V0:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
127+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[V1:%.*]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
128+
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP6]], [[TMP7]]
129+
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP6]], [[TMP7]]
133130
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
134-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 2>
135-
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP0_1]], i64 0
136-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
137-
; CHECK-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP5]], [[TMP8]]
131+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[V0]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 0>
132+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[V1]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 0>
133+
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
134+
; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP1]], [[TMP2]]
135+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
136+
; CHECK-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP5]], [[TMP10]]
138137
; CHECK-NEXT: ret <4 x i32> [[TMP9]]
139138
;
140139
%v0.0 = extractelement <2 x i32> %v0, i32 0

llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,15 +206,15 @@ define i32 @reorder_indices_1(float %0) {
206206
; POW2-ONLY-SAME: float [[TMP0:%.*]]) {
207207
; POW2-ONLY-NEXT: entry:
208208
; POW2-ONLY-NEXT: [[NOR1:%.*]] = alloca [0 x [3 x float]], i32 0, align 4
209+
; POW2-ONLY-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr float, ptr [[NOR1]], i64 1
209210
; POW2-ONLY-NEXT: [[ARRAYIDX2_I265:%.*]] = getelementptr float, ptr [[NOR1]], i64 2
210211
; POW2-ONLY-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2_I265]], align 4
212+
; POW2-ONLY-NEXT: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX_I]], align 4
211213
; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[NOR1]], align 4
212214
; POW2-ONLY-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
213215
; POW2-ONLY-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]]
214216
; POW2-ONLY-NEXT: [[NEG11_I:%.*]] = fmul float [[TMP4]], [[TMP0]]
215217
; POW2-ONLY-NEXT: [[TMP5:%.*]] = call float @llvm.fmuladd.f32(float [[TMP1]], float 0.000000e+00, float [[NEG11_I]])
216-
; POW2-ONLY-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> <i32 1, i32 poison>
217-
; POW2-ONLY-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP1]], i32 1
218218
; POW2-ONLY-NEXT: [[TMP8:%.*]] = fneg <2 x float> [[TMP7]]
219219
; POW2-ONLY-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0
220220
; POW2-ONLY-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <2 x i32> zeroinitializer

llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,19 @@ define void @test(ptr %0, i32 %add651) {
88
; CHECK-NEXT: [[PREDPEL11:%.*]] = alloca [0 x [0 x [25 x i32]]], i32 0, align 16
99
; CHECK-NEXT: [[ARRAYIDX469_6:%.*]] = getelementptr i8, ptr [[PREDPEL11]], i64 28
1010
; CHECK-NEXT: [[ARRAYIDX469_7:%.*]] = getelementptr i8, ptr [[PREDPEL11]], i64 32
11-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PREDPEL11]], i64 36
12-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX469_7]], align 16
11+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PREDPEL11]], i64 40
12+
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 8
1313
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX469_6]], align 4
14+
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX469_7]], align 16
15+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX469_7]], align 16
1416
; CHECK-NEXT: [[CONV470_7:%.*]] = trunc i32 [[TMP2]] to i16
1517
; CHECK-NEXT: store i16 [[CONV470_7]], ptr [[TMP0]], align 2
1618
; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8
1719
; CHECK-NEXT: [[ARRAYIDX660:%.*]] = getelementptr i8, ptr [[TMP4]], i64 7800
1820
; CHECK-NEXT: [[ARRAYIDX689:%.*]] = getelementptr i8, ptr [[TMP4]], i64 7816
19-
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4
2021
; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP3]], splat (i32 1)
21-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 2>
2222
; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP7]]
23-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 poison>, <2 x i32> <i32 2, i32 1>
23+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> <i32 1, i32 poison>, i32 [[TMP5]], i32 1
2424
; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP8]], [[TMP9]]
2525
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[ADD651]], i32 0
2626
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP2]], i32 1

llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,13 @@ define void @test() {
88
; CHECK-NEXT: [[ARRAYIDX21_I:%.*]] = getelementptr inbounds [4 x float], ptr undef, i64 2
99
; CHECK-NEXT: br label [[BB1:%.*]]
1010
; CHECK: bb1:
11+
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX10_I_I86]], align 4
1112
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr undef, align 4
13+
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr undef, align 4
1214
; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> zeroinitializer, [[TMP0]]
13-
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX10_I_I86]], align 4
14-
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr undef, align 4
1515
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP2]], i32 1
16-
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP0]], float [[TMP3]], i32 0
1716
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP2]], i32 0
18-
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP6]])
17+
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP3]], <2 x float> [[TMP6]])
1918
; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB3:%.*]]
2019
; CHECK: bb2:
2120
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP7]], zeroinitializer

llvm/test/Transforms/SLPVectorizer/X86/vec3-reorder-reshuffle.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,15 +205,15 @@ define i32 @reorder_indices_1(float %0) {
205205
; POW2-ONLY-SAME: float [[TMP0:%.*]]) {
206206
; POW2-ONLY-NEXT: entry:
207207
; POW2-ONLY-NEXT: [[NOR1:%.*]] = alloca [0 x [3 x float]], i32 0, align 4
208+
; POW2-ONLY-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr float, ptr [[NOR1]], i64 1
208209
; POW2-ONLY-NEXT: [[ARRAYIDX2_I265:%.*]] = getelementptr float, ptr [[NOR1]], i64 2
209210
; POW2-ONLY-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2_I265]], align 4
211+
; POW2-ONLY-NEXT: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX_I]], align 4
210212
; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[NOR1]], align 4
211213
; POW2-ONLY-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
212214
; POW2-ONLY-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]]
213215
; POW2-ONLY-NEXT: [[NEG11_I:%.*]] = fmul float [[TMP4]], [[TMP0]]
214216
; POW2-ONLY-NEXT: [[TMP5:%.*]] = call float @llvm.fmuladd.f32(float [[TMP1]], float 0.000000e+00, float [[NEG11_I]])
215-
; POW2-ONLY-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> <i32 1, i32 poison>
216-
; POW2-ONLY-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP1]], i32 1
217217
; POW2-ONLY-NEXT: [[TMP8:%.*]] = fneg <2 x float> [[TMP7]]
218218
; POW2-ONLY-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0
219219
; POW2-ONLY-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <2 x i32> zeroinitializer

0 commit comments

Comments
 (0)