Skip to content

Commit 6edd162

Browse files
RKSimontomtor
authored andcommitted
[X86] X86FixupInstTuning - extend BLENDPD/S -> MOVSD/S handling to SSE variant (llvm#143961)
1 parent cee4852 commit 6edd162

15 files changed

+120
-212
lines changed

llvm/lib/Target/X86/X86FixupInstTuning.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,10 +234,16 @@ bool X86FixupInstTuningPass::processInstruction(
234234
};
235235

236236
switch (Opc) {
237-
case X86::VBLENDPSrri:
238-
return ProcessBLENDToMOV(X86::VMOVSSrr);
237+
case X86::BLENDPDrri:
238+
return ProcessBLENDToMOV(X86::MOVSDrr);
239239
case X86::VBLENDPDrri:
240240
return ProcessBLENDToMOV(X86::VMOVSDrr);
241+
242+
case X86::BLENDPSrri:
243+
return ProcessBLENDToMOV(X86::MOVSSrr);
244+
case X86::VBLENDPSrri:
245+
return ProcessBLENDToMOV(X86::VMOVSSrr);
246+
241247
case X86::VPERMILPDri:
242248
return ProcessVPERMILPDri(X86::VSHUFPDrri);
243249
case X86::VPERMILPDYri:

llvm/test/CodeGen/X86/combine-and.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ define <4 x i32> @test11(<4 x i32> %A) {
189189
; SSE-LABEL: test11:
190190
; SSE: # %bb.0:
191191
; SSE-NEXT: xorps %xmm1, %xmm1
192-
; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
192+
; SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
193193
; SSE-NEXT: retq
194194
;
195195
; AVX-LABEL: test11:

llvm/test/CodeGen/X86/combine-or-shuffle.ll

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -108,15 +108,10 @@ define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
108108

109109

110110
define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
111-
; SSE2-LABEL: test5:
112-
; SSE2: # %bb.0:
113-
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
114-
; SSE2-NEXT: retq
115-
;
116-
; SSE4-LABEL: test5:
117-
; SSE4: # %bb.0:
118-
; SSE4-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
119-
; SSE4-NEXT: retq
111+
; SSE-LABEL: test5:
112+
; SSE: # %bb.0:
113+
; SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
114+
; SSE-NEXT: retq
120115
;
121116
; AVX1-LABEL: test5:
122117
; AVX1: # %bb.0:
@@ -283,15 +278,10 @@ define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
283278

284279

285280
define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) {
286-
; SSE2-LABEL: test12:
287-
; SSE2: # %bb.0:
288-
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
289-
; SSE2-NEXT: retq
290-
;
291-
; SSE4-LABEL: test12:
292-
; SSE4: # %bb.0:
293-
; SSE4-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
294-
; SSE4-NEXT: retq
281+
; SSE-LABEL: test12:
282+
; SSE: # %bb.0:
283+
; SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
284+
; SSE-NEXT: retq
295285
;
296286
; AVX1-LABEL: test12:
297287
; AVX1: # %bb.0:

llvm/test/CodeGen/X86/insertelement-zero.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ define <8 x float> @insert_v8f32_z12345z7(<8 x float> %a) {
214214
; SSE41-LABEL: insert_v8f32_z12345z7:
215215
; SSE41: # %bb.0:
216216
; SSE41-NEXT: xorps %xmm2, %xmm2
217-
; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
217+
; SSE41-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
218218
; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3]
219219
; SSE41-NEXT: retq
220220
;
@@ -287,7 +287,7 @@ define <8 x i32> @insert_v8i32_z12345z7(<8 x i32> %a) {
287287
; SSE41-LABEL: insert_v8i32_z12345z7:
288288
; SSE41: # %bb.0:
289289
; SSE41-NEXT: xorps %xmm2, %xmm2
290-
; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
290+
; SSE41-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
291291
; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3]
292292
; SSE41-NEXT: retq
293293
;

llvm/test/CodeGen/X86/masked_expandload.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1097,7 +1097,7 @@ define <2 x float> @expandload_v2f32_v2i1(ptr %base, <2 x float> %src0, <2 x i32
10971097
; SSE42-NEXT: retq
10981098
; SSE42-NEXT: LBB4_1: ## %cond.load
10991099
; SSE42-NEXT: movss (%rdi), %xmm1 ## xmm1 = mem[0],zero,zero,zero
1100-
; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1100+
; SSE42-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
11011101
; SSE42-NEXT: addq $4, %rdi
11021102
; SSE42-NEXT: testb $2, %al
11031103
; SSE42-NEXT: je LBB4_4

llvm/test/CodeGen/X86/masked_load.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -817,7 +817,7 @@ define <2 x float> @load_v2f32_v2i32(<2 x i32> %trigger, ptr %addr, <2 x float>
817817
; SSE42-NEXT: retq
818818
; SSE42-NEXT: LBB7_1: ## %cond.load
819819
; SSE42-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
820-
; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
820+
; SSE42-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
821821
; SSE42-NEXT: testb $2, %al
822822
; SSE42-NEXT: je LBB7_4
823823
; SSE42-NEXT: LBB7_3: ## %cond.load1
@@ -1220,7 +1220,7 @@ define <8 x float> @load_v8f32_v8i1_zero(<8 x i1> %mask, ptr %addr) {
12201220
; SSE42-NEXT: je LBB10_10
12211221
; SSE42-NEXT: LBB10_9: ## %cond.load10
12221222
; SSE42-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1223-
; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
1223+
; SSE42-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
12241224
; SSE42-NEXT: testb $32, %al
12251225
; SSE42-NEXT: je LBB10_12
12261226
; SSE42-NEXT: LBB10_11: ## %cond.load13

llvm/test/CodeGen/X86/sse-insertelt-from-mem.ll

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,11 @@
77
; 0'th element insertion into an SSE register.
88

99
define <4 x float> @insert_f32_firstelt(<4 x float> %x, ptr %s.addr) {
10-
; SSE2-LABEL: insert_f32_firstelt:
11-
; SSE2: # %bb.0:
12-
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
13-
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
14-
; SSE2-NEXT: retq
15-
;
16-
; SSE41-LABEL: insert_f32_firstelt:
17-
; SSE41: # %bb.0:
18-
; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
19-
; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
20-
; SSE41-NEXT: retq
10+
; SSE-LABEL: insert_f32_firstelt:
11+
; SSE: # %bb.0:
12+
; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
13+
; SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
14+
; SSE-NEXT: retq
2115
;
2216
; AVX-LABEL: insert_f32_firstelt:
2317
; AVX: # %bb.0:

llvm/test/CodeGen/X86/sse-insertelt.ll

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,10 @@
77
; 0'th element insertion into an SSE register.
88

99
define <4 x float> @insert_f32_firstelt(<4 x float> %x, float %s) {
10-
; SSE2-LABEL: insert_f32_firstelt:
11-
; SSE2: # %bb.0:
12-
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
13-
; SSE2-NEXT: retq
14-
;
15-
; SSE41-LABEL: insert_f32_firstelt:
16-
; SSE41: # %bb.0:
17-
; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
18-
; SSE41-NEXT: retq
10+
; SSE-LABEL: insert_f32_firstelt:
11+
; SSE: # %bb.0:
12+
; SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
13+
; SSE-NEXT: retq
1914
;
2015
; AVX-LABEL: insert_f32_firstelt:
2116
; AVX: # %bb.0:

llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll

Lines changed: 48 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,X86-SSE,SSE2,X86-SSE2
3-
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,X86-SSE,SSE41,X86-SSE41
2+
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,X86-SSE,X86-SSE2
3+
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,X86-SSE,X86-SSE41
44
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,X86-AVX,X86-AVX1
55
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,X86-AVX,X86-AVX512
6-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,X64-SSE,SSE2,X64-SSE2
7-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,X64-SSE,SSE41,X64-SSE41
6+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,X64-SSE,X64-SSE2
7+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,X64-SSE,X64-SSE41
88
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,X64-AVX,X64-AVX1
99
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,X64-AVX,X64-AVX512
1010

@@ -1150,17 +1150,11 @@ define <4 x float> @insert_test5_add_ss(<4 x float> %a, <4 x float> %b) {
11501150
}
11511151

11521152
define <4 x float> @insert_test5_sub_ss(<4 x float> %a, <4 x float> %b) {
1153-
; SSE2-LABEL: insert_test5_sub_ss:
1154-
; SSE2: # %bb.0:
1155-
; SSE2-NEXT: subps %xmm0, %xmm1
1156-
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1157-
; SSE2-NEXT: ret{{[l|q]}}
1158-
;
1159-
; SSE41-LABEL: insert_test5_sub_ss:
1160-
; SSE41: # %bb.0:
1161-
; SSE41-NEXT: subps %xmm0, %xmm1
1162-
; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1163-
; SSE41-NEXT: ret{{[l|q]}}
1153+
; SSE-LABEL: insert_test5_sub_ss:
1154+
; SSE: # %bb.0:
1155+
; SSE-NEXT: subps %xmm0, %xmm1
1156+
; SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1157+
; SSE-NEXT: ret{{[l|q]}}
11641158
;
11651159
; AVX-LABEL: insert_test5_sub_ss:
11661160
; AVX: # %bb.0:
@@ -1188,17 +1182,11 @@ define <4 x float> @insert_test5_mul_ss(<4 x float> %a, <4 x float> %b) {
11881182
}
11891183

11901184
define <4 x float> @insert_test5_div_ss(<4 x float> %a, <4 x float> %b) {
1191-
; SSE2-LABEL: insert_test5_div_ss:
1192-
; SSE2: # %bb.0:
1193-
; SSE2-NEXT: divps %xmm0, %xmm1
1194-
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1195-
; SSE2-NEXT: ret{{[l|q]}}
1196-
;
1197-
; SSE41-LABEL: insert_test5_div_ss:
1198-
; SSE41: # %bb.0:
1199-
; SSE41-NEXT: divps %xmm0, %xmm1
1200-
; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1201-
; SSE41-NEXT: ret{{[l|q]}}
1185+
; SSE-LABEL: insert_test5_div_ss:
1186+
; SSE: # %bb.0:
1187+
; SSE-NEXT: divps %xmm0, %xmm1
1188+
; SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1189+
; SSE-NEXT: ret{{[l|q]}}
12021190
;
12031191
; AVX-LABEL: insert_test5_div_ss:
12041192
; AVX: # %bb.0:
@@ -1226,17 +1214,11 @@ define <2 x double> @insert_test5_add_sd(<2 x double> %a, <2 x double> %b) {
12261214
}
12271215

12281216
define <2 x double> @insert_test5_sub_sd(<2 x double> %a, <2 x double> %b) {
1229-
; SSE2-LABEL: insert_test5_sub_sd:
1230-
; SSE2: # %bb.0:
1231-
; SSE2-NEXT: subpd %xmm0, %xmm1
1232-
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1233-
; SSE2-NEXT: ret{{[l|q]}}
1234-
;
1235-
; SSE41-LABEL: insert_test5_sub_sd:
1236-
; SSE41: # %bb.0:
1237-
; SSE41-NEXT: subpd %xmm0, %xmm1
1238-
; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1239-
; SSE41-NEXT: ret{{[l|q]}}
1217+
; SSE-LABEL: insert_test5_sub_sd:
1218+
; SSE: # %bb.0:
1219+
; SSE-NEXT: subpd %xmm0, %xmm1
1220+
; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1221+
; SSE-NEXT: ret{{[l|q]}}
12401222
;
12411223
; AVX-LABEL: insert_test5_sub_sd:
12421224
; AVX: # %bb.0:
@@ -1264,17 +1246,11 @@ define <2 x double> @insert_test5_mul_sd(<2 x double> %a, <2 x double> %b) {
12641246
}
12651247

12661248
define <2 x double> @insert_test5_div_sd(<2 x double> %a, <2 x double> %b) {
1267-
; SSE2-LABEL: insert_test5_div_sd:
1268-
; SSE2: # %bb.0:
1269-
; SSE2-NEXT: divpd %xmm0, %xmm1
1270-
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1271-
; SSE2-NEXT: ret{{[l|q]}}
1272-
;
1273-
; SSE41-LABEL: insert_test5_div_sd:
1274-
; SSE41: # %bb.0:
1275-
; SSE41-NEXT: divpd %xmm0, %xmm1
1276-
; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1277-
; SSE41-NEXT: ret{{[l|q]}}
1249+
; SSE-LABEL: insert_test5_div_sd:
1250+
; SSE: # %bb.0:
1251+
; SSE-NEXT: divpd %xmm0, %xmm1
1252+
; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1253+
; SSE-NEXT: ret{{[l|q]}}
12781254
;
12791255
; AVX-LABEL: insert_test5_div_sd:
12801256
; AVX: # %bb.0:
@@ -1287,29 +1263,17 @@ define <2 x double> @insert_test5_div_sd(<2 x double> %a, <2 x double> %b) {
12871263
}
12881264

12891265
define <4 x float> @add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1290-
; X86-SSE2-LABEL: add_ss_mask:
1291-
; X86-SSE2: # %bb.0:
1292-
; X86-SSE2-NEXT: testb $1, {{[0-9]+}}(%esp)
1293-
; X86-SSE2-NEXT: jne .LBB70_1
1294-
; X86-SSE2-NEXT: # %bb.2:
1295-
; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
1296-
; X86-SSE2-NEXT: retl
1297-
; X86-SSE2-NEXT: .LBB70_1:
1298-
; X86-SSE2-NEXT: addss %xmm0, %xmm1
1299-
; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1300-
; X86-SSE2-NEXT: retl
1301-
;
1302-
; X86-SSE41-LABEL: add_ss_mask:
1303-
; X86-SSE41: # %bb.0:
1304-
; X86-SSE41-NEXT: testb $1, {{[0-9]+}}(%esp)
1305-
; X86-SSE41-NEXT: jne .LBB70_1
1306-
; X86-SSE41-NEXT: # %bb.2:
1307-
; X86-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
1308-
; X86-SSE41-NEXT: retl
1309-
; X86-SSE41-NEXT: .LBB70_1:
1310-
; X86-SSE41-NEXT: addss %xmm0, %xmm1
1311-
; X86-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1312-
; X86-SSE41-NEXT: retl
1266+
; X86-SSE-LABEL: add_ss_mask:
1267+
; X86-SSE: # %bb.0:
1268+
; X86-SSE-NEXT: testb $1, {{[0-9]+}}(%esp)
1269+
; X86-SSE-NEXT: jne .LBB70_1
1270+
; X86-SSE-NEXT: # %bb.2:
1271+
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
1272+
; X86-SSE-NEXT: retl
1273+
; X86-SSE-NEXT: .LBB70_1:
1274+
; X86-SSE-NEXT: addss %xmm0, %xmm1
1275+
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1276+
; X86-SSE-NEXT: retl
13131277
;
13141278
; X86-AVX1-LABEL: add_ss_mask:
13151279
; X86-AVX1: # %bb.0:
@@ -1329,29 +1293,17 @@ define <4 x float> @add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c,
13291293
; X86-AVX512-NEXT: vmovaps %xmm2, %xmm0
13301294
; X86-AVX512-NEXT: retl
13311295
;
1332-
; X64-SSE2-LABEL: add_ss_mask:
1333-
; X64-SSE2: # %bb.0:
1334-
; X64-SSE2-NEXT: testb $1, %dil
1335-
; X64-SSE2-NEXT: jne .LBB70_1
1336-
; X64-SSE2-NEXT: # %bb.2:
1337-
; X64-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
1338-
; X64-SSE2-NEXT: retq
1339-
; X64-SSE2-NEXT: .LBB70_1:
1340-
; X64-SSE2-NEXT: addss %xmm0, %xmm1
1341-
; X64-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1342-
; X64-SSE2-NEXT: retq
1343-
;
1344-
; X64-SSE41-LABEL: add_ss_mask:
1345-
; X64-SSE41: # %bb.0:
1346-
; X64-SSE41-NEXT: testb $1, %dil
1347-
; X64-SSE41-NEXT: jne .LBB70_1
1348-
; X64-SSE41-NEXT: # %bb.2:
1349-
; X64-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
1350-
; X64-SSE41-NEXT: retq
1351-
; X64-SSE41-NEXT: .LBB70_1:
1352-
; X64-SSE41-NEXT: addss %xmm0, %xmm1
1353-
; X64-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1354-
; X64-SSE41-NEXT: retq
1296+
; X64-SSE-LABEL: add_ss_mask:
1297+
; X64-SSE: # %bb.0:
1298+
; X64-SSE-NEXT: testb $1, %dil
1299+
; X64-SSE-NEXT: jne .LBB70_1
1300+
; X64-SSE-NEXT: # %bb.2:
1301+
; X64-SSE-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
1302+
; X64-SSE-NEXT: retq
1303+
; X64-SSE-NEXT: .LBB70_1:
1304+
; X64-SSE-NEXT: addss %xmm0, %xmm1
1305+
; X64-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1306+
; X64-SSE-NEXT: retq
13551307
;
13561308
; X64-AVX1-LABEL: add_ss_mask:
13571309
; X64-AVX1: # %bb.0:
@@ -1402,7 +1354,7 @@ define <2 x double> @add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double>
14021354
; X86-SSE41-NEXT: retl
14031355
; X86-SSE41-NEXT: .LBB71_1:
14041356
; X86-SSE41-NEXT: addsd %xmm0, %xmm1
1405-
; X86-SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1357+
; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
14061358
; X86-SSE41-NEXT: retl
14071359
;
14081360
; X86-AVX1-LABEL: add_sd_mask:
@@ -1444,7 +1396,7 @@ define <2 x double> @add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double>
14441396
; X64-SSE41-NEXT: retq
14451397
; X64-SSE41-NEXT: .LBB71_1:
14461398
; X64-SSE41-NEXT: addsd %xmm0, %xmm1
1447-
; X64-SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1399+
; X64-SSE41-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
14481400
; X64-SSE41-NEXT: retq
14491401
;
14501402
; X64-AVX1-LABEL: add_sd_mask:

llvm/test/CodeGen/X86/sse41.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ define <4 x float> @blendps_not_insertps_1(<4 x float> %t1, float %t2) nounwind
345345
; X86-SSE: ## %bb.0:
346346
; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 ## xmm1 = mem[0],zero,zero,zero
347347
; X86-SSE-NEXT: ## encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04]
348-
; X86-SSE-NEXT: blendps $1, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0c,0xc1,0x01]
348+
; X86-SSE-NEXT: movss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x10,0xc1]
349349
; X86-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1,2,3]
350350
; X86-SSE-NEXT: retl ## encoding: [0xc3]
351351
;
@@ -367,7 +367,7 @@ define <4 x float> @blendps_not_insertps_1(<4 x float> %t1, float %t2) nounwind
367367
;
368368
; X64-SSE-LABEL: blendps_not_insertps_1:
369369
; X64-SSE: ## %bb.0:
370-
; X64-SSE-NEXT: blendps $1, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0c,0xc1,0x01]
370+
; X64-SSE-NEXT: movss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x10,0xc1]
371371
; X64-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1,2,3]
372372
; X64-SSE-NEXT: retq ## encoding: [0xc3]
373373
;
@@ -434,7 +434,7 @@ define <4 x float> @insertps_or_blendps(<4 x float> %t1, float %t2) minsize noun
434434
define <4 x float> @blendps_not_insertps_2(<4 x float> %t1, <4 x float> %t2) nounwind {
435435
; SSE-LABEL: blendps_not_insertps_2:
436436
; SSE: ## %bb.0:
437-
; SSE-NEXT: blendps $1, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0c,0xc1,0x01]
437+
; SSE-NEXT: movss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x10,0xc1]
438438
; SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1,2,3]
439439
; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
440440
;

0 commit comments

Comments
 (0)