Skip to content

Commit 31823fb

Browse files
committed
[X86] Add more load folding patterns for blend instructions as a follow up to r354363.
This avoids depending on the peephole pass to do load folding. Also adds some load folding for some insert_subvector patterns that use blend. All of this was found by temporarily adding TB_NO_FORWARD to the blend immediate entries in the load folding tables. I've added -disable-peephole to some of the affected tests from that experiment to ensure we're testing isel patterns. llvm-svn: 354511
1 parent 9ad714f commit 31823fb

File tree

7 files changed

+98
-38
lines changed

7 files changed

+98
-38
lines changed

llvm/lib/Target/X86/X86InstrSSE.td

Lines changed: 66 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6510,17 +6510,35 @@ let Predicates = [HasAVX2] in {
65106510
// Emulate vXi32/vXi64 blends with vXf32/vXf64.
65116511
// ExecutionDomainFixPass will cleanup domains later on.
65126512
let Predicates = [HasAVX] in {
6513-
def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), (iPTR imm:$src3)),
6513+
def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3),
65146514
(VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$src3)>;
6515-
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), (iPTR imm:$src3)),
6515+
def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3),
6516+
(VBLENDPDYrmi VR256:$src1, addr:$src2, imm:$src3)>;
6517+
def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3),
6518+
(VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>;
6519+
6520+
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
65166521
(VBLENDPDrri VR128:$src1, VR128:$src2, imm:$src3)>;
6522+
def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3),
6523+
(VBLENDPDrmi VR128:$src1, addr:$src2, imm:$src3)>;
6524+
def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3),
6525+
(VBLENDPDrmi VR128:$src1, addr:$src2, (BlendCommuteImm2 imm:$src3))>;
65176526
}
65186527

65196528
let Predicates = [HasAVX1Only] in {
6520-
def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), (iPTR imm:$src3)),
6529+
def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), imm:$src3),
65216530
(VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$src3)>;
6522-
def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), (iPTR imm:$src3)),
6531+
def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), imm:$src3),
6532+
(VBLENDPSYrmi VR256:$src1, addr:$src2, imm:$src3)>;
6533+
def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, imm:$src3),
6534+
(VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 imm:$src3))>;
6535+
6536+
def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
65236537
(VBLENDPSrri VR128:$src1, VR128:$src2, imm:$src3)>;
6538+
def : Pat<(X86Blendi VR128:$src1, (loadv4i32 VR128:$src2), imm:$src3),
6539+
(VBLENDPSrmi VR128:$src1, addr:$src2, imm:$src3)>;
6540+
def : Pat<(X86Blendi (loadv4i32 VR128:$src2), VR128:$src1, imm:$src3),
6541+
(VBLENDPSrmi VR128:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>;
65246542
}
65256543

65266544
defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32,
@@ -6534,10 +6552,19 @@ defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
65346552
SchedWriteBlend.XMM, BlendCommuteImm8>;
65356553

65366554
let Predicates = [UseSSE41] in {
6537-
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), (iPTR imm:$src3)),
6555+
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
65386556
(BLENDPDrri VR128:$src1, VR128:$src2, imm:$src3)>;
6539-
def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), (iPTR imm:$src3)),
6557+
def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), imm:$src3),
6558+
(BLENDPDrmi VR128:$src1, addr:$src2, imm:$src3)>;
6559+
def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, imm:$src3),
6560+
(BLENDPDrmi VR128:$src1, addr:$src2, (BlendCommuteImm2 imm:$src3))>;
6561+
6562+
def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
65406563
(BLENDPSrri VR128:$src1, VR128:$src2, imm:$src3)>;
6564+
def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), imm:$src3),
6565+
(BLENDPSrmi VR128:$src1,addr:$src2, imm:$src3)>;
6566+
def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, imm:$src3),
6567+
(BLENDPSrmi VR128:$src1,addr:$src2, (BlendCommuteImm4 imm:$src3))>;
65416568
}
65426569

65436570
// For insertion into the zero index (low half) of a 256-bit vector, it is
@@ -6551,6 +6578,13 @@ def : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)),
65516578
(VBLENDPSYrri VR256:$src1,
65526579
(INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
65536580
VR128:$src2, sub_xmm), 0xf)>;
6581+
6582+
def : Pat<(insert_subvector (loadv4f64 addr:$src2), (v2f64 VR128:$src1), (iPTR 0)),
6583+
(VBLENDPDYrmi (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
6584+
VR128:$src1, sub_xmm), addr:$src2, 0xc)>;
6585+
def : Pat<(insert_subvector (loadv8f32 addr:$src2), (v4f32 VR128:$src1), (iPTR 0)),
6586+
(VBLENDPSYrmi (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
6587+
VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
65546588
}
65556589

65566590
/// SS41I_quaternary_vx - AVX SSE 4.1 with 4 operators
@@ -7804,6 +7838,19 @@ def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
78047838
(VPBLENDDYrri VR256:$src1,
78057839
(INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
78067840
VR128:$src2, sub_xmm), 0xf)>;
7841+
7842+
def : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)),
7843+
(VPBLENDDYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7844+
VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7845+
def : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)),
7846+
(VPBLENDDYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7847+
VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7848+
def : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)),
7849+
(VPBLENDDYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7850+
VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7851+
def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)),
7852+
(VPBLENDDYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7853+
VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
78077854
}
78087855

78097856
let Predicates = [HasAVX1Only] in {
@@ -7823,6 +7870,19 @@ def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
78237870
(VBLENDPSYrri VR256:$src1,
78247871
(INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
78257872
VR128:$src2, sub_xmm), 0xf)>;
7873+
7874+
def : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)),
7875+
(VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7876+
VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7877+
def : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)),
7878+
(VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7879+
VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7880+
def : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)),
7881+
(VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7882+
VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7883+
def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)),
7884+
(VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7885+
VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
78267886
}
78277887

78287888
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/X86/avx-cvt-3.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X86
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64
2+
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X86
3+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64
44

55
; Insertion/shuffles of all-zero/all-bits/constants into v8i32->v8f32 sitofp conversion.
66

llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1912,8 +1912,8 @@ define <2 x i64> @test_masked_z_4xi64_to_2xi64_perm_mem_mask0(<4 x i64>* %vp, <2
19121912
define <2 x i64> @test_masked_4xi64_to_2xi64_perm_mem_mask1(<4 x i64>* %vp, <2 x i64> %vec2, <2 x i64> %mask) {
19131913
; CHECK-LABEL: test_masked_4xi64_to_2xi64_perm_mem_mask1:
19141914
; CHECK: # %bb.0:
1915-
; CHECK-NEXT: vmovdqa (%rdi), %xmm2
1916-
; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = mem[0,1],xmm2[2,3]
1915+
; CHECK-NEXT: vmovdqa 16(%rdi), %xmm2
1916+
; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],mem[2,3]
19171917
; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
19181918
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 {%k1}
19191919
; CHECK-NEXT: retq
@@ -1927,8 +1927,8 @@ define <2 x i64> @test_masked_4xi64_to_2xi64_perm_mem_mask1(<4 x i64>* %vp, <2 x
19271927
define <2 x i64> @test_masked_z_4xi64_to_2xi64_perm_mem_mask1(<4 x i64>* %vp, <2 x i64> %mask) {
19281928
; CHECK-LABEL: test_masked_z_4xi64_to_2xi64_perm_mem_mask1:
19291929
; CHECK: # %bb.0:
1930-
; CHECK-NEXT: vmovdqa (%rdi), %xmm1
1931-
; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
1930+
; CHECK-NEXT: vmovdqa 16(%rdi), %xmm1
1931+
; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],mem[2,3]
19321932
; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1
19331933
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z}
19341934
; CHECK-NEXT: retq
@@ -2553,8 +2553,8 @@ define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mem_mask7(<8 x i64>* %vp, <4
25532553
define <2 x i64> @test_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp) {
25542554
; CHECK-LABEL: test_8xi64_to_2xi64_perm_mem_mask0:
25552555
; CHECK: # %bb.0:
2556-
; CHECK-NEXT: vmovaps (%rdi), %xmm0
2557-
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
2556+
; CHECK-NEXT: vmovaps 32(%rdi), %xmm0
2557+
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
25582558
; CHECK-NEXT: retq
25592559
%vec = load <8 x i64>, <8 x i64>* %vp
25602560
%res = shufflevector <8 x i64> %vec, <8 x i64> undef, <2 x i32> <i32 4, i32 1>
@@ -2563,8 +2563,8 @@ define <2 x i64> @test_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp) {
25632563
define <2 x i64> @test_masked_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp, <2 x i64> %vec2, <2 x i64> %mask) {
25642564
; CHECK-LABEL: test_masked_8xi64_to_2xi64_perm_mem_mask0:
25652565
; CHECK: # %bb.0:
2566-
; CHECK-NEXT: vmovdqa (%rdi), %xmm2
2567-
; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = mem[0,1],xmm2[2,3]
2566+
; CHECK-NEXT: vmovdqa 32(%rdi), %xmm2
2567+
; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],mem[2,3]
25682568
; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
25692569
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 {%k1}
25702570
; CHECK-NEXT: retq
@@ -2578,8 +2578,8 @@ define <2 x i64> @test_masked_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp, <2 x
25782578
define <2 x i64> @test_masked_z_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp, <2 x i64> %mask) {
25792579
; CHECK-LABEL: test_masked_z_8xi64_to_2xi64_perm_mem_mask0:
25802580
; CHECK: # %bb.0:
2581-
; CHECK-NEXT: vmovdqa (%rdi), %xmm1
2582-
; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
2581+
; CHECK-NEXT: vmovdqa 32(%rdi), %xmm1
2582+
; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],mem[2,3]
25832583
; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1
25842584
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z}
25852585
; CHECK-NEXT: retq

llvm/test/CodeGen/X86/commute-blend-sse41.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s
2+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s
33

44
define <8 x i16> @commute_fold_pblendw(<8 x i16> %a, <8 x i16>* %b) {
55
; CHECK-LABEL: commute_fold_pblendw:

llvm/test/CodeGen/X86/insert-into-constant-vector.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32SSE --check-prefix=X32SSE2
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64SSE --check-prefix=X64SSE2
4-
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32SSE --check-prefix=X32SSE4
5-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64SSE --check-prefix=X64SSE4
6-
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32AVX --check-prefix=X32AVX1
7-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64AVX --check-prefix=X64AVX1
8-
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32AVX --check-prefix=X32AVX2
9-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64AVX --check-prefix=X64AVX2
10-
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32AVX --check-prefix=X32AVX512F
11-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X64AVX --check-prefix=X64AVX512F
2+
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32SSE --check-prefix=X32SSE2
3+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64SSE --check-prefix=X64SSE2
4+
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32SSE --check-prefix=X32SSE4
5+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64SSE --check-prefix=X64SSE4
6+
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32AVX --check-prefix=X32AVX1
7+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64AVX --check-prefix=X64AVX1
8+
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32AVX --check-prefix=X32AVX2
9+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64AVX --check-prefix=X64AVX2
10+
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32AVX --check-prefix=X32AVX512F
11+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X64AVX --check-prefix=X64AVX512F
1212

1313
define <16 x i8> @elt0_v16i8(i8 %x) {
1414
; X32SSE2-LABEL: elt0_v16i8:

llvm/test/CodeGen/X86/masked_load.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3-
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
4-
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
5-
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
6-
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
7-
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLBW
2+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
4+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
5+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
6+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
7+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLBW
88

99
define <1 x double> @load_v1f64_v1i64(<1 x i64> %trigger, <1 x double>* %addr, <1 x double> %dst) {
1010
; SSE-LABEL: load_v1f64_v1i64:

llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
4-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512F
2+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
3+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
4+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512F
55
;
66
; Just one 32-bit run to make sure we do reasonable things.
77
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX

0 commit comments

Comments
 (0)