@@ -6510,17 +6510,35 @@ let Predicates = [HasAVX2] in {
6510
6510
// Emulate vXi32/vXi64 blends with vXf32/vXf64.
6511
6511
// ExecutionDomainFixPass will cleanup domains later on.
6512
6512
let Predicates = [HasAVX] in {
6513
- def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), (iPTR imm:$src3) ),
6513
+ def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3),
6514
6514
(VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$src3)>;
6515
- def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), (iPTR imm:$src3)),
6515
+ def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3),
6516
+ (VBLENDPDYrmi VR256:$src1, addr:$src2, imm:$src3)>;
6517
+ def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3),
6518
+ (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>;
6519
+
6520
+ def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
6516
6521
(VBLENDPDrri VR128:$src1, VR128:$src2, imm:$src3)>;
6522
+ def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3),
6523
+ (VBLENDPDrmi VR128:$src1, addr:$src2, imm:$src3)>;
6524
+ def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3),
6525
+ (VBLENDPDrmi VR128:$src1, addr:$src2, (BlendCommuteImm2 imm:$src3))>;
6517
6526
}
6518
6527
6519
6528
let Predicates = [HasAVX1Only] in {
6520
- def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), (iPTR imm:$src3) ),
6529
+ def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), imm:$src3),
6521
6530
(VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$src3)>;
6522
- def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), (iPTR imm:$src3)),
6531
+ def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), imm:$src3),
6532
+ (VBLENDPSYrmi VR256:$src1, addr:$src2, imm:$src3)>;
6533
+ def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, imm:$src3),
6534
+ (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 imm:$src3))>;
6535
+
6536
+ def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
6523
6537
(VBLENDPSrri VR128:$src1, VR128:$src2, imm:$src3)>;
6538
+ def : Pat<(X86Blendi VR128:$src1, (loadv4i32 VR128:$src2), imm:$src3),
6539
+ (VBLENDPSrmi VR128:$src1, addr:$src2, imm:$src3)>;
6540
+ def : Pat<(X86Blendi (loadv4i32 VR128:$src2), VR128:$src1, imm:$src3),
6541
+ (VBLENDPSrmi VR128:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>;
6524
6542
}
6525
6543
6526
6544
defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32,
@@ -6534,10 +6552,19 @@ defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
6534
6552
SchedWriteBlend.XMM, BlendCommuteImm8>;
6535
6553
6536
6554
let Predicates = [UseSSE41] in {
6537
- def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), (iPTR imm:$src3) ),
6555
+ def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
6538
6556
(BLENDPDrri VR128:$src1, VR128:$src2, imm:$src3)>;
6539
- def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), (iPTR imm:$src3)),
6557
+ def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), imm:$src3),
6558
+ (BLENDPDrmi VR128:$src1, addr:$src2, imm:$src3)>;
6559
+ def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, imm:$src3),
6560
+ (BLENDPDrmi VR128:$src1, addr:$src2, (BlendCommuteImm2 imm:$src3))>;
6561
+
6562
+ def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
6540
6563
(BLENDPSrri VR128:$src1, VR128:$src2, imm:$src3)>;
6564
+ def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), imm:$src3),
6565
+ (BLENDPSrmi VR128:$src1,addr:$src2, imm:$src3)>;
6566
+ def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, imm:$src3),
6567
+ (BLENDPSrmi VR128:$src1,addr:$src2, (BlendCommuteImm4 imm:$src3))>;
6541
6568
}
6542
6569
6543
6570
// For insertion into the zero index (low half) of a 256-bit vector, it is
@@ -6551,6 +6578,13 @@ def : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)),
6551
6578
(VBLENDPSYrri VR256:$src1,
6552
6579
(INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
6553
6580
VR128:$src2, sub_xmm), 0xf)>;
6581
+
6582
+ def : Pat<(insert_subvector (loadv4f64 addr:$src2), (v2f64 VR128:$src1), (iPTR 0)),
6583
+ (VBLENDPDYrmi (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
6584
+ VR128:$src1, sub_xmm), addr:$src2, 0xc)>;
6585
+ def : Pat<(insert_subvector (loadv8f32 addr:$src2), (v4f32 VR128:$src1), (iPTR 0)),
6586
+ (VBLENDPSYrmi (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
6587
+ VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
6554
6588
}
6555
6589
6556
6590
/// SS41I_quaternary_vx - AVX SSE 4.1 with 4 operators
@@ -7804,6 +7838,19 @@ def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
7804
7838
(VPBLENDDYrri VR256:$src1,
7805
7839
(INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7806
7840
VR128:$src2, sub_xmm), 0xf)>;
7841
+
7842
+ def : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)),
7843
+ (VPBLENDDYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7844
+ VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7845
+ def : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)),
7846
+ (VPBLENDDYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7847
+ VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7848
+ def : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)),
7849
+ (VPBLENDDYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7850
+ VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7851
+ def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)),
7852
+ (VPBLENDDYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7853
+ VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7807
7854
}
7808
7855
7809
7856
let Predicates = [HasAVX1Only] in {
@@ -7823,6 +7870,19 @@ def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
7823
7870
(VBLENDPSYrri VR256:$src1,
7824
7871
(INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7825
7872
VR128:$src2, sub_xmm), 0xf)>;
7873
+
7874
+ def : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)),
7875
+ (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7876
+ VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7877
+ def : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)),
7878
+ (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7879
+ VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7880
+ def : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)),
7881
+ (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7882
+ VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7883
+ def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)),
7884
+ (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7885
+ VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7826
7886
}
7827
7887
7828
7888
//===----------------------------------------------------------------------===//
0 commit comments