@@ -7048,6 +7048,50 @@ def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
7048
7048
(VBROADCASTF128 addr:$src)>;
7049
7049
}
7050
7050
7051
+ //===----------------------------------------------------------------------===//
7052
+ // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
7053
+ //
7054
+
7055
+ let ExeDomain = SSEPackedSingle in {
7056
+ let isCommutable = 1 in
7057
+ def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
7058
+ (ins VR256:$src1, VR256:$src2, u8imm:$src3),
7059
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
7060
+ VEX_4V, VEX_L, Sched<[WriteFShuffle256]>;
7061
+ def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
7062
+ (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
7063
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
7064
+ VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
7065
+ }
7066
+
7067
+ // Immediate transform to help with commuting.
7068
+ def Perm2XCommuteImm : SDNodeXForm<timm, [{
7069
+ return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N));
7070
+ }]>;
7071
+
7072
+ multiclass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> {
7073
+ def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))),
7074
+ (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>;
7075
+ def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))),
7076
+ (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>;
7077
+ // Pattern with load in other operand.
7078
+ def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))),
7079
+ (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
7080
+ (Perm2XCommuteImm timm:$imm))>;
7081
+ }
7082
+
7083
+ let Predicates = [HasAVX] in {
7084
+ defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>;
7085
+ defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>;
7086
+ }
7087
+
7088
+ let Predicates = [HasAVX1Only] in {
7089
+ defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>;
7090
+ defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>;
7091
+ defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>;
7092
+ defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>;
7093
+ }
7094
+
7051
7095
//===----------------------------------------------------------------------===//
7052
7096
// VINSERTF128 - Insert packed floating-point values
7053
7097
//
@@ -7296,50 +7340,6 @@ let ExeDomain = SSEPackedDouble in {
7296
7340
SchedWriteFVarShuffle.YMM>, VEX_L;
7297
7341
}
7298
7342
7299
- //===----------------------------------------------------------------------===//
7300
- // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
7301
- //
7302
-
7303
- let ExeDomain = SSEPackedSingle in {
7304
- let isCommutable = 1 in
7305
- def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
7306
- (ins VR256:$src1, VR256:$src2, u8imm:$src3),
7307
- "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
7308
- VEX_4V, VEX_L, Sched<[WriteFShuffle256]>;
7309
- def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
7310
- (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
7311
- "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
7312
- VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
7313
- }
7314
-
7315
- // Immediate transform to help with commuting.
7316
- def Perm2XCommuteImm : SDNodeXForm<timm, [{
7317
- return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N));
7318
- }]>;
7319
-
7320
- multiclass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> {
7321
- def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))),
7322
- (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>;
7323
- def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))),
7324
- (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>;
7325
- // Pattern with load in other operand.
7326
- def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))),
7327
- (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
7328
- (Perm2XCommuteImm timm:$imm))>;
7329
- }
7330
-
7331
- let Predicates = [HasAVX] in {
7332
- defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>;
7333
- defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>;
7334
- }
7335
-
7336
- let Predicates = [HasAVX1Only] in {
7337
- defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>;
7338
- defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>;
7339
- defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>;
7340
- defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>;
7341
- }
7342
-
7343
7343
//===----------------------------------------------------------------------===//
7344
7344
// VZERO - Zero YMM registers
7345
7345
// Note: These instruction do not affect the YMM16-YMM31.
0 commit comments