@@ -259,6 +259,23 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
259
259
MaskingConstraint, NoItinerary, IsCommutable,
260
260
IsKCommutable>;
261
261
262
+ // Similar to AVX512_maskable_common, but with scalar types.
263
+ multiclass AVX512_maskable_fp_common<bits<8> O, Format F, X86VectorVTInfo _,
264
+ dag Outs,
265
+ dag Ins, dag MaskingIns, dag ZeroMaskingIns,
266
+ string OpcodeStr,
267
+ string AttSrcAsm, string IntelSrcAsm,
268
+ SDNode Select = vselect,
269
+ string MaskingConstraint = "",
270
+ InstrItinClass itin = NoItinerary,
271
+ bit IsCommutable = 0,
272
+ bit IsKCommutable = 0> :
273
+ AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
274
+ AttSrcAsm, IntelSrcAsm,
275
+ [], [], [],
276
+ MaskingConstraint, NoItinerary, IsCommutable,
277
+ IsKCommutable>;
278
+
262
279
// This multiclass generates the unconditional/non-masking, the masking and
263
280
// the zero-masking variant of the vector instruction. In the masking case, the
264
281
// perserved vector elements come from a new dummy input operand tied to $dst.
@@ -291,6 +308,18 @@ multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
291
308
(X86selects _.KRCWM:$mask, RHS, _.RC:$src0),
292
309
X86selects, "$src0 = $dst", itin, IsCommutable>;
293
310
311
+ // Similar to AVX512_maskable_scalar, but with scalar types.
312
+ multiclass AVX512_maskable_fp_scalar<bits<8> O, Format F, X86VectorVTInfo _,
313
+ dag Outs, dag Ins, string OpcodeStr,
314
+ string AttSrcAsm, string IntelSrcAsm,
315
+ InstrItinClass itin = NoItinerary,
316
+ bit IsCommutable = 0> :
317
+ AVX512_maskable_fp_common<O, F, _, Outs, Ins,
318
+ !con((ins _.FRC:$src0, _.KRCWM:$mask), Ins),
319
+ !con((ins _.KRCWM:$mask), Ins),
320
+ OpcodeStr, AttSrcAsm, IntelSrcAsm,
321
+ X86selects, "$src0 = $dst", itin, IsCommutable>;
322
+
294
323
// Similar to AVX512_maskable but in this case one of the source operands
295
324
// ($src1) is already tied to $dst so we just use that for the preserved
296
325
// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
@@ -6030,27 +6059,40 @@ let Predicates = [HasAVX512] in {
6030
6059
//===----------------------------------------------------------------------===//
6031
6060
multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6032
6061
X86VectorVTInfo _Src, SDNode OpNode> {
6033
- defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6062
+ let isCodeGenOnly = 1 in {
6063
+ defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6034
6064
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
6035
6065
"$src2, $src1", "$src1, $src2",
6036
6066
(_.VT (OpNode (_.VT _.RC:$src1),
6037
6067
(_Src.VT _Src.RC:$src2),
6038
6068
(i32 FROUND_CURRENT)))>,
6039
6069
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
6040
- defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6041
- (ins _Src .RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
6070
+ defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6071
+ (ins _ .RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
6042
6072
"$src2, $src1", "$src1, $src2",
6043
6073
(_.VT (OpNode (_.VT _.RC:$src1),
6044
6074
(_Src.VT (scalar_to_vector
6045
6075
(_Src.ScalarLdFrag addr:$src2))),
6046
6076
(i32 FROUND_CURRENT)))>,
6047
6077
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
6078
+ }
6079
+
6080
+ defm rr : AVX512_maskable_fp_scalar<opc, MRMSrcReg, _, (outs _.FRC:$dst),
6081
+ (ins _.FRC:$src1, _Src.FRC:$src2), OpcodeStr,
6082
+ "$src2, $src1", "$src1, $src2">,
6083
+ EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
6084
+ let mayLoad = 1 in
6085
+ defm rm : AVX512_maskable_fp_scalar<opc, MRMSrcMem, _, (outs _.FRC:$dst),
6086
+ (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
6087
+ "$src2, $src1", "$src1, $src2">,
6088
+ EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
6089
+
6048
6090
}
6049
6091
6050
6092
// Scalar Coversion with SAE - suppress all exceptions
6051
6093
multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6052
6094
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
6053
- defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6095
+ defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6054
6096
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
6055
6097
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
6056
6098
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
@@ -6062,7 +6104,7 @@ multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTIn
6062
6104
// Scalar Conversion with rounding control (RC)
6063
6105
multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6064
6106
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
6065
- defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6107
+ defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6066
6108
(ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
6067
6109
"$rc, $src2, $src1", "$src1, $src2, $rc",
6068
6110
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
@@ -6095,39 +6137,36 @@ defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
6095
6137
X86fpextRnd,f32x_info, f64x_info >;
6096
6138
6097
6139
def : Pat<(f64 (fpextend FR32X:$src)),
6098
- (COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X),
6099
- (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>,
6140
+ (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, FR64X), FR32X:$src)>,
6100
6141
Requires<[HasAVX512]>;
6101
6142
def : Pat<(f64 (fpextend (loadf32 addr:$src))),
6102
- (COPY_TO_REGCLASS ( VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X )>,
6143
+ (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
6103
6144
Requires<[HasAVX512]>;
6104
6145
6105
6146
def : Pat<(f64 (extloadf32 addr:$src)),
6106
- (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X )>,
6147
+ (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
6107
6148
Requires<[HasAVX512, OptForSize]>;
6108
6149
6109
6150
def : Pat<(f64 (extloadf32 addr:$src)),
6110
- (COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)),
6111
- (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,
6151
+ (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
6112
6152
Requires<[HasAVX512, OptForSpeed]>;
6113
6153
6114
6154
def : Pat<(f32 (fpround FR64X:$src)),
6115
- (COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),
6116
- (COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
6155
+ (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, FR32X), FR64X:$src)>,
6117
6156
Requires<[HasAVX512]>;
6118
6157
6119
6158
def : Pat<(v4f32 (X86Movss
6120
6159
(v4f32 VR128X:$dst),
6121
6160
(v4f32 (scalar_to_vector
6122
6161
(f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
6123
- (VCVTSD2SSZrr VR128X:$dst, VR128X:$src)>,
6162
+ (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
6124
6163
Requires<[HasAVX512]>;
6125
6164
6126
6165
def : Pat<(v2f64 (X86Movsd
6127
6166
(v2f64 VR128X:$dst),
6128
6167
(v2f64 (scalar_to_vector
6129
6168
(f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
6130
- (VCVTSS2SDZrr VR128X:$dst, VR128X:$src)>,
6169
+ (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
6131
6170
Requires<[HasAVX512]>;
6132
6171
6133
6172
//===----------------------------------------------------------------------===//
0 commit comments