@@ -1177,17 +1177,6 @@ def NegDoubleConst : SDNodeXForm<fpimm, [{
1177
1177
SDLoc(N), MVT::f64);
1178
1178
}]>;
1179
1179
1180
- // Loads FP16 constant into a register.
1181
- //
1182
- // ptxas does not have hex representation for fp16, so we can't use
1183
- // fp16 immediate values in .f16 instructions. Instead we have to load
1184
- // the constant into a register using mov.b16.
1185
- def LOAD_CONST_F16 :
1186
- NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$a),
1187
- "mov.b16 \t$dst, $a;", []>;
1188
- def LOAD_CONST_BF16 :
1189
- NVPTXInst<(outs Int16Regs:$dst), (ins bf16imm:$a),
1190
- "mov.b16 \t$dst, $a;", []>;
1191
1180
defm FADD : F3_fma_component<"add", fadd>;
1192
1181
defm FSUB : F3_fma_component<"sub", fsub>;
1193
1182
defm FMUL : F3_fma_component<"mul", fmul>;
@@ -1963,7 +1952,7 @@ let hasSideEffects = false in {
1963
1952
1964
1953
1965
1954
// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1966
- let IsSimpleMove=1, hasSideEffects=0 in {
1955
+ let IsSimpleMove=1, hasSideEffects=0, isAsCheapAsAMove=1 in {
1967
1956
def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
1968
1957
"mov.pred \t$dst, $sss;", []>;
1969
1958
def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
@@ -1975,44 +1964,40 @@ let IsSimpleMove=1, hasSideEffects=0 in {
1975
1964
def IMOV128rr : NVPTXInst<(outs Int128Regs:$dst), (ins Int128Regs:$sss),
1976
1965
"mov.b128 \t$dst, $sss;", []>;
1977
1966
1978
- def IMOVB16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1979
- "mov.b16 \t$dst, $sss;", []>;
1980
- def IMOVB32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1981
- "mov.b32 \t$dst, $sss;", []>;
1982
- def IMOVB64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1983
- "mov.b64 \t$dst, $sss;", []>;
1984
-
1985
- def FMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1986
- // We have to use .b16 here as there's no mov.f16.
1987
- "mov.b16 \t$dst, $src;", []>;
1988
1967
def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1989
1968
"mov.f32 \t$dst, $src;", []>;
1990
1969
def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
1991
1970
"mov.f64 \t$dst, $src;", []>;
1992
- }
1993
1971
1994
- def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1995
- "mov.pred \t$dst, $src;",
1996
- [(set i1:$dst, imm:$src)]>;
1997
- def IMOVB16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1998
- "mov.b16 \t$dst, $src;",
1999
- [(set i16:$dst, imm:$src)]>;
2000
- def IMOVB32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
2001
- "mov.b32 \t$dst, $src;",
2002
- [(set i32:$dst, imm:$src)]>;
2003
- def IMOVB64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
2004
- "mov.b64 \t$dst, $src;",
2005
- [(set i64:$dst, imm:$src)]>;
2006
-
2007
- def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
2008
- "mov.f32 \t$dst, $src;",
2009
- [(set f32:$dst, fpimm:$src)]>;
2010
- def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
2011
- "mov.f64 \t$dst, $src;",
2012
- [(set f64:$dst, fpimm:$src)]>;
2013
-
2014
- def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOVB32ri texternalsym:$dst)>;
2015
- def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOVB64ri texternalsym:$dst)>;
1972
+ def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1973
+ "mov.pred \t$dst, $src;",
1974
+ [(set i1:$dst, imm:$src)]>;
1975
+ def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1976
+ "mov.b16 \t$dst, $src;",
1977
+ [(set i16:$dst, imm:$src)]>;
1978
+ def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1979
+ "mov.b32 \t$dst, $src;",
1980
+ [(set i32:$dst, imm:$src)]>;
1981
+ def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1982
+ "mov.b64 \t$dst, $src;",
1983
+ [(set i64:$dst, imm:$src)]>;
1984
+
1985
+ def FMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$src),
1986
+ "mov.b16 \t$dst, $src;",
1987
+ [(set f16:$dst, fpimm:$src)]>;
1988
+ def BFMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins bf16imm:$src),
1989
+ "mov.b16 \t$dst, $src;",
1990
+ [(set bf16:$dst, fpimm:$src)]>;
1991
+ def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
1992
+ "mov.f32 \t$dst, $src;",
1993
+ [(set f32:$dst, fpimm:$src)]>;
1994
+ def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
1995
+ "mov.f64 \t$dst, $src;",
1996
+ [(set f64:$dst, fpimm:$src)]>;
1997
+ }
1998
+
1999
+ def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
2000
+ def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>;
2016
2001
2017
2002
//---- Copy Frame Index ----
2018
2003
def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
@@ -2208,18 +2193,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
2208
2193
def : Pat<(i1 (OpNode f16:$a, f16:$b)),
2209
2194
(SETP_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
2210
2195
Requires<[useFP16Math]>;
2211
- def : Pat<(i1 (OpNode f16:$a, fpimm:$b)),
2212
- (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
2213
- Requires<[useFP16Math,doF32FTZ]>;
2214
- def : Pat<(i1 (OpNode f16:$a, fpimm:$b)),
2215
- (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
2216
- Requires<[useFP16Math]>;
2217
- def : Pat<(i1 (OpNode fpimm:$a, f16:$b)),
2218
- (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
2219
- Requires<[useFP16Math,doF32FTZ]>;
2220
- def : Pat<(i1 (OpNode fpimm:$a, f16:$b)),
2221
- (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>,
2222
- Requires<[useFP16Math]>;
2223
2196
2224
2197
// bf16 -> pred
2225
2198
def : Pat<(i1 (OpNode bf16:$a, bf16:$b)),
@@ -2228,18 +2201,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
2228
2201
def : Pat<(i1 (OpNode bf16:$a, bf16:$b)),
2229
2202
(SETP_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
2230
2203
Requires<[hasBF16Math]>;
2231
- def : Pat<(i1 (OpNode bf16:$a, fpimm:$b)),
2232
- (SETP_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), ModeFTZ)>,
2233
- Requires<[hasBF16Math,doF32FTZ]>;
2234
- def : Pat<(i1 (OpNode bf16:$a, fpimm:$b)),
2235
- (SETP_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), Mode)>,
2236
- Requires<[hasBF16Math]>;
2237
- def : Pat<(i1 (OpNode fpimm:$a, bf16:$b)),
2238
- (SETP_bf16rr (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
2239
- Requires<[hasBF16Math,doF32FTZ]>;
2240
- def : Pat<(i1 (OpNode fpimm:$a, bf16:$b)),
2241
- (SETP_bf16rr (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, Mode)>,
2242
- Requires<[hasBF16Math]>;
2243
2204
2244
2205
// f32 -> pred
2245
2206
def : Pat<(i1 (OpNode f32:$a, f32:$b)),
@@ -2273,18 +2234,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
2273
2234
def : Pat<(i32 (OpNode f16:$a, f16:$b)),
2274
2235
(SET_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
2275
2236
Requires<[useFP16Math]>;
2276
- def : Pat<(i32 (OpNode f16:$a, fpimm:$b)),
2277
- (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
2278
- Requires<[useFP16Math, doF32FTZ]>;
2279
- def : Pat<(i32 (OpNode f16:$a, fpimm:$b)),
2280
- (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
2281
- Requires<[useFP16Math]>;
2282
- def : Pat<(i32 (OpNode fpimm:$a, f16:$b)),
2283
- (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
2284
- Requires<[useFP16Math, doF32FTZ]>;
2285
- def : Pat<(i32 (OpNode fpimm:$a, f16:$b)),
2286
- (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>,
2287
- Requires<[useFP16Math]>;
2288
2237
2289
2238
// bf16 -> i32
2290
2239
def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
@@ -2293,18 +2242,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
2293
2242
def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
2294
2243
(SET_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
2295
2244
Requires<[hasBF16Math]>;
2296
- def : Pat<(i32 (OpNode bf16:$a, fpimm:$b)),
2297
- (SET_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), ModeFTZ)>,
2298
- Requires<[hasBF16Math, doF32FTZ]>;
2299
- def : Pat<(i32 (OpNode bf16:$a, fpimm:$b)),
2300
- (SET_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), Mode)>,
2301
- Requires<[hasBF16Math]>;
2302
- def : Pat<(i32 (OpNode fpimm:$a, bf16:$b)),
2303
- (SET_bf16ir (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
2304
- Requires<[hasBF16Math, doF32FTZ]>;
2305
- def : Pat<(i32 (OpNode fpimm:$a, bf16:$b)),
2306
- (SET_bf16ir (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, Mode)>,
2307
- Requires<[hasBF16Math]>;
2308
2245
2309
2246
// f32 -> i32
2310
2247
def : Pat<(i32 (OpNode f32:$a, f32:$b)),
0 commit comments