@@ -1177,17 +1177,6 @@ def NegDoubleConst : SDNodeXForm<fpimm, [{
1177
1177
SDLoc(N), MVT::f64);
1178
1178
}]>;
1179
1179
1180
- // Loads FP16 constant into a register.
1181
- //
1182
- // ptxas does not have hex representation for fp16, so we can't use
1183
- // fp16 immediate values in .f16 instructions. Instead we have to load
1184
- // the constant into a register using mov.b16.
1185
- def LOAD_CONST_F16 :
1186
- NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$a),
1187
- "mov.b16 \t$dst, $a;", []>;
1188
- def LOAD_CONST_BF16 :
1189
- NVPTXInst<(outs Int16Regs:$dst), (ins bf16imm:$a),
1190
- "mov.b16 \t$dst, $a;", []>;
1191
1180
defm FADD : F3_fma_component<"add", fadd>;
1192
1181
defm FSUB : F3_fma_component<"sub", fsub>;
1193
1182
defm FMUL : F3_fma_component<"mul", fmul>;
@@ -1963,7 +1952,7 @@ let hasSideEffects = false in {
1963
1952
1964
1953
1965
1954
// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1966
- let IsSimpleMove=1, hasSideEffects=0 in {
1955
+ let IsSimpleMove=1, hasSideEffects=0, isAsCheapAsAMove=1 in {
1967
1956
def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
1968
1957
"mov.pred \t$dst, $sss;", []>;
1969
1958
def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
@@ -1975,48 +1964,37 @@ let IsSimpleMove=1, hasSideEffects=0 in {
1975
1964
def IMOV128rr : NVPTXInst<(outs Int128Regs:$dst), (ins Int128Regs:$sss),
1976
1965
"mov.b128 \t$dst, $sss;", []>;
1977
1966
1978
- def IMOVB16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1979
- "mov.b16 \t$dst, $sss;", []>;
1980
- def IMOVB32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1981
- "mov.b32 \t$dst, $sss;", []>;
1982
- def IMOVB64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1983
- "mov.b64 \t$dst, $sss;", []>;
1984
-
1985
- def FMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1986
- // We have to use .b16 here as there's no mov.f16.
1987
- "mov.b16 \t$dst, $src;", []>;
1988
1967
def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1989
1968
"mov.f32 \t$dst, $src;", []>;
1990
1969
def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
1991
1970
"mov.f64 \t$dst, $src;", []>;
1992
- }
1993
1971
1994
- def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1995
- "mov.pred \t$dst, $src;",
1996
- [(set i1:$dst, imm:$src)]>;
1997
- def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1998
- "mov.u16 \t$dst, $src;",
1999
- [(set i16:$dst, imm:$src)]>;
2000
- def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
2001
- "mov.u32 \t$dst, $src;",
2002
- [(set i32:$dst, imm:$src)]>;
2003
- def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
2004
- "mov.u64 \t$dst, $src;",
2005
- [(set i64:$dst, imm:$src)]>;
2006
-
2007
- def IMOVB16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm :$src),
2008
- "mov.b16 \t$dst, $src;", []>;
2009
- def IMOVB32ri : NVPTXInst<(outs Int32Regs :$dst), (ins i32imm :$src),
2010
- "mov.b32 \t $dst, $src;", []>;
2011
- def IMOVB64ri : NVPTXInst<(outs Int64Regs: $dst), (ins i64imm: $src) ,
2012
- "mov.b64 \t $dst, $src;", [ ]>;
2013
-
2014
- def FMOV32ri : NVPTXInst<(outs Float32Regs: $dst), (ins f32imm: $src) ,
2015
- "mov.f32 \t $dst, $src;",
2016
- [(set f32 :$dst, fpimm :$src)]>;
2017
- def FMOV64ri : NVPTXInst<(outs Float64Regs: $dst), (ins f64imm: $src) ,
2018
- "mov.f64 \t $dst, $src;",
2019
- [(set f64:$dst, fpimm:$src)]>;
1972
+ def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1973
+ "mov.pred \t$dst, $src;",
1974
+ [(set i1:$dst, imm:$src)]>;
1975
+ def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1976
+ "mov.b16 \t$dst, $src;",
1977
+ [(set i16:$dst, imm:$src)]>;
1978
+ def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1979
+ "mov.b32 \t$dst, $src;",
1980
+ [(set i32:$dst, imm:$src)]>;
1981
+ def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1982
+ "mov.b64 \t$dst, $src;",
1983
+ [(set i64:$dst, imm:$src)]>;
1984
+
1985
+ def FMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins f16imm :$src),
1986
+ "mov.b16 \t$dst, $src;",
1987
+ [(set f16 :$dst, fpimm :$src)]>;
1988
+ def BFMOV16ri : NVPTXInst<(outs Int16Regs: $dst), (ins bf16imm: $src),
1989
+ "mov.b16 \t $dst, $src;" ,
1990
+ [(set bf16: $dst, fpimm: $src) ]>;
1991
+ def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
1992
+ "mov.f32 \t $dst, $src;" ,
1993
+ [(set f32: $dst, fpimm: $src)]>;
1994
+ def FMOV64ri : NVPTXInst<(outs Float64Regs :$dst), (ins f64imm :$src),
1995
+ "mov.f64 \t $dst, $src;" ,
1996
+ [(set f64: $dst, fpimm: $src)]>;
1997
+ }
2020
1998
2021
1999
def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
2022
2000
def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>;
@@ -2215,18 +2193,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
2215
2193
def : Pat<(i1 (OpNode f16:$a, f16:$b)),
2216
2194
(SETP_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
2217
2195
Requires<[useFP16Math]>;
2218
- def : Pat<(i1 (OpNode f16:$a, fpimm:$b)),
2219
- (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
2220
- Requires<[useFP16Math,doF32FTZ]>;
2221
- def : Pat<(i1 (OpNode f16:$a, fpimm:$b)),
2222
- (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
2223
- Requires<[useFP16Math]>;
2224
- def : Pat<(i1 (OpNode fpimm:$a, f16:$b)),
2225
- (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
2226
- Requires<[useFP16Math,doF32FTZ]>;
2227
- def : Pat<(i1 (OpNode fpimm:$a, f16:$b)),
2228
- (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>,
2229
- Requires<[useFP16Math]>;
2230
2196
2231
2197
// bf16 -> pred
2232
2198
def : Pat<(i1 (OpNode bf16:$a, bf16:$b)),
@@ -2235,18 +2201,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
2235
2201
def : Pat<(i1 (OpNode bf16:$a, bf16:$b)),
2236
2202
(SETP_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
2237
2203
Requires<[hasBF16Math]>;
2238
- def : Pat<(i1 (OpNode bf16:$a, fpimm:$b)),
2239
- (SETP_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), ModeFTZ)>,
2240
- Requires<[hasBF16Math,doF32FTZ]>;
2241
- def : Pat<(i1 (OpNode bf16:$a, fpimm:$b)),
2242
- (SETP_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), Mode)>,
2243
- Requires<[hasBF16Math]>;
2244
- def : Pat<(i1 (OpNode fpimm:$a, bf16:$b)),
2245
- (SETP_bf16rr (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
2246
- Requires<[hasBF16Math,doF32FTZ]>;
2247
- def : Pat<(i1 (OpNode fpimm:$a, bf16:$b)),
2248
- (SETP_bf16rr (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, Mode)>,
2249
- Requires<[hasBF16Math]>;
2250
2204
2251
2205
// f32 -> pred
2252
2206
def : Pat<(i1 (OpNode f32:$a, f32:$b)),
@@ -2280,18 +2234,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
2280
2234
def : Pat<(i32 (OpNode f16:$a, f16:$b)),
2281
2235
(SET_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
2282
2236
Requires<[useFP16Math]>;
2283
- def : Pat<(i32 (OpNode f16:$a, fpimm:$b)),
2284
- (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
2285
- Requires<[useFP16Math, doF32FTZ]>;
2286
- def : Pat<(i32 (OpNode f16:$a, fpimm:$b)),
2287
- (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
2288
- Requires<[useFP16Math]>;
2289
- def : Pat<(i32 (OpNode fpimm:$a, f16:$b)),
2290
- (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
2291
- Requires<[useFP16Math, doF32FTZ]>;
2292
- def : Pat<(i32 (OpNode fpimm:$a, f16:$b)),
2293
- (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>,
2294
- Requires<[useFP16Math]>;
2295
2237
2296
2238
// bf16 -> i32
2297
2239
def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
@@ -2300,18 +2242,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
2300
2242
def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
2301
2243
(SET_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
2302
2244
Requires<[hasBF16Math]>;
2303
- def : Pat<(i32 (OpNode bf16:$a, fpimm:$b)),
2304
- (SET_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), ModeFTZ)>,
2305
- Requires<[hasBF16Math, doF32FTZ]>;
2306
- def : Pat<(i32 (OpNode bf16:$a, fpimm:$b)),
2307
- (SET_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), Mode)>,
2308
- Requires<[hasBF16Math]>;
2309
- def : Pat<(i32 (OpNode fpimm:$a, bf16:$b)),
2310
- (SET_bf16ir (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
2311
- Requires<[hasBF16Math, doF32FTZ]>;
2312
- def : Pat<(i32 (OpNode fpimm:$a, bf16:$b)),
2313
- (SET_bf16ir (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, Mode)>,
2314
- Requires<[hasBF16Math]>;
2315
2245
2316
2246
// f32 -> i32
2317
2247
def : Pat<(i32 (OpNode f32:$a, f32:$b)),
@@ -3104,21 +3034,17 @@ def: Pat<(f32 (bitconvert vt:$a)),
3104
3034
(BITCONVERT_32_I2F Int32Regs:$a)>;
3105
3035
}
3106
3036
foreach vt = [f16, bf16] in {
3107
- def: Pat<(vt (bitconvert (i16 UInt16Const:$a))),
3108
- (IMOVB16ri UInt16Const:$a)>;
3109
- def: Pat<(vt (bitconvert i16:$a)),
3110
- (ProxyRegI16 Int16Regs:$a)>;
3111
- def: Pat<(i16 (bitconvert vt:$a)),
3112
- (ProxyRegI16 Int16Regs:$a)>;
3037
+ def: Pat<(vt (bitconvert i16:$a)),
3038
+ (vt Int16Regs:$a)>;
3039
+ def: Pat<(i16 (bitconvert vt:$a)),
3040
+ (i16 Int16Regs:$a)>;
3113
3041
}
3114
3042
3115
3043
foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in {
3116
- def: Pat<(ta (bitconvert (i32 UInt32Const:$a))),
3117
- (IMOVB32ri UInt32Const:$a)>;
3118
3044
foreach tb = [v2f16, v2bf16, v2i16, v4i8, i32] in {
3119
3045
if !ne(ta, tb) then {
3120
- def: Pat<(ta (bitconvert (tb Int32Regs :$a) )),
3121
- (ProxyRegI32 Int32Regs:$a)>;
3046
+ def: Pat<(ta (bitconvert tb :$a)),
3047
+ (ta Int32Regs:$a)>;
3122
3048
}
3123
3049
}
3124
3050
}
0 commit comments