@@ -150,9 +150,6 @@ def doRsqrtOpt : Predicate<"doRsqrtOpt()">;
150
150
151
151
def doMulWide : Predicate<"doMulWide">;
152
152
153
- def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">;
154
- def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
155
-
156
153
def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">;
157
154
def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
158
155
@@ -1108,26 +1105,19 @@ def INEG64 :
1108
1105
//-----------------------------------
1109
1106
1110
1107
// Constant 1.0f
1111
- def FloatConst1 : PatLeaf<(fpimm) , [{
1112
- return &N->getValueAPF() .getSemantics() == &llvm::APFloat::IEEEsingle() &&
1113
- N->getValueAPF() .convertToFloat() == 1.0f;
1108
+ def f32imm_1 : FPImmLeaf<f32 , [{
1109
+ return &Imm .getSemantics() == &llvm::APFloat::IEEEsingle() &&
1110
+ Imm .convertToFloat() == 1.0f;
1114
1111
}]>;
1115
1112
// Constant 1.0 (double)
1116
- def DoubleConst1 : PatLeaf<(fpimm) , [{
1117
- return &N->getValueAPF() .getSemantics() == &llvm::APFloat::IEEEdouble() &&
1118
- N->getValueAPF() .convertToDouble() == 1.0;
1113
+ def f64imm_1 : FPImmLeaf<f64 , [{
1114
+ return &Imm .getSemantics() == &llvm::APFloat::IEEEdouble() &&
1115
+ Imm .convertToDouble() == 1.0;
1119
1116
}]>;
1120
1117
// Constant -1.0 (double)
1121
- def DoubleConstNeg1 : PatLeaf<(fpimm), [{
1122
- return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble() &&
1123
- N->getValueAPF().convertToDouble() == -1.0;
1124
- }]>;
1125
-
1126
-
1127
- // Constant -X -> X (double)
1128
- def NegDoubleConst : SDNodeXForm<fpimm, [{
1129
- return CurDAG->getTargetConstantFP(-(N->getValueAPF()),
1130
- SDLoc(N), MVT::f64);
1118
+ def f64imm_neg1 : FPImmLeaf<f64, [{
1119
+ return &Imm.getSemantics() == &llvm::APFloat::IEEEdouble() &&
1120
+ Imm.convertToDouble() == -1.0;
1131
1121
}]>;
1132
1122
1133
1123
defm FADD : F3_fma_component<"add", fadd>;
@@ -1178,11 +1168,11 @@ def BFNEG16x2 : FNEG_BF16_F16X2<"neg.bf16x2", v2bf16, Int32Regs, True>;
1178
1168
//
1179
1169
// F64 division
1180
1170
//
1181
- def FDIV641r :
1171
+ def FRCP64r :
1182
1172
NVPTXInst<(outs Float64Regs:$dst),
1183
- (ins f64imm:$a, Float64Regs:$b),
1173
+ (ins Float64Regs:$b),
1184
1174
"rcp.rn.f64 \t$dst, $b;",
1185
- [(set f64:$dst, (fdiv DoubleConst1:$a , f64:$b))]>;
1175
+ [(set f64:$dst, (fdiv f64imm_1 , f64:$b))]>;
1186
1176
def FDIV64rr :
1187
1177
NVPTXInst<(outs Float64Regs:$dst),
1188
1178
(ins Float64Regs:$a, Float64Regs:$b),
@@ -1196,109 +1186,114 @@ def FDIV64ri :
1196
1186
1197
1187
// fdiv will be converted to rcp
1198
1188
// fneg (fdiv 1.0, X) => fneg (rcp.rn X)
1199
- def : Pat<(fdiv DoubleConstNeg1:$a , f64:$b),
1200
- (FNEGf64 (FDIV641r (NegDoubleConst node:$a), $b))>;
1189
+ def : Pat<(fdiv f64imm_neg1 , f64:$b),
1190
+ (FNEGf64 (FRCP64r $b))>;
1201
1191
1202
1192
//
1203
1193
// F32 Approximate reciprocal
1204
1194
//
1205
- def FDIV321r_ftz :
1195
+
1196
+ def fdiv_approx : PatFrag<(ops node:$a, node:$b),
1197
+ (fdiv node:$a, node:$b), [{
1198
+ return getDivF32Level(N) == NVPTX::DivPrecisionLevel::Approx;
1199
+ }]>;
1200
+
1201
+
1202
+ def FRCP32_approx_r_ftz :
1206
1203
NVPTXInst<(outs Float32Regs:$dst),
1207
- (ins f32imm:$a, Float32Regs:$b),
1204
+ (ins Float32Regs:$b),
1208
1205
"rcp.approx.ftz.f32 \t$dst, $b;",
1209
- [(set f32:$dst, (fdiv FloatConst1:$a , f32:$b))]>,
1210
- Requires<[do_DIVF32_APPROX, doF32FTZ]>;
1211
- def FDIV321r :
1206
+ [(set f32:$dst, (fdiv_approx f32imm_1 , f32:$b))]>,
1207
+ Requires<[doF32FTZ]>;
1208
+ def FRCP32_approx_r :
1212
1209
NVPTXInst<(outs Float32Regs:$dst),
1213
- (ins f32imm:$a, Float32Regs:$b),
1210
+ (ins Float32Regs:$b),
1214
1211
"rcp.approx.f32 \t$dst, $b;",
1215
- [(set f32:$dst, (fdiv FloatConst1:$a , f32:$b))]>,
1216
- Requires<[do_DIVF32_APPROX]>;
1212
+ [(set f32:$dst, (fdiv_approx f32imm_1 , f32:$b))]>;
1213
+
1217
1214
//
1218
1215
// F32 Approximate division
1219
1216
//
1220
1217
def FDIV32approxrr_ftz :
1221
1218
NVPTXInst<(outs Float32Regs:$dst),
1222
1219
(ins Float32Regs:$a, Float32Regs:$b),
1223
1220
"div.approx.ftz.f32 \t$dst, $a, $b;",
1224
- [(set f32:$dst, (fdiv f32:$a, f32:$b))]>,
1225
- Requires<[do_DIVF32_APPROX, doF32FTZ]>;
1221
+ [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>,
1222
+ Requires<[doF32FTZ]>;
1226
1223
def FDIV32approxri_ftz :
1227
1224
NVPTXInst<(outs Float32Regs:$dst),
1228
1225
(ins Float32Regs:$a, f32imm:$b),
1229
1226
"div.approx.ftz.f32 \t$dst, $a, $b;",
1230
- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>,
1231
- Requires<[do_DIVF32_APPROX, doF32FTZ]>;
1227
+ [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>,
1228
+ Requires<[doF32FTZ]>;
1232
1229
def FDIV32approxrr :
1233
1230
NVPTXInst<(outs Float32Regs:$dst),
1234
1231
(ins Float32Regs:$a, Float32Regs:$b),
1235
1232
"div.approx.f32 \t$dst, $a, $b;",
1236
- [(set f32:$dst, (fdiv f32:$a, f32:$b))]>,
1237
- Requires<[do_DIVF32_APPROX]>;
1233
+ [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>;
1238
1234
def FDIV32approxri :
1239
1235
NVPTXInst<(outs Float32Regs:$dst),
1240
1236
(ins Float32Regs:$a, f32imm:$b),
1241
1237
"div.approx.f32 \t$dst, $a, $b;",
1242
- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>,
1243
- Requires<[do_DIVF32_APPROX]>;
1238
+ [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>;
1244
1239
//
1245
1240
// F32 Semi-accurate reciprocal
1246
1241
//
1247
1242
// rcp.approx gives the same result as div.full(1.0f, a) and is faster.
1248
1243
//
1249
- def FDIV321r_approx_ftz :
1250
- NVPTXInst<(outs Float32Regs:$dst),
1251
- (ins f32imm:$a, Float32Regs:$b),
1252
- "rcp.approx.ftz.f32 \t$dst, $b;",
1253
- [(set f32:$dst, (fdiv FloatConst1:$a, f32:$b))]>,
1254
- Requires<[do_DIVF32_FULL, doF32FTZ]>;
1255
- def FDIV321r_approx :
1256
- NVPTXInst<(outs Float32Regs:$dst),
1257
- (ins f32imm:$a, Float32Regs:$b),
1258
- "rcp.approx.f32 \t$dst, $b;",
1259
- [(set f32:$dst, (fdiv FloatConst1:$a, f32:$b))]>,
1260
- Requires<[do_DIVF32_FULL]>;
1244
+
1245
+ def fdiv_full : PatFrag<(ops node:$a, node:$b),
1246
+ (fdiv node:$a, node:$b), [{
1247
+ return getDivF32Level(N) == NVPTX::DivPrecisionLevel::Full;
1248
+ }]>;
1249
+
1250
+
1251
+ def : Pat<(fdiv_full f32imm_1, f32:$b),
1252
+ (FRCP32_approx_r_ftz $b)>,
1253
+ Requires<[doF32FTZ]>;
1254
+
1255
+ def : Pat<(fdiv_full f32imm_1, f32:$b),
1256
+ (FRCP32_approx_r $b)>;
1257
+
1261
1258
//
1262
1259
// F32 Semi-accurate division
1263
1260
//
1264
1261
def FDIV32rr_ftz :
1265
1262
NVPTXInst<(outs Float32Regs:$dst),
1266
1263
(ins Float32Regs:$a, Float32Regs:$b),
1267
1264
"div.full.ftz.f32 \t$dst, $a, $b;",
1268
- [(set f32:$dst, (fdiv Float32Regs :$a, f32:$b))]>,
1269
- Requires<[do_DIVF32_FULL, doF32FTZ]>;
1265
+ [(set f32:$dst, (fdiv_full f32 :$a, f32:$b))]>,
1266
+ Requires<[doF32FTZ]>;
1270
1267
def FDIV32ri_ftz :
1271
1268
NVPTXInst<(outs Float32Regs:$dst),
1272
1269
(ins Float32Regs:$a, f32imm:$b),
1273
1270
"div.full.ftz.f32 \t$dst, $a, $b;",
1274
- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>,
1275
- Requires<[do_DIVF32_FULL, doF32FTZ]>;
1271
+ [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>,
1272
+ Requires<[doF32FTZ]>;
1276
1273
def FDIV32rr :
1277
1274
NVPTXInst<(outs Float32Regs:$dst),
1278
1275
(ins Float32Regs:$a, Float32Regs:$b),
1279
1276
"div.full.f32 \t$dst, $a, $b;",
1280
- [(set f32:$dst, (fdiv f32:$a, f32:$b))]>,
1281
- Requires<[do_DIVF32_FULL]>;
1277
+ [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>;
1282
1278
def FDIV32ri :
1283
1279
NVPTXInst<(outs Float32Regs:$dst),
1284
1280
(ins Float32Regs:$a, f32imm:$b),
1285
1281
"div.full.f32 \t$dst, $a, $b;",
1286
- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>,
1287
- Requires<[do_DIVF32_FULL]>;
1282
+ [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>;
1288
1283
//
1289
1284
// F32 Accurate reciprocal
1290
1285
//
1291
1286
def FDIV321r_prec_ftz :
1292
1287
NVPTXInst<(outs Float32Regs:$dst),
1293
- (ins f32imm:$a, Float32Regs:$b),
1288
+ (ins Float32Regs:$b),
1294
1289
"rcp.rn.ftz.f32 \t$dst, $b;",
1295
- [(set f32:$dst, (fdiv FloatConst1:$a , f32:$b))]>,
1290
+ [(set f32:$dst, (fdiv f32imm_1 , f32:$b))]>,
1296
1291
Requires<[doF32FTZ]>;
1297
- def FDIV321r_prec :
1292
+ def FRCP32r_prec :
1298
1293
NVPTXInst<(outs Float32Regs:$dst),
1299
- (ins f32imm:$a, Float32Regs:$b),
1294
+ (ins Float32Regs:$b),
1300
1295
"rcp.rn.f32 \t$dst, $b;",
1301
- [(set f32:$dst, (fdiv FloatConst1:$a , f32:$b))]>;
1296
+ [(set f32:$dst, (fdiv f32imm_1 , f32:$b))]>;
1302
1297
//
1303
1298
// F32 Accurate division
1304
1299
//
0 commit comments