@@ -1211,36 +1211,104 @@ int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
1211
1211
}
1212
1212
}
1213
1213
1214
- if (Size != 32 )
1215
- return -1 ;
1214
+ if (Size == 32 ) {
1215
+ switch (P) {
1216
+ case CmpInst::ICMP_NE:
1217
+ return AMDGPU::S_CMP_LG_U32;
1218
+ case CmpInst::ICMP_EQ:
1219
+ return AMDGPU::S_CMP_EQ_U32;
1220
+ case CmpInst::ICMP_SGT:
1221
+ return AMDGPU::S_CMP_GT_I32;
1222
+ case CmpInst::ICMP_SGE:
1223
+ return AMDGPU::S_CMP_GE_I32;
1224
+ case CmpInst::ICMP_SLT:
1225
+ return AMDGPU::S_CMP_LT_I32;
1226
+ case CmpInst::ICMP_SLE:
1227
+ return AMDGPU::S_CMP_LE_I32;
1228
+ case CmpInst::ICMP_UGT:
1229
+ return AMDGPU::S_CMP_GT_U32;
1230
+ case CmpInst::ICMP_UGE:
1231
+ return AMDGPU::S_CMP_GE_U32;
1232
+ case CmpInst::ICMP_ULT:
1233
+ return AMDGPU::S_CMP_LT_U32;
1234
+ case CmpInst::ICMP_ULE:
1235
+ return AMDGPU::S_CMP_LE_U32;
1236
+ case CmpInst::FCMP_OEQ:
1237
+ return AMDGPU::S_CMP_EQ_F32;
1238
+ case CmpInst::FCMP_OGT:
1239
+ return AMDGPU::S_CMP_GT_F32;
1240
+ case CmpInst::FCMP_OGE:
1241
+ return AMDGPU::S_CMP_GE_F32;
1242
+ case CmpInst::FCMP_OLT:
1243
+ return AMDGPU::S_CMP_LT_F32;
1244
+ case CmpInst::FCMP_OLE:
1245
+ return AMDGPU::S_CMP_LE_F32;
1246
+ case CmpInst::FCMP_ONE:
1247
+ return AMDGPU::S_CMP_LG_F32;
1248
+ case CmpInst::FCMP_ORD:
1249
+ return AMDGPU::S_CMP_O_F32;
1250
+ case CmpInst::FCMP_UNO:
1251
+ return AMDGPU::S_CMP_U_F32;
1252
+ case CmpInst::FCMP_UEQ:
1253
+ return AMDGPU::S_CMP_NLG_F32;
1254
+ case CmpInst::FCMP_UGT:
1255
+ return AMDGPU::S_CMP_NLE_F32;
1256
+ case CmpInst::FCMP_UGE:
1257
+ return AMDGPU::S_CMP_NLT_F32;
1258
+ case CmpInst::FCMP_ULT:
1259
+ return AMDGPU::S_CMP_NGE_F32;
1260
+ case CmpInst::FCMP_ULE:
1261
+ return AMDGPU::S_CMP_NGT_F32;
1262
+ case CmpInst::FCMP_UNE:
1263
+ return AMDGPU::S_CMP_NEQ_F32;
1264
+ default :
1265
+ llvm_unreachable (" Unknown condition code!" );
1266
+ }
1267
+ }
1216
1268
1217
- switch (P) {
1218
- case CmpInst::ICMP_NE:
1219
- return AMDGPU::S_CMP_LG_U32;
1220
- case CmpInst::ICMP_EQ:
1221
- return AMDGPU::S_CMP_EQ_U32;
1222
- case CmpInst::ICMP_SGT:
1223
- return AMDGPU::S_CMP_GT_I32;
1224
- case CmpInst::ICMP_SGE:
1225
- return AMDGPU::S_CMP_GE_I32;
1226
- case CmpInst::ICMP_SLT:
1227
- return AMDGPU::S_CMP_LT_I32;
1228
- case CmpInst::ICMP_SLE:
1229
- return AMDGPU::S_CMP_LE_I32;
1230
- case CmpInst::ICMP_UGT:
1231
- return AMDGPU::S_CMP_GT_U32;
1232
- case CmpInst::ICMP_UGE:
1233
- return AMDGPU::S_CMP_GE_U32;
1234
- case CmpInst::ICMP_ULT:
1235
- return AMDGPU::S_CMP_LT_U32;
1236
- case CmpInst::ICMP_ULE:
1237
- return AMDGPU::S_CMP_LE_U32;
1238
- default :
1239
- llvm_unreachable (" Unknown condition code!" );
1269
+ if (Size == 16 ) {
1270
+ if (!STI.hasSALUFloatInsts ())
1271
+ return -1 ;
1272
+
1273
+ switch (P) {
1274
+ case CmpInst::FCMP_OEQ:
1275
+ return AMDGPU::S_CMP_EQ_F16;
1276
+ case CmpInst::FCMP_OGT:
1277
+ return AMDGPU::S_CMP_GT_F16;
1278
+ case CmpInst::FCMP_OGE:
1279
+ return AMDGPU::S_CMP_GE_F16;
1280
+ case CmpInst::FCMP_OLT:
1281
+ return AMDGPU::S_CMP_LT_F16;
1282
+ case CmpInst::FCMP_OLE:
1283
+ return AMDGPU::S_CMP_LE_F16;
1284
+ case CmpInst::FCMP_ONE:
1285
+ return AMDGPU::S_CMP_LG_F16;
1286
+ case CmpInst::FCMP_ORD:
1287
+ return AMDGPU::S_CMP_O_F16;
1288
+ case CmpInst::FCMP_UNO:
1289
+ return AMDGPU::S_CMP_U_F16;
1290
+ case CmpInst::FCMP_UEQ:
1291
+ return AMDGPU::S_CMP_NLG_F16;
1292
+ case CmpInst::FCMP_UGT:
1293
+ return AMDGPU::S_CMP_NLE_F16;
1294
+ case CmpInst::FCMP_UGE:
1295
+ return AMDGPU::S_CMP_NLT_F16;
1296
+ case CmpInst::FCMP_ULT:
1297
+ return AMDGPU::S_CMP_NGE_F16;
1298
+ case CmpInst::FCMP_ULE:
1299
+ return AMDGPU::S_CMP_NGT_F16;
1300
+ case CmpInst::FCMP_UNE:
1301
+ return AMDGPU::S_CMP_NEQ_F16;
1302
+ default :
1303
+ llvm_unreachable (" Unknown condition code!" );
1304
+ }
1240
1305
}
1306
+
1307
+ return -1 ;
1241
1308
}
1242
1309
1243
- bool AMDGPUInstructionSelector::selectG_ICMP (MachineInstr &I) const {
1310
+ bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP (MachineInstr &I) const {
1311
+
1244
1312
MachineBasicBlock *BB = I.getParent ();
1245
1313
const DebugLoc &DL = I.getDebugLoc ();
1246
1314
@@ -1266,6 +1334,9 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
1266
1334
return Ret;
1267
1335
}
1268
1336
1337
+ if (I.getOpcode () == AMDGPU::G_FCMP)
1338
+ return false ;
1339
+
1269
1340
int Opcode = getV_CMPOpcode (Pred, Size, *Subtarget);
1270
1341
if (Opcode == -1 )
1271
1342
return false ;
@@ -2439,6 +2510,42 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
2439
2510
return false ;
2440
2511
}
2441
2512
2513
+ static bool isExtractHiElt (MachineRegisterInfo &MRI, Register In,
2514
+ Register &Out) {
2515
+ Register LShlSrc;
2516
+ if (mi_match (In, MRI,
2517
+ m_GTrunc (m_GLShr (m_Reg (LShlSrc), m_SpecificICst (16 ))))) {
2518
+ Out = LShlSrc;
2519
+ return true ;
2520
+ }
2521
+ return false ;
2522
+ }
2523
+
2524
+ bool AMDGPUInstructionSelector::selectG_FPEXT (MachineInstr &I) const {
2525
+ if (!Subtarget->hasSALUFloatInsts ())
2526
+ return false ;
2527
+
2528
+ Register Dst = I.getOperand (0 ).getReg ();
2529
+ const RegisterBank *DstRB = RBI.getRegBank (Dst, *MRI, TRI);
2530
+ if (DstRB->getID () != AMDGPU::SGPRRegBankID)
2531
+ return false ;
2532
+
2533
+ Register Src = I.getOperand (1 ).getReg ();
2534
+
2535
+ if (MRI->getType (Dst) == LLT::scalar (32 ) &&
2536
+ MRI->getType (Src) == LLT::scalar (16 )) {
2537
+ if (isExtractHiElt (*MRI, Src, Src)) {
2538
+ MachineBasicBlock *BB = I.getParent ();
2539
+ BuildMI (*BB, &I, I.getDebugLoc (), TII.get (AMDGPU::S_CVT_HI_F32_F16), Dst)
2540
+ .addUse (Src);
2541
+ I.eraseFromParent ();
2542
+ return RBI.constrainGenericRegister (Dst, AMDGPU::SReg_32RegClass, *MRI);
2543
+ }
2544
+ }
2545
+
2546
+ return false ;
2547
+ }
2548
+
2442
2549
bool AMDGPUInstructionSelector::selectG_CONSTANT (MachineInstr &I) const {
2443
2550
MachineBasicBlock *BB = I.getParent ();
2444
2551
MachineOperand &ImmOp = I.getOperand (1 );
@@ -3471,7 +3578,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
3471
3578
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
3472
3579
return selectG_INTRINSIC_W_SIDE_EFFECTS (I);
3473
3580
case TargetOpcode::G_ICMP:
3474
- if (selectG_ICMP (I))
3581
+ case TargetOpcode::G_FCMP:
3582
+ if (selectG_ICMP_or_FCMP (I))
3475
3583
return true ;
3476
3584
return selectImpl (I, *CoverageInfo);
3477
3585
case TargetOpcode::G_LOAD:
@@ -3508,6 +3616,10 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
3508
3616
selectImpl (I, *CoverageInfo))
3509
3617
return true ;
3510
3618
return selectG_SZA_EXT (I);
3619
+ case TargetOpcode::G_FPEXT:
3620
+ if (selectG_FPEXT (I))
3621
+ return true ;
3622
+ return selectImpl (I, *CoverageInfo);
3511
3623
case TargetOpcode::G_BRCOND:
3512
3624
return selectG_BRCOND (I);
3513
3625
case TargetOpcode::G_GLOBAL_VALUE:
0 commit comments