@@ -591,21 +591,6 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
591
591
592
592
return AltMappings;
593
593
}
594
- case TargetOpcode::G_SMIN:
595
- case TargetOpcode::G_SMAX:
596
- case TargetOpcode::G_UMIN:
597
- case TargetOpcode::G_UMAX: {
598
- static const OpRegBankEntry<3 > Table[2 ] = {
599
- { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
600
-
601
- // Scalar requires cmp+select, and extends if 16-bit.
602
- // FIXME: Should there be separate costs for 32 and 16-bit
603
- { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 3 }
604
- };
605
-
606
- const std::array<unsigned , 3 > RegSrcOpIdx = { { 0 , 1 , 2 } };
607
- return addMappingFromTable<3 >(MI, MRI, RegSrcOpIdx, makeArrayRef (Table));
608
- }
609
594
case TargetOpcode::G_UADDE:
610
595
case TargetOpcode::G_USUBE:
611
596
case TargetOpcode::G_SADDE:
@@ -1576,23 +1561,8 @@ bool AMDGPURegisterBankInfo::applyMappingBFEIntrinsic(
1576
1561
return true ;
1577
1562
}
1578
1563
1579
- // FIXME: Duplicated from LegalizerHelper
1580
- static CmpInst::Predicate minMaxToCompare (unsigned Opc) {
1581
- switch (Opc) {
1582
- case TargetOpcode::G_SMIN:
1583
- return CmpInst::ICMP_SLT;
1584
- case TargetOpcode::G_SMAX:
1585
- return CmpInst::ICMP_SGT;
1586
- case TargetOpcode::G_UMIN:
1587
- return CmpInst::ICMP_ULT;
1588
- case TargetOpcode::G_UMAX:
1589
- return CmpInst::ICMP_UGT;
1590
- default :
1591
- llvm_unreachable (" not in integer min/max" );
1592
- }
1593
- }
1594
-
1595
- static unsigned minMaxToExtend (unsigned Opc) {
1564
+ // Return a suitable opcode for extending the operands of Opc when widening.
1565
+ static unsigned getExtendOp (unsigned Opc) {
1596
1566
switch (Opc) {
1597
1567
case TargetOpcode::G_SMIN:
1598
1568
case TargetOpcode::G_SMAX:
@@ -1601,7 +1571,7 @@ static unsigned minMaxToExtend(unsigned Opc) {
1601
1571
case TargetOpcode::G_UMAX:
1602
1572
return TargetOpcode::G_ZEXT;
1603
1573
default :
1604
- llvm_unreachable ( " not in integer min/max " ) ;
1574
+ return TargetOpcode::G_ANYEXT ;
1605
1575
}
1606
1576
}
1607
1577
@@ -1628,30 +1598,6 @@ unpackV2S16ToS32(MachineIRBuilder &B, Register Src, unsigned ExtOpcode) {
1628
1598
return std::make_pair (Bitcast.getReg (0 ), ShiftHi.getReg (0 ));
1629
1599
}
1630
1600
1631
- static MachineInstr *buildExpandedScalarMinMax (MachineIRBuilder &B,
1632
- CmpInst::Predicate Pred,
1633
- Register Dst, Register Src0,
1634
- Register Src1) {
1635
- const LLT CmpType = LLT::scalar (32 );
1636
- auto Cmp = B.buildICmp (Pred, CmpType, Src0, Src1);
1637
- return B.buildSelect (Dst, Cmp, Src0, Src1);
1638
- }
1639
-
1640
- // FIXME: Duplicated from LegalizerHelper, except changing the boolean type.
1641
- void AMDGPURegisterBankInfo::lowerScalarMinMax (MachineIRBuilder &B,
1642
- MachineInstr &MI) const {
1643
- Register Dst = MI.getOperand (0 ).getReg ();
1644
- Register Src0 = MI.getOperand (1 ).getReg ();
1645
- Register Src1 = MI.getOperand (2 ).getReg ();
1646
-
1647
- const CmpInst::Predicate Pred = minMaxToCompare (MI.getOpcode ());
1648
- MachineInstr *Sel = buildExpandedScalarMinMax (B, Pred, Dst, Src0, Src1);
1649
-
1650
- Register CmpReg = Sel->getOperand (1 ).getReg ();
1651
- B.getMRI ()->setRegBank (CmpReg, AMDGPU::SGPRRegBank);
1652
- MI.eraseFromParent ();
1653
- }
1654
-
1655
1601
// For cases where only a single copy is inserted for matching register banks.
1656
1602
// Replace the register in the instruction operand
1657
1603
static bool substituteSimpleCopyRegs (
@@ -2341,7 +2287,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
2341
2287
case AMDGPU::G_MUL:
2342
2288
case AMDGPU::G_SHL:
2343
2289
case AMDGPU::G_LSHR:
2344
- case AMDGPU::G_ASHR: {
2290
+ case AMDGPU::G_ASHR:
2291
+ case AMDGPU::G_SMIN:
2292
+ case AMDGPU::G_SMAX:
2293
+ case AMDGPU::G_UMIN:
2294
+ case AMDGPU::G_UMAX: {
2345
2295
Register DstReg = MI.getOperand (0 ).getReg ();
2346
2296
LLT DstTy = MRI.getType (DstReg);
2347
2297
@@ -2365,10 +2315,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
2365
2315
Register WideSrc0Lo, WideSrc0Hi;
2366
2316
Register WideSrc1Lo, WideSrc1Hi;
2367
2317
2318
+ unsigned ExtendOp = getExtendOp (MI.getOpcode ());
2368
2319
std::tie (WideSrc0Lo, WideSrc0Hi)
2369
- = unpackV2S16ToS32 (B, MI.getOperand (1 ).getReg (), AMDGPU::G_ANYEXT );
2320
+ = unpackV2S16ToS32 (B, MI.getOperand (1 ).getReg (), ExtendOp );
2370
2321
std::tie (WideSrc1Lo, WideSrc1Hi)
2371
- = unpackV2S16ToS32 (B, MI.getOperand (2 ).getReg (), AMDGPU::G_ANYEXT );
2322
+ = unpackV2S16ToS32 (B, MI.getOperand (2 ).getReg (), ExtendOp );
2372
2323
auto Lo = B.buildInstr (MI.getOpcode (), {S32}, {WideSrc0Lo, WideSrc1Lo});
2373
2324
auto Hi = B.buildInstr (MI.getOpcode (), {S32}, {WideSrc0Hi, WideSrc1Hi});
2374
2325
B.buildBuildVectorTrunc (DstReg, {Lo.getReg (0 ), Hi.getReg (0 )});
@@ -2390,73 +2341,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
2390
2341
2391
2342
return ;
2392
2343
}
2393
- case AMDGPU::G_SMIN:
2394
- case AMDGPU::G_SMAX:
2395
- case AMDGPU::G_UMIN:
2396
- case AMDGPU::G_UMAX: {
2397
- Register DstReg = MI.getOperand (0 ).getReg ();
2398
- const RegisterBank *DstBank =
2399
- OpdMapper.getInstrMapping ().getOperandMapping (0 ).BreakDown [0 ].RegBank ;
2400
- if (DstBank == &AMDGPU::VGPRRegBank)
2401
- break ;
2402
-
2403
- MachineFunction *MF = MI.getParent ()->getParent ();
2404
- MachineIRBuilder B (MI);
2405
-
2406
- // Turn scalar min/max into a compare and select.
2407
- LLT Ty = MRI.getType (DstReg);
2408
- const LLT S32 = LLT::scalar (32 );
2409
- const LLT S16 = LLT::scalar (16 );
2410
- const LLT V2S16 = LLT::vector (2 , 16 );
2411
-
2412
- if (Ty == V2S16) {
2413
- ApplyRegBankMapping ApplySALU (*this , MRI, &AMDGPU::SGPRRegBank);
2414
- B.setChangeObserver (ApplySALU);
2415
-
2416
- // Need to widen to s32, and expand as cmp + select, and avoid producing
2417
- // illegal vector extends or unmerges that would need further
2418
- // legalization.
2419
- //
2420
- // TODO: Should we just readfirstlane? That should probably be handled
2421
- // with a UniformVGPR register bank that wouldn't need special
2422
- // consideration here.
2423
-
2424
- Register Dst = MI.getOperand (0 ).getReg ();
2425
- Register Src0 = MI.getOperand (1 ).getReg ();
2426
- Register Src1 = MI.getOperand (2 ).getReg ();
2427
-
2428
- Register WideSrc0Lo, WideSrc0Hi;
2429
- Register WideSrc1Lo, WideSrc1Hi;
2430
-
2431
- unsigned ExtendOp = minMaxToExtend (MI.getOpcode ());
2432
-
2433
- std::tie (WideSrc0Lo, WideSrc0Hi) = unpackV2S16ToS32 (B, Src0, ExtendOp);
2434
- std::tie (WideSrc1Lo, WideSrc1Hi) = unpackV2S16ToS32 (B, Src1, ExtendOp);
2435
-
2436
- Register Lo = MRI.createGenericVirtualRegister (S32);
2437
- Register Hi = MRI.createGenericVirtualRegister (S32);
2438
- const CmpInst::Predicate Pred = minMaxToCompare (MI.getOpcode ());
2439
- buildExpandedScalarMinMax (B, Pred, Lo, WideSrc0Lo, WideSrc1Lo);
2440
- buildExpandedScalarMinMax (B, Pred, Hi, WideSrc0Hi, WideSrc1Hi);
2441
-
2442
- B.buildBuildVectorTrunc (Dst, {Lo, Hi});
2443
- MI.eraseFromParent ();
2444
- } else if (Ty == S16) {
2445
- ApplyRegBankMapping ApplySALU (*this , MRI, &AMDGPU::SGPRRegBank);
2446
- B.setChangeObserver (ApplySALU);
2447
- LegalizerHelper Helper (*MF, ApplySALU, B);
2448
-
2449
- // Need to widen to s32, and expand as cmp + select.
2450
- if (Helper.widenScalar (MI, 0 , S32) != LegalizerHelper::Legalized)
2451
- llvm_unreachable (" widenScalar should have succeeded" );
2452
-
2453
- // FIXME: This is relying on widenScalar leaving MI in place.
2454
- lowerScalarMinMax (B, MI);
2455
- } else
2456
- lowerScalarMinMax (B, MI);
2457
-
2458
- return ;
2459
- }
2460
2344
case AMDGPU::G_SEXT_INREG: {
2461
2345
SmallVector<Register, 2 > SrcRegs (OpdMapper.getVRegs (1 ));
2462
2346
if (SrcRegs.empty ())
0 commit comments