@@ -1621,18 +1621,6 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
1621
1621
case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
1622
1622
case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
1623
1623
case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
1624
- case AMDGPU::S_LOAD_DWORD_IMM:
1625
- case AMDGPU::S_LOAD_DWORD_SGPR:
1626
- case AMDGPU::S_LOAD_DWORD_IMM_ci:
1627
- return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
1628
- case AMDGPU::S_LOAD_DWORDX2_IMM:
1629
- case AMDGPU::S_LOAD_DWORDX2_SGPR:
1630
- case AMDGPU::S_LOAD_DWORDX2_IMM_ci:
1631
- return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
1632
- case AMDGPU::S_LOAD_DWORDX4_IMM:
1633
- case AMDGPU::S_LOAD_DWORDX4_SGPR:
1634
- case AMDGPU::S_LOAD_DWORDX4_IMM_ci:
1635
- return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
1636
1624
case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
1637
1625
case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
1638
1626
case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
@@ -1993,6 +1981,20 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr *UseMI,
1993
1981
return DstReg;
1994
1982
}
1995
1983
1984
+ void SIInstrInfo::legalizeOperandsSMRD (MachineRegisterInfo &MRI,
1985
+ MachineInstr *MI) const {
1986
+
1987
+ // If the pointer is store in VGPRs, then we need to move them to
1988
+ // SGPRs using v_readfirstlane. This is safe because we only select
1989
+ // loads with uniform pointers to SMRD instruction so we know the
1990
+ // pointer value is uniform.
1991
+ MachineOperand *SBase = getNamedOperand (*MI, AMDGPU::OpName::sbase);
1992
+ if (SBase && !RI.isSGPRClass (MRI.getRegClass (SBase->getReg ()))) {
1993
+ unsigned SGPR = readlaneVGPRToSGPR (SBase->getReg (), MI, MRI);
1994
+ SBase->setReg (SGPR);
1995
+ }
1996
+ }
1997
+
1996
1998
void SIInstrInfo::legalizeOperands (MachineInstr *MI) const {
1997
1999
MachineRegisterInfo &MRI = MI->getParent ()->getParent ()->getRegInfo ();
1998
2000
@@ -2008,6 +2010,12 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
2008
2010
return ;
2009
2011
}
2010
2012
2013
+ // Legalize SMRD
2014
+ if (isSMRD (*MI)) {
2015
+ legalizeOperandsSMRD (MRI, MI);
2016
+ return ;
2017
+ }
2018
+
2011
2019
// Legalize REG_SEQUENCE and PHI
2012
2020
// The register class of the operands much be the same type as the register
2013
2021
// class of the output.
@@ -2280,219 +2288,6 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
2280
2288
}
2281
2289
}
2282
2290
2283
- void SIInstrInfo::splitSMRD (MachineInstr *MI,
2284
- const TargetRegisterClass *HalfRC,
2285
- unsigned HalfImmOp, unsigned HalfSGPROp,
2286
- MachineInstr *&Lo, MachineInstr *&Hi) const {
2287
-
2288
- DebugLoc DL = MI->getDebugLoc ();
2289
- MachineBasicBlock *MBB = MI->getParent ();
2290
- MachineRegisterInfo &MRI = MBB->getParent ()->getRegInfo ();
2291
- unsigned RegLo = MRI.createVirtualRegister (HalfRC);
2292
- unsigned RegHi = MRI.createVirtualRegister (HalfRC);
2293
- unsigned HalfSize = HalfRC->getSize ();
2294
- const MachineOperand *OffOp =
2295
- getNamedOperand (*MI, AMDGPU::OpName::offset);
2296
- const MachineOperand *SBase = getNamedOperand (*MI, AMDGPU::OpName::sbase);
2297
-
2298
- // The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes
2299
- // on VI.
2300
-
2301
- bool IsKill = SBase->isKill ();
2302
- if (OffOp) {
2303
- bool isVI =
2304
- MBB->getParent ()->getSubtarget <AMDGPUSubtarget>().getGeneration () >=
2305
- AMDGPUSubtarget::VOLCANIC_ISLANDS;
2306
- unsigned OffScale = isVI ? 1 : 4 ;
2307
- // Handle the _IMM variant
2308
- unsigned LoOffset = OffOp->getImm () * OffScale;
2309
- unsigned HiOffset = LoOffset + HalfSize;
2310
- Lo = BuildMI (*MBB, MI, DL, get (HalfImmOp), RegLo)
2311
- // Use addReg instead of addOperand
2312
- // to make sure kill flag is cleared.
2313
- .addReg (SBase->getReg (), 0 , SBase->getSubReg ())
2314
- .addImm (LoOffset / OffScale);
2315
-
2316
- if (!isUInt<20 >(HiOffset) || (!isVI && !isUInt<8 >(HiOffset / OffScale))) {
2317
- unsigned OffsetSGPR =
2318
- MRI.createVirtualRegister (&AMDGPU::SReg_32RegClass);
2319
- BuildMI (*MBB, MI, DL, get (AMDGPU::S_MOV_B32), OffsetSGPR)
2320
- .addImm (HiOffset); // The offset in register is in bytes.
2321
- Hi = BuildMI (*MBB, MI, DL, get (HalfSGPROp), RegHi)
2322
- .addReg (SBase->getReg (), getKillRegState (IsKill),
2323
- SBase->getSubReg ())
2324
- .addReg (OffsetSGPR);
2325
- } else {
2326
- Hi = BuildMI (*MBB, MI, DL, get (HalfImmOp), RegHi)
2327
- .addReg (SBase->getReg (), getKillRegState (IsKill),
2328
- SBase->getSubReg ())
2329
- .addImm (HiOffset / OffScale);
2330
- }
2331
- } else {
2332
- // Handle the _SGPR variant
2333
- MachineOperand *SOff = getNamedOperand (*MI, AMDGPU::OpName::soff);
2334
- Lo = BuildMI (*MBB, MI, DL, get (HalfSGPROp), RegLo)
2335
- .addReg (SBase->getReg (), 0 , SBase->getSubReg ())
2336
- .addOperand (*SOff);
2337
- unsigned OffsetSGPR = MRI.createVirtualRegister (&AMDGPU::SReg_32RegClass);
2338
- BuildMI (*MBB, MI, DL, get (AMDGPU::S_ADD_I32), OffsetSGPR)
2339
- .addReg (SOff->getReg (), 0 , SOff->getSubReg ())
2340
- .addImm (HalfSize);
2341
- Hi = BuildMI (*MBB, MI, DL, get (HalfSGPROp), RegHi)
2342
- .addReg (SBase->getReg (), getKillRegState (IsKill),
2343
- SBase->getSubReg ())
2344
- .addReg (OffsetSGPR);
2345
- }
2346
-
2347
- unsigned SubLo, SubHi;
2348
- const TargetRegisterClass *NewDstRC;
2349
- switch (HalfSize) {
2350
- case 4 :
2351
- SubLo = AMDGPU::sub0;
2352
- SubHi = AMDGPU::sub1;
2353
- NewDstRC = &AMDGPU::VReg_64RegClass;
2354
- break ;
2355
- case 8 :
2356
- SubLo = AMDGPU::sub0_sub1;
2357
- SubHi = AMDGPU::sub2_sub3;
2358
- NewDstRC = &AMDGPU::VReg_128RegClass;
2359
- break ;
2360
- case 16 :
2361
- SubLo = AMDGPU::sub0_sub1_sub2_sub3;
2362
- SubHi = AMDGPU::sub4_sub5_sub6_sub7;
2363
- NewDstRC = &AMDGPU::VReg_256RegClass;
2364
- break ;
2365
- case 32 :
2366
- SubLo = AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
2367
- SubHi = AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15;
2368
- NewDstRC = &AMDGPU::VReg_512RegClass;
2369
- break ;
2370
- default :
2371
- llvm_unreachable (" Unhandled HalfSize" );
2372
- }
2373
-
2374
- unsigned OldDst = MI->getOperand (0 ).getReg ();
2375
- unsigned NewDst = MRI.createVirtualRegister (NewDstRC);
2376
-
2377
- MRI.replaceRegWith (OldDst, NewDst);
2378
-
2379
- BuildMI (*MBB, MI, DL, get (AMDGPU::REG_SEQUENCE), NewDst)
2380
- .addReg (RegLo)
2381
- .addImm (SubLo)
2382
- .addReg (RegHi)
2383
- .addImm (SubHi);
2384
- }
2385
-
2386
- void SIInstrInfo::moveSMRDToVALU (MachineInstr *MI,
2387
- MachineRegisterInfo &MRI,
2388
- SmallVectorImpl<MachineInstr *> &Worklist) const {
2389
- MachineBasicBlock *MBB = MI->getParent ();
2390
- int DstIdx = AMDGPU::getNamedOperandIdx (MI->getOpcode (), AMDGPU::OpName::dst);
2391
- assert (DstIdx != -1 );
2392
- unsigned DstRCID = get (MI->getOpcode ()).OpInfo [DstIdx].RegClass ;
2393
- switch (RI.getRegClass (DstRCID)->getSize ()) {
2394
- case 4 :
2395
- case 8 :
2396
- case 16 : {
2397
- unsigned NewOpcode = getVALUOp (*MI);
2398
- unsigned RegOffset;
2399
- unsigned ImmOffset;
2400
-
2401
- if (MI->getOperand (2 ).isReg ()) {
2402
- RegOffset = MI->getOperand (2 ).getReg ();
2403
- ImmOffset = 0 ;
2404
- } else {
2405
- assert (MI->getOperand (2 ).isImm ());
2406
- // SMRD instructions take a dword offsets on SI and byte offset on VI
2407
- // and MUBUF instructions always take a byte offset.
2408
- ImmOffset = MI->getOperand (2 ).getImm ();
2409
- if (MBB->getParent ()->getSubtarget <AMDGPUSubtarget>().getGeneration () <=
2410
- AMDGPUSubtarget::SEA_ISLANDS)
2411
- ImmOffset <<= 2 ;
2412
- RegOffset = MRI.createVirtualRegister (&AMDGPU::SGPR_32RegClass);
2413
-
2414
- if (isUInt<12 >(ImmOffset)) {
2415
- BuildMI (*MBB, MI, MI->getDebugLoc (), get (AMDGPU::S_MOV_B32),
2416
- RegOffset)
2417
- .addImm (0 );
2418
- } else {
2419
- BuildMI (*MBB, MI, MI->getDebugLoc (), get (AMDGPU::S_MOV_B32),
2420
- RegOffset)
2421
- .addImm (ImmOffset);
2422
- ImmOffset = 0 ;
2423
- }
2424
- }
2425
-
2426
- unsigned SRsrc = MRI.createVirtualRegister (&AMDGPU::SReg_128RegClass);
2427
- unsigned DWord0 = RegOffset;
2428
- unsigned DWord1 = MRI.createVirtualRegister (&AMDGPU::SGPR_32RegClass);
2429
- unsigned DWord2 = MRI.createVirtualRegister (&AMDGPU::SGPR_32RegClass);
2430
- unsigned DWord3 = MRI.createVirtualRegister (&AMDGPU::SGPR_32RegClass);
2431
- uint64_t RsrcDataFormat = getDefaultRsrcDataFormat ();
2432
-
2433
- BuildMI (*MBB, MI, MI->getDebugLoc (), get (AMDGPU::S_MOV_B32), DWord1)
2434
- .addImm (0 );
2435
- BuildMI (*MBB, MI, MI->getDebugLoc (), get (AMDGPU::S_MOV_B32), DWord2)
2436
- .addImm (RsrcDataFormat & 0xFFFFFFFF );
2437
- BuildMI (*MBB, MI, MI->getDebugLoc (), get (AMDGPU::S_MOV_B32), DWord3)
2438
- .addImm (RsrcDataFormat >> 32 );
2439
- BuildMI (*MBB, MI, MI->getDebugLoc (), get (AMDGPU::REG_SEQUENCE), SRsrc)
2440
- .addReg (DWord0)
2441
- .addImm (AMDGPU::sub0)
2442
- .addReg (DWord1)
2443
- .addImm (AMDGPU::sub1)
2444
- .addReg (DWord2)
2445
- .addImm (AMDGPU::sub2)
2446
- .addReg (DWord3)
2447
- .addImm (AMDGPU::sub3);
2448
-
2449
- const MCInstrDesc &NewInstDesc = get (NewOpcode);
2450
- const TargetRegisterClass *NewDstRC
2451
- = RI.getRegClass (NewInstDesc.OpInfo [0 ].RegClass );
2452
- unsigned NewDstReg = MRI.createVirtualRegister (NewDstRC);
2453
- unsigned DstReg = MI->getOperand (0 ).getReg ();
2454
- MRI.replaceRegWith (DstReg, NewDstReg);
2455
-
2456
- MachineInstr *NewInst =
2457
- BuildMI (*MBB, MI, MI->getDebugLoc (), NewInstDesc, NewDstReg)
2458
- .addOperand (MI->getOperand (1 )) // sbase
2459
- .addReg (SRsrc)
2460
- .addImm (0 )
2461
- .addImm (ImmOffset)
2462
- .addImm (0 ) // glc
2463
- .addImm (0 ) // slc
2464
- .addImm (0 ) // tfe
2465
- .setMemRefs (MI->memoperands_begin (), MI->memoperands_end ());
2466
- MI->eraseFromParent ();
2467
-
2468
- legalizeOperands (NewInst);
2469
- addUsersToMoveToVALUWorklist (NewDstReg, MRI, Worklist);
2470
- break ;
2471
- }
2472
- case 32 : {
2473
- MachineInstr *Lo, *Hi;
2474
- addUsersToMoveToVALUWorklist (MI->getOperand (0 ).getReg (), MRI, Worklist);
2475
- splitSMRD (MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM,
2476
- AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi);
2477
- MI->eraseFromParent ();
2478
- moveSMRDToVALU (Lo, MRI, Worklist);
2479
- moveSMRDToVALU (Hi, MRI, Worklist);
2480
- break ;
2481
- }
2482
-
2483
- case 64 : {
2484
- MachineInstr *Lo, *Hi;
2485
- addUsersToMoveToVALUWorklist (MI->getOperand (0 ).getReg (), MRI, Worklist);
2486
- splitSMRD (MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM,
2487
- AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi);
2488
- MI->eraseFromParent ();
2489
- moveSMRDToVALU (Lo, MRI, Worklist);
2490
- moveSMRDToVALU (Hi, MRI, Worklist);
2491
- break ;
2492
- }
2493
- }
2494
- }
2495
-
2496
2291
void SIInstrInfo::moveToVALU (MachineInstr &TopInst) const {
2497
2292
SmallVector<MachineInstr *, 128 > Worklist;
2498
2293
Worklist.push_back (&TopInst);
@@ -2508,10 +2303,6 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
2508
2303
// Handle some special cases
2509
2304
switch (Opcode) {
2510
2305
default :
2511
- if (isSMRD (*Inst)) {
2512
- moveSMRDToVALU (Inst, MRI, Worklist);
2513
- continue ;
2514
- }
2515
2306
break ;
2516
2307
case AMDGPU::S_AND_B64:
2517
2308
splitScalar64BitBinaryOp (Worklist, Inst, AMDGPU::V_AND_B32_e64);
0 commit comments