@@ -422,7 +422,7 @@ void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
422
422
getAddrModeInfo (*PtrMI, MRI, AddrInfo);
423
423
}
424
424
425
- static bool isInstrUniform (const MachineInstr &MI) {
425
+ bool AMDGPUInstructionSelector:: isInstrUniform (const MachineInstr &MI) const {
426
426
if (!MI.hasOneMemOperand ())
427
427
return false ;
428
428
@@ -444,52 +444,6 @@ static bool isInstrUniform(const MachineInstr &MI) {
444
444
return I && I->getMetadata (" amdgpu.uniform" );
445
445
}
446
446
447
- static unsigned getSmrdOpcode (unsigned BaseOpcode, unsigned LoadSize) {
448
-
449
- if (LoadSize == 32 )
450
- return BaseOpcode;
451
-
452
- switch (BaseOpcode) {
453
- case AMDGPU::S_LOAD_DWORD_IMM:
454
- switch (LoadSize) {
455
- case 64 :
456
- return AMDGPU::S_LOAD_DWORDX2_IMM;
457
- case 128 :
458
- return AMDGPU::S_LOAD_DWORDX4_IMM;
459
- case 256 :
460
- return AMDGPU::S_LOAD_DWORDX8_IMM;
461
- case 512 :
462
- return AMDGPU::S_LOAD_DWORDX16_IMM;
463
- }
464
- break ;
465
- case AMDGPU::S_LOAD_DWORD_IMM_ci:
466
- switch (LoadSize) {
467
- case 64 :
468
- return AMDGPU::S_LOAD_DWORDX2_IMM_ci;
469
- case 128 :
470
- return AMDGPU::S_LOAD_DWORDX4_IMM_ci;
471
- case 256 :
472
- return AMDGPU::S_LOAD_DWORDX8_IMM_ci;
473
- case 512 :
474
- return AMDGPU::S_LOAD_DWORDX16_IMM_ci;
475
- }
476
- break ;
477
- case AMDGPU::S_LOAD_DWORD_SGPR:
478
- switch (LoadSize) {
479
- case 64 :
480
- return AMDGPU::S_LOAD_DWORDX2_SGPR;
481
- case 128 :
482
- return AMDGPU::S_LOAD_DWORDX4_SGPR;
483
- case 256 :
484
- return AMDGPU::S_LOAD_DWORDX8_SGPR;
485
- case 512 :
486
- return AMDGPU::S_LOAD_DWORDX16_SGPR;
487
- }
488
- break ;
489
- }
490
- llvm_unreachable (" Invalid base smrd opcode or size" );
491
- }
492
-
493
447
bool AMDGPUInstructionSelector::hasVgprParts (ArrayRef<GEPInfo> AddrInfo) const {
494
448
for (const GEPInfo &GEPInfo : AddrInfo) {
495
449
if (!GEPInfo.VgprParts .empty ())
@@ -498,81 +452,6 @@ bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
498
452
return false ;
499
453
}
500
454
501
- bool AMDGPUInstructionSelector::selectSMRD (MachineInstr &I,
502
- ArrayRef<GEPInfo> AddrInfo) const {
503
-
504
- if (!I.hasOneMemOperand ())
505
- return false ;
506
-
507
- if ((*I.memoperands_begin ())->getAddrSpace () != AMDGPUAS::CONSTANT_ADDRESS &&
508
- (*I.memoperands_begin ())->getAddrSpace () != AMDGPUAS::CONSTANT_ADDRESS_32BIT)
509
- return false ;
510
-
511
- if (!isInstrUniform (I))
512
- return false ;
513
-
514
- if (hasVgprParts (AddrInfo))
515
- return false ;
516
-
517
- MachineBasicBlock *BB = I.getParent ();
518
- MachineFunction *MF = BB->getParent ();
519
- const GCNSubtarget &Subtarget = MF->getSubtarget <GCNSubtarget>();
520
- MachineRegisterInfo &MRI = MF->getRegInfo ();
521
- unsigned DstReg = I.getOperand (0 ).getReg ();
522
- const DebugLoc &DL = I.getDebugLoc ();
523
- unsigned Opcode;
524
- unsigned LoadSize = RBI.getSizeInBits (DstReg, MRI, TRI);
525
-
526
- if (!AddrInfo.empty () && AddrInfo[0 ].SgprParts .size () == 1 ) {
527
-
528
- const GEPInfo &GEPInfo = AddrInfo[0 ];
529
-
530
- unsigned PtrReg = GEPInfo.SgprParts [0 ];
531
- int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset (Subtarget, GEPInfo.Imm );
532
- if (AMDGPU::isLegalSMRDImmOffset (Subtarget, GEPInfo.Imm )) {
533
- Opcode = getSmrdOpcode (AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
534
-
535
- MachineInstr *SMRD = BuildMI (*BB, &I, DL, TII.get (Opcode), DstReg)
536
- .addReg (PtrReg)
537
- .addImm (EncodedImm)
538
- .addImm (0 ); // glc
539
- return constrainSelectedInstRegOperands (*SMRD, TII, TRI, RBI);
540
- }
541
-
542
- if (Subtarget.getGeneration () == AMDGPUSubtarget::SEA_ISLANDS &&
543
- isUInt<32 >(EncodedImm)) {
544
- Opcode = getSmrdOpcode (AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize);
545
- MachineInstr *SMRD = BuildMI (*BB, &I, DL, TII.get (Opcode), DstReg)
546
- .addReg (PtrReg)
547
- .addImm (EncodedImm)
548
- .addImm (0 ); // glc
549
- return constrainSelectedInstRegOperands (*SMRD, TII, TRI, RBI);
550
- }
551
-
552
- if (isUInt<32 >(GEPInfo.Imm )) {
553
- Opcode = getSmrdOpcode (AMDGPU::S_LOAD_DWORD_SGPR, LoadSize);
554
- unsigned OffsetReg = MRI.createVirtualRegister (&AMDGPU::SReg_32RegClass);
555
- BuildMI (*BB, &I, DL, TII.get (AMDGPU::S_MOV_B32), OffsetReg)
556
- .addImm (GEPInfo.Imm );
557
-
558
- MachineInstr *SMRD = BuildMI (*BB, &I, DL, TII.get (Opcode), DstReg)
559
- .addReg (PtrReg)
560
- .addReg (OffsetReg)
561
- .addImm (0 ); // glc
562
- return constrainSelectedInstRegOperands (*SMRD, TII, TRI, RBI);
563
- }
564
- }
565
-
566
- unsigned PtrReg = I.getOperand (1 ).getReg ();
567
- Opcode = getSmrdOpcode (AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
568
- MachineInstr *SMRD = BuildMI (*BB, &I, DL, TII.get (Opcode), DstReg)
569
- .addReg (PtrReg)
570
- .addImm (0 )
571
- .addImm (0 ); // glc
572
- return constrainSelectedInstRegOperands (*SMRD, TII, TRI, RBI);
573
- }
574
-
575
-
576
455
bool AMDGPUInstructionSelector::selectG_LOAD (MachineInstr &I) const {
577
456
MachineBasicBlock *BB = I.getParent ();
578
457
MachineFunction *MF = BB->getParent ();
@@ -587,11 +466,6 @@ bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
587
466
588
467
getAddrModeInfo (I, MRI, AddrInfo);
589
468
590
- if (selectSMRD (I, AddrInfo)) {
591
- I.eraseFromParent ();
592
- return true ;
593
- }
594
-
595
469
switch (LoadSize) {
596
470
default :
597
471
llvm_unreachable (" Load size not supported\n " );
@@ -644,6 +518,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
644
518
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
645
519
return selectG_INTRINSIC_W_SIDE_EFFECTS (I, CoverageInfo);
646
520
case TargetOpcode::G_LOAD:
521
+ if (selectImpl (I, CoverageInfo))
522
+ return true ;
647
523
return selectG_LOAD (I);
648
524
case TargetOpcode::G_STORE:
649
525
return selectG_STORE (I);
@@ -694,3 +570,82 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
694
570
[=](MachineInstrBuilder &MIB) { MIB.addImm (0 ); } // src_mods
695
571
}};
696
572
}
573
+
574
+ InstructionSelector::ComplexRendererFns
575
+ AMDGPUInstructionSelector::selectSmrdImm (MachineOperand &Root) const {
576
+ MachineRegisterInfo &MRI =
577
+ Root.getParent ()->getParent ()->getParent ()->getRegInfo ();
578
+
579
+ SmallVector<GEPInfo, 4 > AddrInfo;
580
+ getAddrModeInfo (*Root.getParent (), MRI, AddrInfo);
581
+
582
+ if (AddrInfo.empty () || AddrInfo[0 ].SgprParts .size () != 1 )
583
+ return None;
584
+
585
+ const GEPInfo &GEPInfo = AddrInfo[0 ];
586
+
587
+ if (!AMDGPU::isLegalSMRDImmOffset (STI, GEPInfo.Imm ))
588
+ return None;
589
+
590
+ unsigned PtrReg = GEPInfo.SgprParts [0 ];
591
+ int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset (STI, GEPInfo.Imm );
592
+ return {{
593
+ [=](MachineInstrBuilder &MIB) { MIB.addReg (PtrReg); },
594
+ [=](MachineInstrBuilder &MIB) { MIB.addImm (EncodedImm); }
595
+ }};
596
+ }
597
+
598
+ InstructionSelector::ComplexRendererFns
599
+ AMDGPUInstructionSelector::selectSmrdImm32 (MachineOperand &Root) const {
600
+ MachineRegisterInfo &MRI =
601
+ Root.getParent ()->getParent ()->getParent ()->getRegInfo ();
602
+
603
+ SmallVector<GEPInfo, 4 > AddrInfo;
604
+ getAddrModeInfo (*Root.getParent (), MRI, AddrInfo);
605
+
606
+ if (AddrInfo.empty () || AddrInfo[0 ].SgprParts .size () != 1 )
607
+ return None;
608
+
609
+ const GEPInfo &GEPInfo = AddrInfo[0 ];
610
+ unsigned PtrReg = GEPInfo.SgprParts [0 ];
611
+ int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset (STI, GEPInfo.Imm );
612
+ if (!isUInt<32 >(EncodedImm))
613
+ return None;
614
+
615
+ return {{
616
+ [=](MachineInstrBuilder &MIB) { MIB.addReg (PtrReg); },
617
+ [=](MachineInstrBuilder &MIB) { MIB.addImm (EncodedImm); }
618
+ }};
619
+ }
620
+
621
+ InstructionSelector::ComplexRendererFns
622
+ AMDGPUInstructionSelector::selectSmrdSgpr (MachineOperand &Root) const {
623
+ MachineInstr *MI = Root.getParent ();
624
+ MachineBasicBlock *MBB = MI->getParent ();
625
+ MachineRegisterInfo &MRI = MBB->getParent ()->getRegInfo ();
626
+
627
+ SmallVector<GEPInfo, 4 > AddrInfo;
628
+ getAddrModeInfo (*MI, MRI, AddrInfo);
629
+
630
+ // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
631
+ // then we can select all ptr + 32-bit offsets not just immediate offsets.
632
+ if (AddrInfo.empty () || AddrInfo[0 ].SgprParts .size () != 1 )
633
+ return None;
634
+
635
+ const GEPInfo &GEPInfo = AddrInfo[0 ];
636
+ if (!GEPInfo.Imm || !isUInt<32 >(GEPInfo.Imm ))
637
+ return None;
638
+
639
+ // If we make it this far we have a load with an 32-bit immediate offset.
640
+ // It is OK to select this using a sgpr offset, because we have already
641
+ // failed trying to select this load into one of the _IMM variants since
642
+ // the _IMM Patterns are considered before the _SGPR patterns.
643
+ unsigned PtrReg = GEPInfo.SgprParts [0 ];
644
+ unsigned OffsetReg = MRI.createVirtualRegister (&AMDGPU::SReg_32_XM0RegClass);
645
+ BuildMI (*MBB, MI, MI->getDebugLoc (), TII.get (AMDGPU::S_MOV_B32), OffsetReg)
646
+ .addImm (GEPInfo.Imm );
647
+ return {{
648
+ [=](MachineInstrBuilder &MIB) { MIB.addReg (PtrReg); },
649
+ [=](MachineInstrBuilder &MIB) { MIB.addReg (OffsetReg); }
650
+ }};
651
+ }
0 commit comments