Skip to content

Commit 79b5c38

Browse files
committed
AMDGPU/GlobalISel: Move SMRD selection logic to TableGen
Reviewers: arsenm Reviewed By: arsenm Subscribers: volkan, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D52922 llvm-svn: 354516
1 parent d95da99 commit 79b5c38

File tree

5 files changed

+166
-141
lines changed

5 files changed

+166
-141
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212

1313
include "AMDGPU.td"
1414

15+
def p0 : PtrValueType<i64, 0>;
16+
def p1 : PtrValueType<i64, 1>;
17+
def p4 : PtrValueType<i64, 4>;
18+
1519
def sd_vsrc0 : ComplexPattern<i32, 1, "">;
1620
def gi_vsrc0 :
1721
GIComplexOperandMatcher<s32, "selectVSRC0">,
@@ -34,6 +38,18 @@ def gi_vop3omods :
3438
GIComplexOperandMatcher<s32, "selectVOP3OMods">,
3539
GIComplexPatternEquiv<VOP3OMods>;
3640

41+
def gi_smrd_imm :
42+
GIComplexOperandMatcher<s64, "selectSmrdImm">,
43+
GIComplexPatternEquiv<SMRDImm>;
44+
45+
def gi_smrd_imm32 :
46+
GIComplexOperandMatcher<s64, "selectSmrdImm32">,
47+
GIComplexPatternEquiv<SMRDImm32>;
48+
49+
def gi_smrd_sgpr :
50+
GIComplexOperandMatcher<s64, "selectSmrdSgpr">,
51+
GIComplexPatternEquiv<SMRDSgpr>;
52+
3753
class GISelSop2Pat <
3854
SDPatternOperator node,
3955
Instruction inst,
@@ -134,3 +150,11 @@ defm : GISelVop2IntrPat <int_maxnum, V_MAX_F32_e32, f32>;
134150
def : GISelVop3Pat2ModsPat <int_maxnum, V_MAX_F64, f64>;
135151
defm : GISelVop2IntrPat <int_minnum, V_MIN_F32_e32, f32>;
136152
def : GISelVop3Pat2ModsPat <int_minnum, V_MIN_F64, f64>;
153+
154+
// Since GlobalISel is more flexible then SelectionDAG, I think we can get
155+
// away with adding patterns for integer types and not legalizing all
156+
// loads and stores to vector types. This should help simplify the load/store
157+
// legalization.
158+
foreach Ty = [i64, p0, p1, p4] in {
159+
defm : SMRD_Pattern <"S_LOAD_DWORDX2", Ty>;
160+
}

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 82 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
422422
getAddrModeInfo(*PtrMI, MRI, AddrInfo);
423423
}
424424

425-
static bool isInstrUniform(const MachineInstr &MI) {
425+
bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
426426
if (!MI.hasOneMemOperand())
427427
return false;
428428

@@ -444,52 +444,6 @@ static bool isInstrUniform(const MachineInstr &MI) {
444444
return I && I->getMetadata("amdgpu.uniform");
445445
}
446446

447-
static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) {
448-
449-
if (LoadSize == 32)
450-
return BaseOpcode;
451-
452-
switch (BaseOpcode) {
453-
case AMDGPU::S_LOAD_DWORD_IMM:
454-
switch (LoadSize) {
455-
case 64:
456-
return AMDGPU::S_LOAD_DWORDX2_IMM;
457-
case 128:
458-
return AMDGPU::S_LOAD_DWORDX4_IMM;
459-
case 256:
460-
return AMDGPU::S_LOAD_DWORDX8_IMM;
461-
case 512:
462-
return AMDGPU::S_LOAD_DWORDX16_IMM;
463-
}
464-
break;
465-
case AMDGPU::S_LOAD_DWORD_IMM_ci:
466-
switch (LoadSize) {
467-
case 64:
468-
return AMDGPU::S_LOAD_DWORDX2_IMM_ci;
469-
case 128:
470-
return AMDGPU::S_LOAD_DWORDX4_IMM_ci;
471-
case 256:
472-
return AMDGPU::S_LOAD_DWORDX8_IMM_ci;
473-
case 512:
474-
return AMDGPU::S_LOAD_DWORDX16_IMM_ci;
475-
}
476-
break;
477-
case AMDGPU::S_LOAD_DWORD_SGPR:
478-
switch (LoadSize) {
479-
case 64:
480-
return AMDGPU::S_LOAD_DWORDX2_SGPR;
481-
case 128:
482-
return AMDGPU::S_LOAD_DWORDX4_SGPR;
483-
case 256:
484-
return AMDGPU::S_LOAD_DWORDX8_SGPR;
485-
case 512:
486-
return AMDGPU::S_LOAD_DWORDX16_SGPR;
487-
}
488-
break;
489-
}
490-
llvm_unreachable("Invalid base smrd opcode or size");
491-
}
492-
493447
bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
494448
for (const GEPInfo &GEPInfo : AddrInfo) {
495449
if (!GEPInfo.VgprParts.empty())
@@ -498,81 +452,6 @@ bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
498452
return false;
499453
}
500454

501-
bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
502-
ArrayRef<GEPInfo> AddrInfo) const {
503-
504-
if (!I.hasOneMemOperand())
505-
return false;
506-
507-
if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
508-
(*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS_32BIT)
509-
return false;
510-
511-
if (!isInstrUniform(I))
512-
return false;
513-
514-
if (hasVgprParts(AddrInfo))
515-
return false;
516-
517-
MachineBasicBlock *BB = I.getParent();
518-
MachineFunction *MF = BB->getParent();
519-
const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
520-
MachineRegisterInfo &MRI = MF->getRegInfo();
521-
unsigned DstReg = I.getOperand(0).getReg();
522-
const DebugLoc &DL = I.getDebugLoc();
523-
unsigned Opcode;
524-
unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
525-
526-
if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) {
527-
528-
const GEPInfo &GEPInfo = AddrInfo[0];
529-
530-
unsigned PtrReg = GEPInfo.SgprParts[0];
531-
int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm);
532-
if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) {
533-
Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
534-
535-
MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
536-
.addReg(PtrReg)
537-
.addImm(EncodedImm)
538-
.addImm(0); // glc
539-
return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
540-
}
541-
542-
if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS &&
543-
isUInt<32>(EncodedImm)) {
544-
Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize);
545-
MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
546-
.addReg(PtrReg)
547-
.addImm(EncodedImm)
548-
.addImm(0); // glc
549-
return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
550-
}
551-
552-
if (isUInt<32>(GEPInfo.Imm)) {
553-
Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize);
554-
unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
555-
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg)
556-
.addImm(GEPInfo.Imm);
557-
558-
MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
559-
.addReg(PtrReg)
560-
.addReg(OffsetReg)
561-
.addImm(0); // glc
562-
return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
563-
}
564-
}
565-
566-
unsigned PtrReg = I.getOperand(1).getReg();
567-
Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
568-
MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
569-
.addReg(PtrReg)
570-
.addImm(0)
571-
.addImm(0); // glc
572-
return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
573-
}
574-
575-
576455
bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
577456
MachineBasicBlock *BB = I.getParent();
578457
MachineFunction *MF = BB->getParent();
@@ -587,11 +466,6 @@ bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
587466

588467
getAddrModeInfo(I, MRI, AddrInfo);
589468

590-
if (selectSMRD(I, AddrInfo)) {
591-
I.eraseFromParent();
592-
return true;
593-
}
594-
595469
switch (LoadSize) {
596470
default:
597471
llvm_unreachable("Load size not supported\n");
@@ -644,6 +518,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
644518
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
645519
return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
646520
case TargetOpcode::G_LOAD:
521+
if (selectImpl(I, CoverageInfo))
522+
return true;
647523
return selectG_LOAD(I);
648524
case TargetOpcode::G_STORE:
649525
return selectG_STORE(I);
@@ -694,3 +570,82 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
694570
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods
695571
}};
696572
}
573+
574+
InstructionSelector::ComplexRendererFns
575+
AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
576+
MachineRegisterInfo &MRI =
577+
Root.getParent()->getParent()->getParent()->getRegInfo();
578+
579+
SmallVector<GEPInfo, 4> AddrInfo;
580+
getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
581+
582+
if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
583+
return None;
584+
585+
const GEPInfo &GEPInfo = AddrInfo[0];
586+
587+
if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
588+
return None;
589+
590+
unsigned PtrReg = GEPInfo.SgprParts[0];
591+
int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
592+
return {{
593+
[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
594+
[=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
595+
}};
596+
}
597+
598+
InstructionSelector::ComplexRendererFns
599+
AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
600+
MachineRegisterInfo &MRI =
601+
Root.getParent()->getParent()->getParent()->getRegInfo();
602+
603+
SmallVector<GEPInfo, 4> AddrInfo;
604+
getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
605+
606+
if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
607+
return None;
608+
609+
const GEPInfo &GEPInfo = AddrInfo[0];
610+
unsigned PtrReg = GEPInfo.SgprParts[0];
611+
int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
612+
if (!isUInt<32>(EncodedImm))
613+
return None;
614+
615+
return {{
616+
[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
617+
[=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
618+
}};
619+
}
620+
621+
InstructionSelector::ComplexRendererFns
622+
AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
623+
MachineInstr *MI = Root.getParent();
624+
MachineBasicBlock *MBB = MI->getParent();
625+
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
626+
627+
SmallVector<GEPInfo, 4> AddrInfo;
628+
getAddrModeInfo(*MI, MRI, AddrInfo);
629+
630+
// FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
631+
// then we can select all ptr + 32-bit offsets not just immediate offsets.
632+
if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
633+
return None;
634+
635+
const GEPInfo &GEPInfo = AddrInfo[0];
636+
if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
637+
return None;
638+
639+
// If we make it this far we have a load with an 32-bit immediate offset.
640+
// It is OK to select this using a sgpr offset, because we have already
641+
// failed trying to select this load into one of the _IMM variants since
642+
// the _IMM Patterns are considered before the _SGPR patterns.
643+
unsigned PtrReg = GEPInfo.SgprParts[0];
644+
unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
645+
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
646+
.addImm(GEPInfo.Imm);
647+
return {{
648+
[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
649+
[=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
650+
}};
651+
}

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class AMDGPUInstructionSelector : public InstructionSelector {
5757
GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { }
5858
};
5959

60+
bool isInstrUniform(const MachineInstr &MI) const;
6061
/// tblgen-erated 'select' implementation.
6162
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
6263

@@ -89,6 +90,13 @@ class AMDGPUInstructionSelector : public InstructionSelector {
8990
InstructionSelector::ComplexRendererFns
9091
selectVOP3Mods(MachineOperand &Root) const;
9192

93+
InstructionSelector::ComplexRendererFns
94+
selectSmrdImm(MachineOperand &Root) const;
95+
InstructionSelector::ComplexRendererFns
96+
selectSmrdImm32(MachineOperand &Root) const;
97+
InstructionSelector::ComplexRendererFns
98+
selectSmrdSgpr(MachineOperand &Root) const;
99+
92100
const SIInstrInfo &TII;
93101
const SIRegisterInfo &TRI;
94102
const AMDGPURegisterBankInfo &RBI;

llvm/lib/Target/AMDGPU/SMInstructions.td

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,22 @@ def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>;
682682
// Scalar Memory Patterns
683683
//===----------------------------------------------------------------------===//
684684

685-
def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformLoad(N);}]>;
685+
def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformLoad(N);}]> {
686+
let GISelPredicateCode = [{
687+
if (!MI.hasOneMemOperand())
688+
return false;
689+
if (!isInstrUniform(MI))
690+
return false;
691+
692+
// FIXME: We should probably be caching this.
693+
SmallVector<GEPInfo, 4> AddrInfo;
694+
getAddrModeInfo(MI, MRI, AddrInfo);
695+
696+
if (hasVgprParts(AddrInfo))
697+
return false;
698+
return true;
699+
}];
700+
}
686701

687702
def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">;
688703
def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
@@ -710,6 +725,12 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> {
710725
(smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
711726
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
712727
>;
728+
729+
// 4. No offset
730+
def : GCNPat <
731+
(vt (smrd_load (i64 SReg_64:$sbase))),
732+
(vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))
733+
>;
713734
}
714735

715736
multiclass SMLoad_Pattern <string Instr, ValueType vt> {

0 commit comments

Comments
 (0)