Skip to content

Commit 6468e44

Browse files
author
git apple-llvm automerger
committed
Merge commit 'b70cb5020416' from llvm.org/master into apple/main
2 parents 683fa00 + b70cb50 commit 6468e44

File tree

2 files changed

+86
-55
lines changed

2 files changed

+86
-55
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 57 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,54 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII,
637637
DefBuilder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit);
638638
}
639639

640+
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB,
641+
MachineBasicBlock::iterator MI, const DebugLoc &DL,
642+
MCRegister DestReg, MCRegister SrcReg, bool KillSrc,
643+
const TargetRegisterClass *RC, bool Forward) {
644+
const SIRegisterInfo &RI = TII.getRegisterInfo();
645+
ArrayRef<int16_t> BaseIndices = RI.getRegSplitParts(RC, 4);
646+
MachineBasicBlock::iterator I = MI;
647+
MachineInstr *FirstMI = nullptr, *LastMI = nullptr;
648+
649+
for (unsigned Idx = 0; Idx < BaseIndices.size(); ++Idx) {
650+
int16_t SubIdx = BaseIndices[Idx];
651+
Register Reg = RI.getSubReg(DestReg, SubIdx);
652+
unsigned Opcode = AMDGPU::S_MOV_B32;
653+
654+
// Is SGPR aligned? If so try to combine with next.
655+
Register Src = RI.getSubReg(SrcReg, SubIdx);
656+
bool AlignedDest = ((Reg - AMDGPU::SGPR0) % 2) == 0;
657+
bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0;
658+
if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.size())) {
659+
// Can use SGPR64 copy
660+
unsigned Channel = RI.getChannelFromSubReg(SubIdx);
661+
SubIdx = RI.getSubRegFromChannel(Channel, 2);
662+
Opcode = AMDGPU::S_MOV_B64;
663+
Idx++;
664+
}
665+
666+
LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), RI.getSubReg(DestReg, SubIdx))
667+
.addReg(RI.getSubReg(SrcReg, SubIdx))
668+
.addReg(SrcReg, RegState::Implicit);
669+
670+
if (!FirstMI)
671+
FirstMI = LastMI;
672+
673+
if (!Forward)
674+
I--;
675+
}
676+
677+
assert(FirstMI && LastMI);
678+
if (!Forward)
679+
std::swap(FirstMI, LastMI);
680+
681+
FirstMI->addOperand(
682+
MachineOperand::CreateReg(DestReg, true /*IsDef*/, true /*IsImp*/));
683+
684+
if (KillSrc)
685+
LastMI->addRegisterKilled(SrcReg, &RI);
686+
}
687+
640688
void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
641689
MachineBasicBlock::iterator MI,
642690
const DebugLoc &DL, MCRegister DestReg,
@@ -842,41 +890,34 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
842890
return;
843891
}
844892

845-
unsigned EltSize = 4;
846-
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
893+
const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
847894
if (RI.isSGPRClass(RC)) {
848-
// TODO: Copy vec3/vec5 with s_mov_b64s then final s_mov_b32.
849-
if (!(RI.getRegSizeInBits(*RC) % 64)) {
850-
Opcode = AMDGPU::S_MOV_B64;
851-
EltSize = 8;
852-
} else {
853-
Opcode = AMDGPU::S_MOV_B32;
854-
EltSize = 4;
855-
}
856-
857895
if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) {
858896
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
859897
return;
860898
}
861-
} else if (RI.hasAGPRs(RC)) {
899+
expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RC, Forward);
900+
return;
901+
}
902+
903+
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
904+
if (RI.hasAGPRs(RC)) {
862905
Opcode = RI.hasVGPRs(RI.getPhysRegClass(SrcReg)) ?
863906
AMDGPU::V_ACCVGPR_WRITE_B32 : AMDGPU::INSTRUCTION_LIST_END;
864907
} else if (RI.hasVGPRs(RC) && RI.hasAGPRs(RI.getPhysRegClass(SrcReg))) {
865908
Opcode = AMDGPU::V_ACCVGPR_READ_B32;
866909
}
867910

868911
// For the cases where we need an intermediate instruction/temporary register
869-
// (the result is an SGPR, and the source is either an SGPR or AGPR), we need
870-
// a scavenger.
912+
// (destination is an AGPR), we need a scavenger.
871913
//
872914
// FIXME: The pass should maintain this for us so we don't have to re-scan the
873915
// whole block for every handled copy.
874916
std::unique_ptr<RegScavenger> RS;
875917
if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
876918
RS.reset(new RegScavenger());
877919

878-
ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
879-
bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
920+
ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, 4);
880921

881922
for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
882923
unsigned SubIdx;
@@ -885,7 +926,6 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
885926
else
886927
SubIdx = SubIndices[SubIndices.size() - Idx - 1];
887928

888-
889929
bool UseKill = KillSrc && Idx == SubIndices.size() - 1;
890930

891931
if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {

0 commit comments

Comments
 (0)