Skip to content

Commit bc1a37b

Browse files
committed
feedback
1 parent 232c014 commit bc1a37b

File tree

1 file changed

+17
-12
lines changed

1 file changed

+17
-12
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2200,9 +2200,14 @@ bool SIFoldOperandsImpl::tryFoldImmRegSequence(MachineInstr &MI) {
22002200
assert(MI.isRegSequence());
22012201
auto Reg = MI.getOperand(0).getReg();
22022202
const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
2203+
const MCInstrDesc &MovDesc = TII->get(AMDGPU::V_MOV_B64_PSEUDO);
2204+
const TargetRegisterClass *RC =
2205+
TII->getRegClass(MovDesc, 0, TRI, *MI.getMF());
22032206

22042207
if (!ST->hasMovB64() || !TRI->isVGPR(*MRI, Reg) ||
2205-
!MRI->hasOneNonDBGUse(Reg) || !TRI->isProperlyAlignedRC(*DefRC))
2208+
!MRI->hasOneNonDBGUse(Reg) ||
2209+
(!TRI->getCompatibleSubRegClass(DefRC, RC, AMDGPU::sub0_sub1) &&
2210+
DefRC != RC))
22062211
return false;
22072212

22082213
SmallVector<std::pair<MachineOperand *, unsigned>, 32> Defs;
@@ -2211,10 +2216,10 @@ bool SIFoldOperandsImpl::tryFoldImmRegSequence(MachineInstr &MI) {
22112216

22122217
// Only attempting to fold immediate materializations.
22132218
if (!Defs.empty() &&
2214-
!std::all_of(Defs.begin(), Defs.end(),
2215-
[](const std::pair<MachineOperand *, unsigned> &Op) {
2216-
return Op.first->isImm();
2217-
}))
2219+
std::any_of(Defs.begin(), Defs.end(),
2220+
[](const std::pair<MachineOperand *, unsigned> &Op) {
2221+
return !Op.first->isImm();
2222+
}))
22182223
return false;
22192224

22202225
SmallVector<uint64_t, 8> ImmVals;
@@ -2240,9 +2245,8 @@ bool SIFoldOperandsImpl::tryFoldImmRegSequence(MachineInstr &MI) {
22402245
}
22412246

22422247
// Can only combine REG_SEQUENCE into one 64b immediate materialization mov.
2243-
if (DefRC == TRI->getVGPR64Class()) {
2244-
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
2245-
TII->get(AMDGPU::V_MOV_B64_PSEUDO), Reg)
2248+
if (DefRC == RC) {
2249+
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), MovDesc, Reg)
22462250
.addImm(ImmVals[0]);
22472251
MI.eraseFromParent();
22482252
return true;
@@ -2257,21 +2261,22 @@ bool SIFoldOperandsImpl::tryFoldImmRegSequence(MachineInstr &MI) {
22572261
for (unsigned i = MI.getNumOperands() - 1; i > 0; --i)
22582262
MI.removeOperand(i);
22592263

2260-
for (unsigned i = 0; i < ImmVals.size(); ++i) {
2261-
const TargetRegisterClass *RC = TRI->getVGPR64Class();
2264+
unsigned Ch = 0;
2265+
for (uint64_t Val : ImmVals) {
22622266
Register MovReg = MRI->createVirtualRegister(RC);
22632267
// Duplicate vmov imm materializations (e.g., splatted operands) should get
22642268
// combined by MachineCSE pass.
22652269
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
22662270
TII->get(AMDGPU::V_MOV_B64_PSEUDO), MovReg)
2267-
.addImm(ImmVals[i]);
2271+
.addImm(Val);
22682272

22692273
// 2 subregs with no overlap (i.e., sub0_sub1, sub2_sub3, etc.).
22702274
unsigned SubReg64B =
2271-
SIRegisterInfo::getSubRegFromChannel(/*Channel=*/i * 2, /*SubRegs=*/2);
2275+
SIRegisterInfo::getSubRegFromChannel(/*Channel=*/Ch * 2, /*SubRegs=*/2);
22722276

22732277
MI.addOperand(MachineOperand::CreateReg(MovReg, /*isDef=*/false));
22742278
MI.addOperand(MachineOperand::CreateImm(SubReg64B));
2279+
++Ch;
22752280
}
22762281

22772282
LLVM_DEBUG(dbgs() << "Folded into " << MI);

0 commit comments

Comments
 (0)