Skip to content

Commit dde38cd

Browse files
committed
feedback
1 parent 4c57893 commit dde38cd

File tree

1 file changed

+17
-12
lines changed

1 file changed

+17
-12
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2205,9 +2205,14 @@ bool SIFoldOperandsImpl::tryFoldImmRegSequence(MachineInstr &MI) {
22052205
assert(MI.isRegSequence());
22062206
auto Reg = MI.getOperand(0).getReg();
22072207
const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
2208+
const MCInstrDesc &MovDesc = TII->get(AMDGPU::V_MOV_B64_PSEUDO);
2209+
const TargetRegisterClass *RC =
2210+
TII->getRegClass(MovDesc, 0, TRI, *MI.getMF());
22082211

22092212
if (!ST->hasMovB64() || !TRI->isVGPR(*MRI, Reg) ||
2210-
!MRI->hasOneNonDBGUse(Reg) || !TRI->isProperlyAlignedRC(*DefRC))
2213+
!MRI->hasOneNonDBGUse(Reg) ||
2214+
(!TRI->getCompatibleSubRegClass(DefRC, RC, AMDGPU::sub0_sub1) &&
2215+
DefRC != RC))
22112216
return false;
22122217

22132218
SmallVector<std::pair<MachineOperand *, unsigned>, 32> Defs;
@@ -2216,10 +2221,10 @@ bool SIFoldOperandsImpl::tryFoldImmRegSequence(MachineInstr &MI) {
22162221

22172222
// Only attempting to fold immediate materializations.
22182223
if (!Defs.empty() &&
2219-
!std::all_of(Defs.begin(), Defs.end(),
2220-
[](const std::pair<MachineOperand *, unsigned> &Op) {
2221-
return Op.first->isImm();
2222-
}))
2224+
std::any_of(Defs.begin(), Defs.end(),
2225+
[](const std::pair<MachineOperand *, unsigned> &Op) {
2226+
return !Op.first->isImm();
2227+
}))
22232228
return false;
22242229

22252230
SmallVector<uint64_t, 8> ImmVals;
@@ -2245,9 +2250,8 @@ bool SIFoldOperandsImpl::tryFoldImmRegSequence(MachineInstr &MI) {
22452250
}
22462251

22472252
// Can only combine REG_SEQUENCE into one 64b immediate materialization mov.
2248-
if (DefRC == TRI->getVGPR64Class()) {
2249-
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
2250-
TII->get(AMDGPU::V_MOV_B64_PSEUDO), Reg)
2253+
if (DefRC == RC) {
2254+
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), MovDesc, Reg)
22512255
.addImm(ImmVals[0]);
22522256
MI.eraseFromParent();
22532257
return true;
@@ -2262,21 +2266,22 @@ bool SIFoldOperandsImpl::tryFoldImmRegSequence(MachineInstr &MI) {
22622266
for (unsigned i = MI.getNumOperands() - 1; i > 0; --i)
22632267
MI.removeOperand(i);
22642268

2265-
for (unsigned i = 0; i < ImmVals.size(); ++i) {
2266-
const TargetRegisterClass *RC = TRI->getVGPR64Class();
2269+
unsigned Ch = 0;
2270+
for (uint64_t Val : ImmVals) {
22672271
Register MovReg = MRI->createVirtualRegister(RC);
22682272
// Duplicate vmov imm materializations (e.g., splatted operands) should get
22692273
// combined by MachineCSE pass.
22702274
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
22712275
TII->get(AMDGPU::V_MOV_B64_PSEUDO), MovReg)
2272-
.addImm(ImmVals[i]);
2276+
.addImm(Val);
22732277

22742278
// 2 subregs with no overlap (i.e., sub0_sub1, sub2_sub3, etc.).
22752279
unsigned SubReg64B =
2276-
SIRegisterInfo::getSubRegFromChannel(/*Channel=*/i * 2, /*SubRegs=*/2);
2280+
SIRegisterInfo::getSubRegFromChannel(/*Channel=*/Ch * 2, /*SubRegs=*/2);
22772281

22782282
MI.addOperand(MachineOperand::CreateReg(MovReg, /*isDef=*/false));
22792283
MI.addOperand(MachineOperand::CreateImm(SubReg64B));
2284+
++Ch;
22802285
}
22812286

22822287
LLVM_DEBUG(dbgs() << "Folded into " << MI);

0 commit comments

Comments
 (0)