Skip to content

AMDGPU: Extract lambda used in foldImmediate into a helper function #127484

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 36 additions & 23 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3437,6 +3437,30 @@ void SIInstrInfo::removeModOperands(MachineInstr &MI) const {
}
}

std::optional<int64_t> SIInstrInfo::extractSubregFromImm(int64_t Imm,
unsigned SubRegIndex) {
switch (SubRegIndex) {
case AMDGPU::NoSubRegister:
return Imm;
case AMDGPU::sub0:
return Lo_32(Imm);
case AMDGPU::sub1:
return Hi_32(Imm);
case AMDGPU::lo16:
return SignExtend64<16>(Imm);
case AMDGPU::hi16:
return SignExtend64<16>(Imm >> 16);
case AMDGPU::sub1_lo16:
return SignExtend64<16>(Imm >> 32);
case AMDGPU::sub1_hi16:
return SignExtend64<16>(Imm >> 48);
default:
return std::nullopt;
}

llvm_unreachable("covered subregister switch");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we really need this to avoid compiler warning?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this should be the version that makes msvc, gcc, and clang happy

}

bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Register Reg, MachineRegisterInfo *MRI) const {
if (!MRI->hasOneNonDBGUse(Reg))
Expand All @@ -3446,25 +3470,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (!getConstValDefinedInReg(DefMI, Reg, Imm))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Re: line +3469]

nit: I'd still prefer to initialize it even though if getConstValDefinedInReg returns true, it will be initialized.

See this comment inline on Graphite.

return false;

auto getImmFor = [=](const MachineOperand &UseOp) -> int64_t {
switch (UseOp.getSubReg()) {
default:
return Imm;
case AMDGPU::sub0:
return Lo_32(Imm);
case AMDGPU::sub1:
return Hi_32(Imm);
case AMDGPU::lo16:
return SignExtend64<16>(Imm);
case AMDGPU::hi16:
return SignExtend64<16>(Imm >> 16);
case AMDGPU::sub1_lo16:
return SignExtend64<16>(Imm >> 32);
case AMDGPU::sub1_hi16:
return SignExtend64<16>(Imm >> 48);
}
};

assert(!DefMI.getOperand(0).getSubReg() && "Expected SSA form");

unsigned Opc = UseMI.getOpcode();
Expand All @@ -3480,7 +3485,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
: AMDGPU::V_MOV_B32_e32
: Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
: AMDGPU::S_MOV_B32;
APInt Imm(Is64Bit ? 64 : 32, getImmFor(UseMI.getOperand(1)),

std::optional<int64_t> SubRegImm =
extractSubregFromImm(Imm, UseMI.getOperand(1).getSubReg());

APInt Imm(Is64Bit ? 64 : 32, *SubRegImm,
/*isSigned=*/true, /*implicitTrunc=*/true);

if (RI.isAGPR(*MRI, DstReg)) {
Expand Down Expand Up @@ -3591,7 +3600,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
return false;

const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
const std::optional<int64_t> SubRegImm = extractSubregFromImm(
Imm, RegSrc == Src1 ? Src0->getSubReg() : Src1->getSubReg());

// FIXME: This would be a lot easier if we could return a new instruction
// instead of having to modify in place.
Expand All @@ -3608,7 +3618,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));

Src1->ChangeToImmediate(Imm);
Src1->ChangeToImmediate(*SubRegImm);

removeModOperands(UseMI);
UseMI.setDesc(get(NewOpc));
Expand Down Expand Up @@ -3679,8 +3689,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));

const std::optional<int64_t> SubRegImm =
extractSubregFromImm(Imm, Src2->getSubReg());

// ChangingToImmediate adds Src2 back to the instruction.
Src2->ChangeToImmediate(getImmFor(*Src2));
Src2->ChangeToImmediate(*SubRegImm);

// These come before src2.
removeModOperands(UseMI);
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,15 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {

void removeModOperands(MachineInstr &MI) const;

/// Return the extracted immediate value in a subregister use from a constant
/// materialized in a super register.
///
/// e.g. %imm = S_MOV_B64 K[0:63]
/// USE %imm.sub1
/// This will return K[32:63]
static std::optional<int64_t> extractSubregFromImm(int64_t ImmVal,
unsigned SubRegIndex);

bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const final;

Expand Down