Skip to content

[RISCV] Eliminate getVLENFactoredAmount and expose muladd [nfc] #87881

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 24 additions & 35 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3052,48 +3052,37 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
#undef CASE_WIDEOP_OPCODE_LMULS
#undef CASE_WIDEOP_OPCODE_COMMON

void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator II,
const DebugLoc &DL, Register DestReg,
int64_t Amount,
MachineInstr::MIFlag Flag) const {
assert(Amount > 0 && "There is no need to get VLEN scaled value.");
assert(Amount % 8 == 0 &&
"Reserve the stack by the multiple of one vector size.");

void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator II, const DebugLoc &DL,
Register DestReg, uint32_t Amount,
MachineInstr::MIFlag Flag) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
assert(isInt<32>(Amount / 8) &&
"Expect the number of vector registers within 32-bits.");
uint32_t NumOfVReg = Amount / 8;

BuildMI(MBB, II, DL, get(RISCV::PseudoReadVLENB), DestReg).setMIFlag(Flag);
if (llvm::has_single_bit<uint32_t>(NumOfVReg)) {
uint32_t ShiftAmount = Log2_32(NumOfVReg);
if (llvm::has_single_bit<uint32_t>(Amount)) {
uint32_t ShiftAmount = Log2_32(Amount);
if (ShiftAmount == 0)
return;
BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
.addReg(DestReg, RegState::Kill)
.addImm(ShiftAmount)
.setMIFlag(Flag);
} else if (STI.hasStdExtZba() &&
((NumOfVReg % 3 == 0 && isPowerOf2_64(NumOfVReg / 3)) ||
(NumOfVReg % 5 == 0 && isPowerOf2_64(NumOfVReg / 5)) ||
(NumOfVReg % 9 == 0 && isPowerOf2_64(NumOfVReg / 9)))) {
((Amount % 3 == 0 && isPowerOf2_64(Amount / 3)) ||
(Amount % 5 == 0 && isPowerOf2_64(Amount / 5)) ||
(Amount % 9 == 0 && isPowerOf2_64(Amount / 9)))) {
// We can use Zba SHXADD+SLLI instructions for multiply in some cases.
unsigned Opc;
uint32_t ShiftAmount;
if (NumOfVReg % 9 == 0) {
if (Amount % 9 == 0) {
Opc = RISCV::SH3ADD;
ShiftAmount = Log2_64(NumOfVReg / 9);
} else if (NumOfVReg % 5 == 0) {
ShiftAmount = Log2_64(Amount / 9);
} else if (Amount % 5 == 0) {
Opc = RISCV::SH2ADD;
ShiftAmount = Log2_64(NumOfVReg / 5);
} else if (NumOfVReg % 3 == 0) {
ShiftAmount = Log2_64(Amount / 5);
} else if (Amount % 3 == 0) {
Opc = RISCV::SH1ADD;
ShiftAmount = Log2_64(NumOfVReg / 3);
ShiftAmount = Log2_64(Amount / 3);
} else {
llvm_unreachable("Unexpected number of vregs");
llvm_unreachable("implied by if-clause");
}
if (ShiftAmount)
BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
Expand All @@ -3104,9 +3093,9 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
.addReg(DestReg, RegState::Kill)
.addReg(DestReg)
.setMIFlag(Flag);
} else if (llvm::has_single_bit<uint32_t>(NumOfVReg - 1)) {
} else if (llvm::has_single_bit<uint32_t>(Amount - 1)) {
Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
uint32_t ShiftAmount = Log2_32(NumOfVReg - 1);
uint32_t ShiftAmount = Log2_32(Amount - 1);
BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
.addReg(DestReg)
.addImm(ShiftAmount)
Expand All @@ -3115,9 +3104,9 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
.addReg(ScaledRegister, RegState::Kill)
.addReg(DestReg, RegState::Kill)
.setMIFlag(Flag);
} else if (llvm::has_single_bit<uint32_t>(NumOfVReg + 1)) {
} else if (llvm::has_single_bit<uint32_t>(Amount + 1)) {
Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
uint32_t ShiftAmount = Log2_32(NumOfVReg + 1);
uint32_t ShiftAmount = Log2_32(Amount + 1);
BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
.addReg(DestReg)
.addImm(ShiftAmount)
Expand All @@ -3128,22 +3117,22 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
.setMIFlag(Flag);
} else if (STI.hasStdExtM() || STI.hasStdExtZmmul()) {
Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
movImm(MBB, II, DL, N, NumOfVReg, Flag);
movImm(MBB, II, DL, N, Amount, Flag);
BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg)
.addReg(DestReg, RegState::Kill)
.addReg(N, RegState::Kill)
.setMIFlag(Flag);
} else {
Register Acc;
uint32_t PrevShiftAmount = 0;
for (uint32_t ShiftAmount = 0; NumOfVReg >> ShiftAmount; ShiftAmount++) {
if (NumOfVReg & (1U << ShiftAmount)) {
for (uint32_t ShiftAmount = 0; Amount >> ShiftAmount; ShiftAmount++) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Amount is a int32_t and will never become 0 if it starts negative.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Taking a look at this, the caller never passes in a negative number. The previous code used a uint32_t for the amount, so I'm going to switch over the extracted routine to match. Note that this shouldn't matter as the caller passes the absolute value in here, and the assert that isInt<32> on a positive number should imply that uint32_t and int32_t are equivalent in practice.

if (Amount & (1U << ShiftAmount)) {
if (ShiftAmount)
BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
.addReg(DestReg, RegState::Kill)
.addImm(ShiftAmount - PrevShiftAmount)
.setMIFlag(Flag);
if (NumOfVReg >> (ShiftAmount + 1)) {
if (Amount >> (ShiftAmount + 1)) {
// If we don't have an accmulator yet, create it and copy DestReg.
if (!Acc) {
Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass);
Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,10 +229,12 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
unsigned OpIdx,
const TargetRegisterInfo *TRI) const override;

void getVLENFactoredAmount(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator II, const DebugLoc &DL, Register DestReg,
int64_t Amount, MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const;
/// Generate code to multiply the value in DestReg by Amt - handles all
/// the common optimizations for this idiom, and supports fallback for
/// subtargets which don't support multiply instructions.
void mulImm(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator II, const DebugLoc &DL,
Register DestReg, uint32_t Amt, MachineInstr::MIFlag Flag) const;

bool useMachineCombiner() const override { return true; }

Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,16 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
Register ScratchReg = DestReg;
if (DestReg == SrcReg)
ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
TII->getVLENFactoredAmount(MF, MBB, II, DL, ScratchReg, ScalableValue, Flag);

assert(ScalableValue > 0 && "There is no need to get VLEN scaled value.");
assert(ScalableValue % 8 == 0 &&
"Reserve the stack by the multiple of one vector size.");
assert(isInt<32>(ScalableValue / 8) &&
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be isUInt<32>?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unchanged from the original code. It arguably should be changed, but I'd prefer to revisit the cornercases of extremely large offsets here in a separate change. (Remember, we're talking about more than 2^31 VLEN sized slots here, that's... unlikely.)

"Expect the number of vector registers within 32-bits.");
uint32_t NumOfVReg = ScalableValue / 8;
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), ScratchReg)
.setMIFlag(Flag);
TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag);
BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg)
.addReg(SrcReg).addReg(ScratchReg, RegState::Kill)
.setMIFlag(Flag);
Expand Down