-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Machine-Combiner] Add a pass to reassociate chains of accumulation instructions into a tree #132728
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Machine-Combiner] Add a pass to reassociate chains of accumulation instructions into a tree #132728
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Jonathan Cohen (jcohen-apple) ChangesThis pass is designed to increase ILP by performing accumulation into multiple registers. It currently supports only the S/UABAL accumulation instruction, but can be extended to support additional instructions. Relanding after the previous version caused build failures due to a modified function signature in MachineCombiner.cpp. Patch is 67.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132728.diff 8 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
index 3428c4dde5c7f..25fce679323ee 100644
--- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -32,6 +32,7 @@ enum MachineCombinerPattern : unsigned {
REASSOC_AX_YB,
REASSOC_XA_BY,
REASSOC_XA_YB,
+ ACC_CHAIN,
TARGET_PATTERN_START
};
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 8f2792c1cb7d5..1a8be37f321ae 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1276,6 +1276,41 @@ class TargetInstrInfo : public MCInstrInfo {
return false;
}
+ /// Find chains of accumulations that can be rewritten as a tree for increased
+ /// ILP.
+ bool getAccumulatorReassociationPatterns(
+ MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns) const;
+
+ /// Find the chain of accumulator instructions in \P MBB and return them in
+ /// \P Chain.
+ void getAccumulatorChain(MachineInstr *CurrentInstr,
+ SmallVectorImpl<Register> &Chain) const;
+
+ /// Return true when \P OpCode is an instruction which performs
+ /// accumulation into one of its operand registers.
+ virtual bool isAccumulationOpcode(unsigned Opcode) const { return false; }
+
+ /// Returns an opcode which defines the accumulator used by \P Opcode.
+ virtual unsigned getAccumulationStartOpcode(unsigned Opcode) const {
+ llvm_unreachable("Function not implemented for target!");
+ return 0;
+ }
+
+ /// Returns the opcode that should be use to reduce accumulation registers.
+ virtual unsigned
+ getReduceOpcodeForAccumulator(unsigned int AccumulatorOpCode) const {
+ llvm_unreachable("Function not implemented for target!");
+ return 0;
+ }
+
+ /// Reduces branches of the accumulator tree into a single register.
+ void reduceAccumulatorTree(SmallVectorImpl<Register> &RegistersToReduce,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ MachineFunction &MF, MachineInstr &Root,
+ MachineRegisterInfo &MRI,
+ DenseMap<Register, unsigned> &InstrIdxForVirtReg,
+ Register ResultReg) const;
+
/// Return the inverse operation opcode if it exists for \P Opcode (e.g. add
/// for sub and vice versa).
virtual std::optional<unsigned> getInverseOpcode(unsigned Opcode) const {
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index e517ae1a7c44c..63c1e7283abae 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
@@ -42,6 +43,19 @@ static cl::opt<bool> DisableHazardRecognizer(
"disable-sched-hazard", cl::Hidden, cl::init(false),
cl::desc("Disable hazard detection during preRA scheduling"));
+static cl::opt<bool> EnableAccReassociation(
+ "acc-reassoc", cl::Hidden, cl::init(true),
+ cl::desc("Enable reassociation of accumulation chains"));
+
+static cl::opt<unsigned int>
+ MinAccumulatorDepth("acc-min-depth", cl::Hidden, cl::init(8),
+ cl::desc("Minimum length of accumulator chains "
+ "required for the optimization to kick in"));
+
+static cl::opt<unsigned int> MaxAccumulatorWidth(
+ "acc-max-width", cl::Hidden, cl::init(3),
+ cl::desc("Maximum number of branches in the accumulator tree"));
+
TargetInstrInfo::~TargetInstrInfo() = default;
const TargetRegisterClass*
@@ -899,6 +913,154 @@ bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst,
hasReassociableSibling(Inst, Commuted);
}
+// Utility routine that checks if \param MO is defined by an
+// \param CombineOpc instruction in the basic block \param MBB.
+// If \param CombineOpc is not provided, the OpCode check will
+// be skipped.
+static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
+ unsigned CombineOpc = 0) {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineInstr *MI = nullptr;
+
+ if (MO.isReg() && MO.getReg().isVirtual())
+ MI = MRI.getUniqueVRegDef(MO.getReg());
+ // And it needs to be in the trace (otherwise, it won't have a depth).
+ if (!MI || MI->getParent() != &MBB ||
+ ((unsigned)MI->getOpcode() != CombineOpc && CombineOpc != 0))
+ return false;
+ // Must only used by the user we combine with.
+ if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
+ return false;
+
+ return true;
+}
+
+// A chain of accumulation instructions will be selected IFF:
+// 1. All the accumulation instructions in the chain have the same opcode,
+// besides the first that has a slightly different opcode because it does
+// not accumulate into a register.
+// 2. All the instructions in the chain are combinable (have a single use
+// which itself is part of the chain).
+// 3. Meets the required minimum length.
+void TargetInstrInfo::getAccumulatorChain(
+ MachineInstr *CurrentInstr, SmallVectorImpl<Register> &Chain) const {
+ // Walk up the chain of accumulation instructions and collect them in the
+ // vector.
+ MachineBasicBlock &MBB = *CurrentInstr->getParent();
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ unsigned AccumulatorOpcode = CurrentInstr->getOpcode();
+ std::optional<unsigned> ChainStartOpCode =
+ getAccumulationStartOpcode(AccumulatorOpcode);
+
+ if (!ChainStartOpCode.has_value())
+ return;
+
+ // Push the first accumulator result to the start of the chain.
+ Chain.push_back(CurrentInstr->getOperand(0).getReg());
+
+ // Collect the accumulator input register from all instructions in the chain.
+ while (CurrentInstr &&
+ canCombine(MBB, CurrentInstr->getOperand(1), AccumulatorOpcode)) {
+ Chain.push_back(CurrentInstr->getOperand(1).getReg());
+ CurrentInstr = MRI.getUniqueVRegDef(CurrentInstr->getOperand(1).getReg());
+ }
+
+ // Add the instruction at the top of the chain.
+ if (CurrentInstr->getOpcode() == AccumulatorOpcode &&
+ canCombine(MBB, CurrentInstr->getOperand(1)))
+ Chain.push_back(CurrentInstr->getOperand(1).getReg());
+}
+
+/// Find chains of accumulations that can be rewritten as a tree for increased
+/// ILP.
+bool TargetInstrInfo::getAccumulatorReassociationPatterns(
+ MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns) const {
+ if (!EnableAccReassociation)
+ return false;
+
+ unsigned Opc = Root.getOpcode();
+ if (!isAccumulationOpcode(Opc))
+ return false;
+
+ // Verify that this is the end of the chain.
+ MachineBasicBlock &MBB = *Root.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ if (!MRI.hasOneNonDBGUser(Root.getOperand(0).getReg()))
+ return false;
+
+ auto User = MRI.use_instr_begin(Root.getOperand(0).getReg());
+ if (User->getOpcode() == Opc)
+ return false;
+
+ // Walk up the use chain and collect the reduction chain.
+ SmallVector<Register, 32> Chain;
+ getAccumulatorChain(&Root, Chain);
+
+ // Reject chains which are too short to be worth modifying.
+ if (Chain.size() < MinAccumulatorDepth)
+ return false;
+
+ // Check if the MBB this instruction is a part of contains any other chains.
+ // If so, don't apply it.
+ SmallSet<Register, 32> ReductionChain(Chain.begin(), Chain.end());
+ for (const auto &I : MBB) {
+ if (I.getOpcode() == Opc &&
+ !ReductionChain.contains(I.getOperand(0).getReg()))
+ return false;
+ }
+
+ Patterns.push_back(MachineCombinerPattern::ACC_CHAIN);
+ return true;
+}
+
+// Reduce branches of the accumulator tree by adding them together.
+void TargetInstrInfo::reduceAccumulatorTree(
+ SmallVectorImpl<Register> &RegistersToReduce,
+ SmallVectorImpl<MachineInstr *> &InsInstrs, MachineFunction &MF,
+ MachineInstr &Root, MachineRegisterInfo &MRI,
+ DenseMap<Register, unsigned> &InstrIdxForVirtReg,
+ Register ResultReg) const {
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ SmallVector<Register, 8> NewRegs;
+
+ // Get the opcode for the reduction instruction we will need to build.
+ // If for some reason it is not defined, early exit and don't apply this.
+ unsigned ReduceOpCode = getReduceOpcodeForAccumulator(Root.getOpcode());
+
+ for (unsigned int i = 1; i <= (RegistersToReduce.size() / 2); i += 2) {
+ auto RHS = RegistersToReduce[i - 1];
+ auto LHS = RegistersToReduce[i];
+ Register Dest;
+ // If we are reducing 2 registers, reuse the original result register.
+ if (RegistersToReduce.size() == 2)
+ Dest = ResultReg;
+ // Otherwise, create a new virtual register to hold the partial sum.
+ else {
+ auto NewVR = MRI.createVirtualRegister(
+ MRI.getRegClass(Root.getOperand(0).getReg()));
+ Dest = NewVR;
+ NewRegs.push_back(Dest);
+ InstrIdxForVirtReg.insert(std::make_pair(Dest, InsInstrs.size()));
+ }
+
+ // Create the new reduction instruction.
+ MachineInstrBuilder MIB =
+ BuildMI(MF, MIMetadata(Root), TII->get(ReduceOpCode), Dest)
+ .addReg(RHS, getKillRegState(true))
+ .addReg(LHS, getKillRegState(true));
+ // Copy any flags needed from the original instruction.
+ MIB->setFlags(Root.getFlags());
+ InsInstrs.push_back(MIB);
+ }
+
+ // If the number of registers to reduce is odd, add the remaining register to
+ // the vector of registers to reduce.
+ if (RegistersToReduce.size() % 2 != 0)
+ NewRegs.push_back(RegistersToReduce[RegistersToReduce.size() - 1]);
+
+ RegistersToReduce = NewRegs;
+}
+
// The concept of the reassociation pass is that these operations can benefit
// from this kind of transformation:
//
@@ -938,6 +1100,8 @@ bool TargetInstrInfo::getMachineCombinerPatterns(
}
return true;
}
+ if (getAccumulatorReassociationPatterns(Root, Patterns))
+ return true;
return false;
}
@@ -949,7 +1113,12 @@ bool TargetInstrInfo::isThroughputPattern(unsigned Pattern) const {
CombinerObjective
TargetInstrInfo::getCombinerObjective(unsigned Pattern) const {
- return CombinerObjective::Default;
+ switch (Pattern) {
+ case MachineCombinerPattern::ACC_CHAIN:
+ return CombinerObjective::MustReduceDepth;
+ default:
+ return CombinerObjective::Default;
+ }
}
std::pair<unsigned, unsigned>
@@ -1252,19 +1421,101 @@ void TargetInstrInfo::genAlternativeCodeSequence(
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<Register, unsigned> &InstIdxForVirtReg) const {
MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
+ MachineBasicBlock &MBB = *Root.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- // Select the previous instruction in the sequence based on the input pattern.
- std::array<unsigned, 5> OperandIndices;
- getReassociateOperandIndices(Root, Pattern, OperandIndices);
- MachineInstr *Prev =
- MRI.getUniqueVRegDef(Root.getOperand(OperandIndices[0]).getReg());
+ switch (Pattern) {
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ case MachineCombinerPattern::REASSOC_XA_YB: {
+ // Select the previous instruction in the sequence based on the input
+ // pattern.
+ std::array<unsigned, 5> OperandIndices;
+ getReassociateOperandIndices(Root, Pattern, OperandIndices);
+ MachineInstr *Prev =
+ MRI.getUniqueVRegDef(Root.getOperand(OperandIndices[0]).getReg());
+
+ // Don't reassociate if Prev and Root are in different blocks.
+ if (Prev->getParent() != Root.getParent())
+ return;
- // Don't reassociate if Prev and Root are in different blocks.
- if (Prev->getParent() != Root.getParent())
- return;
+ reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, OperandIndices,
+ InstIdxForVirtReg);
+ break;
+ }
+ case MachineCombinerPattern::ACC_CHAIN: {
+ SmallVector<Register, 32> ChainRegs;
+ getAccumulatorChain(&Root, ChainRegs);
+ unsigned int Depth = ChainRegs.size();
+ assert(MaxAccumulatorWidth > 1 &&
+ "Max accumulator width set to illegal value");
+ unsigned int MaxWidth = Log2_32(Depth) < MaxAccumulatorWidth
+ ? Log2_32(Depth)
+ : MaxAccumulatorWidth;
+
+ // Walk down the chain and rewrite it as a tree.
+ for (auto IndexedReg : llvm::enumerate(llvm::reverse(ChainRegs))) {
+ // No need to rewrite the first node, it is already perfect as it is.
+ if (IndexedReg.index() == 0)
+ continue;
+
+ MachineInstr *Instr = MRI.getUniqueVRegDef(IndexedReg.value());
+ MachineInstrBuilder MIB;
+ Register AccReg;
+ if (IndexedReg.index() < MaxWidth) {
+ // Now we need to create new instructions for the first row.
+ AccReg = Instr->getOperand(0).getReg();
+ std::optional<unsigned> OpCode =
+ getAccumulationStartOpcode(Root.getOpcode());
+ assert(OpCode.value() &&
+ "Missing opcode for accumulation instruction.");
+
+ MIB = BuildMI(MF, MIMetadata(*Instr), TII->get(OpCode.value()), AccReg)
+ .addReg(Instr->getOperand(2).getReg(),
+ getKillRegState(Instr->getOperand(2).isKill()))
+ .addReg(Instr->getOperand(3).getReg(),
+ getKillRegState(Instr->getOperand(3).isKill()));
+ } else {
+ // For the remaining cases, we need to use an output register of one of
+ // the newly inserted instuctions as operand 1
+ AccReg = Instr->getOperand(0).getReg() == Root.getOperand(0).getReg()
+ ? MRI.createVirtualRegister(
+ MRI.getRegClass(Root.getOperand(0).getReg()))
+ : Instr->getOperand(0).getReg();
+ assert(IndexedReg.index() - MaxWidth >= 0);
+ auto AccumulatorInput =
+ ChainRegs[Depth - (IndexedReg.index() - MaxWidth) - 1];
+ MIB = BuildMI(MF, MIMetadata(*Instr), TII->get(Instr->getOpcode()),
+ AccReg)
+ .addReg(AccumulatorInput, getKillRegState(true))
+ .addReg(Instr->getOperand(2).getReg(),
+ getKillRegState(Instr->getOperand(2).isKill()))
+ .addReg(Instr->getOperand(3).getReg(),
+ getKillRegState(Instr->getOperand(3).isKill()));
+ }
- reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, OperandIndices,
- InstIdxForVirtReg);
+ MIB->setFlags(Instr->getFlags());
+ InstIdxForVirtReg.insert(std::make_pair(AccReg, InsInstrs.size()));
+ InsInstrs.push_back(MIB);
+ DelInstrs.push_back(Instr);
+ }
+
+ SmallVector<Register, 8> RegistersToReduce;
+ for (unsigned i = (InsInstrs.size() - MaxWidth); i < InsInstrs.size();
+ ++i) {
+ auto Reg = InsInstrs[i]->getOperand(0).getReg();
+ RegistersToReduce.push_back(Reg);
+ }
+
+ while (RegistersToReduce.size() > 1)
+ reduceAccumulatorTree(RegistersToReduce, InsInstrs, MF, Root, MRI,
+ InstIdxForVirtReg, Root.getOperand(0).getReg());
+
+ break;
+ }
+ }
}
MachineTraceStrategy TargetInstrInfo::getMachineCombinerTraceStrategy() const {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 9f8082b64ab18..d370f8c7ff6ea 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -53,6 +53,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <cassert>
+#include <cstddef>
#include <cstdint>
#include <iterator>
#include <utility>
@@ -6770,6 +6771,133 @@ static bool getMaddPatterns(MachineInstr &Root,
}
return Found;
}
+
+bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
+ switch (Opcode) {
+ default:
+ break;
+ case AArch64::UABALB_ZZZ_D:
+ case AArch64::UABALB_ZZZ_H:
+ case AArch64::UABALB_ZZZ_S:
+ case AArch64::UABALT_ZZZ_D:
+ case AArch64::UABALT_ZZZ_H:
+ case AArch64::UABALT_ZZZ_S:
+ case AArch64::SABALB_ZZZ_D:
+ case AArch64::SABALB_ZZZ_S:
+ case AArch64::SABALB_ZZZ_H:
+ case AArch64::SABALT_ZZZ_D:
+ case AArch64::SABALT_ZZZ_S:
+ case AArch64::SABALT_ZZZ_H:
+ case AArch64::UABALv16i8_v8i16:
+ case AArch64::UABALv2i32_v2i64:
+ case AArch64::UABALv4i16_v4i32:
+ case AArch64::UABALv4i32_v2i64:
+ case AArch64::UABALv8i16_v4i32:
+ case AArch64::UABALv8i8_v8i16:
+ case AArch64::UABAv16i8:
+ case AArch64::UABAv2i32:
+ case AArch64::UABAv4i16:
+ case AArch64::UABAv4i32:
+ case AArch64::UABAv8i16:
+ case AArch64::UABAv8i8:
+ case AArch64::SABALv16i8_v8i16:
+ case AArch64::SABALv2i32_v2i64:
+ case AArch64::SABALv4i16_v4i32:
+ case AArch64::SABALv4i32_v2i64:
+ case AArch64::SABALv8i16_v4i32:
+ case AArch64::SABALv8i8_v8i16:
+ case AArch64::SABAv16i8:
+ case AArch64::SABAv2i32:
+ case AArch64::SABAv4i16:
+ case AArch64::SABAv4i32:
+ case AArch64::SABAv8i16:
+ case AArch64::SABAv8i8:
+ return true;
+ }
+
+ return false;
+}
+
+unsigned AArch64InstrInfo::getAccumulationStartOpcode(
+ unsigned AccumulationOpcode) const {
+ switch (AccumulationOpcode) {
+ default:
+ llvm_unreachable("Unsupported accumulation Opcode!");
+ case AArch64::UABALB_ZZZ_D:
+ return AArch64::UABDLB_ZZZ_D;
+ case AArch64::UABALB_ZZZ_H:
+ return AArch64::UABDLB_ZZZ_H;
+ case AArch64::UABALB_ZZZ_S:
+ return AArch64::UABDLB_ZZZ_S;
+ case AArch64::UABALT_ZZZ_D:
+ return AArch64::UABDLT_ZZZ_D;
+ case AArch64::UABALT_ZZZ_H:
+ return AArch64::UABDLT_ZZZ_H;
+ case AArch64::UABALT_ZZZ_S:
+ return AArch64::UABDLT_ZZZ_S;
+ case AArch64::UABALv16i8_v8i16:
+ return AArch64::UABDLv16i8_v8i16;
+ case AArch64::UABALv2i32_v2i64:
+ return AArch64::UABDLv2i32_v2i64;
+ case AArch64::UABALv4i16_v4i32:
+ return AArch64::UABDLv4i16_v4i32;
+ case AArch64::UABALv4i32_v2i64:
+ return AArch64::UABDLv4i32_v2i64;
+ case AArch64::UABALv8i16_v4i32:
+ return AArch64::UABDLv8i16_v4i32;
+ case AArch64::UABALv8i8_v8i16:
+ return AArch64::UABDLv8i8_v8i16;
+ case AArch64::UABAv16i8:
+ return AArch64::UABDv16i8;
+ case AArch64::UABAv2i32:
+ return AArch64::UABDv2i32;
+ case AArch64::UABAv4i16:
+ return AArch64::UABDv4i16;
+ case AArch64::UABAv4i32:
+ return AArch64::UABDv4i32;
+ case AArch64::UABAv8i16:
+ return AArch64::UABDv8i16;
+ case AArch64::UABAv8i8:
+ return AArch64::UABDv8i8;
+ case AArch64::SABALB_ZZZ_D:
+ return AArch64::SABDLB_ZZZ_D;
+ case AArch64::SABALB_ZZZ_S:
+ return AArch64::SABDLB_ZZZ_S;
+ case AArch64::SABALB_ZZZ_H:
+ return AArch64::SABDLB_ZZZ_H;
+ case AArch64::SABALT_ZZZ_D:
+ return AArch64::SABDLT_ZZZ_D;
+ case AArch64::SABALT_ZZZ_S:
+ return AArch64::SABDLT_ZZZ_S;
+ case AArch64::SABALT_ZZZ_H:
+ return AArch64::SABDLT_ZZZ_H;
+ case AArch64::SABALv16i8_v8i16:
+ return AArch64::SABDLv16i8_v8i16;
+ case AArch64::SABALv2i32_v2i64:
+ return AArch64::SABDLv2i32_v2i64;
+ case AArch64::SABALv4i16_v4i32:
+ return AArch64::SABDLv4i16_v4i32;
+ case AArch64::SABALv4i32_v2i64:
+ return AArch64::SABDLv4i32_v2i64;
+ case AArch64::SABALv8i16_v4i32:
+ return AArch64::SABDLv8i16_v4i32;
+ case AArch64::SABALv8i8_v8i16:
+ return AArch64::SABDLv8i8_v8i16;
+ case AArch64::SABAv16i8:
+ return AArch64::SABDv16i8;
+ case AArch64::SABAv2i32:
+ return AArch64::SABAv2i32;
+ case AArch64::SABAv4i16:
+ return AArch64::SABDv4i16;
+ case AArch64::SABAv4i32:
+ return AArch64::SABDv4i32;
+ case AArch64::SABAv8i16:
+ return AArch64::SABDv8i16;
+ case AArch64::SABAv8i8:
+ return AArch64::SABDv8i8;
+ }
+}
+
/// Floating-Point Support
/// Find instructions that can be turned into madd.
@@ -7531,6 +7659,63 @@ static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI,
DelInstrs.push_back(&Root);
}
+unsigned AArch64InstrInfo::ge...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
6fd4cd5
to
133205f
Compare
…nstructions into a tree This pass is designed to increase ILP by performing accumulation into multiple registers. It currently supports only the S/UABAL accumulation instruction, but can be extended to support additional instructions.
133205f
to
671880e
Compare
…nstructions into a tree (llvm#132728) This pass is designed to increase ILP by performing accumulation into multiple registers. It currently supports only the S/UABAL accumulation instruction, but can be extended to support additional instructions. Reland of llvm#126060 which was reverted due to a conflict with llvm#131272.
…structions into a tree (llvm#132728) This pass is designed to increase ILP by performing accumulation into multiple registers. It currently supports only the S/UABAL accumulation instruction, but will be extended to support additional instructions. This is pattern appears often in code written using intrinsics, which get linearized in IR form be the Reassociate Pass, but unlike basic instructions such as add/mult/etc, do not have corresponding MachineCombiner patterns aimed at restoring the tree which is linearized in the earlier pass. rdar://78517468
This pass is designed to increase ILP by performing accumulation into multiple registers. It currently supports only the S/UABAL accumulation instruction, but can be extended to support additional instructions.
Relanding after the previous version caused build failures due to a modified function signature in MachineCombiner.cpp.