-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64][SME2] Improve register allocation of multi-vector SME intrinsics #116399
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fc0d224
6992da2
0d4c931
7188a2d
ebc97b7
43939b9
426253c
645e30b
d7ccfe1
0f0bc84
7f3e687
6cb5c5d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -8641,6 +8641,56 @@ static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) { | |||||
return ZExtBool; | ||||||
} | ||||||
|
||||||
// The FORM_TRANSPOSED_REG_TUPLE pseudo should only be used if the | ||||||
// input operands are copy nodes where the source register is in a | ||||||
// StridedOrContiguous class. For example: | ||||||
// | ||||||
// %3:zpr2stridedorcontiguous = LD1B_2Z_IMM_PSEUDO .. | ||||||
// %4:zpr = COPY %3.zsub1:zpr2stridedorcontiguous | ||||||
// %5:zpr = COPY %3.zsub0:zpr2stridedorcontiguous | ||||||
// %6:zpr2stridedorcontiguous = LD1B_2Z_PSEUDO .. | ||||||
// %7:zpr = COPY %6.zsub1:zpr2stridedorcontiguous | ||||||
sdesmalen-arm marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
// %8:zpr = COPY %6.zsub0:zpr2stridedorcontiguous | ||||||
sdesmalen-arm marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
// %9:zpr2mul2 = FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO %5:zpr, %8:zpr | ||||||
// | ||||||
bool shouldUseFormStridedPseudo(MachineInstr &MI) { | ||||||
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); | ||||||
|
||||||
const TargetRegisterClass *RegClass = nullptr; | ||||||
switch (MI.getOpcode()) { | ||||||
case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO: | ||||||
RegClass = &AArch64::ZPR2StridedOrContiguousRegClass; | ||||||
break; | ||||||
case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO: | ||||||
sdesmalen-arm marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
RegClass = &AArch64::ZPR4StridedOrContiguousRegClass; | ||||||
break; | ||||||
default: | ||||||
llvm_unreachable("Unexpected opcode."); | ||||||
kmclaughlin-arm marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
} | ||||||
kmclaughlin-arm marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
MCRegister SubReg = MCRegister::NoRegister; | ||||||
for (unsigned I = 1; I < MI.getNumOperands(); ++I) { | ||||||
sdesmalen-arm marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
MachineOperand &MO = MI.getOperand(I); | ||||||
assert(MO.isReg() && "Unexpected operand to FORM_TRANSPOSED_REG_TUPLE"); | ||||||
|
||||||
MachineOperand *Def = MRI.getOneDef(MO.getReg()); | ||||||
if (!Def || !Def->getParent()->isCopy()) | ||||||
sdesmalen-arm marked this conversation as resolved.
Show resolved
Hide resolved
kmclaughlin-arm marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
return false; | ||||||
sdesmalen-arm marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
const MachineOperand &CopySrc = Def->getParent()->getOperand(1); | ||||||
unsigned OpSubReg = CopySrc.getSubReg(); | ||||||
if (SubReg == MCRegister::NoRegister) | ||||||
SubReg = OpSubReg; | ||||||
|
||||||
MachineOperand *CopySrcOp = MRI.getOneDef(CopySrc.getReg()); | ||||||
if (!CopySrcOp || !CopySrcOp->isReg() || OpSubReg != SubReg || | ||||||
MRI.getRegClass(CopySrcOp->getReg()) != RegClass) | ||||||
return false; | ||||||
} | ||||||
|
||||||
return true; | ||||||
} | ||||||
|
||||||
void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, | ||||||
SDNode *Node) const { | ||||||
sdesmalen-arm marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
// Live-in physreg copies that are glued to SMSTART are applied as | ||||||
|
@@ -8666,6 +8716,27 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, | |||||
} | ||||||
} | ||||||
|
||||||
if (MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO || | ||||||
MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO) { | ||||||
// If input values to the FORM_TRANSPOSED_REG_TUPLE pseudo aren't copies | ||||||
// from a StridedOrContiguous class, fall back on REG_SEQUENCE node. | ||||||
if (shouldUseFormStridedPseudo(MI)) | ||||||
return; | ||||||
|
||||||
const TargetInstrInfo *TII = Subtarget->getInstrInfo(); | ||||||
MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), | ||||||
TII->get(TargetOpcode::REG_SEQUENCE), | ||||||
MI.getOperand(0).getReg()); | ||||||
|
||||||
for (unsigned I = 1; I < MI.getNumOperands(); ++I) { | ||||||
MIB.add(MI.getOperand(I)); | ||||||
MIB.addImm(AArch64::zsub0 + (I - 1)); | ||||||
} | ||||||
|
||||||
MI.eraseFromParent(); | ||||||
return; | ||||||
} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit:
Suggested change
Then you can remove |
||||||
|
||||||
// Add an implicit use of 'VG' for ADDXri/SUBXri, which are instructions that | ||||||
// have nothing to do with VG, were it not that they are used to materialise a | ||||||
// frame-address. If they contain a frame-index to a scalable vector, this | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,30 @@ def tileslicerange0s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<0, 4>", []>; | |
|
||
def am_sme_indexed_b4 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0,15>", [], [SDNPWantRoot]>; | ||
|
||
// The FORM_TRANSPOSED_REG_TUPLE pseudos defined below are intended to | ||
// improve register allocation for intrinsics which use strided and contiguous | ||
// multi-vector registers, avoiding unnecessary copies. | ||
// If the operands of the pseudo are copies where the source register is in | ||
// the StridedOrContiguous class, the pseudo is used to provide a hint to the | ||
// register allocator suggesting a contigious multi-vector register which | ||
// matches the subregister sequence used by the operands. | ||
// If the operands do not match this pattern, the pseudos are expanded | ||
// to a REG_SEQUENCE using the post-isel hook. | ||
|
||
def FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO : | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this needs a description of why we add these pseudos, and a comment that we expand them to REG_SEQUENCE with the post-isel hook if they don't meet certain criteria. |
||
Pseudo<(outs ZPR2Mul2:$tup), | ||
(ins ZPR:$zn0, ZPR:$zn1), []>, Sched<[]>{ | ||
let hasSideEffects = 0; | ||
let hasPostISelHook = 1; | ||
} | ||
|
||
def FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO : | ||
Pseudo<(outs ZPR4Mul4:$tup), | ||
(ins ZPR:$zn0, ZPR:$zn1, ZPR:$zn2, ZPR:$zn3), []>, Sched<[]>{ | ||
let hasSideEffects = 0; | ||
let hasPostISelHook = 1; | ||
} | ||
|
||
def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; | ||
def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore, | ||
[SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>; | ||
|
@@ -172,14 +196,14 @@ class SME2_ZA_TwoOp_VG2_Multi_Index_Pat<string name, SDPatternOperator intrinsic | |
Operand imm_ty, ComplexPattern tileslice> | ||
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i)), | ||
(!cast<Instruction>(name # _PSEUDO) $base, $offset, | ||
(REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), zpr_ty:$Zm, imm_ty:$i)>; | ||
(FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1,vt:$Zn2), zpr_ty:$Zm, imm_ty:$i)>; | ||
|
||
class SME2_ZA_TwoOp_VG4_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt, | ||
Operand imm_ty, ComplexPattern tileslice> | ||
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), | ||
vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm, (i32 imm_ty:$i)), | ||
(!cast<Instruction>(name # _PSEUDO) $base, $offset, | ||
(REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3), | ||
(FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4), | ||
zpr_ty:$Zm, imm_ty:$i)>; | ||
|
||
class SME2_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty> | ||
|
Uh oh!
There was an error while loading. Please reload this page.