Skip to content

[AArch64][GlobalISel] Select UMULL instruction #65469

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion llvm/lib/Target/AArch64/AArch64Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,13 @@ def mul_const : GICombineRule<
(apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }])
>;

def lower_mull : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_MUL):$root,
[{ return matchExtMulToMULL(*${root}, MRI); }]),
(apply [{ applyExtMulToMULL(*${root}, MRI, B, Observer); }])
>;

def build_vector_to_dup : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_BUILD_VECTOR):$root,
Expand Down Expand Up @@ -232,7 +239,7 @@ def AArch64PostLegalizerLowering
icmp_lowering, build_vector_lowering,
lower_vector_fcmp, form_truncstore,
vector_sext_inreg_to_shift,
unmerge_ext_to_unmerge]> {
unmerge_ext_to_unmerge, lower_mull]> {
}

// Post-legalization combines which are primarily optimizations.
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,18 @@ def G_PREFETCH : AArch64GenericInstruction {
let hasSideEffects = 1;
}

def G_UMULL : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
let hasSideEffects = 0;
}

def G_SMULL : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
let hasSideEffects = 0;
}

// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
Expand Down Expand Up @@ -255,6 +267,9 @@ def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;

def : GINodeEquiv<G_BSP, AArch64bsp>;

def : GINodeEquiv<G_UMULL, AArch64umull>;
def : GINodeEquiv<G_SMULL, AArch64smull>;

def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;

def : GINodeEquiv<G_PREFETCH, AArch64Prefetch>;
Expand Down
8 changes: 1 addition & 7 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampScalar(0, s32, s64);

getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
.legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
.scalarizeIf(
[=](const LegalityQuery &Query) {
return Query.Opcode == G_MUL && Query.Types[0] == v2s64;
},
0)
.legalFor({v2s64})
.legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
.widenScalarToNextPow2(0)
.clampScalar(0, s32, s64)
.clampMaxNumElements(0, s8, 16)
Expand Down
69 changes: 69 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1110,6 +1110,75 @@ void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
Observer.changedInstr(MI);
}

// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
// Match v2s64 mul instructions, which will then be scalarised later on
// Doing these two matches in one function to ensure that the order of matching
// will always be the same.
// Try lowering MUL to MULL before trying to scalarize if needed.
bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI) {
// Get the instructions that defined the source operand
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);

if (DstTy.isVector()) {
// If the source operands were EXTENDED before, then {U/S}MULL can be used
unsigned I1Opc = I1->getOpcode();
unsigned I2Opc = I2->getOpcode();
if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
(I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
(MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
(MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
return true;
}
// If result type is v2s64, scalarise the instruction
else if (DstTy == LLT::fixed_vector(2, 64)) {
return true;
}
}
return false;
}

void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, GISelChangeObserver &Observer) {
assert(MI.getOpcode() == TargetOpcode::G_MUL &&
"Expected a G_MUL instruction");

// Get the instructions that defined the source operand
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);

// If the source operands were EXTENDED before, then {U/S}MULL can be used
unsigned I1Opc = I1->getOpcode();
unsigned I2Opc = I2->getOpcode();
if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
(I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
(MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
(MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {

B.setInstrAndDebugLoc(MI);
B.buildInstr(I1->getOpcode() == TargetOpcode::G_ZEXT ? AArch64::G_UMULL
: AArch64::G_SMULL,
{MI.getOperand(0).getReg()},
{I1->getOperand(1).getReg(), I2->getOperand(1).getReg()});
MI.eraseFromParent();
}
// If result type is v2s64, scalarise the instruction
else if (DstTy == LLT::fixed_vector(2, 64)) {
LegalizerHelper Helper(*MI.getMF(), Observer, B);
B.setInstrAndDebugLoc(MI);
Helper.fewerElementsVector(
MI, 0,
DstTy.changeElementCount(
DstTy.getElementCount().divideCoefficientBy(2)));
}
}

class AArch64PostLegalizerLoweringImpl : public Combiner {
protected:
// TODO: Make CombinerHelper methods const.
Expand Down
8 changes: 2 additions & 6 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[UV]], [[UV2]]
; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[UV1]], [[UV3]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MUL]](s64), [[MUL1]](s64)
; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR]](<2 x s64>)
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[COPY]], [[COPY1]]
; CHECK-NEXT: $q0 = COPY [[MUL]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<2 x s64>) = COPY $q0
%1:_(<2 x s64>) = COPY $q1
Expand Down
8 changes: 3 additions & 5 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir
Original file line number Diff line number Diff line change
Expand Up @@ -203,11 +203,9 @@ body: |
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[UV]], [[UV2]]
; CHECK-NEXT: [[SDIV1:%[0-9]+]]:_(s64) = G_SDIV [[UV1]], [[UV3]]
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SDIV]], [[UV4]]
; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SDIV1]], [[UV5]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MUL]](s64), [[MUL1]](s64)
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<2 x s64>) = G_SUB [[COPY]], [[BUILD_VECTOR]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SDIV]](s64), [[SDIV1]](s64)
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[BUILD_VECTOR]], [[COPY1]]
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<2 x s64>) = G_SUB [[COPY]], [[MUL]]
; CHECK-NEXT: $q0 = COPY [[SUB]](<2 x s64>)
%0:_(<2 x s64>) = COPY $q0
%1:_(<2 x s64>) = COPY $q1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
#
# DEBUG-NEXT: G_SDIV (opcode {{[0-9]+}}): 1 type index
# DEBUG-NEXT: G_SDIV (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
#
Expand Down
Loading