Skip to content

Commit a604c4b

Browse files
authored
[AArch64][GlobalISel] TableGen Selection for G_VECREDUCE_ADD (llvm#70785)
Instruction Selection for G_VECREDUCE_ADD now uses TableGen
1 parent de58aa8 commit a604c4b

File tree

3 files changed

+17
-45
lines changed

3 files changed

+17
-45
lines changed

llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ def : GINodeEquiv<G_VECREDUCE_UMIN, vecreduce_umin>;
177177
def : GINodeEquiv<G_VECREDUCE_UMAX, vecreduce_umax>;
178178
def : GINodeEquiv<G_VECREDUCE_SMIN, vecreduce_smin>;
179179
def : GINodeEquiv<G_VECREDUCE_SMAX, vecreduce_smax>;
180+
def : GINodeEquiv<G_VECREDUCE_ADD, vecreduce_add>;
180181

181182
def : GINodeEquiv<G_STRICT_FADD, strict_fadd>;
182183
def : GINodeEquiv<G_STRICT_FSUB, strict_fsub>;

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6676,6 +6676,22 @@ def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
66766676
ssub))>;
66776677
}
66786678

6679+
// For vecreduce_add, used by GlobalISel not SDAG
6680+
def : Pat<(i8 (vecreduce_add (v8i8 V64:$Rn))),
6681+
(i8 (ADDVv8i8v V64:$Rn))>;
6682+
def : Pat<(i8 (vecreduce_add (v16i8 V128:$Rn))),
6683+
(i8 (ADDVv16i8v V128:$Rn))>;
6684+
def : Pat<(i16 (vecreduce_add (v4i16 V64:$Rn))),
6685+
(i16 (ADDVv4i16v V64:$Rn))>;
6686+
def : Pat<(i16 (vecreduce_add (v8i16 V128:$Rn))),
6687+
(i16 (ADDVv8i16v V128:$Rn))>;
6688+
def : Pat<(i32 (vecreduce_add (v2i32 V64:$Rn))),
6689+
(i32 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub))>;
6690+
def : Pat<(i32 (vecreduce_add (v4i32 V128:$Rn))),
6691+
(i32 (ADDVv4i32v V128:$Rn))>;
6692+
def : Pat<(i64 (vecreduce_add (v2i64 V128:$Rn))),
6693+
(i64 (ADDPv2i64p V128:$Rn))>;
6694+
66796695
defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>;
66806696
// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
66816697
def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -3558,8 +3558,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
35583558
return selectConcatVectors(I, MRI);
35593559
case TargetOpcode::G_JUMP_TABLE:
35603560
return selectJumpTable(I, MRI);
3561-
case TargetOpcode::G_VECREDUCE_ADD:
3562-
return selectReduction(I, MRI);
35633561
case TargetOpcode::G_MEMCPY:
35643562
case TargetOpcode::G_MEMCPY_INLINE:
35653563
case TargetOpcode::G_MEMMOVE:
@@ -3578,49 +3576,6 @@ bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
35783576
return Success;
35793577
}
35803578

3581-
bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3582-
MachineRegisterInfo &MRI) {
3583-
Register VecReg = I.getOperand(1).getReg();
3584-
LLT VecTy = MRI.getType(VecReg);
3585-
if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3586-
// For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3587-
// a subregister copy afterwards.
3588-
if (VecTy == LLT::fixed_vector(2, 32)) {
3589-
Register DstReg = I.getOperand(0).getReg();
3590-
auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3591-
{VecReg, VecReg});
3592-
auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3593-
.addReg(AddP.getReg(0), 0, AArch64::ssub)
3594-
.getReg(0);
3595-
RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3596-
I.eraseFromParent();
3597-
return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3598-
}
3599-
3600-
unsigned Opc = 0;
3601-
if (VecTy == LLT::fixed_vector(16, 8))
3602-
Opc = AArch64::ADDVv16i8v;
3603-
else if (VecTy == LLT::fixed_vector(8, 8))
3604-
Opc = AArch64::ADDVv8i8v;
3605-
else if (VecTy == LLT::fixed_vector(8, 16))
3606-
Opc = AArch64::ADDVv8i16v;
3607-
else if (VecTy == LLT::fixed_vector(4, 16))
3608-
Opc = AArch64::ADDVv4i16v;
3609-
else if (VecTy == LLT::fixed_vector(4, 32))
3610-
Opc = AArch64::ADDVv4i32v;
3611-
else if (VecTy == LLT::fixed_vector(2, 64))
3612-
Opc = AArch64::ADDPv2i64p;
3613-
else {
3614-
LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
3615-
return false;
3616-
}
3617-
I.setDesc(TII.get(Opc));
3618-
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3619-
}
3620-
3621-
return false;
3622-
}
3623-
36243579
bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
36253580
MachineRegisterInfo &MRI) {
36263581
unsigned Mopcode;

0 commit comments

Comments
 (0)