Skip to content

[ARM] Don't block tail-predication from unrelated VPT blocks. #94239

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ static bool shouldInspect(MachineInstr &MI) {
return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI);
}

static bool isHorizontalReduction(const MachineInstr &MI) {
const MCInstrDesc &MCID = MI.getDesc();
uint64_t Flags = MCID.TSFlags;
return (Flags & ARMII::HorizontalReduction) != 0;
}

namespace {

using InstSet = SmallPtrSetImpl<MachineInstr *>;
Expand Down Expand Up @@ -275,6 +281,16 @@ namespace {
if (VPT->getOpcode() == ARM::MVE_VPST)
return false;

// If the VPT block does not define something that is an "output", then
// the tail-predicated version will just perform a subset of the original
// vpt block, where the last lanes should not be used.
if (isVPTOpcode(VPT->getOpcode()) &&
all_of(Block.getInsts(), [](const MachineInstr *MI) {
return !MI->mayStore() && !MI->mayLoad() &&
!isHorizontalReduction(*MI) && !isVCTP(MI);
}))
return true;

auto IsOperandPredicated = [&](MachineInstr *MI, unsigned Idx) {
MachineInstr *Op = RDA.getMIOperand(MI, MI->getOperand(Idx));
return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op);
Expand Down Expand Up @@ -813,12 +829,6 @@ static bool producesDoubleWidthResult(const MachineInstr &MI) {
return (Flags & ARMII::DoubleWidthResult) != 0;
}

static bool isHorizontalReduction(const MachineInstr &MI) {
const MCInstrDesc &MCID = MI.getDesc();
uint64_t Flags = MCID.TSFlags;
return (Flags & ARMII::HorizontalReduction) != 0;
}

// Can this instruction generate a non-zero result when given only zeroed
// operands? This allows us to know that, given operands with false bytes
// zeroed by masked loads, that the result will also contain zeros in those
Expand Down
25 changes: 5 additions & 20 deletions llvm/test/CodeGen/Thumb2/mve-tailpred-vptblock.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,50 +20,35 @@ define void @convert_vptblock(ptr %pchTarget, i16 signext %iTargetStride, ptr %p
; CHECK-NEXT: mov.w r8, #0
; CHECK-NEXT: ldrd r4, r5, [sp, #88]
; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: cmp.w r10, #8
; CHECK-NEXT: mov.w r0, #1
; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: mov.w r11, #0
; CHECK-NEXT: it ge
; CHECK-NEXT: movge r3, #8
; CHECK-NEXT: vidup.u16 q0, r8, #4
; CHECK-NEXT: sub.w r3, r10, r3
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: adds r3, #7
; CHECK-NEXT: vmov.i16 q2, #0x100
; CHECK-NEXT: vmov.i16 q3, #0xff
; CHECK-NEXT: add.w r9, r0, r3, lsr #3
; CHECK-NEXT: .LBB0_2: @ %for.body
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB0_3 Depth 2
; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: mov r6, r8
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: dls lr, r9
; CHECK-NEXT: dlstp.16 lr, r10
; CHECK-NEXT: .LBB0_3: @ %do.body
; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: vctp.16 r3
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrbt.u16 q5, [r2, q4]
; CHECK-NEXT: vldrb.u16 q5, [r2, q4]
; CHECK-NEXT: vmul.i16 q4, q5, r5
; CHECK-NEXT: vshr.u16 q4, q4, #8
; CHECK-NEXT: vsub.i16 q5, q2, q4
; CHECK-NEXT: vpt.i16 eq, q4, q3
; CHECK-NEXT: vmovt q5, q1
; CHECK-NEXT: vctp.16 r3
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrbt.u16 q6, [r0]
; CHECK-NEXT: vldrb.u16 q6, [r0]
; CHECK-NEXT: vsub.i16 q4, q2, q5
; CHECK-NEXT: subs r3, #8
; CHECK-NEXT: vmul.i16 q5, q5, q6
; CHECK-NEXT: vmla.i16 q5, q4, r4
; CHECK-NEXT: vshr.u16 q4, q5, #8
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrbt.16 q4, [r0], #8
; CHECK-NEXT: vstrb.16 q4, [r0], #8
; CHECK-NEXT: vidup.u16 q4, r6, #4
; CHECK-NEXT: le lr, .LBB0_3
; CHECK-NEXT: letp lr, .LBB0_3
; CHECK-NEXT: @ %bb.4: @ %do.end
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: add.w r0, r11, #1
Expand Down
Loading