Skip to content

Commit 7dc5504

Browse files
[LLVM][CodeGen][AArch64] Improve PTEST removal by looking through copies. (#132041)
The general predicates of the PTEST and PTEST_like instructions may belong to different register classes. This can lead to the insertion of a COPY instruction, making them appear different. However, for the purpose of PTEST removal, such copies are irrelevant, and we can look through them to improve the likelihood of finding a match.
1 parent 9dc854c commit 7dc5504

File tree

2 files changed

+55
-2
lines changed

2 files changed

+55
-2
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,13 +1491,22 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
14911491
if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
14921492
return PredOpcode;
14931493

1494+
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1495+
1496+
// If the PTEST like instruction's general predicate is not `Mask`, attempt
1497+
// to look through a copy and try again. This is because some instructions
1498+
// take a predicate whose register class is a subset of its result class.
1499+
if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1500+
PTestLikeMask->getOperand(1).getReg().isVirtual())
1501+
PTestLikeMask =
1502+
MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
1503+
14941504
// For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
14951505
// the element size matches and either the PTEST_LIKE instruction uses
14961506
// the same all active mask or the condition is "any".
14971507
if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
14981508
getElementSizeForOpcode(MaskOpcode) ==
14991509
getElementSizeForOpcode(PredOpcode)) {
1500-
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
15011510
if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
15021511
return PredOpcode;
15031512
}
@@ -1524,7 +1533,6 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
15241533
// active flag, whereas the PTEST instruction with the same mask doesn't.
15251534
// For PTEST_ANY this doesn't apply as the flags in this case would be
15261535
// identical regardless of element size.
1527-
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
15281536
uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
15291537
if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
15301538
PTest->getOpcode() == AArch64::PTEST_PP_ANY))

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,3 +661,48 @@ body: |
661661
RET_ReallyLR implicit $w0
662662
663663
...
664+
---
665+
name: cmpeq_nxv16i8_ptest_with_register_class_mismatch
666+
alignment: 2
667+
tracksRegLiveness: true
668+
registers:
669+
- { id: 0, class: ppr }
670+
- { id: 1, class: zpr }
671+
- { id: 2, class: zpr }
672+
- { id: 3, class: ppr_3b }
673+
- { id: 4, class: ppr }
674+
- { id: 5, class: gpr32 }
675+
- { id: 6, class: gpr32 }
676+
liveins:
677+
- { reg: '$z0', virtual-reg: '%1' }
678+
- { reg: '$z1', virtual-reg: '%2' }
679+
frameInfo:
680+
maxCallFrameSize: 0
681+
body: |
682+
bb.0:
683+
liveins: $z0, $z1
684+
685+
; CHECK-LABEL: name: cmpeq_nxv16i8_ptest_with_register_class_mismatch
686+
; CHECK: liveins: $z0, $z1
687+
; CHECK-NEXT: {{ $}}
688+
; CHECK-NEXT: [[COPY:%[0-9]+]]:zpr = COPY $z0
689+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:zpr = COPY $z1
690+
; CHECK-NEXT: [[PTRUE_B:%[0-9]+]]:ppr = PTRUE_B 31, implicit $vg
691+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr_3b = COPY [[PTRUE_B]]
692+
; CHECK-NEXT: [[CMPEQ_PPzZZ_B:%[0-9]+]]:ppr = CMPEQ_PPzZZ_B [[COPY2]], [[COPY]], [[COPY1]], implicit-def $nzcv
693+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $wzr
694+
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY3]], $wzr, 0, implicit $nzcv
695+
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
696+
; CHECK-NEXT: RET_ReallyLR implicit $w0
697+
%1:zpr = COPY $z0
698+
%2:zpr = COPY $z1
699+
%0:ppr = PTRUE_B 31, implicit $vg
700+
%3:ppr_3b = COPY %0
701+
%4:ppr = CMPEQ_PPzZZ_B %3, %1, %2, implicit-def dead $nzcv
702+
PTEST_PP %0, killed %4, implicit-def $nzcv
703+
%5:gpr32 = COPY $wzr
704+
%6:gpr32 = CSINCWr %5, $wzr, 0, implicit $nzcv
705+
$w0 = COPY %6
706+
RET_ReallyLR implicit $w0
707+
708+
...

0 commit comments

Comments
 (0)