Skip to content

Commit d9be51c

Browse files
authored
[AArch64] Improve code generation for experimental.cttz.elts (#91505)
This patch extends support for lowering the experimental.cttz.elts intrinsic to BRKB + CNTP instruction sequences, using this lowering for all legal predicate types. An unused parameter is also removed from some of the related regression tests.
1 parent 632317e commit d9be51c

File tree

4 files changed

+237
-90
lines changed

4 files changed

+237
-90
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1868,7 +1868,12 @@ bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
18681868
}
18691869

18701870
bool AArch64TargetLowering::shouldExpandCttzElements(EVT VT) const {
1871-
return !Subtarget->hasSVEorSME() || VT != MVT::nxv16i1;
1871+
if (!Subtarget->hasSVEorSME())
1872+
return true;
1873+
1874+
// We can only use the BRKB + CNTP sequence with legal predicate types.
1875+
return VT != MVT::nxv16i1 && VT != MVT::nxv8i1 && VT != MVT::nxv4i1 &&
1876+
VT != MVT::nxv2i1;
18721877
}
18731878

18741879
void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2082,6 +2082,18 @@ let Predicates = [HasSVEorSME] in {
20822082
def : Pat<(i64 (AArch64CttzElts nxv16i1:$Op1)),
20832083
(CNTP_XPP_B (BRKB_PPzP (PTRUE_B 31), PPR:$Op1),
20842084
(BRKB_PPzP (PTRUE_B 31), PPR:$Op1))>;
2085+
2086+
def : Pat<(i64 (AArch64CttzElts nxv8i1:$Op1)),
2087+
(CNTP_XPP_H (BRKB_PPzP (PTRUE_H 31), PPR:$Op1),
2088+
(BRKB_PPzP (PTRUE_H 31), PPR:$Op1))>;
2089+
2090+
def : Pat<(i64 (AArch64CttzElts nxv4i1:$Op1)),
2091+
(CNTP_XPP_S (BRKB_PPzP (PTRUE_S 31), PPR:$Op1),
2092+
(BRKB_PPzP (PTRUE_S 31), PPR:$Op1))>;
2093+
2094+
def : Pat<(i64 (AArch64CttzElts nxv2i1:$Op1)),
2095+
(CNTP_XPP_D (BRKB_PPzP (PTRUE_D 31), PPR:$Op1),
2096+
(BRKB_PPzP (PTRUE_D 31), PPR:$Op1))>;
20852097
}
20862098

20872099
defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb", add, int_aarch64_sve_cntb>;
@@ -2175,6 +2187,30 @@ let Predicates = [HasSVEorSME] in {
21752187
(INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Op1, sub_32)),
21762188
sub_32)>;
21772189

2190+
def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv8i1:$Op2)))),
2191+
(INCP_XP_H (BRKB_PPzP (PTRUE_H 31), PPR:$Op2), GPR64:$Op1)>;
2192+
2193+
def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv8i1:$Op2))))),
2194+
(EXTRACT_SUBREG (INCP_XP_H (BRKB_PPzP (PTRUE_H 31), PPR:$Op2),
2195+
(INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Op1, sub_32)),
2196+
sub_32)>;
2197+
2198+
def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv4i1:$Op2)))),
2199+
(INCP_XP_S (BRKB_PPzP (PTRUE_S 31), PPR:$Op2), GPR64:$Op1)>;
2200+
2201+
def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv4i1:$Op2))))),
2202+
(EXTRACT_SUBREG (INCP_XP_S (BRKB_PPzP (PTRUE_S 31), PPR:$Op2),
2203+
(INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Op1, sub_32)),
2204+
sub_32)>;
2205+
2206+
def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv2i1:$Op2)))),
2207+
(INCP_XP_D (BRKB_PPzP (PTRUE_D 31), PPR:$Op2), GPR64:$Op1)>;
2208+
2209+
def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv2i1:$Op2))))),
2210+
(EXTRACT_SUBREG (INCP_XP_D (BRKB_PPzP (PTRUE_D 31), PPR:$Op2),
2211+
(INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Op1, sub_32)),
2212+
sub_32)>;
2213+
21782214
defm INDEX_RR : sve_int_index_rr<"index", AArch64mul_p_oneuse>;
21792215
defm INDEX_IR : sve_int_index_ir<"index", AArch64mul_p, AArch64mul_p_oneuse>;
21802216
defm INDEX_RI : sve_int_index_ri<"index">;

0 commit comments

Comments
 (0)