Skip to content

[AArch64] Improve code generation for experimental.cttz.elts #91505

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1852,7 +1852,12 @@ bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
}

bool AArch64TargetLowering::shouldExpandCttzElements(EVT VT) const {
return !Subtarget->hasSVEorSME() || VT != MVT::nxv16i1;
if (!Subtarget->hasSVEorSME())
return true;

// We can only use the BRKB + CNTP sequence with legal predicate types.
return VT != MVT::nxv16i1 && VT != MVT::nxv8i1 && VT != MVT::nxv4i1 &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (VT != MVT::nxv16i1 && VT != MVT::nxv8i1 && VT != MVT::nxv4i1 &&
return VT != MVT::nxv16i1 && VT != MVT::nxv8i1 && VT != MVT::nxv4i1 &&
VT != MVT::nxv2i1;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice - I've updated with this simplified return statement.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

VT != MVT::nxv2i1;
}

void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
Expand Down
36 changes: 36 additions & 0 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2075,6 +2075,18 @@ let Predicates = [HasSVEorSME] in {
def : Pat<(i64 (AArch64CttzElts nxv16i1:$Op1)),
(CNTP_XPP_B (BRKB_PPzP (PTRUE_B 31), PPR:$Op1),
(BRKB_PPzP (PTRUE_B 31), PPR:$Op1))>;

def : Pat<(i64 (AArch64CttzElts nxv8i1:$Op1)),
(CNTP_XPP_H (BRKB_PPzP (PTRUE_H 31), PPR:$Op1),
(BRKB_PPzP (PTRUE_H 31), PPR:$Op1))>;

def : Pat<(i64 (AArch64CttzElts nxv4i1:$Op1)),
(CNTP_XPP_S (BRKB_PPzP (PTRUE_S 31), PPR:$Op1),
(BRKB_PPzP (PTRUE_S 31), PPR:$Op1))>;

def : Pat<(i64 (AArch64CttzElts nxv2i1:$Op1)),
(CNTP_XPP_D (BRKB_PPzP (PTRUE_D 31), PPR:$Op1),
(BRKB_PPzP (PTRUE_D 31), PPR:$Op1))>;
}

defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb", add, int_aarch64_sve_cntb>;
Expand Down Expand Up @@ -2168,6 +2180,30 @@ let Predicates = [HasSVEorSME] in {
(INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Op1, sub_32)),
sub_32)>;

def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv8i1:$Op2)))),
(INCP_XP_H (BRKB_PPzP (PTRUE_H 31), PPR:$Op2), GPR64:$Op1)>;

def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv8i1:$Op2))))),
(EXTRACT_SUBREG (INCP_XP_H (BRKB_PPzP (PTRUE_H 31), PPR:$Op2),
(INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Op1, sub_32)),
sub_32)>;

def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv4i1:$Op2)))),
(INCP_XP_S (BRKB_PPzP (PTRUE_S 31), PPR:$Op2), GPR64:$Op1)>;

def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv4i1:$Op2))))),
(EXTRACT_SUBREG (INCP_XP_S (BRKB_PPzP (PTRUE_S 31), PPR:$Op2),
(INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Op1, sub_32)),
sub_32)>;

def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv2i1:$Op2)))),
(INCP_XP_D (BRKB_PPzP (PTRUE_D 31), PPR:$Op2), GPR64:$Op1)>;

def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv2i1:$Op2))))),
(EXTRACT_SUBREG (INCP_XP_D (BRKB_PPzP (PTRUE_D 31), PPR:$Op2),
(INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Op1, sub_32)),
sub_32)>;

defm INDEX_RR : sve_int_index_rr<"index", AArch64mul_p_oneuse>;
defm INDEX_IR : sve_int_index_ir<"index", AArch64mul_p, AArch64mul_p_oneuse>;
defm INDEX_RI : sve_int_index_ri<"index">;
Expand Down
Loading