Skip to content

[AArch64] Consider StreamingSVE in shouldExpandGetActiveLaneMask #144722

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2113,7 +2113,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
EVT OpVT) const {
// Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
if (!Subtarget->hasSVE() || ResVT.getVectorElementType() != MVT::i1)
if (!Subtarget->isSVEorStreamingSVEAvailable() ||
ResVT.getVectorElementType() != MVT::i1)
return true;

// Only support illegal types if the result is scalable and min elements > 1.
Expand Down Expand Up @@ -2283,6 +2284,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::FSQRT, VT, Default);
setOperationAction(ISD::FSUB, VT, Default);
setOperationAction(ISD::FTRUNC, VT, Default);
setOperationAction(ISD::GET_ACTIVE_LANE_MASK, VT, Default);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Default);
setOperationAction(ISD::LOAD, VT, PreferNEON ? Legal : Default);
setOperationAction(ISD::MGATHER, VT, PreferSVE ? Default : Expand);
Expand Down Expand Up @@ -18099,7 +18101,8 @@ performActiveLaneMaskCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
/*IsEqual=*/false))
return While;

if (!ST->hasSVE2p1() && !(ST->hasSME2() && ST->isStreaming()))
if (!N->getValueType(0).isScalableVector() ||
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (!N->getValueType(0).isScalableVT() ||
if (!N->getValueType(0).isScalableVector() ||

(!ST->hasSVE2p1() && !(ST->hasSME2() && ST->isStreaming())))
return SDValue();

if (!N->hasNUsesOfValue(2, 0))
Expand Down
139 changes: 98 additions & 41 deletions llvm/test/CodeGen/AArch64/active_lane_mask.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STREAMING

; == Scalable ==

Expand Down Expand Up @@ -292,61 +293,117 @@ define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) {
}

define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
; CHECK-LABEL: lane_mask_v16i1_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.b, #0, #1
; CHECK-NEXT: dup v1.16b, w0
; CHECK-NEXT: uqadd v0.16b, v1.16b, v0.16b
; CHECK-NEXT: dup v1.16b, w1
; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
; CHECK-SVE-LABEL: lane_mask_v16i1_i8:
; CHECK-SVE: // %bb.0:
; CHECK-SVE-NEXT: index z0.b, #0, #1
; CHECK-SVE-NEXT: dup v1.16b, w0
; CHECK-SVE-NEXT: uqadd v0.16b, v1.16b, v0.16b
; CHECK-SVE-NEXT: dup v1.16b, w1
; CHECK-SVE-NEXT: cmhi v0.16b, v1.16b, v0.16b
; CHECK-SVE-NEXT: ret
;
; CHECK-STREAMING-LABEL: lane_mask_v16i1_i8:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a problem with this patch, but we may want to lower this to SVE's uqadd at some point?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is happening because the v16i8 uaddsat is expanded before lowering with SVE instructions. I can try to address this in another patch.

; CHECK-STREAMING: // %bb.0:
; CHECK-STREAMING-NEXT: index z0.b, w0, #1
; CHECK-STREAMING-NEXT: mov z1.b, w0
; CHECK-STREAMING-NEXT: ptrue p0.b, vl16
; CHECK-STREAMING-NEXT: cmphi p1.b, p0/z, z1.b, z0.b
; CHECK-STREAMING-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-STREAMING-NEXT: orr z0.d, z0.d, z1.d
; CHECK-STREAMING-NEXT: mov z1.b, w1
; CHECK-STREAMING-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
; CHECK-STREAMING-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-STREAMING-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i8(i8 %index, i8 %TC)
ret <16 x i1> %active.lane.mask
}

define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) {
; CHECK-LABEL: lane_mask_v8i1_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.b, #0, #1
; CHECK-NEXT: dup v1.8b, w0
; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b
; CHECK-NEXT: dup v1.8b, w1
; CHECK-NEXT: cmhi v0.8b, v1.8b, v0.8b
; CHECK-NEXT: ret
; CHECK-SVE-LABEL: lane_mask_v8i1_i8:
; CHECK-SVE: // %bb.0:
; CHECK-SVE-NEXT: index z0.b, #0, #1
; CHECK-SVE-NEXT: dup v1.8b, w0
; CHECK-SVE-NEXT: uqadd v0.8b, v1.8b, v0.8b
; CHECK-SVE-NEXT: dup v1.8b, w1
; CHECK-SVE-NEXT: cmhi v0.8b, v1.8b, v0.8b
; CHECK-SVE-NEXT: ret
;
; CHECK-STREAMING-LABEL: lane_mask_v8i1_i8:
; CHECK-STREAMING: // %bb.0:
; CHECK-STREAMING-NEXT: index z0.b, w0, #1
; CHECK-STREAMING-NEXT: mov z1.b, w0
; CHECK-STREAMING-NEXT: ptrue p0.b, vl8
; CHECK-STREAMING-NEXT: cmphi p1.b, p0/z, z1.b, z0.b
; CHECK-STREAMING-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-STREAMING-NEXT: orr z0.d, z0.d, z1.d
; CHECK-STREAMING-NEXT: mov z1.b, w1
; CHECK-STREAMING-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
; CHECK-STREAMING-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i8(i8 %index, i8 %TC)
ret <8 x i1> %active.lane.mask
}

define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
; CHECK-LABEL: lane_mask_v4i1_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v0.4h, w0
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: movi d2, #0xff00ff00ff00ff
; CHECK-NEXT: dup v3.4h, w1
; CHECK-NEXT: bic v0.4h, #255, lsl #8
; CHECK-NEXT: bic v3.4h, #255, lsl #8
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h
; CHECK-NEXT: cmhi v0.4h, v3.4h, v0.4h
; CHECK-NEXT: ret
; CHECK-SVE-LABEL: lane_mask_v4i1_i8:
; CHECK-SVE: // %bb.0:
; CHECK-SVE-NEXT: dup v0.4h, w0
; CHECK-SVE-NEXT: index z1.h, #0, #1
; CHECK-SVE-NEXT: movi d2, #0xff00ff00ff00ff
; CHECK-SVE-NEXT: dup v3.4h, w1
; CHECK-SVE-NEXT: bic v0.4h, #255, lsl #8
; CHECK-SVE-NEXT: bic v3.4h, #255, lsl #8
; CHECK-SVE-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-SVE-NEXT: umin v0.4h, v0.4h, v2.4h
; CHECK-SVE-NEXT: cmhi v0.4h, v3.4h, v0.4h
; CHECK-SVE-NEXT: ret
;
; CHECK-STREAMING-LABEL: lane_mask_v4i1_i8:
; CHECK-STREAMING: // %bb.0:
; CHECK-STREAMING-NEXT: mov z1.h, w0
; CHECK-STREAMING-NEXT: index z0.h, #0, #1
; CHECK-STREAMING-NEXT: ptrue p0.h, vl4
; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff
; CHECK-STREAMING-NEXT: add z0.h, z1.h, z0.h
; CHECK-STREAMING-NEXT: mov z1.h, w1
; CHECK-STREAMING-NEXT: umin z0.h, z0.h, #255
; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff
; CHECK-STREAMING-NEXT: cmphi p0.h, p0/z, z1.h, z0.h
; CHECK-STREAMING-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8 %index, i8 %TC)
ret <4 x i1> %active.lane.mask
}

define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
; CHECK-LABEL: lane_mask_v2i1_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d0, #0x0000ff000000ff
; CHECK-NEXT: dup v1.2s, w0
; CHECK-NEXT: index z2.s, #0, #1
; CHECK-NEXT: dup v3.2s, w1
; CHECK-NEXT: and v1.8b, v1.8b, v0.8b
; CHECK-NEXT: add v1.2s, v1.2s, v2.2s
; CHECK-NEXT: and v2.8b, v3.8b, v0.8b
; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s
; CHECK-NEXT: cmhi v0.2s, v2.2s, v0.2s
; CHECK-NEXT: ret
; CHECK-SVE-LABEL: lane_mask_v2i1_i8:
; CHECK-SVE: // %bb.0:
; CHECK-SVE-NEXT: movi d0, #0x0000ff000000ff
; CHECK-SVE-NEXT: dup v1.2s, w0
; CHECK-SVE-NEXT: index z2.s, #0, #1
; CHECK-SVE-NEXT: dup v3.2s, w1
; CHECK-SVE-NEXT: and v1.8b, v1.8b, v0.8b
; CHECK-SVE-NEXT: add v1.2s, v1.2s, v2.2s
; CHECK-SVE-NEXT: and v2.8b, v3.8b, v0.8b
; CHECK-SVE-NEXT: umin v0.2s, v1.2s, v0.2s
; CHECK-SVE-NEXT: cmhi v0.2s, v2.2s, v0.2s
; CHECK-SVE-NEXT: ret
;
; CHECK-STREAMING-LABEL: lane_mask_v2i1_i8:
; CHECK-STREAMING: // %bb.0:
; CHECK-STREAMING-NEXT: and w8, w0, #0xff
; CHECK-STREAMING-NEXT: ptrue p0.s, vl2
; CHECK-STREAMING-NEXT: index z0.s, w8, #1
; CHECK-STREAMING-NEXT: and w8, w1, #0xff
; CHECK-STREAMING-NEXT: mov z1.s, w8
; CHECK-STREAMING-NEXT: umin z0.s, z0.s, #255
; CHECK-STREAMING-NEXT: cmphi p0.s, p0/z, z1.s, z0.s
; CHECK-STREAMING-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC)
ret <2 x i1> %active.lane.mask
}
Expand Down
Loading