Skip to content

Commit 61b99ca

Browse files
[AArch64] Consider StreamingSVE in shouldExpandGetActiveLaneMask (#144722)
If StreamingSVE is available, we may be able to lower the intrinsic to the GET_ACTIVE_LANE_MASK node instead of expanding it. Also adds the node to addTypeForFixedLengthSVE to ensure we lower to the SVE instruction when useSVEForFixedLengthVectors is true.
1 parent 049d61a commit 61b99ca

File tree

2 files changed

+103
-43
lines changed

2 files changed

+103
-43
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2129,7 +2129,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
21292129
bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
21302130
EVT OpVT) const {
21312131
// Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
2132-
if (!Subtarget->hasSVE() || ResVT.getVectorElementType() != MVT::i1)
2132+
if (!Subtarget->isSVEorStreamingSVEAvailable() ||
2133+
ResVT.getVectorElementType() != MVT::i1)
21332134
return true;
21342135

21352136
// Only support illegal types if the result is scalable and min elements > 1.
@@ -2299,6 +2300,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
22992300
setOperationAction(ISD::FSQRT, VT, Default);
23002301
setOperationAction(ISD::FSUB, VT, Default);
23012302
setOperationAction(ISD::FTRUNC, VT, Default);
2303+
setOperationAction(ISD::GET_ACTIVE_LANE_MASK, VT, Default);
23022304
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Default);
23032305
setOperationAction(ISD::LOAD, VT, PreferNEON ? Legal : Default);
23042306
setOperationAction(ISD::MGATHER, VT, PreferSVE ? Default : Expand);
@@ -18139,7 +18141,8 @@ performActiveLaneMaskCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
1813918141
/*IsEqual=*/false))
1814018142
return While;
1814118143

18142-
if (!ST->hasSVE2p1() && !(ST->hasSME2() && ST->isStreaming()))
18144+
if (!N->getValueType(0).isScalableVector() ||
18145+
(!ST->hasSVE2p1() && !(ST->hasSME2() && ST->isStreaming())))
1814318146
return SDValue();
1814418147

1814518148
if (!N->hasNUsesOfValue(2, 0))

llvm/test/CodeGen/AArch64/active_lane_mask.ll

Lines changed: 98 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
3+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STREAMING
34

45
; == Scalable ==
56

@@ -292,61 +293,117 @@ define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) {
292293
}
293294

294295
define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
295-
; CHECK-LABEL: lane_mask_v16i1_i8:
296-
; CHECK: // %bb.0:
297-
; CHECK-NEXT: index z0.b, #0, #1
298-
; CHECK-NEXT: dup v1.16b, w0
299-
; CHECK-NEXT: uqadd v0.16b, v1.16b, v0.16b
300-
; CHECK-NEXT: dup v1.16b, w1
301-
; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b
302-
; CHECK-NEXT: ret
296+
; CHECK-SVE-LABEL: lane_mask_v16i1_i8:
297+
; CHECK-SVE: // %bb.0:
298+
; CHECK-SVE-NEXT: index z0.b, #0, #1
299+
; CHECK-SVE-NEXT: dup v1.16b, w0
300+
; CHECK-SVE-NEXT: uqadd v0.16b, v1.16b, v0.16b
301+
; CHECK-SVE-NEXT: dup v1.16b, w1
302+
; CHECK-SVE-NEXT: cmhi v0.16b, v1.16b, v0.16b
303+
; CHECK-SVE-NEXT: ret
304+
;
305+
; CHECK-STREAMING-LABEL: lane_mask_v16i1_i8:
306+
; CHECK-STREAMING: // %bb.0:
307+
; CHECK-STREAMING-NEXT: index z0.b, w0, #1
308+
; CHECK-STREAMING-NEXT: mov z1.b, w0
309+
; CHECK-STREAMING-NEXT: ptrue p0.b, vl16
310+
; CHECK-STREAMING-NEXT: cmphi p1.b, p0/z, z1.b, z0.b
311+
; CHECK-STREAMING-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
312+
; CHECK-STREAMING-NEXT: orr z0.d, z0.d, z1.d
313+
; CHECK-STREAMING-NEXT: mov z1.b, w1
314+
; CHECK-STREAMING-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
315+
; CHECK-STREAMING-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
316+
; CHECK-STREAMING-NEXT: // kill: def $q0 killed $q0 killed $z0
317+
; CHECK-STREAMING-NEXT: ret
303318
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i8(i8 %index, i8 %TC)
304319
ret <16 x i1> %active.lane.mask
305320
}
306321

307322
define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) {
308-
; CHECK-LABEL: lane_mask_v8i1_i8:
309-
; CHECK: // %bb.0:
310-
; CHECK-NEXT: index z0.b, #0, #1
311-
; CHECK-NEXT: dup v1.8b, w0
312-
; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b
313-
; CHECK-NEXT: dup v1.8b, w1
314-
; CHECK-NEXT: cmhi v0.8b, v1.8b, v0.8b
315-
; CHECK-NEXT: ret
323+
; CHECK-SVE-LABEL: lane_mask_v8i1_i8:
324+
; CHECK-SVE: // %bb.0:
325+
; CHECK-SVE-NEXT: index z0.b, #0, #1
326+
; CHECK-SVE-NEXT: dup v1.8b, w0
327+
; CHECK-SVE-NEXT: uqadd v0.8b, v1.8b, v0.8b
328+
; CHECK-SVE-NEXT: dup v1.8b, w1
329+
; CHECK-SVE-NEXT: cmhi v0.8b, v1.8b, v0.8b
330+
; CHECK-SVE-NEXT: ret
331+
;
332+
; CHECK-STREAMING-LABEL: lane_mask_v8i1_i8:
333+
; CHECK-STREAMING: // %bb.0:
334+
; CHECK-STREAMING-NEXT: index z0.b, w0, #1
335+
; CHECK-STREAMING-NEXT: mov z1.b, w0
336+
; CHECK-STREAMING-NEXT: ptrue p0.b, vl8
337+
; CHECK-STREAMING-NEXT: cmphi p1.b, p0/z, z1.b, z0.b
338+
; CHECK-STREAMING-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
339+
; CHECK-STREAMING-NEXT: orr z0.d, z0.d, z1.d
340+
; CHECK-STREAMING-NEXT: mov z1.b, w1
341+
; CHECK-STREAMING-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
342+
; CHECK-STREAMING-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
343+
; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
344+
; CHECK-STREAMING-NEXT: ret
316345
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i8(i8 %index, i8 %TC)
317346
ret <8 x i1> %active.lane.mask
318347
}
319348

320349
define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
321-
; CHECK-LABEL: lane_mask_v4i1_i8:
322-
; CHECK: // %bb.0:
323-
; CHECK-NEXT: dup v0.4h, w0
324-
; CHECK-NEXT: index z1.h, #0, #1
325-
; CHECK-NEXT: movi d2, #0xff00ff00ff00ff
326-
; CHECK-NEXT: dup v3.4h, w1
327-
; CHECK-NEXT: bic v0.4h, #255, lsl #8
328-
; CHECK-NEXT: bic v3.4h, #255, lsl #8
329-
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
330-
; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h
331-
; CHECK-NEXT: cmhi v0.4h, v3.4h, v0.4h
332-
; CHECK-NEXT: ret
350+
; CHECK-SVE-LABEL: lane_mask_v4i1_i8:
351+
; CHECK-SVE: // %bb.0:
352+
; CHECK-SVE-NEXT: dup v0.4h, w0
353+
; CHECK-SVE-NEXT: index z1.h, #0, #1
354+
; CHECK-SVE-NEXT: movi d2, #0xff00ff00ff00ff
355+
; CHECK-SVE-NEXT: dup v3.4h, w1
356+
; CHECK-SVE-NEXT: bic v0.4h, #255, lsl #8
357+
; CHECK-SVE-NEXT: bic v3.4h, #255, lsl #8
358+
; CHECK-SVE-NEXT: add v0.4h, v0.4h, v1.4h
359+
; CHECK-SVE-NEXT: umin v0.4h, v0.4h, v2.4h
360+
; CHECK-SVE-NEXT: cmhi v0.4h, v3.4h, v0.4h
361+
; CHECK-SVE-NEXT: ret
362+
;
363+
; CHECK-STREAMING-LABEL: lane_mask_v4i1_i8:
364+
; CHECK-STREAMING: // %bb.0:
365+
; CHECK-STREAMING-NEXT: mov z1.h, w0
366+
; CHECK-STREAMING-NEXT: index z0.h, #0, #1
367+
; CHECK-STREAMING-NEXT: ptrue p0.h, vl4
368+
; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff
369+
; CHECK-STREAMING-NEXT: add z0.h, z1.h, z0.h
370+
; CHECK-STREAMING-NEXT: mov z1.h, w1
371+
; CHECK-STREAMING-NEXT: umin z0.h, z0.h, #255
372+
; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff
373+
; CHECK-STREAMING-NEXT: cmphi p0.h, p0/z, z1.h, z0.h
374+
; CHECK-STREAMING-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
375+
; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
376+
; CHECK-STREAMING-NEXT: ret
333377
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8 %index, i8 %TC)
334378
ret <4 x i1> %active.lane.mask
335379
}
336380

337381
define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
338-
; CHECK-LABEL: lane_mask_v2i1_i8:
339-
; CHECK: // %bb.0:
340-
; CHECK-NEXT: movi d0, #0x0000ff000000ff
341-
; CHECK-NEXT: dup v1.2s, w0
342-
; CHECK-NEXT: index z2.s, #0, #1
343-
; CHECK-NEXT: dup v3.2s, w1
344-
; CHECK-NEXT: and v1.8b, v1.8b, v0.8b
345-
; CHECK-NEXT: add v1.2s, v1.2s, v2.2s
346-
; CHECK-NEXT: and v2.8b, v3.8b, v0.8b
347-
; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s
348-
; CHECK-NEXT: cmhi v0.2s, v2.2s, v0.2s
349-
; CHECK-NEXT: ret
382+
; CHECK-SVE-LABEL: lane_mask_v2i1_i8:
383+
; CHECK-SVE: // %bb.0:
384+
; CHECK-SVE-NEXT: movi d0, #0x0000ff000000ff
385+
; CHECK-SVE-NEXT: dup v1.2s, w0
386+
; CHECK-SVE-NEXT: index z2.s, #0, #1
387+
; CHECK-SVE-NEXT: dup v3.2s, w1
388+
; CHECK-SVE-NEXT: and v1.8b, v1.8b, v0.8b
389+
; CHECK-SVE-NEXT: add v1.2s, v1.2s, v2.2s
390+
; CHECK-SVE-NEXT: and v2.8b, v3.8b, v0.8b
391+
; CHECK-SVE-NEXT: umin v0.2s, v1.2s, v0.2s
392+
; CHECK-SVE-NEXT: cmhi v0.2s, v2.2s, v0.2s
393+
; CHECK-SVE-NEXT: ret
394+
;
395+
; CHECK-STREAMING-LABEL: lane_mask_v2i1_i8:
396+
; CHECK-STREAMING: // %bb.0:
397+
; CHECK-STREAMING-NEXT: and w8, w0, #0xff
398+
; CHECK-STREAMING-NEXT: ptrue p0.s, vl2
399+
; CHECK-STREAMING-NEXT: index z0.s, w8, #1
400+
; CHECK-STREAMING-NEXT: and w8, w1, #0xff
401+
; CHECK-STREAMING-NEXT: mov z1.s, w8
402+
; CHECK-STREAMING-NEXT: umin z0.s, z0.s, #255
403+
; CHECK-STREAMING-NEXT: cmphi p0.s, p0/z, z1.s, z0.s
404+
; CHECK-STREAMING-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
405+
; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
406+
; CHECK-STREAMING-NEXT: ret
350407
%active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC)
351408
ret <2 x i1> %active.lane.mask
352409
}

0 commit comments

Comments
 (0)