Skip to content

Commit 1bdd4c1

Browse files
committed
[AArch64] Basic ISel for find_last_active
1 parent aa580c2 commit 1bdd4c1

File tree

4 files changed

+32
-45
lines changed

4 files changed

+32
-45
lines changed

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,9 @@ def vector_insert_subvec : SDNode<"ISD::INSERT_SUBVECTOR",
840840
def extract_subvector : SDNode<"ISD::EXTRACT_SUBVECTOR", SDTSubVecExtract, []>;
841841
def insert_subvector : SDNode<"ISD::INSERT_SUBVECTOR", SDTSubVecInsert, []>;
842842

843+
def find_last_active : SDNode<"ISD::VECTOR_FIND_LAST_ACTIVE",
844+
SDTypeProfile<1, 1, []>, []>;
845+
843846
// Nodes for intrinsics, you should use the intrinsic itself and let tblgen use
844847
// these internally. Don't reference these directly.
845848
def intrinsic_void : SDNode<"ISD::INTRINSIC_VOID",

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1446,6 +1446,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
14461446
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
14471447
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
14481448
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
1449+
setOperationAction(ISD::VECTOR_FIND_LAST_ACTIVE, VT, Legal);
14491450
}
14501451
}
14511452

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3379,6 +3379,17 @@ let Predicates = [HasSVE_or_SME] in {
33793379
def : Pat<(i64 (vector_extract nxv2i64:$vec, VectorIndexD:$index)),
33803380
(UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index)>;
33813381

3382+
// Find index of last active lane. This is a fallback in case we miss the
3383+
// opportunity to fold into a lastb or clastb directly.
3384+
def : Pat<(i64 (find_last_active nxv16i1:$P1)),
3385+
(INSERT_SUBREG (IMPLICIT_DEF), (LASTB_RPZ_B $P1, (INDEX_II_B 0, 1)), sub_32)>;
3386+
def : Pat<(i64 (find_last_active nxv8i1:$P1)),
3387+
(INSERT_SUBREG (IMPLICIT_DEF), (LASTB_RPZ_H $P1, (INDEX_II_H 0, 1)), sub_32)>;
3388+
def : Pat<(i64 (find_last_active nxv4i1:$P1)),
3389+
(INSERT_SUBREG (IMPLICIT_DEF), (LASTB_RPZ_S $P1, (INDEX_II_S 0, 1)), sub_32)>;
3390+
def : Pat<(i64 (find_last_active nxv2i1:$P1)),
3391+
(LASTB_RPZ_D $P1, (INDEX_II_D 0, 1))>;
3392+
33823393
// Move element from the bottom 128-bits of a scalable vector to a single-element vector.
33833394
// Alternative case where insertelement is just scalar_to_vector rather than vector_insert.
33843395
def : Pat<(v1f64 (scalar_to_vector

llvm/test/CodeGen/AArch64/vector-extract-last-active.ll

Lines changed: 17 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -294,12 +294,7 @@ define i8 @extract_last_i8_scalable(<vscale x 16 x i8> %data, <vscale x 16 x i1>
294294
; CHECK-LABEL: extract_last_i8_scalable:
295295
; CHECK: // %bb.0:
296296
; CHECK-NEXT: index z1.b, #0, #1
297-
; CHECK-NEXT: mov z2.b, #0 // =0x0
298-
; CHECK-NEXT: ptrue p1.b
299-
; CHECK-NEXT: sel z1.b, p0, z1.b, z2.b
300-
; CHECK-NEXT: umaxv b1, p1, z1.b
301-
; CHECK-NEXT: fmov w8, s1
302-
; CHECK-NEXT: and x8, x8, #0xff
297+
; CHECK-NEXT: lastb w8, p0, z1.b
303298
; CHECK-NEXT: whilels p1.b, xzr, x8
304299
; CHECK-NEXT: ptest p0, p0.b
305300
; CHECK-NEXT: lastb w8, p1, z0.b
@@ -313,15 +308,11 @@ define i16 @extract_last_i16_scalable(<vscale x 8 x i16> %data, <vscale x 8 x i1
313308
; CHECK-LABEL: extract_last_i16_scalable:
314309
; CHECK: // %bb.0:
315310
; CHECK-NEXT: index z1.h, #0, #1
316-
; CHECK-NEXT: mov z2.h, #0 // =0x0
311+
; CHECK-NEXT: lastb w8, p0, z1.h
312+
; CHECK-NEXT: whilels p1.h, xzr, x8
313+
; CHECK-NEXT: lastb w8, p1, z0.h
317314
; CHECK-NEXT: ptrue p1.h
318-
; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h
319-
; CHECK-NEXT: umaxv h1, p1, z1.h
320-
; CHECK-NEXT: fmov w8, s1
321-
; CHECK-NEXT: and x8, x8, #0xffff
322-
; CHECK-NEXT: whilels p2.h, xzr, x8
323315
; CHECK-NEXT: ptest p1, p0.b
324-
; CHECK-NEXT: lastb w8, p2, z0.h
325316
; CHECK-NEXT: csel w0, w8, w0, ne
326317
; CHECK-NEXT: ret
327318
%res = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %mask, i16 %passthru)
@@ -332,15 +323,11 @@ define i32 @extract_last_i32_scalable(<vscale x 4 x i32> %data, <vscale x 4 x i1
332323
; CHECK-LABEL: extract_last_i32_scalable:
333324
; CHECK: // %bb.0:
334325
; CHECK-NEXT: index z1.s, #0, #1
335-
; CHECK-NEXT: mov z2.s, #0 // =0x0
326+
; CHECK-NEXT: lastb w8, p0, z1.s
327+
; CHECK-NEXT: whilels p1.s, xzr, x8
328+
; CHECK-NEXT: lastb w8, p1, z0.s
336329
; CHECK-NEXT: ptrue p1.s
337-
; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s
338-
; CHECK-NEXT: umaxv s1, p1, z1.s
339-
; CHECK-NEXT: fmov w8, s1
340-
; CHECK-NEXT: mov w8, w8
341-
; CHECK-NEXT: whilels p2.s, xzr, x8
342330
; CHECK-NEXT: ptest p1, p0.b
343-
; CHECK-NEXT: lastb w8, p2, z0.s
344331
; CHECK-NEXT: csel w0, w8, w0, ne
345332
; CHECK-NEXT: ret
346333
%res = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %mask, i32 %passthru)
@@ -351,14 +338,11 @@ define i64 @extract_last_i64_scalable(<vscale x 2 x i64> %data, <vscale x 2 x i1
351338
; CHECK-LABEL: extract_last_i64_scalable:
352339
; CHECK: // %bb.0:
353340
; CHECK-NEXT: index z1.d, #0, #1
354-
; CHECK-NEXT: mov z2.d, #0 // =0x0
341+
; CHECK-NEXT: lastb x8, p0, z1.d
342+
; CHECK-NEXT: whilels p1.d, xzr, x8
343+
; CHECK-NEXT: lastb x8, p1, z0.d
355344
; CHECK-NEXT: ptrue p1.d
356-
; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d
357-
; CHECK-NEXT: umaxv d1, p1, z1.d
358-
; CHECK-NEXT: fmov x8, d1
359-
; CHECK-NEXT: whilels p2.d, xzr, x8
360345
; CHECK-NEXT: ptest p1, p0.b
361-
; CHECK-NEXT: lastb x8, p2, z0.d
362346
; CHECK-NEXT: csel x0, x8, x0, ne
363347
; CHECK-NEXT: ret
364348
%res = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %mask, i64 %passthru)
@@ -369,15 +353,11 @@ define float @extract_last_float_scalable(<vscale x 4 x float> %data, <vscale x
369353
; CHECK-LABEL: extract_last_float_scalable:
370354
; CHECK: // %bb.0:
371355
; CHECK-NEXT: index z2.s, #0, #1
372-
; CHECK-NEXT: mov z3.s, #0 // =0x0
356+
; CHECK-NEXT: lastb w8, p0, z2.s
357+
; CHECK-NEXT: whilels p1.s, xzr, x8
358+
; CHECK-NEXT: lastb s0, p1, z0.s
373359
; CHECK-NEXT: ptrue p1.s
374-
; CHECK-NEXT: sel z2.s, p0, z2.s, z3.s
375-
; CHECK-NEXT: umaxv s2, p1, z2.s
376-
; CHECK-NEXT: fmov w8, s2
377-
; CHECK-NEXT: mov w8, w8
378-
; CHECK-NEXT: whilels p2.s, xzr, x8
379360
; CHECK-NEXT: ptest p1, p0.b
380-
; CHECK-NEXT: lastb s0, p2, z0.s
381361
; CHECK-NEXT: fcsel s0, s0, s1, ne
382362
; CHECK-NEXT: ret
383363
%res = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %mask, float %passthru)
@@ -388,14 +368,11 @@ define double @extract_last_double_scalable(<vscale x 2 x double> %data, <vscale
388368
; CHECK-LABEL: extract_last_double_scalable:
389369
; CHECK: // %bb.0:
390370
; CHECK-NEXT: index z2.d, #0, #1
391-
; CHECK-NEXT: mov z3.d, #0 // =0x0
371+
; CHECK-NEXT: lastb x8, p0, z2.d
372+
; CHECK-NEXT: whilels p1.d, xzr, x8
373+
; CHECK-NEXT: lastb d0, p1, z0.d
392374
; CHECK-NEXT: ptrue p1.d
393-
; CHECK-NEXT: sel z2.d, p0, z2.d, z3.d
394-
; CHECK-NEXT: umaxv d2, p1, z2.d
395-
; CHECK-NEXT: fmov x8, d2
396-
; CHECK-NEXT: whilels p2.d, xzr, x8
397375
; CHECK-NEXT: ptest p1, p0.b
398-
; CHECK-NEXT: lastb d0, p2, z0.d
399376
; CHECK-NEXT: fcsel d0, d0, d1, ne
400377
; CHECK-NEXT: ret
401378
%res = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %mask, double %passthru)
@@ -407,12 +384,7 @@ define i8 @extract_last_i8_scalable_poison_passthru(<vscale x 16 x i8> %data, <v
407384
; CHECK-LABEL: extract_last_i8_scalable_poison_passthru:
408385
; CHECK: // %bb.0:
409386
; CHECK-NEXT: index z1.b, #0, #1
410-
; CHECK-NEXT: mov z2.b, #0 // =0x0
411-
; CHECK-NEXT: sel z1.b, p0, z1.b, z2.b
412-
; CHECK-NEXT: ptrue p0.b
413-
; CHECK-NEXT: umaxv b1, p0, z1.b
414-
; CHECK-NEXT: fmov w8, s1
415-
; CHECK-NEXT: and x8, x8, #0xff
387+
; CHECK-NEXT: lastb w8, p0, z1.b
416388
; CHECK-NEXT: whilels p0.b, xzr, x8
417389
; CHECK-NEXT: lastb w0, p0, z0.b
418390
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)