Skip to content

Commit 53d1c21

Browse files
[LLVM][SVE] Improve code generation for vector.insert into posion. (#105665)
1 parent 2f0661c commit 53d1c21

File tree

6 files changed

+101
-180
lines changed

6 files changed

+101
-180
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14880,6 +14880,10 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
1488014880
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
1488114881
}
1488214882

14883+
// We can select these directly.
14884+
if (isTypeLegal(InVT) && Vec0.isUndef())
14885+
return Op;
14886+
1488314887
// Ensure the subvector is half the size of the main vector.
1488414888
if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
1488514889
return SDValue();

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1938,19 +1938,35 @@ let Predicates = [HasSVEorSME] in {
19381938
def : Pat<(nxv2bf16 (extract_subvector nxv8bf16:$Zs, (i64 6))),
19391939
(UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;
19401940

1941+
// Insert subvectors into FP SVE vectors.
1942+
foreach VT = [nxv4f16, nxv4f32, nxv4bf16] in
1943+
foreach idx = [0, 2] in
1944+
def : Pat<(VT (vector_insert_subvec undef, SVEType<VT>.HalfLength:$src, (i64 idx))),
1945+
(UZP1_ZZZ_S $src, $src)>;
1946+
1947+
foreach VT = [nxv8f16, nxv8bf16] in {
1948+
foreach idx = [0, 4] in
1949+
def : Pat<(VT (vector_insert_subvec undef, SVEType<VT>.HalfLength:$src, (i64 idx))),
1950+
(UZP1_ZZZ_H $src, $src)>;
1951+
1952+
foreach idx = [0, 2, 4, 6] in
1953+
def : Pat<(VT (vector_insert_subvec undef, SVEType<VT>.QuarterLength:$src, (i64 idx))),
1954+
(UZP1_ZZZ_H (UZP1_ZZZ_H $src, $src), (UZP1_ZZZ_H $src, $src))>;
1955+
}
1956+
19411957
// extract/insert 64-bit fixed length vector from/into a scalable vector
19421958
foreach VT = [v8i8, v4i16, v2i32, v1i64, v4f16, v2f32, v1f64, v4bf16] in {
1943-
def : Pat<(VT (vector_extract_subvec SVEContainerVT<VT>.Value:$Zs, (i64 0))),
1959+
def : Pat<(VT (vector_extract_subvec NEONType<VT>.SVEContainer:$Zs, (i64 0))),
19441960
(EXTRACT_SUBREG ZPR:$Zs, dsub)>;
1945-
def : Pat<(SVEContainerVT<VT>.Value (vector_insert_subvec undef, (VT V64:$src), (i64 0))),
1961+
def : Pat<(NEONType<VT>.SVEContainer (vector_insert_subvec undef, (VT V64:$src), (i64 0))),
19461962
(INSERT_SUBREG (IMPLICIT_DEF), $src, dsub)>;
19471963
}
19481964

19491965
// extract/insert 128-bit fixed length vector from/into a scalable vector
19501966
foreach VT = [v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64, v8bf16] in {
1951-
def : Pat<(VT (vector_extract_subvec SVEContainerVT<VT>.Value:$Zs, (i64 0))),
1967+
def : Pat<(VT (vector_extract_subvec NEONType<VT>.SVEContainer:$Zs, (i64 0))),
19521968
(EXTRACT_SUBREG ZPR:$Zs, zsub)>;
1953-
def : Pat<(SVEContainerVT<VT>.Value (vector_insert_subvec undef, (VT V128:$src), (i64 0))),
1969+
def : Pat<(NEONType<VT>.SVEContainer (vector_insert_subvec undef, (VT V128:$src), (i64 0))),
19541970
(INSERT_SUBREG (IMPLICIT_DEF), $src, zsub)>;
19551971
}
19561972

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,10 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13-
// Helper class to find the largest legal scalable vector type that can hold VT.
14-
// Non-matches return VT, which often means VT is the container type.
15-
class SVEContainerVT<ValueType VT> {
16-
ValueType Value = !cond(
17-
// fixed length vectors
13+
// Helper class to hold conversions of legal fixed-length vector types.
14+
class NEONType<ValueType VT> {
15+
// The largest legal scalable vector type that can hold VT.
16+
ValueType SVEContainer = !cond(
1817
!eq(VT, v8i8): nxv16i8,
1918
!eq(VT, v16i8): nxv16i8,
2019
!eq(VT, v4i16): nxv8i16,
@@ -31,13 +30,35 @@ class SVEContainerVT<ValueType VT> {
3130
!eq(VT, v2f64): nxv2f64,
3231
!eq(VT, v4bf16): nxv8bf16,
3332
!eq(VT, v8bf16): nxv8bf16,
34-
// unpacked scalable vectors
33+
true : untyped);
34+
}
35+
36+
// Helper class to hold conversions of legal scalable vector types.
37+
class SVEType<ValueType VT> {
38+
// The largest legal scalable vector type that can hold VT.
39+
// Non-matches return VT because only packed types remain.
40+
ValueType Packed = !cond(
3541
!eq(VT, nxv2f16): nxv8f16,
3642
!eq(VT, nxv4f16): nxv8f16,
3743
!eq(VT, nxv2f32): nxv4f32,
3844
!eq(VT, nxv2bf16): nxv8bf16,
3945
!eq(VT, nxv4bf16): nxv8bf16,
4046
true : VT);
47+
48+
// The legal scalable vector that is half the length of VT.
49+
ValueType HalfLength = !cond(
50+
!eq(VT, nxv8f16): nxv4f16,
51+
!eq(VT, nxv4f16): nxv2f16,
52+
!eq(VT, nxv4f32): nxv2f32,
53+
!eq(VT, nxv8bf16): nxv4bf16,
54+
!eq(VT, nxv4bf16): nxv2bf16,
55+
true : untyped);
56+
57+
// The legal scalable vector that is quarter the length of VT.
58+
ValueType QuarterLength = !cond(
59+
!eq(VT, nxv8f16): nxv2f16,
60+
!eq(VT, nxv8bf16): nxv2bf16,
61+
true : untyped);
4162
}
4263

4364
def SDT_AArch64Setcc : SDTypeProfile<1, 4, [
@@ -2959,10 +2980,10 @@ multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
29592980
def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>,
29602981
SVEPseudo2Instr<NAME, 1>;
29612982
// convert vt1 to a packed type for the intrinsic patterns
2962-
defvar packedvt1 = SVEContainerVT<vt1>.Value;
2983+
defvar packedvt1 = SVEType<vt1>.Packed;
29632984

29642985
// convert vt3 to a packed type for the intrinsic patterns
2965-
defvar packedvt3 = SVEContainerVT<vt3>.Value;
2986+
defvar packedvt3 = SVEType<vt3>.Packed;
29662987

29672988
def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, packedvt3, !cast<Instruction>(NAME)>;
29682989
def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
@@ -2982,7 +3003,7 @@ multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
29823003
SVEPseudo2Instr<NAME, 1>;
29833004

29843005
// convert vt1 to a packed type for the intrinsic patterns
2985-
defvar packedvt1 = SVEContainerVT<vt1>.Value;
3006+
defvar packedvt1 = SVEType<vt1>.Packed;
29863007

29873008
def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, vt3, !cast<Instruction>(NAME)>;
29883009
def : SVE_1_Op_Passthru_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;

llvm/test/CodeGen/AArch64/sve-bitcast.ll

Lines changed: 22 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,11 +1426,8 @@ define <vscale x 1 x i64> @bitcast_nxv4f16_to_nxv1i64(<vscale x 4 x half> %v) #0
14261426
;
14271427
; CHECK_BE-LABEL: bitcast_nxv4f16_to_nxv1i64:
14281428
; CHECK_BE: // %bb.0:
1429-
; CHECK_BE-NEXT: ptrue p0.h
1430-
; CHECK_BE-NEXT: ptrue p1.s
1431-
; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h
1432-
; CHECK_BE-NEXT: revb z0.s, p1/m, z0.s
14331429
; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
1430+
; CHECK_BE-NEXT: ptrue p0.h
14341431
; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h
14351432
; CHECK_BE-NEXT: ptrue p0.d
14361433
; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d
@@ -1447,13 +1444,11 @@ define <vscale x 1 x i64> @bitcast_nxv2f32_to_nxv1i64(<vscale x 2 x float> %v) #
14471444
;
14481445
; CHECK_BE-LABEL: bitcast_nxv2f32_to_nxv1i64:
14491446
; CHECK_BE: // %bb.0:
1450-
; CHECK_BE-NEXT: ptrue p0.s
1451-
; CHECK_BE-NEXT: ptrue p1.d
1452-
; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s
1453-
; CHECK_BE-NEXT: revb z0.d, p1/m, z0.d
14541447
; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s
1448+
; CHECK_BE-NEXT: ptrue p0.s
14551449
; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s
1456-
; CHECK_BE-NEXT: revb z0.d, p1/m, z0.d
1450+
; CHECK_BE-NEXT: ptrue p0.d
1451+
; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d
14571452
; CHECK_BE-NEXT: ret
14581453
%bc = bitcast <vscale x 2 x float> %v to <vscale x 1 x i64>
14591454
ret <vscale x 1 x i64> %bc
@@ -1479,11 +1474,8 @@ define <vscale x 1 x i64> @bitcast_nxv4bf16_to_nxv1i64(<vscale x 4 x bfloat> %v)
14791474
;
14801475
; CHECK_BE-LABEL: bitcast_nxv4bf16_to_nxv1i64:
14811476
; CHECK_BE: // %bb.0:
1482-
; CHECK_BE-NEXT: ptrue p0.h
1483-
; CHECK_BE-NEXT: ptrue p1.s
1484-
; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h
1485-
; CHECK_BE-NEXT: revb z0.s, p1/m, z0.s
14861477
; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
1478+
; CHECK_BE-NEXT: ptrue p0.h
14871479
; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h
14881480
; CHECK_BE-NEXT: ptrue p0.d
14891481
; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d
@@ -1888,11 +1880,8 @@ define <vscale x 1 x double> @bitcast_nxv4f16_to_nxv1f64(<vscale x 4 x half> %v)
18881880
;
18891881
; CHECK_BE-LABEL: bitcast_nxv4f16_to_nxv1f64:
18901882
; CHECK_BE: // %bb.0:
1891-
; CHECK_BE-NEXT: ptrue p0.h
1892-
; CHECK_BE-NEXT: ptrue p1.s
1893-
; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h
1894-
; CHECK_BE-NEXT: revb z0.s, p1/m, z0.s
18951883
; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
1884+
; CHECK_BE-NEXT: ptrue p0.h
18961885
; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h
18971886
; CHECK_BE-NEXT: ptrue p0.d
18981887
; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d
@@ -1909,13 +1898,11 @@ define <vscale x 1 x double> @bitcast_nxv2f32_to_nxv1f64(<vscale x 2 x float> %v
19091898
;
19101899
; CHECK_BE-LABEL: bitcast_nxv2f32_to_nxv1f64:
19111900
; CHECK_BE: // %bb.0:
1912-
; CHECK_BE-NEXT: ptrue p0.s
1913-
; CHECK_BE-NEXT: ptrue p1.d
1914-
; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s
1915-
; CHECK_BE-NEXT: revb z0.d, p1/m, z0.d
19161901
; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s
1902+
; CHECK_BE-NEXT: ptrue p0.s
19171903
; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s
1918-
; CHECK_BE-NEXT: revb z0.d, p1/m, z0.d
1904+
; CHECK_BE-NEXT: ptrue p0.d
1905+
; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d
19191906
; CHECK_BE-NEXT: ret
19201907
%bc = bitcast <vscale x 2 x float> %v to <vscale x 1 x double>
19211908
ret <vscale x 1 x double> %bc
@@ -1929,11 +1916,8 @@ define <vscale x 1 x double> @bitcast_nxv4bf16_to_nxv1f64(<vscale x 4 x bfloat>
19291916
;
19301917
; CHECK_BE-LABEL: bitcast_nxv4bf16_to_nxv1f64:
19311918
; CHECK_BE: // %bb.0:
1932-
; CHECK_BE-NEXT: ptrue p0.h
1933-
; CHECK_BE-NEXT: ptrue p1.s
1934-
; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h
1935-
; CHECK_BE-NEXT: revb z0.s, p1/m, z0.s
19361919
; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
1920+
; CHECK_BE-NEXT: ptrue p0.h
19371921
; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h
19381922
; CHECK_BE-NEXT: ptrue p0.d
19391923
; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d
@@ -2333,29 +2317,18 @@ define <vscale x 1 x i32> @bitcast_nxv2i16_to_nxv1i32(<vscale x 2 x i16> %v) #0
23332317
define <vscale x 1 x i32> @bitcast_nxv2f16_to_nxv1i32(<vscale x 2 x half> %v) #0 {
23342318
; CHECK-LABEL: bitcast_nxv2f16_to_nxv1i32:
23352319
; CHECK: // %bb.0:
2336-
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
2337-
; CHECK-NEXT: addvl sp, sp, #-1
2338-
; CHECK-NEXT: ptrue p0.d
2339-
; CHECK-NEXT: ptrue p1.s
2340-
; CHECK-NEXT: st1h { z0.d }, p0, [sp]
2341-
; CHECK-NEXT: ld1w { z0.s }, p1/z, [sp]
2342-
; CHECK-NEXT: addvl sp, sp, #1
2343-
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
2320+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
2321+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
23442322
; CHECK-NEXT: ret
23452323
;
23462324
; CHECK_BE-LABEL: bitcast_nxv2f16_to_nxv1i32:
23472325
; CHECK_BE: // %bb.0:
2348-
; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
2349-
; CHECK_BE-NEXT: addvl sp, sp, #-1
2350-
; CHECK_BE-NEXT: ptrue p0.d
2351-
; CHECK_BE-NEXT: ptrue p1.h
2352-
; CHECK_BE-NEXT: st1h { z0.d }, p0, [sp]
2326+
; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
2327+
; CHECK_BE-NEXT: ptrue p0.h
2328+
; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
2329+
; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h
23532330
; CHECK_BE-NEXT: ptrue p0.s
2354-
; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp]
2355-
; CHECK_BE-NEXT: revb z0.h, p1/m, z0.h
23562331
; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s
2357-
; CHECK_BE-NEXT: addvl sp, sp, #1
2358-
; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
23592332
; CHECK_BE-NEXT: ret
23602333
%bc = bitcast <vscale x 2 x half> %v to <vscale x 1 x i32>
23612334
ret <vscale x 1 x i32> %bc
@@ -2366,29 +2339,18 @@ define <vscale x 1 x i32> @bitcast_nxv2f16_to_nxv1i32(<vscale x 2 x half> %v) #0
23662339
define <vscale x 1 x i32> @bitcast_nxv2bf16_to_nxv1i32(<vscale x 2 x bfloat> %v) #0 {
23672340
; CHECK-LABEL: bitcast_nxv2bf16_to_nxv1i32:
23682341
; CHECK: // %bb.0:
2369-
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
2370-
; CHECK-NEXT: addvl sp, sp, #-1
2371-
; CHECK-NEXT: ptrue p0.d
2372-
; CHECK-NEXT: ptrue p1.s
2373-
; CHECK-NEXT: st1h { z0.d }, p0, [sp]
2374-
; CHECK-NEXT: ld1w { z0.s }, p1/z, [sp]
2375-
; CHECK-NEXT: addvl sp, sp, #1
2376-
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
2342+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
2343+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
23772344
; CHECK-NEXT: ret
23782345
;
23792346
; CHECK_BE-LABEL: bitcast_nxv2bf16_to_nxv1i32:
23802347
; CHECK_BE: // %bb.0:
2381-
; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
2382-
; CHECK_BE-NEXT: addvl sp, sp, #-1
2383-
; CHECK_BE-NEXT: ptrue p0.d
2384-
; CHECK_BE-NEXT: ptrue p1.h
2385-
; CHECK_BE-NEXT: st1h { z0.d }, p0, [sp]
2348+
; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
2349+
; CHECK_BE-NEXT: ptrue p0.h
2350+
; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
2351+
; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h
23862352
; CHECK_BE-NEXT: ptrue p0.s
2387-
; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp]
2388-
; CHECK_BE-NEXT: revb z0.h, p1/m, z0.h
23892353
; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s
2390-
; CHECK_BE-NEXT: addvl sp, sp, #1
2391-
; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
23922354
; CHECK_BE-NEXT: ret
23932355
%bc = bitcast <vscale x 2 x bfloat> %v to <vscale x 1 x i32>
23942356
ret <vscale x 1 x i32> %bc

llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -296,15 +296,9 @@ define <4 x i64> @extract_v4i64_nxv8i64_0(<vscale x 8 x i64> %arg) {
296296
define <4 x half> @extract_v4f16_nxv2f16_0(<vscale x 2 x half> %arg) {
297297
; CHECK-LABEL: extract_v4f16_nxv2f16_0:
298298
; CHECK: // %bb.0:
299-
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
300-
; CHECK-NEXT: addvl sp, sp, #-1
301-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
302-
; CHECK-NEXT: .cfi_offset w29, -16
303-
; CHECK-NEXT: ptrue p0.d
304-
; CHECK-NEXT: st1h { z0.d }, p0, [sp]
305-
; CHECK-NEXT: ldr d0, [sp]
306-
; CHECK-NEXT: addvl sp, sp, #1
307-
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
299+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
300+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
301+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
308302
; CHECK-NEXT: ret
309303
%ext = call <4 x half> @llvm.vector.extract.v4f16.nxv2f16(<vscale x 2 x half> %arg, i64 0)
310304
ret <4 x half> %ext
@@ -313,18 +307,10 @@ define <4 x half> @extract_v4f16_nxv2f16_0(<vscale x 2 x half> %arg) {
313307
define <4 x half> @extract_v4f16_nxv2f16_4(<vscale x 2 x half> %arg) {
314308
; CHECK-LABEL: extract_v4f16_nxv2f16_4:
315309
; CHECK: // %bb.0:
316-
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
317-
; CHECK-NEXT: addvl sp, sp, #-1
318-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
319-
; CHECK-NEXT: .cfi_offset w29, -16
320-
; CHECK-NEXT: ptrue p0.d
321-
; CHECK-NEXT: ptrue p1.h
322-
; CHECK-NEXT: st1h { z0.d }, p0, [sp]
323-
; CHECK-NEXT: ld1h { z0.h }, p1/z, [sp]
310+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
311+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
324312
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
325313
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
326-
; CHECK-NEXT: addvl sp, sp, #1
327-
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
328314
; CHECK-NEXT: ret
329315
%ext = call <4 x half> @llvm.vector.extract.v4f16.nxv2f16(<vscale x 2 x half> %arg, i64 4)
330316
ret <4 x half> %ext

0 commit comments

Comments
 (0)