Skip to content

Commit c77d79b

Browse files
[SVE2.1][Clang][LLVM]Add 128bits builtin in Clang and LLVM intrinisc (#71930)
This patch implements the builtins in Clang and the LLVM-IR intrinsic for the following: EXTQ // Variants are also available for: // _s8, _s16, _u16, _s32, _u32, _s64, _u64 // _bf16, _f16, _f32, _f64 svuint8_t svextq_lane[_u8](svuint8_t zdn, svuint8_t zm, uint64_t imm); TBLQ and TBXQ // Variants are also available for: // _u8, _u16, _s16, _u32, _s32, _u64, _s64 // _bf16, _f16, _f32, _f64 svint8_t svtblq[_s8](svint8_t zn, svuint8_t zm); svint8_t svtbxq[_s8](svint8_t zn, svuint8_t zm); UZPQ1, UZPQ2, ZIPQ1 and ZIPQ2 // Variants are also available for: // _s8, _u16, _s16, _u32, _s32, _u64, _s64 // _bf16, _f16, _f32, _f64 svuint8_t svuzpq1[_u8](svuint8_t zn, svuint8_t zm); svuint8_t svuzpq2[_u8](svuint8_t zn, svuint8_t zm); svuint8_t svzipq1[_u8](svuint8_t zn, svuint8_t zm); svuint8_t svzipq2[_u8](svuint8_t zn, svuint8_t zm); PMOV // Variants are available for: // _s8, _u16, _s16, _s32, _u32, _s64, _u64 svbool_t svpmov_lane[_u8](svuint8_t zn, uint64_t imm); svbool_t svpmov[_u8](svuint8_t zn); // The immediate is zero svuint8_t svpmov_u8_z(svbool_t pn); // The immediate is zero // Variants are available for: // _s16, _s32, _u32, _s64, _u64 svuint16_t svpmov_lane[_u16]_m(svuint16_t zd, svbool_t pn, uint64_t imm); According to the PR#257[1] [1]ARM-software/acle#257 Co-authored-by: Hassnaa Hamdi <[email protected]>
1 parent 19cdc0a commit c77d79b

26 files changed

+3161
-15
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2026,3 +2026,36 @@ let TargetGuard = "sme2" in {
20262026
def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
20272027
def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
20282028
}
2029+
2030+
let TargetGuard = "sve2p1" in {
2031+
// ZIPQ1, ZIPQ2, UZPQ1, UZPQ2
2032+
def SVZIPQ1 : SInst<"svzipq1[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq1", [], []>;
2033+
def SVZIPQ2 : SInst<"svzipq2[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq2", [], []>;
2034+
def SVUZPQ1 : SInst<"svuzpq1[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq1", [], []>;
2035+
def SVUZPQ2 : SInst<"svuzpq2[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq2", [], []>;
2036+
// TBLQ, TBXQ
2037+
def SVTBLQ : SInst<"svtblq[_{d}]", "ddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tblq">;
2038+
def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tbxq">;
2039+
// EXTQ
2040+
def EXTQ : SInst<"svextq_lane[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq_lane", [], [ImmCheck<2, ImmCheck0_15>]>;
2041+
// PMOV
2042+
// Move to Pred
2043+
multiclass PMOV_TO_PRED<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
2044+
def _LANE : Inst<name # "_lane[_{d}]", "Pdi", types, MergeNone, intrinsic, flags, [ImmCheck<1, immCh>]>;
2045+
def _LANE_ZERO : SInst<name # "[_{d}]", "Pd", types, MergeNone, intrinsic # "_zero", flags, []>;
2046+
}
2047+
defm SVPMOV_B_TO_PRED : PMOV_TO_PRED<"svpmov", "cUc", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_0>;
2048+
defm SVPMOV_H_TO_PRED : PMOV_TO_PRED<"svpmov", "sUs", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_1>;
2049+
defm SVPMOV_S_TO_PRED : PMOV_TO_PRED<"svpmov", "iUi", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_3>;
2050+
defm SVPMOV_D_TO_PRED : PMOV_TO_PRED<"svpmov", "lUl", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_7>;
2051+
2052+
// Move to Vector
2053+
multiclass PMOV_TO_VEC<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
2054+
def _M : SInst<name # "_lane[_{d}]", "ddPi", types, MergeOp1, intrinsic # "_merging", flags, [ImmCheck<2, immCh>]>;
2055+
def _Z : SInst<name # "_{d}_z", "dP", types, MergeNone, intrinsic # "_zeroing", flags, []>;
2056+
}
2057+
def SVPMOV_TO_VEC_LANE_B : SInst<"svpmov_{d}_z", "dP", "cUc", MergeNone, "aarch64_sve_pmov_to_vector_lane_zeroing", [], []>;
2058+
defm SVPMOV_TO_VEC_LANE_H : PMOV_TO_VEC<"svpmov", "sUs", "aarch64_sve_pmov_to_vector_lane", [], ImmCheck1_1>;
2059+
defm SVPMOV_TO_VEC_LANE_S : PMOV_TO_VEC<"svpmov", "iUi", "aarch64_sve_pmov_to_vector_lane", [], ImmCheck1_3>;
2060+
defm SVPMOV_TO_VEC_LANE_D : PMOV_TO_VEC<"svpmov", "lUl", "aarch64_sve_pmov_to_vector_lane" ,[], ImmCheck1_7>;
2061+
}

clang/include/clang/Basic/arm_sve_sme_incl.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,9 @@ def ImmCheck0_0 : ImmCheckType<16>; // 0..0
249249
def ImmCheck0_15 : ImmCheckType<17>; // 0..15
250250
def ImmCheck0_255 : ImmCheckType<18>; // 0..255
251251
def ImmCheck2_4_Mul2 : ImmCheckType<19>; // 2, 4
252+
def ImmCheck1_1 : ImmCheckType<20>; // 1..1
253+
def ImmCheck1_3 : ImmCheckType<21>; // 1..3
254+
def ImmCheck1_7 : ImmCheckType<22>; // 1..7
252255

253256
class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
254257
int Arg = arg;

clang/lib/Sema/SemaChecking.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3052,6 +3052,18 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
30523052
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
30533053
HasError = true;
30543054
break;
3055+
case SVETypeFlags::ImmCheck1_1:
3056+
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
3057+
HasError = true;
3058+
break;
3059+
case SVETypeFlags::ImmCheck1_3:
3060+
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
3061+
HasError = true;
3062+
break;
3063+
case SVETypeFlags::ImmCheck1_7:
3064+
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
3065+
HasError = true;
3066+
break;
30553067
case SVETypeFlags::ImmCheckExtract:
30563068
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
30573069
(2048 / ElementSizeInBits) - 1))
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
2+
// REQUIRES: aarch64-registered-target
3+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
4+
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
5+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
6+
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
7+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
8+
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
9+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
10+
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
11+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
12+
13+
14+
#include <arm_sve.h>
15+
16+
#ifdef SVE_OVERLOADED_FORMS
17+
// A simple used,unused... macro, long enough to represent any SVE builtin.
18+
#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
19+
#else
20+
#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
21+
#endif
22+
23+
// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svextq_lane_u8
24+
// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
25+
// CHECK-NEXT: entry:
26+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
27+
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
28+
//
29+
// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_u8u11__SVUint8_tS_
30+
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
31+
// CPP-CHECK-NEXT: entry:
32+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
33+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
34+
//
35+
svuint8_t test_svextq_lane_u8(svuint8_t zn, svuint8_t zm) {
36+
return SVE_ACLE_FUNC(svextq_lane, _u8,,)(zn, zm, 0);
37+
}
38+
39+
// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svextq_lane_s8
40+
// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
41+
// CHECK-NEXT: entry:
42+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
43+
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
44+
//
45+
// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svextq_lane_s8u10__SVInt8_tS_
46+
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0]] {
47+
// CPP-CHECK-NEXT: entry:
48+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
49+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
50+
//
51+
svint8_t test_svextq_lane_s8(svint8_t zn, svint8_t zm) {
52+
return SVE_ACLE_FUNC(svextq_lane, _s8,,)(zn, zm, 4);
53+
}
54+
55+
// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svextq_lane_u16
56+
// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
57+
// CHECK-NEXT: entry:
58+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
59+
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
60+
//
61+
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_u16u12__SVUint16_tS_
62+
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
63+
// CPP-CHECK-NEXT: entry:
64+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
65+
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
66+
//
67+
svuint16_t test_svextq_lane_u16(svuint16_t zn, svuint16_t zm) {
68+
return SVE_ACLE_FUNC(svextq_lane, _u16,,)(zn, zm, 1);
69+
}
70+
71+
// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svextq_lane_s16
72+
// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
73+
// CHECK-NEXT: entry:
74+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
75+
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
76+
//
77+
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svextq_lane_s16u11__SVInt16_tS_
78+
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) #[[ATTR0]] {
79+
// CPP-CHECK-NEXT: entry:
80+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.lane.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
81+
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
82+
//
83+
svint16_t test_svextq_lane_s16(svint16_t zn, svint16_t zm) {
84+
return SVE_ACLE_FUNC(svextq_lane, _s16,,)(zn, zm, 5);
85+
}
86+
87+
// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svextq_lane_u32
88+
// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
89+
// CHECK-NEXT: entry:
90+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
91+
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
92+
//
93+
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_u32u12__SVUint32_tS_
94+
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
95+
// CPP-CHECK-NEXT: entry:
96+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
97+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
98+
//
99+
svuint32_t test_svextq_lane_u32(svuint32_t zn, svuint32_t zm) {
100+
return SVE_ACLE_FUNC(svextq_lane, _u32,,)(zn, zm, 2);
101+
}
102+
103+
// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svextq_lane_s32
104+
// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
105+
// CHECK-NEXT: entry:
106+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
107+
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
108+
//
109+
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svextq_lane_s32u11__SVInt32_tS_
110+
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
111+
// CPP-CHECK-NEXT: entry:
112+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.lane.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
113+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
114+
//
115+
svint32_t test_svextq_lane_s32(svint32_t zn, svint32_t zm) {
116+
return SVE_ACLE_FUNC(svextq_lane, _s32,,)(zn, zm, 6);
117+
}
118+
119+
// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_lane_u64
120+
// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
121+
// CHECK-NEXT: entry:
122+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
123+
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
124+
//
125+
// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_u64u12__SVUint64_tS_
126+
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
127+
// CPP-CHECK-NEXT: entry:
128+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
129+
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
130+
//
131+
svuint64_t test_svextq_lane_u64(svuint64_t zn, svuint64_t zm) {
132+
return SVE_ACLE_FUNC(svextq_lane, _u64,,)(zn, zm, 3);
133+
}
134+
135+
// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_lane_s64
136+
// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
137+
// CHECK-NEXT: entry:
138+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
139+
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
140+
//
141+
// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svextq_lane_s64u11__SVInt64_tS_
142+
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
143+
// CPP-CHECK-NEXT: entry:
144+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.lane.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
145+
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
146+
//
147+
svint64_t test_svextq_lane_s64(svint64_t zn, svint64_t zm) {
148+
return SVE_ACLE_FUNC(svextq_lane, _s64,,)(zn, zm, 7);
149+
}
150+
151+
// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svextq_lane_f16
152+
// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
153+
// CHECK-NEXT: entry:
154+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
155+
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
156+
//
157+
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z20test_svextq_lane_f16u13__SVFloat16_tS_
158+
// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
159+
// CPP-CHECK-NEXT: entry:
160+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.lane.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
161+
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
162+
//
163+
svfloat16_t test_svextq_lane_f16(svfloat16_t zn, svfloat16_t zm) {
164+
return SVE_ACLE_FUNC(svextq_lane, _f16,,)(zn, zm, 8);
165+
}
166+
167+
// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svextq_lane_f32
168+
// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
169+
// CHECK-NEXT: entry:
170+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
171+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
172+
//
173+
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z20test_svextq_lane_f32u13__SVFloat32_tS_
174+
// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
175+
// CPP-CHECK-NEXT: entry:
176+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.lane.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
177+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
178+
//
179+
svfloat32_t test_svextq_lane_f32(svfloat32_t zn, svfloat32_t zm) {
180+
return SVE_ACLE_FUNC(svextq_lane, _f32,,)(zn, zm, 9);
181+
}
182+
183+
// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svextq_lane_f64
184+
// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
185+
// CHECK-NEXT: entry:
186+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
187+
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
188+
//
189+
// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z20test_svextq_lane_f64u13__SVFloat64_tS_
190+
// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
191+
// CPP-CHECK-NEXT: entry:
192+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.lane.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
193+
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
194+
//
195+
svfloat64_t test_svextq_lane_f64(svfloat64_t zn, svfloat64_t zm) {
196+
return SVE_ACLE_FUNC(svextq_lane, _f64,,)(zn, zm, 10);
197+
}
198+
199+
// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svextq_lane_bf16
200+
// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
201+
// CHECK-NEXT: entry:
202+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
203+
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
204+
//
205+
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z21test_svextq_lane_bf16u14__SVBfloat16_tS_
206+
// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
207+
// CPP-CHECK-NEXT: entry:
208+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.lane.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
209+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
210+
//
211+
svbfloat16_t test_svextq_lane_bf16(svbfloat16_t zn, svbfloat16_t zm) {
212+
return SVE_ACLE_FUNC(svextq_lane, _bf16,,)(zn, zm, 11);
213+
}

0 commit comments

Comments
 (0)