Skip to content

Commit 2b4d818

Browse files
authored
[Clang][LLVM][SVE2.1] Created intrinsics for DUPQ instr. (#83260)
This patch adds clang and llvm support for following intrinsic and maps it to DUPQ instruction: ``` // Variants are also available for: // _s8, _u16, _s16, _u32, _s32, _u64, _s64 // _bf16, _f16, _f32, _f64 svuint8_t svdup_laneq[_u8](svuint8_t zn, uint64_t imm_idx); ```
1 parent e0d4906 commit 2b4d818

File tree

7 files changed

+357
-6
lines changed

7 files changed

+357
-6
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2215,6 +2215,15 @@ let TargetGuard = "sve2p1" in {
22152215
def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tbxq">;
22162216
// EXTQ
22172217
def EXTQ : SInst<"svextq[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq", [], [ImmCheck<2, ImmCheck0_15>]>;
2218+
// DUPQ
2219+
def SVDUP_LANEQ_B : SInst<"svdup_laneq[_{d}]", "ddi", "cUc", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_15>]>;
2220+
def SVDUP_LANEQ_H : SInst<"svdup_laneq[_{d}]", "ddi", "sUsh", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_7>]>;
2221+
def SVDUP_LANEQ_S : SInst<"svdup_laneq[_{d}]", "ddi", "iUif", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
2222+
def SVDUP_LANEQ_D : SInst<"svdup_laneq[_{d}]", "ddi", "lUld", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>;
2223+
2224+
let TargetGuard = "bf16" in {
2225+
def SVDUP_LANEQ_BF16 : SInst<"svdup_laneq[_{d}]", "ddi", "b", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_7>]>;
2226+
}
22182227
// PMOV
22192228
// Move to Pred
22202229
multiclass PMOV_TO_PRED<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
2+
// REQUIRES: aarch64-registered-target
3+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
4+
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
5+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
6+
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
7+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
8+
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
9+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
10+
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
11+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
12+
13+
14+
#include <arm_sve.h>
15+
16+
#ifdef SVE_OVERLOADED_FORMS
17+
// A simple used,unused... macro, long enough to represent any SVE builtin.
18+
#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
19+
#else
20+
#define SVE_ACLE_FUNC(A1, A2) A1##A2
21+
#endif
22+
23+
// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svdup_laneq_s8
24+
// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0:[0-9]+]] {
25+
// CHECK-NEXT: entry:
26+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
27+
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
28+
//
29+
// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svdup_laneq_s8u10__SVInt8_t
30+
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0:[0-9]+]] {
31+
// CPP-CHECK-NEXT: entry:
32+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
33+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
34+
//
35+
svint8_t test_svdup_laneq_s8(svint8_t zn) {
36+
return SVE_ACLE_FUNC(svdup_laneq, _s8)(zn, 0);
37+
}
38+
39+
// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svdup_laneq_u8
40+
// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
41+
// CHECK-NEXT: entry:
42+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 15)
43+
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
44+
//
45+
// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svdup_laneq_u8u11__SVUint8_t
46+
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
47+
// CPP-CHECK-NEXT: entry:
48+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 15)
49+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
50+
//
51+
svuint8_t test_svdup_laneq_u8(svuint8_t zn) {
52+
return SVE_ACLE_FUNC(svdup_laneq, _u8)(zn, 15);
53+
}
54+
55+
// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svdup_laneq_s16
56+
// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
57+
// CHECK-NEXT: entry:
58+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 1)
59+
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
60+
//
61+
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svdup_laneq_s16u11__SVInt16_t
62+
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
63+
// CPP-CHECK-NEXT: entry:
64+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 1)
65+
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
66+
//
67+
svint16_t test_svdup_laneq_s16(svint16_t zn) {
68+
return SVE_ACLE_FUNC(svdup_laneq, _s16)(zn, 1);
69+
}
70+
71+
// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svdup_laneq_u16
72+
// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
73+
// CHECK-NEXT: entry:
74+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 7)
75+
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
76+
//
77+
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svdup_laneq_u16u12__SVUint16_t
78+
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
79+
// CPP-CHECK-NEXT: entry:
80+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 7)
81+
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
82+
//
83+
svuint16_t test_svdup_laneq_u16(svuint16_t zn) {
84+
return SVE_ACLE_FUNC(svdup_laneq, _u16)(zn, 7);
85+
}
86+
87+
// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svdup_laneq_s32
88+
// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
89+
// CHECK-NEXT: entry:
90+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 2)
91+
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
92+
//
93+
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svdup_laneq_s32u11__SVInt32_t
94+
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
95+
// CPP-CHECK-NEXT: entry:
96+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 2)
97+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
98+
//
99+
svint32_t test_svdup_laneq_s32(svint32_t zn) {
100+
return SVE_ACLE_FUNC(svdup_laneq, _s32)(zn, 2);
101+
}
102+
103+
// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svdup_laneq_u32
104+
// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
105+
// CHECK-NEXT: entry:
106+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 3)
107+
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
108+
//
109+
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svdup_laneq_u32u12__SVUint32_t
110+
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
111+
// CPP-CHECK-NEXT: entry:
112+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 3)
113+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
114+
//
115+
svuint32_t test_svdup_laneq_u32(svuint32_t zn) {
116+
return SVE_ACLE_FUNC(svdup_laneq, _u32)(zn, 3);
117+
}
118+
119+
// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svdup_laneq_s64
120+
// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
121+
// CHECK-NEXT: entry:
122+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 0)
123+
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
124+
//
125+
// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svdup_laneq_s64u11__SVInt64_t
126+
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
127+
// CPP-CHECK-NEXT: entry:
128+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 0)
129+
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
130+
//
131+
svint64_t test_svdup_laneq_s64(svint64_t zn) {
132+
return SVE_ACLE_FUNC(svdup_laneq, _s64)(zn, 0);
133+
}
134+
135+
// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svdup_laneq_u64
136+
// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
137+
// CHECK-NEXT: entry:
138+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 1)
139+
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
140+
//
141+
// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svdup_laneq_u64u12__SVUint64_t
142+
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
143+
// CPP-CHECK-NEXT: entry:
144+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 1)
145+
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
146+
//
147+
svuint64_t test_svdup_laneq_u64(svuint64_t zn) {
148+
return SVE_ACLE_FUNC(svdup_laneq, _u64)(zn, 1);
149+
}
150+
151+
// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svdup_laneq_f16
152+
// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]]) #[[ATTR0]] {
153+
// CHECK-NEXT: entry:
154+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dup.laneq.nxv8f16(<vscale x 8 x half> [[ZN]], i32 4)
155+
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
156+
//
157+
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z20test_svdup_laneq_f16u13__SVFloat16_t
158+
// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]]) #[[ATTR0]] {
159+
// CPP-CHECK-NEXT: entry:
160+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dup.laneq.nxv8f16(<vscale x 8 x half> [[ZN]], i32 4)
161+
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
162+
//
163+
svfloat16_t test_svdup_laneq_f16(svfloat16_t zn) {
164+
return SVE_ACLE_FUNC(svdup_laneq, _f16)(zn, 4);
165+
}
166+
167+
// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svdup_laneq_f32
168+
// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]]) #[[ATTR0]] {
169+
// CHECK-NEXT: entry:
170+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.laneq.nxv4f32(<vscale x 4 x float> [[ZN]], i32 1)
171+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
172+
//
173+
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z20test_svdup_laneq_f32u13__SVFloat32_t
174+
// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]]) #[[ATTR0]] {
175+
// CPP-CHECK-NEXT: entry:
176+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.laneq.nxv4f32(<vscale x 4 x float> [[ZN]], i32 1)
177+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
178+
//
179+
svfloat32_t test_svdup_laneq_f32(svfloat32_t zn) {
180+
return SVE_ACLE_FUNC(svdup_laneq, _f32)(zn, 1);
181+
}
182+
183+
// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svdup_laneq_f64
184+
// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]]) #[[ATTR0]] {
185+
// CHECK-NEXT: entry:
186+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.laneq.nxv2f64(<vscale x 2 x double> [[ZN]], i32 1)
187+
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
188+
//
189+
// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z20test_svdup_laneq_f64u13__SVFloat64_t
190+
// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]]) #[[ATTR0]] {
191+
// CPP-CHECK-NEXT: entry:
192+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.laneq.nxv2f64(<vscale x 2 x double> [[ZN]], i32 1)
193+
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
194+
//
195+
svfloat64_t test_svdup_laneq_f64(svfloat64_t zn) {
196+
return SVE_ACLE_FUNC(svdup_laneq, _f64)(zn, 1);
197+
}
198+
199+
// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svdup_laneq_bf16
200+
// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]]) #[[ATTR0]] {
201+
// CHECK-NEXT: entry:
202+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.laneq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], i32 3)
203+
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
204+
//
205+
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z21test_svdup_laneq_bf16u14__SVBfloat16_t
206+
// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]]) #[[ATTR0]] {
207+
// CPP-CHECK-NEXT: entry:
208+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.laneq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], i32 3)
209+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
210+
//
211+
svbfloat16_t test_svdup_laneq_bf16(svbfloat16_t zn) {
212+
return SVE_ACLE_FUNC(svdup_laneq, _bf16)(zn, 3);
213+
}

clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,3 +188,32 @@ void test_svget_svset_b(uint64_t idx, svboolx2_t tuple2, svboolx4_t tuple4, svbo
188188
svget2_b(tuple2, idx); // expected-error {{argument to 'svget2_b' must be a constant integer}}
189189
svget4_b(tuple4, idx); // expected-error {{argument to 'svget4_b' must be a constant integer}}
190190
}
191+
192+
__attribute__((target("+sve2p1")))
193+
void test_svdup_laneq(){
194+
svuint8_t zn_u8;
195+
svuint16_t zn_u16;
196+
svuint32_t zn_u32;
197+
svuint64_t zn_u64;
198+
svint8_t zn_s8;
199+
svint16_t zn_s16;
200+
svint32_t zn_s32;
201+
svint64_t zn_s64;
202+
svfloat16_t zn_f16;
203+
svfloat32_t zn_f32;
204+
svfloat64_t zn_f64;
205+
svbfloat16_t zn_bf16;
206+
207+
svdup_laneq_u8(zn_u8,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
208+
svdup_laneq_u16(zn_u16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
209+
svdup_laneq_u32(zn_u32,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
210+
svdup_laneq_u64(zn_u64,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
211+
svdup_laneq_s8(zn_s8,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
212+
svdup_laneq_s16(zn_s16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
213+
svdup_laneq_s32(zn_s32,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
214+
svdup_laneq_s64(zn_s64,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
215+
svdup_laneq_f16(zn_f16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
216+
svdup_laneq_f32(zn_f32,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
217+
svdup_laneq_f64(zn_f64,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
218+
svdup_laneq_bf16(zn_bf16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
219+
}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,6 +1360,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
13601360
LLVMSubdivide2VectorType<0>,
13611361
llvm_i32_ty],
13621362
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
1363+
1364+
class SVE2_1VectorArgIndexed_Intrinsic
1365+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1366+
[LLVMMatchType<0>,
1367+
llvm_i32_ty],
1368+
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
13631369

13641370
class AdvSIMD_SVE_CDOT_LANE_Intrinsic
13651371
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
@@ -1913,6 +1919,7 @@ def int_aarch64_sve_clastb : AdvSIMD_Pred2VectorArg_Intrinsic;
19131919
def int_aarch64_sve_clastb_n : AdvSIMD_SVE_ReduceWithInit_Intrinsic;
19141920
def int_aarch64_sve_compact : AdvSIMD_Pred1VectorArg_Intrinsic;
19151921
def int_aarch64_sve_dupq_lane : AdvSIMD_SVE_DUPQ_Intrinsic;
1922+
def int_aarch64_sve_dup_laneq : SVE2_1VectorArgIndexed_Intrinsic;
19161923
def int_aarch64_sve_ext : AdvSIMD_2VectorArgIndexed_Intrinsic;
19171924
def int_aarch64_sve_sel : AdvSIMD_Pred2VectorArg_Intrinsic;
19181925
def int_aarch64_sve_lasta : AdvSIMD_SVE_Reduce_Intrinsic;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4101,7 +4101,7 @@ defm FMINNMQV : sve2p1_fp_reduction_q<0b101, "fminnmqv", int_aarch64_sve_fminnmq
41014101
defm FMAXQV : sve2p1_fp_reduction_q<0b110, "fmaxqv", int_aarch64_sve_fmaxqv>;
41024102
defm FMINQV : sve2p1_fp_reduction_q<0b111, "fminqv", int_aarch64_sve_fminqv>;
41034103

4104-
defm DUPQ_ZZI : sve2p1_dupq<"dupq">;
4104+
defm DUPQ_ZZI : sve2p1_dupq<"dupq", int_aarch64_sve_dup_laneq>;
41054105
defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq>;
41064106

41074107
defm PMOV_PZI : sve2p1_vector_to_pred<"pmov", int_aarch64_sve_pmov_to_pred_lane, int_aarch64_sve_pmov_to_pred_lane_zero>;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9893,23 +9893,33 @@ class sve2p1_dupq<bits<5> ind_tsz, string mnemonic, ZPRRegOp zprty, Operand ityp
98939893
let hasSideEffects = 0;
98949894
}
98959895

9896-
multiclass sve2p1_dupq<string mnemonic> {
9897-
def _B : sve2p1_dupq<{?, ?, ?, ?, 1}, mnemonic, ZPR8, VectorIndexB32b> {
9896+
multiclass sve2p1_dupq<string mnemonic, SDPatternOperator Op> {
9897+
def _B : sve2p1_dupq<{?, ?, ?, ?, 1}, mnemonic, ZPR8, VectorIndexB32b_timm> {
98989898
bits<4> index;
98999899
let Inst{20-17} = index;
99009900
}
9901-
def _H : sve2p1_dupq<{?, ?, ?, 1, 0}, mnemonic, ZPR16, VectorIndexH32b> {
9901+
def _H : sve2p1_dupq<{?, ?, ?, 1, 0}, mnemonic, ZPR16, VectorIndexH32b_timm> {
99029902
bits<3> index;
99039903
let Inst{20-18} = index;
99049904
}
9905-
def _S : sve2p1_dupq<{?, ?, 1, 0, 0}, mnemonic, ZPR32, VectorIndexS32b> {
9905+
def _S : sve2p1_dupq<{?, ?, 1, 0, 0}, mnemonic, ZPR32, VectorIndexS32b_timm> {
99069906
bits<2> index;
99079907
let Inst{20-19} = index;
99089908
}
9909-
def _D : sve2p1_dupq<{?, 1, 0, 0, 0}, mnemonic, ZPR64, VectorIndexD32b> {
9909+
def _D : sve2p1_dupq<{?, 1, 0, 0, 0}, mnemonic, ZPR64, VectorIndexD32b_timm> {
99109910
bits<1> index;
99119911
let Inst{20} = index;
99129912
}
9913+
9914+
def : SVE_2_Op_Imm_Pat<nxv16i8, Op, nxv16i8, i32, VectorIndexB32b_timm, !cast<Instruction>(NAME # _B)>;
9915+
def : SVE_2_Op_Imm_Pat<nxv8i16, Op, nxv8i16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME # _H)>;
9916+
def : SVE_2_Op_Imm_Pat<nxv4i32, Op, nxv4i32, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _S)>;
9917+
def : SVE_2_Op_Imm_Pat<nxv2i64, Op, nxv2i64, i32, VectorIndexD32b_timm, !cast<Instruction>(NAME # _D)>;
9918+
9919+
def : SVE_2_Op_Imm_Pat<nxv8f16, Op, nxv8f16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME # _H)>;
9920+
def : SVE_2_Op_Imm_Pat<nxv4f32, Op, nxv4f32, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _S)>;
9921+
def : SVE_2_Op_Imm_Pat<nxv2f64, Op, nxv2f64, i32, VectorIndexD32b_timm, !cast<Instruction>(NAME # _D)>;
9922+
def : SVE_2_Op_Imm_Pat<nxv8bf16, Op, nxv8bf16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME # _H)>;
99139923
}
99149924

99159925

0 commit comments

Comments
 (0)