You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[AArch64][SVE] Change the immediate argument in svextq (#115340)
In order to align with `svext` and NEON `vext`/`vextq`, this patch
changes immediate argument in `svextq` such that it refers to elements
of the size of those of the source vector, rather than bytes. The [spec
for this
intrinsic](https://github.com/ARM-software/acle/blob/main/main/acle.md#extq)
is ambiguous about the meaning of this argument, this issue was raised
after there was a differing interpretation for it from the implementers
of the ACLE in GCC.
For example (with our current implementation):
`svextq_f64(zn_f64, zm_f64, 1)` would, for each 128-bit segment of
`zn_f64,` concatenate the highest 15 bytes of this segment with the
first byte of the corresponding segment of `zm_f64`.
After this patch, the behavior of `svextq_f64(zn_f64, zm_f64, 1)` would
be, for each 128-bit vector segment of `zn_f64`, to concatenate the
higher doubleword of this segment with the lower doubleword of the
corresponding segment of `zm_f64`.
The range of the immediate argument in `svextq` would be modified such
that it is:
- [0,15] for `svextq_{s8,u8}`
- [0,7] for `svextq_{s16,u16,f16,bf16}`
- [0,3] for `svextq_{s32,u32,f32}`
- [0,1] for `svextq_{s64,u64,f64}`
// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svextq_s32
104
104
// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
105
105
// CHECK-NEXT: entry:
106
-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
106
+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 3)
107
107
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
108
108
//
109
109
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svextq_s32u11__SVInt32_tS_
110
110
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
111
111
// CPP-CHECK-NEXT: entry:
112
-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
112
+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 3)
113
113
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_u64
120
120
// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
121
121
// CHECK-NEXT: entry:
122
-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
122
+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 1)
123
123
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
124
124
//
125
125
// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svextq_u64u12__SVUint64_tS_
126
126
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
127
127
// CPP-CHECK-NEXT: entry:
128
-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
128
+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 1)
129
129
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_s64
136
136
// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
137
137
// CHECK-NEXT: entry:
138
-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
138
+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 0)
139
139
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
140
140
//
141
141
// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svextq_s64u11__SVInt64_tS_
142
142
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
143
143
// CPP-CHECK-NEXT: entry:
144
-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
144
+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 0)
145
145
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svextq_f16
152
152
// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
153
153
// CHECK-NEXT: entry:
154
-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
154
+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 7)
155
155
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
156
156
//
157
157
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z15test_svextq_f16u13__SVFloat16_tS_
158
158
// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
159
159
// CPP-CHECK-NEXT: entry:
160
-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
160
+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 7)
161
161
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svextq_f32
168
168
// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
169
169
// CHECK-NEXT: entry:
170
-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
170
+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 2)
171
171
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
172
172
//
173
173
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z15test_svextq_f32u13__SVFloat32_tS_
174
174
// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
175
175
// CPP-CHECK-NEXT: entry:
176
-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
176
+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 2)
177
177
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svextq_f64
184
184
// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
185
185
// CHECK-NEXT: entry:
186
-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
186
+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 0)
187
187
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
188
188
//
189
189
// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z15test_svextq_f64u13__SVFloat64_tS_
190
190
// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
191
191
// CPP-CHECK-NEXT: entry:
192
-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
192
+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 0)
193
193
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svextq_bf16
200
200
// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
201
201
// CHECK-NEXT: entry:
202
-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
202
+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 6)
203
203
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
204
204
//
205
205
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z16test_svextq_bf16u14__SVBfloat16_tS_
206
206
// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
207
207
// CPP-CHECK-NEXT: entry:
208
-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
208
+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 6)
209
209
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
0 commit comments