@@ -107,9 +107,7 @@ define <8 x i8> @sqxtn8b(<8 x i16> %A) nounwind #0 {
107
107
; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] {
108
108
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
109
109
; CHECK-NEXT: call void @llvm.donothing()
110
- ; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
111
- ; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP2]] to i64
112
- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <8 x i8>
110
+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
113
111
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[A]])
114
112
; CHECK-NEXT: store <8 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
115
113
; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -123,9 +121,7 @@ define <4 x i16> @sqxtn4h(<4 x i32> %A) nounwind #0 {
123
121
; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
124
122
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
125
123
; CHECK-NEXT: call void @llvm.donothing()
126
- ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
127
- ; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP2]] to i64
128
- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <4 x i16>
124
+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
129
125
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[A]])
130
126
; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
131
127
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -139,8 +135,7 @@ define <2 x i32> @sqxtn2s(<2 x i64> %A) nounwind #0 {
139
135
; CHECK-SAME: <2 x i64> [[A:%.*]]) #[[ATTR0]] {
140
136
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
141
137
; CHECK-NEXT: call void @llvm.donothing()
142
- ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
143
- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP2]] to <2 x i32>
138
+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
144
139
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[A]])
145
140
; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
146
141
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -155,9 +150,7 @@ define <16 x i8> @sqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind #0 {
155
150
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
156
151
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
157
152
; CHECK-NEXT: call void @llvm.donothing()
158
- ; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
159
- ; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP6]] to i64
160
- ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <8 x i8>
153
+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
161
154
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[A]])
162
155
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
163
156
; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[RET]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -175,9 +168,7 @@ define <8 x i16> @sqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind #0 {
175
168
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
176
169
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
177
170
; CHECK-NEXT: call void @llvm.donothing()
178
- ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
179
- ; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP6]] to i64
180
- ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <4 x i16>
171
+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
181
172
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[A]])
182
173
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
183
174
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[RET]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -195,8 +186,7 @@ define <4 x i32> @sqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind #0 {
195
186
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
196
187
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
197
188
; CHECK-NEXT: call void @llvm.donothing()
198
- ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
199
- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <2 x i32>
189
+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
200
190
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[A]])
201
191
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
202
192
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[RET]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -217,9 +207,7 @@ define <8 x i8> @uqxtn8b(<8 x i16> %A) nounwind #0 {
217
207
; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] {
218
208
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
219
209
; CHECK-NEXT: call void @llvm.donothing()
220
- ; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
221
- ; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP2]] to i64
222
- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <8 x i8>
210
+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
223
211
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[A]])
224
212
; CHECK-NEXT: store <8 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
225
213
; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -233,9 +221,7 @@ define <4 x i16> @uqxtn4h(<4 x i32> %A) nounwind #0 {
233
221
; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
234
222
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
235
223
; CHECK-NEXT: call void @llvm.donothing()
236
- ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
237
- ; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP2]] to i64
238
- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <4 x i16>
224
+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
239
225
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[A]])
240
226
; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
241
227
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -249,8 +235,7 @@ define <2 x i32> @uqxtn2s(<2 x i64> %A) nounwind #0 {
249
235
; CHECK-SAME: <2 x i64> [[A:%.*]]) #[[ATTR0]] {
250
236
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
251
237
; CHECK-NEXT: call void @llvm.donothing()
252
- ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
253
- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP2]] to <2 x i32>
238
+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
254
239
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[A]])
255
240
; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
256
241
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -265,9 +250,7 @@ define <16 x i8> @uqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind #0 {
265
250
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
266
251
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
267
252
; CHECK-NEXT: call void @llvm.donothing()
268
- ; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
269
- ; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP6]] to i64
270
- ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <8 x i8>
253
+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
271
254
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[A]])
272
255
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
273
256
; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[RET]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -285,9 +268,7 @@ define <8 x i16> @uqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind #0 {
285
268
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
286
269
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
287
270
; CHECK-NEXT: call void @llvm.donothing()
288
- ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
289
- ; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP6]] to i64
290
- ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <4 x i16>
271
+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
291
272
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[A]])
292
273
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
293
274
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[RET]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -305,8 +286,7 @@ define <4 x i32> @uqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind #0 {
305
286
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
306
287
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
307
288
; CHECK-NEXT: call void @llvm.donothing()
308
- ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
309
- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <2 x i32>
289
+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
310
290
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[A]])
311
291
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
312
292
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[RET]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -327,9 +307,7 @@ define <8 x i8> @sqxtun8b(<8 x i16> %A) nounwind #0 {
327
307
; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] {
328
308
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
329
309
; CHECK-NEXT: call void @llvm.donothing()
330
- ; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
331
- ; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP2]] to i64
332
- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <8 x i8>
310
+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
333
311
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[A]])
334
312
; CHECK-NEXT: store <8 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
335
313
; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -343,9 +321,7 @@ define <4 x i16> @sqxtun4h(<4 x i32> %A) nounwind #0 {
343
321
; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
344
322
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
345
323
; CHECK-NEXT: call void @llvm.donothing()
346
- ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
347
- ; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP2]] to i64
348
- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <4 x i16>
324
+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
349
325
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[A]])
350
326
; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
351
327
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -359,8 +335,7 @@ define <2 x i32> @sqxtun2s(<2 x i64> %A) nounwind #0 {
359
335
; CHECK-SAME: <2 x i64> [[A:%.*]]) #[[ATTR0]] {
360
336
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
361
337
; CHECK-NEXT: call void @llvm.donothing()
362
- ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
363
- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP2]] to <2 x i32>
338
+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
364
339
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[A]])
365
340
; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
366
341
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -375,9 +350,7 @@ define <16 x i8> @sqxtun2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind #0 {
375
350
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
376
351
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
377
352
; CHECK-NEXT: call void @llvm.donothing()
378
- ; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
379
- ; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP6]] to i64
380
- ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <8 x i8>
353
+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
381
354
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[A]])
382
355
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
383
356
; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[RET]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -395,9 +368,7 @@ define <8 x i16> @sqxtun2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind #0 {
395
368
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
396
369
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
397
370
; CHECK-NEXT: call void @llvm.donothing()
398
- ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
399
- ; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP6]] to i64
400
- ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <4 x i16>
371
+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
401
372
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[A]])
402
373
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
403
374
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[RET]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -415,8 +386,7 @@ define <4 x i32> @sqxtun2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind #0 {
415
386
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
416
387
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
417
388
; CHECK-NEXT: call void @llvm.donothing()
418
- ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
419
- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <2 x i32>
389
+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
420
390
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[A]])
421
391
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
422
392
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[RET]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
0 commit comments