Skip to content

Commit 67e6b75

Browse files
committed
Fix instrumentation to use shadow cast
1 parent 9971fc5 commit 67e6b75

File tree

3 files changed

+55
-106
lines changed

3 files changed

+55
-106
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4237,6 +4237,21 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
42374237
/*trailingVerbatimArgs*/ 0);
42384238
}
42394239

4240+
/// Instrument vector instructions that change the width.
4241+
///
4242+
/// e.g., <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32>
4243+
/// (this example also saturates the values, but we ignore that for the
4244+
/// purposes of propagating the shadow)
4245+
void handleVectorWidthChangeIntrinsic(IntrinsicInst &I) {
4246+
assert(I.arg_size() == 1);
4247+
4248+
IRBuilder<> IRB(&I);
4249+
Value *S = getShadow(&I, 0);
4250+
S = CreateShadowCast(IRB, S, getShadowTy(&I));
4251+
setShadow(&I, S);
4252+
setOriginForNaryOp(I);
4253+
}
4254+
42404255
/// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
42414256
/// and vst{2,3,4}lane).
42424257
///
@@ -4898,7 +4913,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
48984913
case Intrinsic::aarch64_neon_sqxtn:
48994914
case Intrinsic::aarch64_neon_sqxtun:
49004915
case Intrinsic::aarch64_neon_uqxtn:
4901-
handleVectorReduceIntrinsic(I);
4916+
handleVectorWidthChangeIntrinsic(I);
49024917
break;
49034918

49044919
case Intrinsic::aarch64_neon_st1x2:

llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vmovn.ll

Lines changed: 18 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,7 @@ define <8 x i8> @sqxtn8b(<8 x i16> %A) nounwind #0 {
107107
; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] {
108108
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
109109
; CHECK-NEXT: call void @llvm.donothing()
110-
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
111-
; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP2]] to i64
112-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <8 x i8>
110+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
113111
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[A]])
114112
; CHECK-NEXT: store <8 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
115113
; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -123,9 +121,7 @@ define <4 x i16> @sqxtn4h(<4 x i32> %A) nounwind #0 {
123121
; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
124122
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
125123
; CHECK-NEXT: call void @llvm.donothing()
126-
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
127-
; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP2]] to i64
128-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <4 x i16>
124+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
129125
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[A]])
130126
; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
131127
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -139,8 +135,7 @@ define <2 x i32> @sqxtn2s(<2 x i64> %A) nounwind #0 {
139135
; CHECK-SAME: <2 x i64> [[A:%.*]]) #[[ATTR0]] {
140136
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
141137
; CHECK-NEXT: call void @llvm.donothing()
142-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
143-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP2]] to <2 x i32>
138+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
144139
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[A]])
145140
; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
146141
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -155,9 +150,7 @@ define <16 x i8> @sqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind #0 {
155150
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
156151
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
157152
; CHECK-NEXT: call void @llvm.donothing()
158-
; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
159-
; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP6]] to i64
160-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <8 x i8>
153+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
161154
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[A]])
162155
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
163156
; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[RET]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -175,9 +168,7 @@ define <8 x i16> @sqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind #0 {
175168
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
176169
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
177170
; CHECK-NEXT: call void @llvm.donothing()
178-
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
179-
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP6]] to i64
180-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <4 x i16>
171+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
181172
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[A]])
182173
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
183174
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[RET]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -195,8 +186,7 @@ define <4 x i32> @sqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind #0 {
195186
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
196187
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
197188
; CHECK-NEXT: call void @llvm.donothing()
198-
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
199-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <2 x i32>
189+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
200190
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[A]])
201191
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
202192
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[RET]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -217,9 +207,7 @@ define <8 x i8> @uqxtn8b(<8 x i16> %A) nounwind #0 {
217207
; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] {
218208
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
219209
; CHECK-NEXT: call void @llvm.donothing()
220-
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
221-
; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP2]] to i64
222-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <8 x i8>
210+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
223211
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[A]])
224212
; CHECK-NEXT: store <8 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
225213
; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -233,9 +221,7 @@ define <4 x i16> @uqxtn4h(<4 x i32> %A) nounwind #0 {
233221
; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
234222
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
235223
; CHECK-NEXT: call void @llvm.donothing()
236-
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
237-
; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP2]] to i64
238-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <4 x i16>
224+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
239225
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[A]])
240226
; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
241227
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -249,8 +235,7 @@ define <2 x i32> @uqxtn2s(<2 x i64> %A) nounwind #0 {
249235
; CHECK-SAME: <2 x i64> [[A:%.*]]) #[[ATTR0]] {
250236
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
251237
; CHECK-NEXT: call void @llvm.donothing()
252-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
253-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP2]] to <2 x i32>
238+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
254239
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[A]])
255240
; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
256241
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -265,9 +250,7 @@ define <16 x i8> @uqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind #0 {
265250
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
266251
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
267252
; CHECK-NEXT: call void @llvm.donothing()
268-
; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
269-
; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP6]] to i64
270-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <8 x i8>
253+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
271254
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[A]])
272255
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
273256
; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[RET]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -285,9 +268,7 @@ define <8 x i16> @uqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind #0 {
285268
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
286269
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
287270
; CHECK-NEXT: call void @llvm.donothing()
288-
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
289-
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP6]] to i64
290-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <4 x i16>
271+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
291272
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[A]])
292273
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
293274
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[RET]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -305,8 +286,7 @@ define <4 x i32> @uqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind #0 {
305286
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
306287
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
307288
; CHECK-NEXT: call void @llvm.donothing()
308-
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
309-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <2 x i32>
289+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
310290
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[A]])
311291
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
312292
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[RET]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -327,9 +307,7 @@ define <8 x i8> @sqxtun8b(<8 x i16> %A) nounwind #0 {
327307
; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] {
328308
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
329309
; CHECK-NEXT: call void @llvm.donothing()
330-
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
331-
; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP2]] to i64
332-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <8 x i8>
310+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
333311
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[A]])
334312
; CHECK-NEXT: store <8 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
335313
; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -343,9 +321,7 @@ define <4 x i16> @sqxtun4h(<4 x i32> %A) nounwind #0 {
343321
; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
344322
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
345323
; CHECK-NEXT: call void @llvm.donothing()
346-
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
347-
; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP2]] to i64
348-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <4 x i16>
324+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
349325
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[A]])
350326
; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
351327
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -359,8 +335,7 @@ define <2 x i32> @sqxtun2s(<2 x i64> %A) nounwind #0 {
359335
; CHECK-SAME: <2 x i64> [[A:%.*]]) #[[ATTR0]] {
360336
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
361337
; CHECK-NEXT: call void @llvm.donothing()
362-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
363-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP2]] to <2 x i32>
338+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
364339
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[A]])
365340
; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
366341
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -375,9 +350,7 @@ define <16 x i8> @sqxtun2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind #0 {
375350
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
376351
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
377352
; CHECK-NEXT: call void @llvm.donothing()
378-
; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
379-
; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP6]] to i64
380-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <8 x i8>
353+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
381354
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[A]])
382355
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
383356
; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[RET]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -395,9 +368,7 @@ define <8 x i16> @sqxtun2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind #0 {
395368
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
396369
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
397370
; CHECK-NEXT: call void @llvm.donothing()
398-
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
399-
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP6]] to i64
400-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <4 x i16>
371+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
401372
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[A]])
402373
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
403374
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[RET]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -415,8 +386,7 @@ define <4 x i32> @sqxtun2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind #0 {
415386
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
416387
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
417388
; CHECK-NEXT: call void @llvm.donothing()
418-
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
419-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <2 x i32>
389+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
420390
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[A]])
421391
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
422392
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[RET]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>

0 commit comments

Comments
 (0)