Skip to content

Commit 132fb50

Browse files
committed
Fix instrumentation to use shadow cast
1 parent d0bdec3 commit 132fb50

File tree

3 files changed

+55
-106
lines changed

3 files changed

+55
-106
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4140,6 +4140,21 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
41404140
/*trailingVerbatimArgs*/ 0);
41414141
}
41424142

4143+
/// Instrument vector instructions that change the width.
4144+
///
4145+
/// e.g., <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32>
4146+
/// (this example also saturates the values, but we ignore that for the
4147+
/// purposes of propagating the shadow)
4148+
void handleVectorWidthChangeIntrinsic(IntrinsicInst &I) {
4149+
assert(I.arg_size() == 1);
4150+
4151+
IRBuilder<> IRB(&I);
4152+
Value *S = getShadow(&I, 0);
4153+
S = CreateShadowCast(IRB, S, getShadowTy(&I));
4154+
setShadow(&I, S);
4155+
setOriginForNaryOp(I);
4156+
}
4157+
41434158
/// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
41444159
/// and vst{2,3,4}lane).
41454160
///
@@ -4753,7 +4768,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
47534768
case Intrinsic::aarch64_neon_sqxtn:
47544769
case Intrinsic::aarch64_neon_sqxtun:
47554770
case Intrinsic::aarch64_neon_uqxtn:
4756-
handleVectorReduceIntrinsic(I);
4771+
handleVectorWidthChangeIntrinsic(I);
47574772
break;
47584773

47594774
case Intrinsic::aarch64_neon_st1x2:

llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vmovn.ll

Lines changed: 18 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,7 @@ define <8 x i8> @sqxtn8b(<8 x i16> %A) nounwind #0 {
107107
; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] {
108108
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
109109
; CHECK-NEXT: call void @llvm.donothing()
110-
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
111-
; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP2]] to i64
112-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <8 x i8>
110+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
113111
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[A]])
114112
; CHECK-NEXT: store <8 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
115113
; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -123,9 +121,7 @@ define <4 x i16> @sqxtn4h(<4 x i32> %A) nounwind #0 {
123121
; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
124122
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
125123
; CHECK-NEXT: call void @llvm.donothing()
126-
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
127-
; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP2]] to i64
128-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <4 x i16>
124+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
129125
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[A]])
130126
; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
131127
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -139,8 +135,7 @@ define <2 x i32> @sqxtn2s(<2 x i64> %A) nounwind #0 {
139135
; CHECK-SAME: <2 x i64> [[A:%.*]]) #[[ATTR0]] {
140136
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
141137
; CHECK-NEXT: call void @llvm.donothing()
142-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
143-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP2]] to <2 x i32>
138+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
144139
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[A]])
145140
; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
146141
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -155,9 +150,7 @@ define <16 x i8> @sqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind #0 {
155150
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
156151
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
157152
; CHECK-NEXT: call void @llvm.donothing()
158-
; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
159-
; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP6]] to i64
160-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <8 x i8>
153+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
161154
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[A]])
162155
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
163156
; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[RET]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -175,9 +168,7 @@ define <8 x i16> @sqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind #0 {
175168
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
176169
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
177170
; CHECK-NEXT: call void @llvm.donothing()
178-
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
179-
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP6]] to i64
180-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <4 x i16>
171+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
181172
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[A]])
182173
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
183174
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[RET]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -195,8 +186,7 @@ define <4 x i32> @sqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind #0 {
195186
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
196187
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
197188
; CHECK-NEXT: call void @llvm.donothing()
198-
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
199-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <2 x i32>
189+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
200190
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[A]])
201191
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
202192
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[RET]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -217,9 +207,7 @@ define <8 x i8> @uqxtn8b(<8 x i16> %A) nounwind #0 {
217207
; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] {
218208
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
219209
; CHECK-NEXT: call void @llvm.donothing()
220-
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
221-
; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP2]] to i64
222-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <8 x i8>
210+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
223211
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[A]])
224212
; CHECK-NEXT: store <8 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
225213
; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -233,9 +221,7 @@ define <4 x i16> @uqxtn4h(<4 x i32> %A) nounwind #0 {
233221
; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
234222
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
235223
; CHECK-NEXT: call void @llvm.donothing()
236-
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
237-
; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP2]] to i64
238-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <4 x i16>
224+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
239225
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[A]])
240226
; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
241227
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -249,8 +235,7 @@ define <2 x i32> @uqxtn2s(<2 x i64> %A) nounwind #0 {
249235
; CHECK-SAME: <2 x i64> [[A:%.*]]) #[[ATTR0]] {
250236
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
251237
; CHECK-NEXT: call void @llvm.donothing()
252-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
253-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP2]] to <2 x i32>
238+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
254239
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[A]])
255240
; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
256241
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -265,9 +250,7 @@ define <16 x i8> @uqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind #0 {
265250
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
266251
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
267252
; CHECK-NEXT: call void @llvm.donothing()
268-
; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
269-
; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP6]] to i64
270-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <8 x i8>
253+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
271254
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[A]])
272255
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
273256
; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[RET]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -285,9 +268,7 @@ define <8 x i16> @uqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind #0 {
285268
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
286269
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
287270
; CHECK-NEXT: call void @llvm.donothing()
288-
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
289-
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP6]] to i64
290-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <4 x i16>
271+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
291272
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[A]])
292273
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
293274
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[RET]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -305,8 +286,7 @@ define <4 x i32> @uqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind #0 {
305286
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
306287
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
307288
; CHECK-NEXT: call void @llvm.donothing()
308-
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
309-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <2 x i32>
289+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
310290
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[A]])
311291
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
312292
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[RET]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -327,9 +307,7 @@ define <8 x i8> @sqxtun8b(<8 x i16> %A) nounwind #0 {
327307
; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] {
328308
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
329309
; CHECK-NEXT: call void @llvm.donothing()
330-
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
331-
; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP2]] to i64
332-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <8 x i8>
310+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
333311
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[A]])
334312
; CHECK-NEXT: store <8 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
335313
; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -343,9 +321,7 @@ define <4 x i16> @sqxtun4h(<4 x i32> %A) nounwind #0 {
343321
; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
344322
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
345323
; CHECK-NEXT: call void @llvm.donothing()
346-
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
347-
; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP2]] to i64
348-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <4 x i16>
324+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
349325
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[A]])
350326
; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
351327
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -359,8 +335,7 @@ define <2 x i32> @sqxtun2s(<2 x i64> %A) nounwind #0 {
359335
; CHECK-SAME: <2 x i64> [[A:%.*]]) #[[ATTR0]] {
360336
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
361337
; CHECK-NEXT: call void @llvm.donothing()
362-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
363-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP2]] to <2 x i32>
338+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
364339
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[A]])
365340
; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
366341
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -375,9 +350,7 @@ define <16 x i8> @sqxtun2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind #0 {
375350
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
376351
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
377352
; CHECK-NEXT: call void @llvm.donothing()
378-
; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
379-
; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP6]] to i64
380-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <8 x i8>
353+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
381354
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[A]])
382355
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
383356
; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[RET]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -395,9 +368,7 @@ define <8 x i16> @sqxtun2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind #0 {
395368
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
396369
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
397370
; CHECK-NEXT: call void @llvm.donothing()
398-
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
399-
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP6]] to i64
400-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <4 x i16>
371+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
401372
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[A]])
402373
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
403374
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[RET]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -415,8 +386,7 @@ define <4 x i32> @sqxtun2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind #0 {
415386
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
416387
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
417388
; CHECK-NEXT: call void @llvm.donothing()
418-
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
419-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <2 x i32>
389+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
420390
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[A]])
421391
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
422392
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[RET]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>

0 commit comments

Comments
 (0)