Skip to content

Commit a6b235a

Browse files
committed
Abuse handleShadowOr to perform width change
1 parent 132fb50 commit a6b235a

File tree

3 files changed

+79
-55
lines changed

3 files changed

+79
-55
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4140,21 +4140,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
41404140
/*trailingVerbatimArgs*/ 0);
41414141
}
41424142

4143-
/// Instrument vector instructions that change the width.
4144-
///
4145-
/// e.g., <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32>
4146-
/// (this example also saturates the values, but we ignore that for the
4147-
/// purposes of propagating the shadow)
4148-
void handleVectorWidthChangeIntrinsic(IntrinsicInst &I) {
4149-
assert(I.arg_size() == 1);
4150-
4151-
IRBuilder<> IRB(&I);
4152-
Value *S = getShadow(&I, 0);
4153-
S = CreateShadowCast(IRB, S, getShadowTy(&I));
4154-
setShadow(&I, S);
4155-
setOriginForNaryOp(I);
4156-
}
4157-
41584143
/// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
41594144
/// and vst{2,3,4}lane).
41604145
///
@@ -4768,7 +4753,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
47684753
case Intrinsic::aarch64_neon_sqxtn:
47694754
case Intrinsic::aarch64_neon_sqxtun:
47704755
case Intrinsic::aarch64_neon_uqxtn:
4771-
handleVectorWidthChangeIntrinsic(I);
4756+
handleShadowOr(I);
47724757
break;
47734758

47744759
case Intrinsic::aarch64_neon_st1x2:

llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vmovn.ll

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,8 @@ define <8 x i8> @sqxtn8b(<8 x i16> %A) nounwind #0 {
107107
; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] {
108108
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
109109
; CHECK-NEXT: call void @llvm.donothing()
110-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
110+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], zeroinitializer
111+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[_MSPROP]] to <8 x i8>
111112
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[A]])
112113
; CHECK-NEXT: store <8 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
113114
; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -121,7 +122,8 @@ define <4 x i16> @sqxtn4h(<4 x i32> %A) nounwind #0 {
121122
; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
122123
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
123124
; CHECK-NEXT: call void @llvm.donothing()
124-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
125+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
126+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[_MSPROP]] to <4 x i16>
125127
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[A]])
126128
; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
127129
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -135,7 +137,8 @@ define <2 x i32> @sqxtn2s(<2 x i64> %A) nounwind #0 {
135137
; CHECK-SAME: <2 x i64> [[A:%.*]]) #[[ATTR0]] {
136138
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
137139
; CHECK-NEXT: call void @llvm.donothing()
138-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
140+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer
141+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[_MSPROP]] to <2 x i32>
139142
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[A]])
140143
; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
141144
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -150,7 +153,8 @@ define <16 x i8> @sqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind #0 {
150153
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
151154
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
152155
; CHECK-NEXT: call void @llvm.donothing()
153-
; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
156+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP1]], zeroinitializer
157+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[_MSPROP1]] to <8 x i8>
154158
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[A]])
155159
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
156160
; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[RET]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -168,7 +172,8 @@ define <8 x i16> @sqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind #0 {
168172
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
169173
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
170174
; CHECK-NEXT: call void @llvm.donothing()
171-
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
175+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
176+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[_MSPROP1]] to <4 x i16>
172177
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[A]])
173178
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
174179
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[RET]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -186,7 +191,8 @@ define <4 x i32> @sqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind #0 {
186191
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
187192
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
188193
; CHECK-NEXT: call void @llvm.donothing()
189-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
194+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer
195+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[_MSPROP1]] to <2 x i32>
190196
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[A]])
191197
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
192198
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[RET]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -207,7 +213,8 @@ define <8 x i8> @uqxtn8b(<8 x i16> %A) nounwind #0 {
207213
; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] {
208214
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
209215
; CHECK-NEXT: call void @llvm.donothing()
210-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
216+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], zeroinitializer
217+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[_MSPROP]] to <8 x i8>
211218
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[A]])
212219
; CHECK-NEXT: store <8 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
213220
; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -221,7 +228,8 @@ define <4 x i16> @uqxtn4h(<4 x i32> %A) nounwind #0 {
221228
; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
222229
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
223230
; CHECK-NEXT: call void @llvm.donothing()
224-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
231+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
232+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[_MSPROP]] to <4 x i16>
225233
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[A]])
226234
; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
227235
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -235,7 +243,8 @@ define <2 x i32> @uqxtn2s(<2 x i64> %A) nounwind #0 {
235243
; CHECK-SAME: <2 x i64> [[A:%.*]]) #[[ATTR0]] {
236244
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
237245
; CHECK-NEXT: call void @llvm.donothing()
238-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
246+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer
247+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[_MSPROP]] to <2 x i32>
239248
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[A]])
240249
; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
241250
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -250,7 +259,8 @@ define <16 x i8> @uqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind #0 {
250259
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
251260
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
252261
; CHECK-NEXT: call void @llvm.donothing()
253-
; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
262+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP1]], zeroinitializer
263+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[_MSPROP1]] to <8 x i8>
254264
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[A]])
255265
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
256266
; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[RET]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -268,7 +278,8 @@ define <8 x i16> @uqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind #0 {
268278
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
269279
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
270280
; CHECK-NEXT: call void @llvm.donothing()
271-
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
281+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
282+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[_MSPROP1]] to <4 x i16>
272283
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[A]])
273284
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
274285
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[RET]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -286,7 +297,8 @@ define <4 x i32> @uqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind #0 {
286297
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
287298
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
288299
; CHECK-NEXT: call void @llvm.donothing()
289-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
300+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer
301+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[_MSPROP1]] to <2 x i32>
290302
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[A]])
291303
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
292304
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[RET]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -307,7 +319,8 @@ define <8 x i8> @sqxtun8b(<8 x i16> %A) nounwind #0 {
307319
; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] {
308320
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
309321
; CHECK-NEXT: call void @llvm.donothing()
310-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
322+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], zeroinitializer
323+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[_MSPROP]] to <8 x i8>
311324
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[A]])
312325
; CHECK-NEXT: store <8 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
313326
; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -321,7 +334,8 @@ define <4 x i16> @sqxtun4h(<4 x i32> %A) nounwind #0 {
321334
; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
322335
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
323336
; CHECK-NEXT: call void @llvm.donothing()
324-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
337+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
338+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[_MSPROP]] to <4 x i16>
325339
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[A]])
326340
; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
327341
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -335,7 +349,8 @@ define <2 x i32> @sqxtun2s(<2 x i64> %A) nounwind #0 {
335349
; CHECK-SAME: <2 x i64> [[A:%.*]]) #[[ATTR0]] {
336350
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
337351
; CHECK-NEXT: call void @llvm.donothing()
338-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
352+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer
353+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[_MSPROP]] to <2 x i32>
339354
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[A]])
340355
; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
341356
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -350,7 +365,8 @@ define <16 x i8> @sqxtun2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind #0 {
350365
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
351366
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
352367
; CHECK-NEXT: call void @llvm.donothing()
353-
; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
368+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP1]], zeroinitializer
369+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[_MSPROP1]] to <8 x i8>
354370
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[A]])
355371
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
356372
; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[RET]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -368,7 +384,8 @@ define <8 x i16> @sqxtun2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind #0 {
368384
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
369385
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
370386
; CHECK-NEXT: call void @llvm.donothing()
371-
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
387+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
388+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[_MSPROP1]] to <4 x i16>
372389
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[A]])
373390
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
374391
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[RET]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -386,7 +403,8 @@ define <4 x i32> @sqxtun2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind #0 {
386403
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
387404
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
388405
; CHECK-NEXT: call void @llvm.donothing()
389-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
406+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer
407+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[_MSPROP1]] to <2 x i32>
390408
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[A]])
391409
; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
392410
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[RET]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>

0 commit comments

Comments
 (0)