@@ -4132,10 +4132,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
4132
4132
4133
4133
CallInst *CI =
4134
4134
IRB.CreateIntrinsic (I.getType (), I.getIntrinsicID (), ShadowArgs);
4135
- // The intrinsic may require floating-point but shadows can be arbitrary
4136
- // bit patterns, of which some would be interpreted as "invalid"
4137
- // floating-point values (NaN etc.); we assume the intrinsic will happily
4138
- // copy them.
4135
+ // The AVX masked load intrinsics do not have integer variants. We use the
4136
+ // floating-point variants, which will happily copy the shadows even if
4137
+ // they are interpreted as "invalid" floating-point values (NaN etc.).
4139
4138
setShadow (&I, IRB.CreateBitCast (CI, getShadowTy (&I)));
4140
4139
4141
4140
if (!MS.TrackOrigins )
@@ -4301,6 +4300,78 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
4301
4300
setOriginForNaryOp (I);
4302
4301
}
4303
4302
4303
+ // Handle Arm NEON vector load intrinsics (vld*).
4304
+ //
4305
+ // The WithLane instructions (ld[234]lane) are similar to:
4306
+ // call {<4 x i32>, <4 x i32>, <4 x i32>}
4307
+ // @llvm.aarch64.neon.ld3lane.v4i32.p0
4308
+ // (<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 %lane, ptr
4309
+ // %A)
4310
+ //
4311
+ // The non-WithLane instructions (ld[234], ld1x[234], ld[234]r) are similar
4312
+ // to:
4313
+ // call {<8 x i8>, <8 x i8>} @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
4314
+ void handleNEONVectorLoad (IntrinsicInst &I, bool WithLane) {
4315
+ unsigned int numArgs = I.arg_size ();
4316
+
4317
+ // Return type is a struct of vectors of integers or floating-point
4318
+ assert (I.getType ()->isStructTy ());
4319
+ [[maybe_unused]] StructType *RetTy = cast<StructType>(I.getType ());
4320
+ assert (RetTy->getNumElements () > 0 );
4321
+ assert (RetTy->getElementType (0 )->isIntOrIntVectorTy () ||
4322
+ RetTy->getElementType (0 )->isFPOrFPVectorTy ());
4323
+ for (unsigned int i = 0 ; i < RetTy->getNumElements (); i++)
4324
+ assert (RetTy->getElementType (i) == RetTy->getElementType (0 ));
4325
+
4326
+ if (WithLane) {
4327
+ // 2, 3 or 4 vectors, plus lane number, plus input pointer
4328
+ assert (4 <= numArgs && numArgs <= 6 );
4329
+
4330
+ // Return type is a struct of the input vectors
4331
+ assert (RetTy->getNumElements () + 2 == numArgs);
4332
+ for (unsigned int i = 0 ; i < RetTy->getNumElements (); i++)
4333
+ assert (I.getArgOperand (i)->getType () == RetTy->getElementType (0 ));
4334
+ } else {
4335
+ assert (numArgs == 1 );
4336
+ }
4337
+
4338
+ IRBuilder<> IRB (&I);
4339
+
4340
+ SmallVector<Value *, 6 > ShadowArgs;
4341
+ if (WithLane) {
4342
+ for (unsigned int i = 0 ; i < numArgs - 2 ; i++)
4343
+ ShadowArgs.push_back (getShadow (I.getArgOperand (i)));
4344
+
4345
+ // Lane number, passed verbatim
4346
+ Value *LaneNumber = I.getArgOperand (numArgs - 2 );
4347
+ ShadowArgs.push_back (LaneNumber);
4348
+
4349
+ // TODO: blend shadow of lane number into output shadow?
4350
+ insertShadowCheck (LaneNumber, &I);
4351
+ }
4352
+
4353
+ Value *Src = I.getArgOperand (numArgs - 1 );
4354
+ assert (Src->getType ()->isPointerTy () && " Source is not a pointer!" );
4355
+
4356
+ Type *SrcShadowTy = getShadowTy (Src);
4357
+ auto [SrcShadowPtr, SrcOriginPtr] =
4358
+ getShadowOriginPtr (Src, IRB, SrcShadowTy, Align (1 ), /* isStore*/ false );
4359
+ ShadowArgs.push_back (SrcShadowPtr);
4360
+
4361
+ // The NEON vector load instructions handled by this function all have
4362
+ // integer variants. It is easier to use those rather than trying to cast
4363
+ // a struct of vectors of floats into a struct of vectors of integers.
4364
+ CallInst *CI =
4365
+ IRB.CreateIntrinsic (getShadowTy (&I), I.getIntrinsicID (), ShadowArgs);
4366
+ setShadow (&I, CI);
4367
+
4368
+ if (!MS.TrackOrigins )
4369
+ return ;
4370
+
4371
+ Value *PtrSrcOrigin = IRB.CreateLoad (MS.OriginTy , SrcOriginPtr);
4372
+ setOrigin (&I, PtrSrcOrigin);
4373
+ }
4374
+
4304
4375
// / Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
4305
4376
// / and vst{2,3,4}lane).
4306
4377
// /
@@ -5011,6 +5082,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
5011
5082
handleVectorReduceIntrinsic (I, /* AllowShadowCast=*/ true );
5012
5083
break ;
5013
5084
5085
+ case Intrinsic::aarch64_neon_ld1x2:
5086
+ case Intrinsic::aarch64_neon_ld1x3:
5087
+ case Intrinsic::aarch64_neon_ld1x4:
5088
+ case Intrinsic::aarch64_neon_ld2:
5089
+ case Intrinsic::aarch64_neon_ld3:
5090
+ case Intrinsic::aarch64_neon_ld4:
5091
+ case Intrinsic::aarch64_neon_ld2r:
5092
+ case Intrinsic::aarch64_neon_ld3r:
5093
+ case Intrinsic::aarch64_neon_ld4r: {
5094
+ handleNEONVectorLoad (I, /* WithLane=*/ false );
5095
+ break ;
5096
+ }
5097
+
5098
+ case Intrinsic::aarch64_neon_ld2lane:
5099
+ case Intrinsic::aarch64_neon_ld3lane:
5100
+ case Intrinsic::aarch64_neon_ld4lane: {
5101
+ handleNEONVectorLoad (I, /* WithLane=*/ true );
5102
+ break ;
5103
+ }
5104
+
5014
5105
// Saturating extract narrow
5015
5106
case Intrinsic::aarch64_neon_sqxtn:
5016
5107
case Intrinsic::aarch64_neon_sqxtun:
0 commit comments