-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MSAN] support vpermilvar AVX instructions #143053
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MSAN] support vpermilvar AVX instructions #143053
Conversation
Created using spr 1.3.4
@llvm/pr-subscribers-compiler-rt-sanitizer @llvm/pr-subscribers-llvm-transforms Author: Florian Mayer (fmayer) ChangesPatch is 72.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/143053.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 232983c9e5a90..9e892cff49a8a 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4159,6 +4159,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, PtrSrcOrigin);
}
+ // Instrument AVX permutation intrinsic.
+ // We apply the same permutation (argument index 1) to the shadow.
+ void handleAvxVpermilvar(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *Shadow = getShadow(&I, 0);
+
+ // Shadows are integer-ish types but some intrinsics require a
+ // different (e.g., floating-point) type.
+ Shadow = IRB.CreateBitCast(Shadow, I.getArgOperand(0)->getType());
+ CallInst *CI = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(),
+ {Shadow, I.getArgOperand(1)});
+
+ setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));
+ setOriginForNaryOp(I);
+ }
+
// Instrument BMI / BMI2 intrinsics.
// All of these intrinsics are Z = I(X, Y)
// where the types of all operands and the result match, and are either i32 or
@@ -5104,6 +5120,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}
+ case Intrinsic::x86_avx_vpermilvar_pd:
+ case Intrinsic::x86_avx_vpermilvar_pd_256:
+ case Intrinsic::x86_avx512_vpermilvar_pd_512:
+ case Intrinsic::x86_avx_vpermilvar_ps:
+ case Intrinsic::x86_avx_vpermilvar_ps_256:
+ case Intrinsic::x86_avx512_vpermilvar_ps_512: {
+ handleAvxVpermilvar(I);
+ break;
+ }
+
case Intrinsic::x86_avx512fp16_mask_add_sh_round:
case Intrinsic::x86_avx512fp16_mask_sub_sh_round:
case Intrinsic::x86_avx512fp16_mask_mul_sh_round:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
index d1655bfbb4d23..95e7c88790ad1 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
@@ -954,21 +954,13 @@ declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) #0 {
; CHECK-LABEL: @test_x86_avx_vpermilvar_pd(
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn()
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0:%.*]], <2 x i64> [[A1:%.*]])
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
+; CHECK-NEXT: [[A0:%.*]] = bitcast <2 x i64> [[TMP1]] to <2 x double>
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0]], <2 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[RES]] to <2 x i64>
+; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A2:%.*]], <2 x i64> [[A1]])
+; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES1]]
;
%res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
@@ -979,21 +971,13 @@ declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwi
define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) #0 {
; CHECK-LABEL: @test_x86_avx_vpermilvar_pd_256(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn()
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0:%.*]], <4 x i64> [[A1:%.*]])
-; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x double> [[RES]]
+; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double>
+; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0]], <4 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64>
+; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A2:%.*]], <4 x i64> [[A1]])
+; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x double> [[RES1]]
;
%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
@@ -1004,16 +988,12 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) #0 {
; CHECK-LABEL: @test_x86_avx_vpermilvar_pd_256_2(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn()
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0:%.*]], <4 x i64> <i64 2, i64 0, i64 0, i64 2>)
-; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x double> [[RES]]
+; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double>
+; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0]], <4 x i64> <i64 2, i64 0, i64 0, i64 2>)
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64>
+; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A1:%.*]], <4 x i64> <i64 2, i64 0, i64 0, i64 2>)
+; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x double> [[RES1]]
;
%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
@@ -1022,21 +1002,13 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) #0 {
define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) #0 {
; CHECK-LABEL: @test_x86_avx_vpermilvar_ps(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn()
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0:%.*]], <4 x i32> [[A1:%.*]])
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
+; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i32> [[TMP1]] to <4 x float>
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32>
+; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A2:%.*]], <4 x i32> [[A1]])
+; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES1]]
;
%res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
@@ -1057,19 +1029,12 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0
; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn()
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0:%.*]], <4 x i32> [[A2]])
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
+; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A2]])
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32>
+; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A3:%.*]], <4 x i32> [[A2]])
+; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES1]]
;
%a2 = load <4 x i32>, ptr %a1
%res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
@@ -1081,21 +1046,13 @@ declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind
define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) #0 {
; CHECK-LABEL: @test_x86_avx_vpermilvar_ps_256(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn()
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0:%.*]], <8 x i32> [[A1:%.*]])
-; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x float> [[RES]]
+; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float>
+; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0]], <8 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32>
+; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A2:%.*]], <8 x i32> [[A1]])
+; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x float> [[RES1]]
;
%res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll
index 2ce2c076f5ad1..27319f33e25b5 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll
@@ -8141,21 +8141,13 @@ declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x
define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) #0 {
; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP7]]
+; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double>
+; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x double> [[TMP7]] to <8 x i64>
+; CHECK-NEXT: [[TMP5:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X2:%.*]], <8 x i64> [[X1]])
+; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP5]]
;
%res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
ret <8 x double> %res
@@ -8165,31 +8157,23 @@ define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0,
;
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermilvar_pd_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double>
+; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x double> [[TMP9]] to <8 x i64>
+; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X4:%.*]], <8 x i64> [[X1]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> zeroinitializer, <8 x i64> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x double> [[TMP9]] to <8 x i64>
+; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP6]], <8 x i64> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x double> [[TMP7]] to <8 x i64>
; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64>
; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i64> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP6]]
; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP17]], <8 x i64> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP11]], <8 x double> [[TMP9]], <8 x double> [[X2]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP11]], <8 x double> [[TMP7]], <8 x double> [[X2]]
; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x double> [[TMP18]]
;
@@ -8201,29 +8185,21 @@ define <8 x double>@test_int_x86_avx512_maskz_vpermilvar_pd_512(<8 x double> %x0
;
; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermilvar_pd_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double>
+; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
+; CHECK-NEXT: [[TMP6:%.*]] = call <8 x doub...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the permutation is uninitialized, the DoL should reject the PERM. There's two schools of thought:
- use
insertShadowCheck(I.getArgOperand(1))
, which will report it as uninitialized memory; similar toextractLowerShadow
, where "intuitively it feels like a partly uninitialized mask is unintended, and we should warn the user immediately." - mix in the shadow of the permutation and propagate the shadow (don't crash); similar to
handleIntrinsicByApplyingToShadow
. In the case of an uninitialized permutation, arguably all bits should become uninitialized.
@@ -4159,6 +4159,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { | |||
setOrigin(&I, PtrSrcOrigin); | |||
} | |||
|
|||
// Instrument AVX permutation intrinsic. | |||
// We apply the same permutation (argument index 1) to the shadow. | |||
void handleAvxVpermilvar(IntrinsicInst &I) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please capitalize "AVX" for consistency with the "handle.*(AVX|SSE|NEON)" functions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
Please update the test headers:
etc. |
Created using spr 1.3.4
Done |
No description provided.