Skip to content

Commit 23fd60d

Browse files
authored
[MSAN] support vpermilvar AVX instructions (#143053)
1 parent 2680afb commit 23fd60d

File tree

5 files changed

+339
-356
lines changed

5 files changed

+339
-356
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4167,6 +4167,23 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
41674167
setOrigin(&I, PtrSrcOrigin);
41684168
}
41694169

4170+
// Instrument AVX permutation intrinsic.
4171+
// We apply the same permutation (argument index 1) to the shadow.
4172+
void handleAVXVpermilvar(IntrinsicInst &I) {
4173+
IRBuilder<> IRB(&I);
4174+
Value *Shadow = getShadow(&I, 0);
4175+
insertShadowCheck(I.getArgOperand(1), &I);
4176+
4177+
// Shadows are integer-ish types but some intrinsics require a
4178+
// different (e.g., floating-point) type.
4179+
Shadow = IRB.CreateBitCast(Shadow, I.getArgOperand(0)->getType());
4180+
CallInst *CI = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(),
4181+
{Shadow, I.getArgOperand(1)});
4182+
4183+
setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));
4184+
setOriginForNaryOp(I);
4185+
}
4186+
41704187
// Instrument BMI / BMI2 intrinsics.
41714188
// All of these intrinsics are Z = I(X, Y)
41724189
// where the types of all operands and the result match, and are either i32 or
@@ -5112,6 +5129,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
51125129
break;
51135130
}
51145131

5132+
case Intrinsic::x86_avx_vpermilvar_pd:
5133+
case Intrinsic::x86_avx_vpermilvar_pd_256:
5134+
case Intrinsic::x86_avx512_vpermilvar_pd_512:
5135+
case Intrinsic::x86_avx_vpermilvar_ps:
5136+
case Intrinsic::x86_avx_vpermilvar_ps_256:
5137+
case Intrinsic::x86_avx512_vpermilvar_ps_512: {
5138+
handleAVXVpermilvar(I);
5139+
break;
5140+
}
5141+
51155142
case Intrinsic::x86_avx512fp16_mask_add_sh_round:
51165143
case Intrinsic::x86_avx512fp16_mask_sub_sh_round:
51175144
case Intrinsic::x86_avx512fp16_mask_mul_sh_round:

llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll

Lines changed: 61 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,6 @@
44
; Handled strictly:
55
; - i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
66
; - i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
7-
; - <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1)
8-
; - <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1)
9-
; - <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>)
10-
; - <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1)
11-
; - <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2)
12-
; - <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1)
137
; - void @llvm.x86.avx.vzeroall()
148
; - void @llvm.x86.avx.vzeroupper()
159

@@ -956,19 +950,19 @@ define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1)
956950
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
957951
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
958952
; CHECK-NEXT: call void @llvm.donothing()
959-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
960-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
961-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
962-
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
963-
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
964-
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
965-
; CHECK: 5:
953+
; CHECK-NEXT: [[A0:%.*]] = bitcast <2 x i64> [[TMP1]] to <2 x double>
954+
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0]], <2 x i64> [[A1:%.*]])
955+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[RES]] to <2 x i64>
956+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
957+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP6]], 0
958+
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
959+
; CHECK: 7:
966960
; CHECK-NEXT: call void @__msan_warning_noreturn()
967961
; CHECK-NEXT: unreachable
968-
; CHECK: 6:
969-
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0:%.*]], <2 x i64> [[A1:%.*]])
970-
; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
971-
; CHECK-NEXT: ret <2 x double> [[RES]]
962+
; CHECK: 8:
963+
; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A2:%.*]], <2 x i64> [[A1]])
964+
; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
965+
; CHECK-NEXT: ret <2 x double> [[RES1]]
972966
;
973967
%res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
974968
ret <2 x double> %res
@@ -981,19 +975,19 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64>
981975
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
982976
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
983977
; CHECK-NEXT: call void @llvm.donothing()
984-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
985-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0
986-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
987-
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
988-
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
989-
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
990-
; CHECK: 5:
978+
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double>
979+
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0]], <4 x i64> [[A1:%.*]])
980+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64>
981+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
982+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP6]], 0
983+
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
984+
; CHECK: 7:
991985
; CHECK-NEXT: call void @__msan_warning_noreturn()
992986
; CHECK-NEXT: unreachable
993-
; CHECK: 6:
994-
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0:%.*]], <4 x i64> [[A1:%.*]])
995-
; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
996-
; CHECK-NEXT: ret <4 x double> [[RES]]
987+
; CHECK: 8:
988+
; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A2:%.*]], <4 x i64> [[A1]])
989+
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
990+
; CHECK-NEXT: ret <4 x double> [[RES1]]
997991
;
998992
%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
999993
ret <4 x double> %res
@@ -1004,16 +998,12 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) #0 {
1004998
; CHECK-LABEL: @test_x86_avx_vpermilvar_pd_256_2(
1005999
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
10061000
; CHECK-NEXT: call void @llvm.donothing()
1007-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
1008-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
1009-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1010-
; CHECK: 3:
1011-
; CHECK-NEXT: call void @__msan_warning_noreturn()
1012-
; CHECK-NEXT: unreachable
1013-
; CHECK: 4:
1014-
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0:%.*]], <4 x i64> <i64 2, i64 0, i64 0, i64 2>)
1015-
; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1016-
; CHECK-NEXT: ret <4 x double> [[RES]]
1001+
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double>
1002+
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0]], <4 x i64> <i64 2, i64 0, i64 0, i64 2>)
1003+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64>
1004+
; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A1:%.*]], <4 x i64> <i64 2, i64 0, i64 0, i64 2>)
1005+
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
1006+
; CHECK-NEXT: ret <4 x double> [[RES1]]
10171007
;
10181008
%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
10191009
ret <4 x double> %res
@@ -1024,19 +1014,19 @@ define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) #
10241014
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
10251015
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
10261016
; CHECK-NEXT: call void @llvm.donothing()
1027-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1028-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
1029-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1030-
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
1031-
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1032-
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
1033-
; CHECK: 5:
1017+
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i32> [[TMP1]] to <4 x float>
1018+
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A1:%.*]])
1019+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32>
1020+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1021+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP6]], 0
1022+
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
1023+
; CHECK: 7:
10341024
; CHECK-NEXT: call void @__msan_warning_noreturn()
10351025
; CHECK-NEXT: unreachable
1036-
; CHECK: 6:
1037-
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0:%.*]], <4 x i32> [[A1:%.*]])
1038-
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1039-
; CHECK-NEXT: ret <4 x float> [[RES]]
1026+
; CHECK: 8:
1027+
; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A2:%.*]], <4 x i32> [[A1]])
1028+
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
1029+
; CHECK-NEXT: ret <4 x float> [[RES1]]
10401030
;
10411031
%res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
10421032
ret <4 x float> %res
@@ -1057,19 +1047,19 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0
10571047
; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
10581048
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
10591049
; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
1060-
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1061-
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0
1062-
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
1063-
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
1064-
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
1065-
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
1066-
; CHECK: 10:
1050+
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
1051+
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A2]])
1052+
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32>
1053+
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
1054+
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0
1055+
; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
1056+
; CHECK: 12:
10671057
; CHECK-NEXT: call void @__msan_warning_noreturn()
10681058
; CHECK-NEXT: unreachable
1069-
; CHECK: 11:
1070-
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0:%.*]], <4 x i32> [[A2]])
1071-
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1072-
; CHECK-NEXT: ret <4 x float> [[RES]]
1059+
; CHECK: 13:
1060+
; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A3:%.*]], <4 x i32> [[A2]])
1061+
; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8
1062+
; CHECK-NEXT: ret <4 x float> [[RES1]]
10731063
;
10741064
%a2 = load <4 x i32>, ptr %a1
10751065
%res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
@@ -1083,19 +1073,19 @@ define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a
10831073
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
10841074
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
10851075
; CHECK-NEXT: call void @llvm.donothing()
1086-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
1087-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0
1088-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
1089-
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
1090-
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1091-
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
1092-
; CHECK: 5:
1076+
; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float>
1077+
; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0]], <8 x i32> [[A1:%.*]])
1078+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32>
1079+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
1080+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP6]], 0
1081+
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
1082+
; CHECK: 7:
10931083
; CHECK-NEXT: call void @__msan_warning_noreturn()
10941084
; CHECK-NEXT: unreachable
1095-
; CHECK: 6:
1096-
; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0:%.*]], <8 x i32> [[A1:%.*]])
1097-
; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1098-
; CHECK-NEXT: ret <8 x float> [[RES]]
1085+
; CHECK: 8:
1086+
; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A2:%.*]], <8 x i32> [[A1]])
1087+
; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
1088+
; CHECK-NEXT: ret <8 x float> [[RES1]]
10991089
;
11001090
%res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
11011091
ret <8 x float> %res

0 commit comments

Comments
 (0)