Skip to content

Commit c153c61

Browse files
c0d1f1edvitalybuka
authored andcommitted
Handle instrumentation of scalar single-precision (_ss) intrinsics
Instrumentation of scalar double-precision intrinsics such as x86_sse41_round_sd was already handled by https://reviews.llvm.org/D82398, but not their single-precision counterparts. https://issuetracker.google.com/172238865 Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D124871
1 parent 3b13f88 commit c153c61

File tree

3 files changed

+48
-34
lines changed

3 files changed

+48
-34
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3200,27 +3200,37 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
32003200
SOC.Done(&I);
32013201
}
32023202

3203-
// Instrument _mm_*_sd intrinsics
3204-
void handleUnarySdIntrinsic(IntrinsicInst &I) {
3203+
// Instrument _mm_*_sd|ss intrinsics
3204+
void handleUnarySdSsIntrinsic(IntrinsicInst &I) {
32053205
IRBuilder<> IRB(&I);
3206+
unsigned Width =
3207+
cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
32063208
Value *First = getShadow(&I, 0);
32073209
Value *Second = getShadow(&I, 1);
3208-
// High word of first operand, low word of second
3209-
Value *Shadow =
3210-
IRB.CreateShuffleVector(First, Second, llvm::makeArrayRef<int>({2, 1}));
3210+
// First element of second operand, remaining elements of first operand
3211+
SmallVector<int, 16> Mask;
3212+
Mask.push_back(Width);
3213+
for (unsigned i = 1; i < Width; i++)
3214+
Mask.push_back(i);
3215+
Value *Shadow = IRB.CreateShuffleVector(First, Second, Mask);
32113216

32123217
setShadow(&I, Shadow);
32133218
setOriginForNaryOp(I);
32143219
}
32153220

3216-
void handleBinarySdIntrinsic(IntrinsicInst &I) {
3221+
void handleBinarySdSsIntrinsic(IntrinsicInst &I) {
32173222
IRBuilder<> IRB(&I);
3223+
unsigned Width =
3224+
cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
32183225
Value *First = getShadow(&I, 0);
32193226
Value *Second = getShadow(&I, 1);
32203227
Value *OrShadow = IRB.CreateOr(First, Second);
3221-
// High word of first operand, low word of both OR'd together
3222-
Value *Shadow = IRB.CreateShuffleVector(First, OrShadow,
3223-
llvm::makeArrayRef<int>({2, 1}));
3228+
// First element of both OR'd together, remaining elements of first operand
3229+
SmallVector<int, 16> Mask;
3230+
Mask.push_back(Width);
3231+
for (unsigned i = 1; i < Width; i++)
3232+
Mask.push_back(i);
3233+
Value *Shadow = IRB.CreateShuffleVector(First, OrShadow, Mask);
32243234

32253235
setShadow(&I, Shadow);
32263236
setOriginForNaryOp(I);
@@ -3495,11 +3505,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
34953505
break;
34963506

34973507
case Intrinsic::x86_sse41_round_sd:
3498-
handleUnarySdIntrinsic(I);
3508+
case Intrinsic::x86_sse41_round_ss:
3509+
handleUnarySdSsIntrinsic(I);
34993510
break;
35003511
case Intrinsic::x86_sse2_max_sd:
3512+
case Intrinsic::x86_sse_max_ss:
35013513
case Intrinsic::x86_sse2_min_sd:
3502-
handleBinarySdIntrinsic(I);
3514+
case Intrinsic::x86_sse_min_ss:
3515+
handleBinarySdSsIntrinsic(I);
35033516
break;
35043517

35053518
case Intrinsic::fshl:

llvm/test/Instrumentation/MemorySanitizer/sse-intrinsics-x86.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -249,9 +249,10 @@ define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) #0 {
249249
; CHECK-DAG: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8
250250
; CHECK-DAG: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8
251251
; CHECK-NEXT: call void @llvm.donothing()
252-
; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
252+
; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
253+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
253254
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
254-
; CHECK-NEXT: store <4 x i32> [[_MSPROP]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8
255+
; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8
255256
; CHECK-NEXT: ret <4 x float> [[RES]]
256257
;
257258
%res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -281,9 +282,10 @@ define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) #0 {
281282
; CHECK-DAG: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8
282283
; CHECK-DAG: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8
283284
; CHECK-NEXT: call void @llvm.donothing()
284-
; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
285+
; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
286+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
285287
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
286-
; CHECK-NEXT: store <4 x i32> [[_MSPROP]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8
288+
; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8
287289
; CHECK-NEXT: ret <4 x float> [[RES]]
288290
;
289291
%res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]

llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -432,30 +432,29 @@ define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, <2 x double>
432432
}
433433

434434

435-
define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) #0 {
436-
; CHECK-LABEL: @test_x86_sse41_round_ss(
437-
; CHECK-DAG: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8
438-
; CHECK-DAG: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8
435+
define <4 x float> @test_x86_sse41_round_ss_load(<4 x float> %a0, <4 x float>* %a1) #0 {
436+
; CHECK-LABEL: @test_x86_sse41_round_ss_load(
437+
; CHECK-DAG: [[TMP1:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8
438+
; CHECK-DAG: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8
439439
; CHECK-NEXT: call void @llvm.donothing()
440-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
441-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
442-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
443-
; CHECK: 4:
444-
; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]]
445-
; CHECK-NEXT: unreachable
446-
; CHECK: 5:
447-
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
448-
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
449-
; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
450-
; CHECK: 7:
440+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
441+
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
442+
; CHECK: 3:
451443
; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]]
452444
; CHECK-NEXT: unreachable
453-
; CHECK: 8:
454-
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i32 7)
455-
; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8
445+
; CHECK: 4:
446+
; CHECK-NEXT: [[A1B:%.*]] = load <4 x float>, <4 x float>* [[A1:%.*]], align 16
447+
; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <4 x float>* [[A1]] to i64
448+
; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
449+
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to <4 x i32>*
450+
; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 16
451+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[_MSLD]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
452+
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1B]], i32 7)
453+
; CHECK-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8
456454
; CHECK-NEXT: ret <4 x float> [[RES]]
457455
;
458-
%res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
456+
%a1b = load <4 x float>, <4 x float>* %a1
457+
%res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1b, i32 7) ; <<4 x float>> [#uses=1]
459458
ret <4 x float> %res
460459
}
461460
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone

0 commit comments

Comments
 (0)