-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[msan] Handle Intrinsic::vector_reduce_f{add,mul} #125615
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This adds handleVectorReduceWithStarterIntrinsic() (similar to handleVectorReduceIntrinsic but for intrinsics with an additional starting parameter) and uses it to handle Intrinsic::vector_reduce_f{add,mul}. Updates the tests from llvm#125597
@llvm/pr-subscribers-compiler-rt-sanitizer @llvm/pr-subscribers-llvm-transforms Author: Thurston Dang (thurstond) ChangesThis adds handleVectorReduceWithStarterIntrinsic() (similar to handleVectorReduceIntrinsic but for intrinsics with an additional starting parameter) and uses it to handle Intrinsic::vector_reduce_f{add,mul}. Updates the tests from #125597 Patch is 42.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125615.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index a4f7e43f041c38..8a56f42c5c4ca1 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3500,6 +3500,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, getOrigin(&I, 0));
}
+ // Similar to handleVectorReduceIntrinsic but with an initial starting value.
+ // e.g., call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float>
+ // %a1)
+ // shadow = shadow[a0] | shadow[a1.0] | shadow[a1.1]
+ void handleVectorReduceWithStarterIntrinsic(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *Shadow0 = getShadow(&I, 0);
+ Value *Shadow1 = IRB.CreateOrReduce(getShadow(&I, 1));
+ Value *S = IRB.CreateOr(Shadow0, Shadow1);
+ setShadow(&I, S);
+ setOriginForNaryOp(I);
+ }
+
// Instrument vector.reduce.or intrinsic.
// Valid (non-poisoned) set bits in the operand pull low the
// corresponding shadow bits.
@@ -4344,6 +4357,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::vector_reduce_mul:
handleVectorReduceIntrinsic(I);
break;
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul:
+ handleVectorReduceWithStarterIntrinsic(I);
+ break;
+
case Intrinsic::x86_sse_stmxcsr:
handleStmxcsr(I);
break;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector-reduce-fadd.ll b/llvm/test/Instrumentation/MemorySanitizer/vector-reduce-fadd.ll
index 306a262b1c9caa..5da4c7357b6ad3 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/vector-reduce-fadd.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/vector-reduce-fadd.ll
@@ -15,17 +15,10 @@ define float @test_v2f32(float %a0, <2 x float> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1:![0-9]+]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.v2f32(float [[A0]], <2 x float> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP6]]
;
%1 = call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float> %a1)
@@ -38,17 +31,10 @@ define float @test_v4f32(float %a0, <4 x float> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float [[A0]], <4 x float> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP6]]
;
%1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %a1)
@@ -61,17 +47,10 @@ define float @test_v8f32(float %a0, <8 x float> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.v8f32(float [[A0]], <8 x float> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP6]]
;
%1 = call float @llvm.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1)
@@ -84,17 +63,10 @@ define float @test_v16f32(float %a0, <16 x float> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[A0]], <16 x float> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP6]]
;
%1 = call float @llvm.vector.reduce.fadd.f32.v16f32(float %a0, <16 x float> %a1)
@@ -107,15 +79,10 @@ define float @test_v2f32_zero(<2 x float> %a0) #0 {
; CHECK-SAME: <2 x float> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[A0]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP5]]
;
%1 = call float @llvm.vector.reduce.fadd.f32.v2f32(float -0.0, <2 x float> %a0)
@@ -127,15 +94,10 @@ define float @test_v4f32_zero(<4 x float> %a0) #0 {
; CHECK-SAME: <4 x float> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[A0]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP5]]
;
%1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float -0.0, <4 x float> %a0)
@@ -147,15 +109,10 @@ define float @test_v8f32_zero(<8 x float> %a0) #0 {
; CHECK-SAME: <8 x float> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[A0]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP5]]
;
%1 = call float @llvm.vector.reduce.fadd.f32.v8f32(float -0.0, <8 x float> %a0)
@@ -167,15 +124,10 @@ define float @test_v16f32_zero(<16 x float> %a0) #0 {
; CHECK-SAME: <16 x float> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[A0]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP5]]
;
%1 = call float @llvm.vector.reduce.fadd.f32.v16f32(float -0.0, <16 x float> %a0)
@@ -188,17 +140,10 @@ define double @test_v2f64(double %a0, <2 x double> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double [[A0]], <2 x double> [[A1]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP6]]
;
%1 = call double @llvm.vector.reduce.fadd.f64.v2f64(double %a0, <2 x double> %a1)
@@ -211,17 +156,10 @@ define double @test_v4f64(double %a0, <4 x double> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.vector.reduce.fadd.v4f64(double [[A0]], <4 x double> [[A1]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP6]]
;
%1 = call double @llvm.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1)
@@ -234,17 +172,10 @@ define double @test_v8f64(double %a0, <8 x double> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.vector.reduce.fadd.v8f64(double [[A0]], <8 x double> [[A1]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP6]]
;
%1 = call double @llvm.vector.reduce.fadd.f64.v8f64(double %a0, <8 x double> %a1)
@@ -257,17 +188,10 @@ define double @test_v16f64(double %a0, <16 x double> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i64> [[TMP2]] to i1024
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i1024 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.vector.reduce.fadd.v16f64(double [[A0]], <16 x double> [[A1]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP6]]
;
%1 = call double @llvm.vector.reduce.fadd.f64.v16f64(double %a0, <16 x double> %a1)
@@ -280,15 +204,10 @@ define double @test_v2f64_zero(<2 x double> %a0) #0 {
; CHECK-SAME: <2 x double> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i64 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[A0]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP5]]
;
%1 = call double @llvm.vector.reduce.fadd.f64.v2f64(double -0.0, <2 x double> %a0)
@@ -300,15 +219,10 @@ define double @test_v4f64_zero(<4 x double> %a0) #0 {
; CHECK-SAME: <4 x double> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i64 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[A0]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP5]]
;
%1 = call double @llvm.vector.reduce.fadd.f64.v4f64(double -0.0, <4 x double> %a0)
@@ -320,15 +234,10 @@ define double @test_v8f64_zero(<8 x double> %a0) #0 {
; CHECK-SAME: <8 x double> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i64 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> [[A0]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP5]]
;
%1 = call double @llvm.vector.reduce.fadd.f64.v8f64(double -0.0, <8 x double> %a0)
@@ -340,15 +...
[truncated]
|
This adds handleVectorReduceWithStarterIntrinsic() (similar to handleVectorReduceIntrinsic but for intrinsics with an additional starting parameter) and uses it to handle Intrinsic::vector_reduce_f{add,mul}. Updates the tests from llvm#125597
This adds handleVectorReduceWithStarterIntrinsic() (similar to handleVectorReduceIntrinsic but for intrinsics with an additional starting parameter) and uses it to handle Intrinsic::vector_reduce_f{add,mul}.
Updates the tests from #125597