Skip to content

[msan] Handle Arm NEON sum long across vector #125784

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 6, 2025

Conversation

thurstond
Copy link
Contributor

Apply handleVectorReduceIntrinsic() to llvm.aarch64.neon.[su]addlv. Previously, these were unknown intrinsics handled suboptimally by visitInstruction.

Updates the tests from #125761

@llvmbot
Copy link
Member

llvmbot commented Feb 5, 2025

@llvm/pr-subscribers-compiler-rt-sanitizer

@llvm/pr-subscribers-llvm-transforms

Author: Thurston Dang (thurstond)

Changes

Apply handleVectorReduceIntrinsic() to llvm.aarch64.neon.[su]addlv. Previously, these were unknown intrinsics handled suboptimally by visitInstruction.

Updates the tests from #125761


Full diff: https://github.com/llvm/llvm-project/pull/125784.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+5)
  • (modified) llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vaddlv.ll (+6-23)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index f3f2e5041fb1d3..2fe678e7f57854 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4360,6 +4360,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     case Intrinsic::vector_reduce_or:
       handleVectorReduceOrIntrinsic(I);
       break;
+
     case Intrinsic::vector_reduce_add:
     case Intrinsic::vector_reduce_xor:
     case Intrinsic::vector_reduce_mul:
@@ -4367,8 +4368,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     case Intrinsic::aarch64_neon_faddv:
     case Intrinsic::aarch64_neon_saddv:
     case Intrinsic::aarch64_neon_uaddv:
+    // Sum long across vector
+    case Intrinsic::aarch64_neon_saddlv:
+    case Intrinsic::aarch64_neon_uaddlv:
       handleVectorReduceIntrinsic(I);
       break;
+
     case Intrinsic::vector_reduce_fadd:
     case Intrinsic::vector_reduce_fmul:
       handleVectorReduceWithStarterIntrinsic(I);
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vaddlv.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vaddlv.ll
index 3c2775b58f4b6c..33a54080609fe7 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vaddlv.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vaddlv.ll
@@ -2,10 +2,6 @@
 ; RUN: opt < %s -passes=msan -S | FileCheck %s
 ;
 ; Forked from llvm/test/CodeGen/AArch64/arm64-vaddlv.ll
-;
-; Currently handled (suboptimally) by handleUnknownInstruction:
-; - llvm.aarch64.neon.saddlv
-; - llvm.aarch64.neon.uaddlv
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-android9001"
@@ -16,15 +12,10 @@ define i64 @test_vaddlv_s32(<2 x i32> %a1) nounwind readnone #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
-; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1:![0-9]+]]
-; CHECK:       2:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]]
-; CHECK-NEXT:    unreachable
-; CHECK:       3:
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
 ; CHECK-NEXT:    [[VADDLV_I:%.*]] = tail call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> [[A1]]) #[[ATTR2:[0-9]+]]
-; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i64 [[TMP2]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret i64 [[VADDLV_I]]
 ;
 entry:
@@ -38,15 +29,10 @@ define i64 @test_vaddlv_u32(<2 x i32> %a1) nounwind readnone #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
-; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK:       2:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT:    unreachable
-; CHECK:       3:
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
 ; CHECK-NEXT:    [[VADDLV_I:%.*]] = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> [[A1]]) #[[ATTR2]]
-; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i64 [[TMP2]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret i64 [[VADDLV_I]]
 ;
 entry:
@@ -59,6 +45,3 @@ declare i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32>) nounwind readnone
 declare i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32>) nounwind readnone
 
 attributes #0 = { sanitize_memory }
-;.
-; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
-;.

Apply handleVectorReduceIntrinsic() to llvm.aarch64.neon.[su]addlv. Previously, these were unknown intrinsics handled suboptimally by visitInstruction.

Updates the tests from llvm#125761
@thurstond thurstond merged commit 73a1c7b into llvm:main Feb 6, 2025
8 checks passed
Icohedron pushed a commit to Icohedron/llvm-project that referenced this pull request Feb 11, 2025
Apply handleVectorReduceIntrinsic() to llvm.aarch64.neon.[su]addlv.
Previously, these were unknown intrinsics handled suboptimally by
visitInstruction.

Updates the tests from llvm#125761
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants