Skip to content

[SLP][REVEC] When ScalarTy is FixedVectorType, the insertion index should consider the number of elements of ScalarTy. #114526

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 1, 2024

Conversation

HanKuanChen
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Nov 1, 2024

@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-llvm-transforms

Author: Han-Kuan Chen (HanKuanChen)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/114526.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+10-9)
  • (modified) llvm/test/Transforms/SLPVectorizer/revec.ll (+54)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2afd02dae3a8b8..328ccf30641a83 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13803,13 +13803,12 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
            unsigned VF = 0,
            function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
     IsFinalized = true;
+    unsigned ScalarTyNumElements = getNumElements(ScalarTy);
     SmallVector<int> NewExtMask(ExtMask);
-    if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy)) {
+    if (ScalarTyNumElements != 1) {
       assert(SLPReVec && "FixedVectorType is not expected.");
-      transformScalarShuffleIndiciesToVector(VecTy->getNumElements(),
-                                             CommonMask);
-      transformScalarShuffleIndiciesToVector(VecTy->getNumElements(),
-                                             NewExtMask);
+      transformScalarShuffleIndiciesToVector(ScalarTyNumElements, CommonMask);
+      transformScalarShuffleIndiciesToVector(ScalarTyNumElements, NewExtMask);
       ExtMask = NewExtMask;
     }
     if (Action) {
@@ -13852,12 +13851,14 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
                                    return !isKnownNonNegative(
                                        V, SimplifyQuery(*R.DL));
                                  }));
+        unsigned InsertionIndex = Idx * ScalarTyNumElements;
         Vec = Builder.CreateInsertVector(Vec->getType(), Vec, V,
-                                         Builder.getInt64(Idx));
+                                         Builder.getInt64(InsertionIndex));
         if (!CommonMask.empty()) {
-          std::iota(std::next(CommonMask.begin(), Idx),
-                    std::next(CommonMask.begin(), Idx + E->getVectorFactor()),
-                    Idx);
+          std::iota(std::next(CommonMask.begin(), InsertionIndex),
+                    std::next(CommonMask.begin(), (Idx + E->getVectorFactor()) *
+                                                      ScalarTyNumElements),
+                    InsertionIndex);
         }
       }
       InVectors.front() = Vec;
diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll
index f32e315142767f..aec81086105d68 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec.ll
@@ -355,3 +355,57 @@ entry:
   %10 = icmp ne <2 x i8> %8, zeroinitializer
   ret void
 }
+
+define void @test12() {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr float, ptr null, i64 33
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr null, i64 50
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr null, i64 75
+; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> [[TMP4]], i64 0)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP6]], <8 x float> [[TMP3]], i64 8)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v16f32(<32 x float> [[TMP7]], <16 x float> [[TMP5]], i64 16)
+; CHECK-NEXT:    [[TMP9:%.*]] = fpext <32 x float> [[TMP8]] to <32 x double>
+; CHECK-NEXT:    [[TMP10:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> poison, <8 x double> zeroinitializer, i64 0)
+; CHECK-NEXT:    [[TMP11:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP10]], <8 x double> zeroinitializer, i64 8)
+; CHECK-NEXT:    [[TMP12:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP11]], <8 x double> zeroinitializer, i64 16)
+; CHECK-NEXT:    [[TMP13:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP12]], <8 x double> zeroinitializer, i64 24)
+; CHECK-NEXT:    [[TMP14:%.*]] = fadd <32 x double> [[TMP13]], [[TMP9]]
+; CHECK-NEXT:    [[TMP15:%.*]] = fptrunc <32 x double> [[TMP14]] to <32 x float>
+; CHECK-NEXT:    [[TMP16:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> zeroinitializer, i64 0)
+; CHECK-NEXT:    [[TMP17:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP16]], <8 x float> zeroinitializer, i64 8)
+; CHECK-NEXT:    [[TMP18:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP17]], <8 x float> zeroinitializer, i64 16)
+; CHECK-NEXT:    [[TMP19:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP18]], <8 x float> zeroinitializer, i64 24)
+; CHECK-NEXT:    [[TMP20:%.*]] = fcmp ogt <32 x float> [[TMP19]], [[TMP15]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = getelementptr float, ptr null, i64 33
+  %1 = getelementptr float, ptr null, i64 41
+  %2 = getelementptr float, ptr null, i64 50
+  %3 = getelementptr float, ptr null, i64 75
+  %4 = load <8 x float>, ptr %0, align 4
+  %5 = load <8 x float>, ptr %1, align 4
+  %6 = load <8 x float>, ptr %2, align 4
+  %7 = load <8 x float>, ptr %3, align 4
+  %8 = fpext <8 x float> %4 to <8 x double>
+  %9 = fpext <8 x float> %5 to <8 x double>
+  %10 = fpext <8 x float> %6 to <8 x double>
+  %11 = fpext <8 x float> %7 to <8 x double>
+  %12 = fadd <8 x double> zeroinitializer, %8
+  %13 = fadd <8 x double> zeroinitializer, %9
+  %14 = fadd <8 x double> zeroinitializer, %10
+  %15 = fadd <8 x double> zeroinitializer, %11
+  %16 = fptrunc <8 x double> %12 to <8 x float>
+  %17 = fptrunc <8 x double> %13 to <8 x float>
+  %18 = fptrunc <8 x double> %14 to <8 x float>
+  %19 = fptrunc <8 x double> %15 to <8 x float>
+  %20 = fcmp ogt <8 x float> zeroinitializer, %16
+  %21 = fcmp ogt <8 x float> zeroinitializer, %17
+  %22 = fcmp ogt <8 x float> zeroinitializer, %18
+  %23 = fcmp ogt <8 x float> zeroinitializer, %19
+  ret void
+}

@HanKuanChen HanKuanChen merged commit e4aeeba into llvm:main Nov 1, 2024
11 checks passed
@HanKuanChen HanKuanChen deleted the slp-revec-CreateInsertVector branch November 1, 2024 13:17
@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 1, 2024

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-darwin running on doug-worker-3 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/23/builds/4387

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll' FAILED ********************
Exit Code: 2

Command Output (stderr):
--
RUN: at line 1: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/lli -jit-kind=orc-lazy -compile-threads=2 -thread-entry hello /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll | /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll
+ /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/lli -jit-kind=orc-lazy -compile-threads=2 -thread-entry hello /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll
+ /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/lli -jit-kind=orc-lazy -compile-threads=2 -thread-entry hello /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll
 #0 0x00000001044d84a8 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/lli+0x1010534a8)
 #1 0x00000001044d6268 llvm::sys::RunSignalHandlers() (/Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/lli+0x101051268)
 #2 0x00000001044d8b77 SignalHandler(int) (/Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/lli+0x101053b77)
 #3 0x00007ff81a873fdd (/usr/lib/system/libsystem_platform.dylib+0x7ff80046afdd)
 #4 0x00007ff7bca77d18 
 #5 0x00007ff81a7ff34a (/usr/lib/libc++abi.dylib+0x7ff8003f634a)
 #6 0x0000000104025c54 (anonymous namespace)::GenericLLVMIRPlatformSupport::deinitialize(llvm::orc::JITDylib&) (/Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/lli+0x100ba0c54)
 #7 0x000000010348ff29 llvm::orc::LLJIT::deinitialize(llvm::orc::JITDylib&) (/Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/lli+0x10000af29)
 #8 0x000000010348d4ab runOrcJIT(char const*) (/Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/lli+0x1000084ab)
 #9 0x0000000103487b69 main (/Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/lli+0x100002b69)
#10 0x00007ff81a4b9345 
FileCheck error: '<stdin>' is empty.
FileCheck command line:  /Volumes/RAMDisk/buildbot-root/x86_64-darwin/build/bin/FileCheck /Volumes/RAMDisk/buildbot-root/x86_64-darwin/llvm-project/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll

--

********************


smallp-o-p pushed a commit to smallp-o-p/llvm-project that referenced this pull request Nov 3, 2024
…ould consider the number of elements of ScalarTy. (llvm#114526)
NoumanAmir657 pushed a commit to NoumanAmir657/llvm-project that referenced this pull request Nov 4, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants