[LV] Limits the splat operations be hoisted must not be defined by a recipe. #117138

Mel-Chen · 2024-11-21T10:26:15Z

Issue #117139

llvmbot · 2024-11-21T10:26:56Z

@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-llvm-transforms

Author: Mel Chen (Mel-Chen)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/117138.diff

4 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+1-1)
(modified) llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll (+2-2)
(modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll (+1-1)
(modified) llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll (+2-2)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 8b1a4aeb88f81f..32996426c28490 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -266,7 +266,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
     return Data.VPV2Vector[Def];
 
   auto GetBroadcastInstrs = [this, Def](Value *V) {
-    bool SafeToHoist = Def->isDefinedOutsideLoopRegions();
+    bool SafeToHoist = !Def->hasDefiningRecipe();
     if (VF.isScalar())
       return V;
     // Place the code for broadcasting invariant variables in the new preheader.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
index aa78113ebaa48c..b1c202eab9dd3d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
@@ -23,9 +23,9 @@
 ; Check that the extractvalue operands are actually free in vector code.
 
 ; FORCED:         [[E1:%.+]] = extractvalue { i64, i64 } %sv, 0
-; FORCED-NEXT:    [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
 ; FORCED-NEXT:    %broadcast.splatinsert = insertelement <2 x i64> poison, i64 [[E1]], i64 0
 ; FORCED-NEXT:    %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT:    [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
 ; FORCED-NEXT:    %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 [[E2]], i64 0
 ; FORCED-NEXT:    %broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer
 ; FORCED-NEXT:    [[ADD:%.+]] = add <2 x i64> %broadcast.splat, %broadcast.splat2
@@ -75,9 +75,9 @@ declare float @powf(float, float) readnone nounwind
 ; FORCED-LABEL: define void @test_getVectorCallCost
 
 ; FORCED:         [[E1:%.+]] = extractvalue { float, float } %sv, 0
-; FORCED-NEXT:    [[E2:%.+]] = extractvalue { float, float } %sv, 1
 ; FORCED-NEXT:    %broadcast.splatinsert = insertelement <2 x float> poison, float [[E1]], i64 0
 ; FORCED-NEXT:    %broadcast.splat = shufflevector <2 x float> %broadcast.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT:    [[E2:%.+]] = extractvalue { float, float } %sv, 1
 ; FORCED-NEXT:    %broadcast.splatinsert1 = insertelement <2 x float> poison, float [[E2]], i64 0
 ; FORCED-NEXT:    %broadcast.splat2 = shufflevector <2 x float> %broadcast.splatinsert1, <2 x float> poison, <2 x i32> zeroinitializer
 
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
index 7778f01c58dc34..fb5087db254b23 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
@@ -5,9 +5,9 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
 ; CHECK-LABEL: @widen_extractvalue(
 ; CHECK:        [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV:%.*]], 0
-; CHECK-NEXT:   [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
 ; CHECK-NEXT:   [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
 ; CHECK-NEXT:   [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:   [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
 ; CHECK-NEXT:   [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT1]], i64 0
 ; CHECK-NEXT:   [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK:        [[ADD:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[DOTSPLAT2]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
index e94bd841360256..7840a9dec794b3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
@@ -137,6 +137,8 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
 ; CHECK-LABEL: @test_induction_step_needs_expansion(
 ; CHECK-NEXT:  iter.check:
 ; CHECK-NEXT:    [[TMP0:%.*]] = sub i16 0, [[OFF:%.*]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[L:%.*]], 8
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
 ; CHECK:       vector.main.loop.iter.check:
@@ -145,8 +147,6 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[L]], 64
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[L]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <16 x i16> splat (i16 16), [[TMP2]]
 ; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer

fhahn · 2024-11-21T19:10:02Z

How about adding a VPDominatorTree to VPTransformState, using that to check for dominance? VPlan's CFG shouldn't change during execution, so no need to worry about updates

llvm/lib/Transforms/Vectorize/VPlan.h

llvm/lib/Transforms/Vectorize/VPlan.cpp

Mel-Chen · 2024-12-13T08:42:20Z

Ping @fhahn, do you have any thoughts on circular dependency between VPlan.h and VPlanDominatorTree.h? Or could we proceed with using the VPDominatorTree pointer?

Mel-Chen · 2025-01-07T12:41:03Z

How about adding a VPDominatorTree to VPTransformState, using that to check for dominance? VPlan's CFG shouldn't change during execution, so no need to worry about updates

@fhahn 40c2dee
I have to update the VPDominatorTree before VPlan::execute since the plan will be changed after State was constructed:

    // 2. Copy and widen instructions from the old loop into the new loop.
    BestVPlan.prepareToExecute(
        ILV.getTripCount(),
        ILV.getOrCreateVectorTripCount(ILV.LoopVectorPreHeader), State);
    replaceVPBBWithIRVPBB(VectorPH, State.CFG.PrevBB);  <--- This function will change CFG of plan

    BestVPlan.execute(&State);

And also, could you please take a look the issue about circular dependency between VPlan.h and VPlanDominatorTree.h?
Or could we proceed with using the pointer?

llvm/lib/Transforms/Vectorize/VPlan.cpp

Nothing in VPlan.h directly depends on VPTransformState, VPCostContext, VPFRange, VPlanPrinter or VPSlotTracker. Move them out to a separate header to reduce the size of widely used VPlan.h. This is a first step towards more cleanly separating declarations in VPlan. Besides reducing VPlan.h's size, this also allows including additional VPlan-related headers in VPlanHelpers.h for use there. An example is using VPDominatorTree in VPTransformState (llvm#117138).

fhahn · 2025-01-23T12:09:13Z

How about adding a VPDominatorTree to VPTransformState, using that to check for dominance? VPlan's CFG shouldn't change during execution, so no need to worry about updates

@fhahn 40c2dee I have to update the VPDominatorTree before VPlan::execute since the plan will be changed after State was constructed:
    // 2. Copy and widen instructions from the old loop into the new loop.
    BestVPlan.prepareToExecute(
        ILV.getTripCount(),
        ILV.getOrCreateVectorTripCount(ILV.LoopVectorPreHeader), State);
    replaceVPBBWithIRVPBB(VectorPH, State.CFG.PrevBB);  <--- This function will change CFG of plan

    BestVPlan.execute(&State);
And also, could you please take a look the issue about circular dependency between VPlan.h and VPlanDominatorTree.h? Or could we proceed with using the pointer?

I think it should be possible to organize the declarations in a better way to enable this (with the main other benefit being that it also reduces the size of VPlan.h): #124104

Nothing in VPlan.h directly depends on VPTransformState, VPCostContext, VPFRange, VPlanPrinter or VPSlotTracker. Move them out to a separate header to reduce the size of widely used VPlan.h. This is a first step towards more cleanly separating declarations in VPlan. Besides reducing VPlan.h's size, this also allows including additional VPlan-related headers in VPlanHelpers.h for use there. An example is using VPDominatorTree in VPTransformState (llvm#117138).

Nothing in VPlan.h directly depends on VPTransformState, VPCostContext, VPFRange, VPlanPrinter or VPSlotTracker. Move them out to a separate header to reduce the size of widely used VPlan.h. This is a first step towards more cleanly separating declarations in VPlan. Besides reducing VPlan.h's size, this also allows including additional VPlan-related headers in VPlanHelpers.h for use there. An example is using VPDominatorTree in VPTransformState (#117138). PR: #124104

…(#124104) Nothing in VPlan.h directly depends on VPTransformState, VPCostContext, VPFRange, VPlanPrinter or VPSlotTracker. Move them out to a separate header to reduce the size of widely used VPlan.h. This is a first step towards more cleanly separating declarations in VPlan. Besides reducing VPlan.h's size, this also allows including additional VPlan-related headers in VPlanHelpers.h for use there. An example is using VPDominatorTree in VPTransformState (llvm/llvm-project#117138). PR: llvm/llvm-project#124104

Mel-Chen · 2025-02-05T14:05:36Z

I think it should be possible to organize the declarations in a better way to enable this (with the main other benefit being that it also reduces the size of VPlan.h): #124104

Thanks, #124104 help me a lot! I j already rebased this patch on that.:)
But I just saw #124644, do we still need to use VPDominatorTree?
In my opinion, the explicit splat is the best approach to resolve this issue.

fhahn

I think it should be possible to organize the declarations in a better way to enable this (with the main other benefit being that it also reduces the size of VPlan.h): #124104

Thanks, #124104 help me a lot! I j already rebased this patch on that.:) But I just saw #124644, do we still need to use VPDominatorTree? In my opinion, the explicit splat is the best approach to resolve this issue.

It depends if anything is blocked on this at the moment. #124644 is the first step, there is a bit more follow-up work needed to completely remove SafeToHoist, but after that the VPDT shouldn't be needed.

If you need this fixed urgently to unblock follow-up patches then it may make sense to use the VPDT here for now.

Mel-Chen · 2025-02-06T09:55:16Z

I think it should be possible to organize the declarations in a better way to enable this (with the main other benefit being that it also reduces the size of VPlan.h): #124104

Thanks, #124104 help me a lot! I j already rebased this patch on that.:) But I just saw #124644, do we still need to use VPDominatorTree? In my opinion, the explicit splat is the best approach to resolve this issue.

It depends if anything is blocked on this at the moment. #124644 is the first step, there is a bit more follow-up work needed to completely remove SafeToHoist, but after that the VPDT shouldn't be needed.

If you need this fixed urgently to unblock follow-up patches then it may make sense to use the VPDT here for now.

Got it. #124644 is currently limited to LiveIn values, and further work is needed to support splatting non-LiveIn values.
In that case, maybe we can proceed with the VPDT approach first. :)

llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll

fhahn · 2025-02-06T20:42:32Z

llvm/lib/Transforms/Vectorize/VPlan.cpp

+    bool SafeToHoist =
+        !Def->hasDefiningRecipe() ||
+        VPDT.properlyDominates(Def->getDefiningRecipe()->getParent(),
+                               Plan->getVectorPreheader());


IICU properlyDominates means that we won't consider things as safe-to-hoist if they are defined int he vector preheader, but that would also be safe, as we insert at the end of the vector preheader. Can we catch this also by checking if it properly dominates the vector loop region?

Maybe we could use dominates instead of properlyDominates to achieve this.

However, I don’t understand the benefit of doing so. In my opinion, if Def is already defined in the preheader yet we still determine that the corresponding splat operation can be hoisted, this might move the splat operation further away from Def, extending Def's live range. (please see the change of llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll)

I might be missing something—could you please explain the advantages of this change in more detail?

Ping @fhahn

We could have users in the entry block as well, but here we only ever hoist to the vector preheader, so that should be fine as-is for now, thanks

fhahn

LGTM, thanks.

fhahn · 2025-03-02T16:49:28Z

llvm/lib/Transforms/Vectorize/VPlan.cpp

+    bool SafeToHoist =
+        !Def->hasDefiningRecipe() ||
+        VPDT.properlyDominates(Def->getDefiningRecipe()->getParent(),
+                               Plan->getVectorPreheader());


We could have users in the entry block as well, but here we only ever hoist to the vector preheader, so that should be fine as-is for now, thanks

…recipe.

…ed by a recipe. (#117138)" This reverts commit 1ff10fa.

…ee (#117138) This patch restricts broadcast operations from being hoisted to the vector preheader unless the basic block that defines the broadcasted value properly dominates the vector preheader. This prevents potential use-before-definition issues when the broadcasted value is defined within the plan. VPDominatorTree is used to confirm this restriction while still allowing safe hoisting for broadcasted values defined outside the plan. Issue #117139

Mel-Chen · 2025-03-13T14:22:10Z

Accidentally used the wrong commit log. :(
Reverted and recommitted with the corrected commit log.
5d5e706

…ed by a recipe. (llvm#117138)" This reverts commit 1ff10fa.

…ee (llvm#117138) This patch restricts broadcast operations from being hoisted to the vector preheader unless the basic block that defines the broadcasted value properly dominates the vector preheader. This prevents potential use-before-definition issues when the broadcasted value is defined within the plan. VPDominatorTree is used to confirm this restriction while still allowing safe hoisting for broadcasted values defined outside the plan. Issue llvm#117139

Mel-Chen requested review from fhahn and ayalz November 21, 2024 10:26

llvmbot added vectorizers llvm:transforms labels Nov 21, 2024

Mel-Chen requested review from alexey-bataev and david-arm November 21, 2024 10:26

Mel-Chen force-pushed the safe-to-hoist branch from ca00f44 to 2153bf3 Compare November 28, 2024 08:47

Mel-Chen commented Nov 28, 2024

View reviewed changes

llvm/lib/Transforms/Vectorize/VPlan.h Outdated Show resolved Hide resolved

david-arm reviewed Nov 28, 2024

View reviewed changes

llvm/lib/Transforms/Vectorize/VPlan.cpp Show resolved Hide resolved

Mel-Chen force-pushed the safe-to-hoist branch from 2153bf3 to 4b77adf Compare December 6, 2024 15:11

Mel-Chen force-pushed the safe-to-hoist branch from 4b77adf to 40c2dee Compare January 7, 2025 12:32

arcbbb reviewed Jan 8, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/VPlan.cpp Outdated Show resolved Hide resolved

fhahn mentioned this pull request Jan 23, 2025

[VPlan] Move auxiliary declarations out of VPlan.h (NFC). #124104

Merged

Mel-Chen force-pushed the safe-to-hoist branch 2 times, most recently from ef3da20 to 324ba39 Compare February 5, 2025 13:58

fhahn reviewed Feb 5, 2025

View reviewed changes

Mel-Chen requested review from david-arm and fhahn February 6, 2025 09:55

Mel-Chen requested a review from arcbbb February 6, 2025 09:55

fhahn requested changes Feb 6, 2025

View reviewed changes

Mel-Chen force-pushed the safe-to-hoist branch from 324ba39 to 126c398 Compare February 18, 2025 09:23

Mel-Chen requested a review from fhahn February 18, 2025 09:33

fhahn approved these changes Mar 2, 2025

View reviewed changes

Mel-Chen added 2 commits March 11, 2025 00:18

[LV] Limits the splat operations be hoisted must not be defined by a …

8a67a65

…recipe.

Rebase and update test case

68426e0

Mel-Chen force-pushed the safe-to-hoist branch from 126c398 to 68426e0 Compare March 11, 2025 07:36

Mel-Chen merged commit 1ff10fa into llvm:main Mar 11, 2025
11 checks passed

Mel-Chen added a commit that referenced this pull request Mar 13, 2025

Revert "[LV] Limits the splat operations be hoisted must not be defin…

ffe202c

…ed by a recipe. (#117138)" This reverts commit 1ff10fa.

frederik-h pushed a commit to frederik-h/llvm-project that referenced this pull request Mar 18, 2025

Revert "[LV] Limits the splat operations be hoisted must not be defin…

c0a0eb5

…ed by a recipe. (llvm#117138)" This reverts commit 1ff10fa.

[LV] Limits the splat operations be hoisted must not be defined by a recipe. #117138

[LV] Limits the splat operations be hoisted must not be defined by a recipe. #117138

Uh oh!

Conversation

Mel-Chen commented Nov 21, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Nov 21, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

fhahn commented Nov 21, 2024

Uh oh!

Uh oh!

Uh oh!

Mel-Chen commented Dec 13, 2024

Uh oh!

Mel-Chen commented Jan 7, 2025

Uh oh!

Uh oh!

fhahn commented Jan 23, 2025

Uh oh!

Mel-Chen commented Feb 5, 2025

Uh oh!

fhahn left a comment

Choose a reason for hiding this comment

Uh oh!

Mel-Chen commented Feb 6, 2025

Uh oh!

Uh oh!

fhahn Feb 6, 2025

Choose a reason for hiding this comment

Uh oh!

Mel-Chen Feb 18, 2025

Choose a reason for hiding this comment

Uh oh!

Mel-Chen Feb 25, 2025

Choose a reason for hiding this comment

Uh oh!

fhahn Mar 2, 2025

Choose a reason for hiding this comment

Uh oh!

fhahn left a comment

Choose a reason for hiding this comment

Uh oh!

fhahn Mar 2, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Mel-Chen commented Mar 13, 2025

Uh oh!

Uh oh!

Mel-Chen commented Nov 21, 2024 •

edited

Loading

llvmbot commented Nov 21, 2024 •

edited

Loading