-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Revert "[RISCV] Expand vp.stride.load to splat of a scalar load." #98422
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
)" This reverts commit cda245a.
@llvm/pr-subscribers-backend-risc-v Author: Nico Weber (nico) ChangesReverts llvm/llvm-project#98140 Breaks tests, see comments on the PR. Full diff: https://github.com/llvm/llvm-project/pull/98422.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index 10e0496f16d4f..6e0f429c34b2f 100644
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -18,11 +18,9 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -37,7 +35,6 @@ namespace {
class RISCVCodeGenPrepare : public FunctionPass,
public InstVisitor<RISCVCodeGenPrepare, bool> {
const DataLayout *DL;
- const DominatorTree *DT;
const RISCVSubtarget *ST;
public:
@@ -51,14 +48,12 @@ class RISCVCodeGenPrepare : public FunctionPass,
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetPassConfig>();
}
bool visitInstruction(Instruction &I) { return false; }
bool visitAnd(BinaryOperator &BO);
bool visitIntrinsicInst(IntrinsicInst &I);
- bool expandVPStrideLoad(IntrinsicInst &I);
};
} // end anonymous namespace
@@ -133,9 +128,6 @@ bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
// Which eliminates the scalar -> vector -> scalar crossing during instruction
// selection.
bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
- if (expandVPStrideLoad(I))
- return true;
-
if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd)
return false;
@@ -163,47 +155,6 @@ bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
return true;
}
-bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
- if (ST->hasOptimizedZeroStrideLoad())
- return false;
-
- Value *BasePtr, *VL;
- using namespace PatternMatch;
- if (!match(&II, m_Intrinsic<Intrinsic::experimental_vp_strided_load>(
- m_Value(BasePtr), m_Zero(), m_AllOnes(), m_Value(VL))))
- return false;
-
- if (!isKnownNonZero(VL, {*DL, DT, nullptr, &II}))
- return false;
-
- auto *VTy = cast<VectorType>(II.getType());
-
- IRBuilder<> Builder(&II);
-
- // Extend VL from i32 to XLen if needed.
- if (ST->is64Bit())
- VL = Builder.CreateZExt(VL, Builder.getInt64Ty());
-
- Type *STy = VTy->getElementType();
- Value *Val = Builder.CreateLoad(STy, BasePtr);
- const auto &TLI = *ST->getTargetLowering();
- Value *Res;
-
- // TODO: Also support fixed/illegal vector types to splat with evl = vl.
- if (isa<ScalableVectorType>(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) {
- unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f
- : Intrinsic::riscv_vmv_v_x;
- Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()},
- {PoisonValue::get(VTy), Val, VL});
- } else {
- Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val);
- }
-
- II.replaceAllUsesWith(Res);
- II.eraseFromParent();
- return true;
-}
-
bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
@@ -213,7 +164,6 @@ bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
ST = &TM.getSubtarget<RISCVSubtarget>(F);
DL = &F.getDataLayout();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
bool MadeChange = false;
for (auto &BB : F)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
index 86359043a90d9..5e64e9fbc1a2f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
@@ -1,16 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \
-; RUN: -verify-machineinstrs < %s | FileCheck %s \
-; RUN: -check-prefixes=CHECK,CHECK-RV32
+; RUN: -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \
-; RUN: -verify-machineinstrs < %s | FileCheck %s \
-; RUN: -check-prefixes=CHECK,CHECK-RV64,CHECK-OPT
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+no-optimized-zero-stride-load \
-; RUN: -verify-machineinstrs < %s | FileCheck %s \
-; RUN: -check-prefixes=CHECK,CHECK-RV32,CHECK-NOOPT
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+no-optimized-zero-stride-load \
-; RUN: -verify-machineinstrs < %s | FileCheck %s \
-; RUN: -check-prefixes=CHECK,CHECK-RV64,CHECK-NOOPT
+; RUN: -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64
declare <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i8(ptr, i8, <2 x i1>, i32)
@@ -632,39 +626,3 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
}
declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64, <33 x i1>, i32)
-
-; Test unmasked integer zero strided
-define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
-; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
-; CHECK-OPT: # %bb.0:
-; CHECK-OPT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-OPT-NEXT: vlse8.v v8, (a0), zero
-; CHECK-OPT-NEXT: ret
-;
-; CHECK-NOOPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
-; CHECK-NOOPT: # %bb.0:
-; CHECK-NOOPT-NEXT: lbu a0, 0(a0)
-; CHECK-NOOPT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NOOPT-NEXT: vmv.v.x v8, a0
-; CHECK-NOOPT-NEXT: ret
- %load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, i8 0, <4 x i1> splat (i1 true), i32 4)
- ret <4 x i8> %load
-}
-
-; Test unmasked float zero strided
-define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
-; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
-; CHECK-OPT: # %bb.0:
-; CHECK-OPT-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero
-; CHECK-OPT-NEXT: ret
-;
-; CHECK-NOOPT-LABEL: zero_strided_unmasked_vpload_4f16:
-; CHECK-NOOPT: # %bb.0:
-; CHECK-NOOPT-NEXT: flh fa5, 0(a0)
-; CHECK-NOOPT-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NOOPT-NEXT: vfmv.v.f v8, fa5
-; CHECK-NOOPT-NEXT: ret
- %load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 4)
- ret <4 x half> %load
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
index d422ed5dcfc22..4d3bced0bcb50 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
@@ -1,16 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \
; RUN: -verify-machineinstrs < %s | FileCheck %s \
-; RUN: -check-prefixes=CHECK,CHECK-RV32,CHECK-OPT
+; RUN: -check-prefixes=CHECK,CHECK-RV32
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \
; RUN: -verify-machineinstrs < %s | FileCheck %s \
-; RUN: -check-prefixes=CHECK,CHECK-RV64,CHECK-OPT
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+no-optimized-zero-stride-load \
-; RUN: -verify-machineinstrs < %s | FileCheck %s \
-; RUN: -check-prefixes=CHECK,CHECK-RV32,CHECK-NOOPT
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+no-optimized-zero-stride-load \
-; RUN: -verify-machineinstrs < %s | FileCheck %s \
-; RUN: -check-prefixes=CHECK,CHECK-RV64,CHECK-NOOPT
+; RUN: -check-prefixes=CHECK,CHECK-RV64
declare <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr, i8, <vscale x 1 x i1>, i32)
@@ -786,39 +780,3 @@ define <vscale x 16 x double> @strided_load_nxv17f64(ptr %ptr, i64 %stride, <vsc
declare <vscale x 17 x double> @llvm.experimental.vp.strided.load.nxv17f64.p0.i64(ptr, i64, <vscale x 17 x i1>, i32)
declare <vscale x 1 x double> @llvm.experimental.vector.extract.nxv1f64(<vscale x 17 x double> %vec, i64 %idx)
declare <vscale x 16 x double> @llvm.experimental.vector.extract.nxv16f64(<vscale x 17 x double> %vec, i64 %idx)
-
-; Test unmasked integer zero strided
-define <vscale x 1 x i8> @zero_strided_unmasked_vpload_nxv1i8_i8(ptr %ptr) {
-; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_nxv1i8_i8:
-; CHECK-OPT: # %bb.0:
-; CHECK-OPT-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
-; CHECK-OPT-NEXT: vlse8.v v8, (a0), zero
-; CHECK-OPT-NEXT: ret
-;
-; CHECK-NOOPT-LABEL: zero_strided_unmasked_vpload_nxv1i8_i8:
-; CHECK-NOOPT: # %bb.0:
-; CHECK-NOOPT-NEXT: lbu a0, 0(a0)
-; CHECK-NOOPT-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
-; CHECK-NOOPT-NEXT: vmv.v.x v8, a0
-; CHECK-NOOPT-NEXT: ret
- %load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr %ptr, i8 0, <vscale x 1 x i1> splat (i1 true), i32 4)
- ret <vscale x 1 x i8> %load
-}
-
-; Test unmasked float zero strided
-define <vscale x 1 x half> @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) {
-; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_nxv1f16:
-; CHECK-OPT: # %bb.0:
-; CHECK-OPT-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero
-; CHECK-OPT-NEXT: ret
-;
-; CHECK-NOOPT-LABEL: zero_strided_unmasked_vpload_nxv1f16:
-; CHECK-NOOPT: # %bb.0:
-; CHECK-NOOPT-NEXT: flh fa5, 0(a0)
-; CHECK-NOOPT-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; CHECK-NOOPT-NEXT: vfmv.v.f v8, fa5
-; CHECK-NOOPT-NEXT: ret
- %load = call <vscale x 1 x half> @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr %ptr, i32 0, <vscale x 1 x i1> splat (i1 true), i32 4)
- ret <vscale x 1 x half> %load
-}
|
aaryanshukla
pushed a commit
to aaryanshukla/llvm-project
that referenced
this pull request
Jul 14, 2024
…vm#98422) Reverts llvm#98140 Breaks tests, see comments on the PR.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Reverts #98140
Breaks tests, see comments on the PR.