Skip to content

[LegacyPM][DirectX] Add legacy scalarizer back for use in the DirectX backend #107427

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/include/llvm/InitializePasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ void initializeSafepointIRVerifierPass(PassRegistry &);
void initializeSelectOptimizePass(PassRegistry &);
void initializeScalarEvolutionWrapperPassPass(PassRegistry &);
void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &);
void initializeScalarizerLegacyPassPass(PassRegistry &);
void initializeScavengerTestPass(PassRegistry &);
void initializeScopedNoAliasAAWrapperPassPass(PassRegistry &);
void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &);
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/LinkAllPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ struct ForcePassLinking {
(void)llvm::createLowerAtomicPass();
(void)llvm::createLoadStoreVectorizerPass();
(void)llvm::createPartiallyInlineLibCallsPass();
(void)llvm::createScalarizerPass();
(void)llvm::createSeparateConstOffsetFromGEPPass();
(void)llvm::createSpeculativeExecutionPass();
(void)llvm::createSpeculativeExecutionIfHasBranchDivergencePass();
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/Transforms/Scalar/Scalarizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
namespace llvm {

class Function;
class FunctionPass;

struct ScalarizerPassOptions {
// These options correspond 1:1 to cl::opt options defined in
Expand All @@ -50,6 +51,10 @@ class ScalarizerPass : public PassInfoMixin<ScalarizerPass> {
void setScalarizeLoadStore(bool Value) { Options.ScalarizeLoadStore = Value; }
void setScalarizeMinBits(unsigned Value) { Options.ScalarizeMinBits = Value; }
};

/// Create a legacy pass manager instance of the Scalarizer pass
FunctionPass *createScalarizerPass(
const ScalarizerPassOptions &Options = ScalarizerPassOptions());
}

#endif /* LLVM_TRANSFORMS_SCALAR_SCALARIZER_H */
6 changes: 6 additions & 0 deletions llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCSectionDXContainer.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/MC/TargetRegistry.h"
Expand All @@ -36,6 +37,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Transforms/Scalar/Scalarizer.h"
#include <optional>

using namespace llvm;
Expand All @@ -44,6 +46,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
RegisterTargetMachine<DirectXTargetMachine> X(getTheDirectXTarget());
auto *PR = PassRegistry::getPassRegistry();
initializeDXILIntrinsicExpansionLegacyPass(*PR);
initializeScalarizerLegacyPassPass(*PR);
initializeDXILPrepareModulePass(*PR);
initializeEmbedDXILPassPass(*PR);
initializeWriteDXILPassPass(*PR);
Expand Down Expand Up @@ -83,6 +86,9 @@ class DirectXPassConfig : public TargetPassConfig {
FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; }
void addCodeGenPrepare() override {
addPass(createDXILIntrinsicExpansionLegacyPass());
ScalarizerPassOptions DxilScalarOptions;
DxilScalarOptions.ScalarizeLoadStore = true;
addPass(createScalarizerPass(DxilScalarOptions));
addPass(createDXILOpLoweringLegacyPass());
addPass(createDXILFinalizeLinkageLegacyPass());
addPass(createDXILTranslateMetadataLegacyPass());
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Scalar/Scalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ using namespace llvm;
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeConstantHoistingLegacyPassPass(Registry);
initializeDCELegacyPassPass(Registry);
initializeScalarizerLegacyPassPass(Registry);
initializeGVNLegacyPassPass(Registry);
initializeEarlyCSELegacyPassPass(Registry);
initializeEarlyCSEMemSSALegacyPassPass(Registry);
Expand Down
39 changes: 39 additions & 0 deletions llvm/lib/Transforms/Scalar/Scalarizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/Local.h"
Expand Down Expand Up @@ -340,8 +341,33 @@ class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
const unsigned ScalarizeMinBits;
};

class ScalarizerLegacyPass : public FunctionPass {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@boomanaiden154 and @nikic Since you removed and approved the removal of the legacy pass in this commit: 2470857

I wanted to make sure you were ok with our reasons for bringing it back for the DIrectX backend?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's ok to bring the pass back for backend use. Just to confirm, the pass is a legalization requirement and injecting it via a pass builder callback into the middle-end pipeline is not an option?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes its a legalization requirement. It handles extraneous inserts element cleanup as-well that we need for the DXIL to be correct. So it makes the most sense for us to do this in the backend.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems reasonable enough to me. There are still quite a few (late) middle-end passes that support both the NewPM and the legacyPM due to being needed by backends. The main reason for removal was that no one was using the ScalarizerLegacyPass and thus there was no test coverage.

If someone is using it, it's reasonable enough to keep it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nikic or @boomanaiden154 would you be willing to approve this PR.

public:
static char ID;
ScalarizerPassOptions Options;
ScalarizerLegacyPass() : FunctionPass(ID), Options() {}
ScalarizerLegacyPass(const ScalarizerPassOptions &Options);
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
};

} // end anonymous namespace

ScalarizerLegacyPass::ScalarizerLegacyPass(const ScalarizerPassOptions &Options)
: FunctionPass(ID), Options(Options) {}

void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
}

char ScalarizerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
"Scalarize vector operations", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
"Scalarize vector operations", false, false)

Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
const VectorSplit &VS, ValueVector *cachePtr)
: BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
Expand Down Expand Up @@ -414,6 +440,19 @@ Value *Scatterer::operator[](unsigned Frag) {
return CV[Frag];
}

bool ScalarizerLegacyPass::runOnFunction(Function &F) {
if (skipFunction(F))
return false;

DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
ScalarizerVisitor Impl(DT, Options);
return Impl.visit(F);
}

FunctionPass *llvm::createScalarizerPass(const ScalarizerPassOptions &Options) {
return new ScalarizerLegacyPass(Options);
}

bool ScalarizerVisitor::visit(Function &F) {
assert(Gathered.empty() && Scattered.empty());

Expand Down
25 changes: 22 additions & 3 deletions llvm/test/CodeGen/DirectX/acos.ll
Original file line number Diff line number Diff line change
@@ -1,20 +1,39 @@
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s

; Make sure dxil operation function calls for acos are generated for float and half.

define noundef float @tan_float(float noundef %a) {
define noundef float @acos_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 15, float %{{.*}})
%elt.acos = call float @llvm.acos.f32(float %a)
ret float %elt.acos
}

define noundef half @tan_half(half noundef %a) {
define noundef half @acos_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 15, half %{{.*}})
%elt.acos = call half @llvm.acos.f16(half %a)
ret half %elt.acos
}

define noundef <4 x float> @acos_float4(<4 x float> noundef %a) {
entry:
; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee0]])
; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee1]])
; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee2]])
; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee3]])
; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
%2 = call <4 x float> @llvm.acos.v4f32(<4 x float> %a)
ret <4 x float> %2
}

declare half @llvm.acos.f16(half)
declare float @llvm.acos.f32(float)
declare <4 x float> @llvm.acos.v4f32(<4 x float>)
25 changes: 22 additions & 3 deletions llvm/test/CodeGen/DirectX/asin.ll
Original file line number Diff line number Diff line change
@@ -1,20 +1,39 @@
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s

; Make sure dxil operation function calls for asin are generated for float and half.

define noundef float @tan_float(float noundef %a) {
define noundef float @asin_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 16, float %{{.*}})
%elt.asin = call float @llvm.asin.f32(float %a)
ret float %elt.asin
}

define noundef half @tan_half(half noundef %a) {
define noundef half @asin_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 16, half %{{.*}})
%elt.asin = call half @llvm.asin.f16(half %a)
ret half %elt.asin
}

define noundef <4 x float> @asin_float4(<4 x float> noundef %a) {
entry:
; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee0]])
; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee1]])
; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee2]])
; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee3]])
; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
%2 = call <4 x float> @llvm.asin.v4f32(<4 x float> %a)
ret <4 x float> %2
}

declare half @llvm.asin.f16(half)
declare float @llvm.asin.f32(float)
declare <4 x float> @llvm.asin.v4f32(<4 x float>)
25 changes: 22 additions & 3 deletions llvm/test/CodeGen/DirectX/atan.ll
Original file line number Diff line number Diff line change
@@ -1,20 +1,39 @@
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s

; Make sure dxil operation function calls for atan are generated for float and half.

define noundef float @tan_float(float noundef %a) {
define noundef float @atan_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 17, float %{{.*}})
%elt.atan = call float @llvm.atan.f32(float %a)
ret float %elt.atan
}

define noundef half @tan_half(half noundef %a) {
define noundef half @atan_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 17, half %{{.*}})
%elt.atan = call half @llvm.atan.f16(half %a)
ret half %elt.atan
}

define noundef <4 x float> @atan_float4(<4 x float> noundef %a) {
entry:
; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee0]])
; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee1]])
; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee2]])
; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee3]])
; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
%2 = call <4 x float> @llvm.atan.v4f32(<4 x float> %a)
ret <4 x float> %2
}

declare half @llvm.atan.f16(half)
declare float @llvm.atan.f32(float)
declare <4 x float> @llvm.atan.v4f32(<4 x float>)
21 changes: 20 additions & 1 deletion llvm/test/CodeGen/DirectX/ceil.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s

; Make sure dxil operation function calls for ceil are generated for float and half.

Expand All @@ -16,5 +16,24 @@ entry:
ret half %elt.ceil
}

define noundef <4 x float> @ceil_float4(<4 x float> noundef %a) {
entry:
; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee0]])
; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee1]])
; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee2]])
; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee3]])
; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
%2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
ret <4 x float> %2
}

declare half @llvm.ceil.f16(half)
declare float @llvm.ceil.f32(float)
declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
21 changes: 20 additions & 1 deletion llvm/test/CodeGen/DirectX/cos.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s

; Make sure dxil operation function calls for cos are generated for float and half.

Expand All @@ -16,5 +16,24 @@ entry:
ret half %elt.cos
}

define noundef <4 x float> @cos_float4(<4 x float> noundef %a) #0 {
entry:
; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee0]])
; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee1]])
; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee2]])
; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee3]])
; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
%2 = call <4 x float> @llvm.cos.v4f32(<4 x float> %a)
ret <4 x float> %2
}

declare half @llvm.cos.f16(half)
declare float @llvm.cos.f32(float)
declare <4 x float> @llvm.cos.v4f32(<4 x float>)
25 changes: 22 additions & 3 deletions llvm/test/CodeGen/DirectX/cosh.ll
Original file line number Diff line number Diff line change
@@ -1,20 +1,39 @@
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s

; Make sure dxil operation function calls for cosh are generated for float and half.

define noundef float @tan_float(float noundef %a) {
define noundef float @cosh_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 18, float %{{.*}})
%elt.cosh = call float @llvm.cosh.f32(float %a)
ret float %elt.cosh
}

define noundef half @tan_half(half noundef %a) {
define noundef half @cosh_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 18, half %{{.*}})
%elt.cosh = call half @llvm.cosh.f16(half %a)
ret half %elt.cosh
}

define noundef <4 x float> @cosh_float4(<4 x float> noundef %a) #0 {
entry:
; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee0]])
; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee1]])
; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee2]])
; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee3]])
; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
%2 = call <4 x float> @llvm.cosh.v4f32(<4 x float> %a)
ret <4 x float> %2
}

declare half @llvm.cosh.f16(half)
declare float @llvm.cosh.f32(float)
declare <4 x float> @llvm.cosh.v4f32(<4 x float>)
Loading
Loading