llvm
diff --git a/‎llvm/include/llvm/InitializePasses.h
Lines changed: 1 addition & 1 deletion b/‎llvm/include/llvm/InitializePasses.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/include/llvm/Transforms/Scalar/Scalarizer.h
Lines changed: 3 additions & 12 deletions b/‎llvm/include/llvm/Transforms/Scalar/Scalarizer.h
Lines changed: 3 additions & 12 deletions
diff --git a/‎llvm/lib/Target/DirectX/DXILOpLowering.cpp
Lines changed: 0 additions & 2 deletions b/‎llvm/lib/Target/DirectX/DXILOpLowering.cpp
Lines changed: 0 additions & 2 deletions
diff --git a/‎llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
Lines changed: 5 additions & 2 deletions b/‎llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
Lines changed: 5 additions & 2 deletions
diff --git a/‎llvm/lib/Transforms/Scalar/Scalarizer.cpp
Lines changed: 18 additions & 9 deletions b/‎llvm/lib/Transforms/Scalar/Scalarizer.cpp
Lines changed: 18 additions & 9 deletions
diff --git a/‎llvm/test/CodeGen/DirectX/acos.ll
Lines changed: 22 additions & 3 deletions b/‎llvm/test/CodeGen/DirectX/acos.ll
Lines changed: 22 additions & 3 deletions
diff --git a/‎llvm/test/CodeGen/DirectX/asin.ll
Lines changed: 22 additions & 3 deletions b/‎llvm/test/CodeGen/DirectX/asin.ll
Lines changed: 22 additions & 3 deletions
diff --git a/‎llvm/test/CodeGen/DirectX/atan.ll
Lines changed: 22 additions & 3 deletions b/‎llvm/test/CodeGen/DirectX/atan.ll
Lines changed: 22 additions & 3 deletions
diff --git a/‎llvm/test/CodeGen/DirectX/ceil.ll
Lines changed: 20 additions & 1 deletion b/‎llvm/test/CodeGen/DirectX/ceil.ll
Lines changed: 20 additions & 1 deletion
diff --git a/‎llvm/test/CodeGen/DirectX/cos.ll
Lines changed: 20 additions & 1 deletion b/‎llvm/test/CodeGen/DirectX/cos.ll
Lines changed: 20 additions & 1 deletion
diff --git a/‎llvm/test/CodeGen/DirectX/cosh.ll
Lines changed: 22 additions & 3 deletions b/‎llvm/test/CodeGen/DirectX/cosh.ll
Lines changed: 22 additions & 3 deletions
@@ -276,7 +276,7 @@ void initializeSafepointIRVerifierPass(PassRegistry &);
 void initializeSelectOptimizePass(PassRegistry &);
 void initializeScalarEvolutionWrapperPassPass(PassRegistry &);
 void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &);
-void initializeScalarizerLegacyPassPass(PassRegistry&);
+void initializeScalarizerLegacyPassPass(PassRegistry &);
 void initializeScavengerTestPass(PassRegistry &);
 void initializeScopedNoAliasAAWrapperPassPass(PassRegistry &);
 void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &);
 
@@ -19,12 +19,12 @@
 #define LLVM_TRANSFORMS_SCALAR_SCALARIZER_H
 
 #include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
 #include <optional>
 
 namespace llvm {
 
 class Function;
+class FunctionPass;
 
 struct ScalarizerPassOptions {
   // These options correspond 1:1 to cl::opt options defined in
@@ -53,17 +53,8 @@ class ScalarizerPass : public PassInfoMixin<ScalarizerPass> {
 };
 
 /// Create a legacy pass manager instance of the Scalarizer pass
-FunctionPass *createScalarizerPass();
-
-class ScalarizerLegacyPass : public FunctionPass {
-public:
-  static char ID;
-  ScalarizerPassOptions Options;
-  ScalarizerLegacyPass();
-  bool runOnFunction(Function &F) override;
-  void getAnalysisUsage(AnalysisUsage& AU) const override;
-};
-
+FunctionPass *createScalarizerPass(
+    const ScalarizerPassOptions &Options = ScalarizerPassOptions());
 }
 
 #endif /* LLVM_TRANSFORMS_SCALAR_SCALARIZER_H */
@@ -24,7 +24,6 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Transforms/Scalar/Scalarizer.h"
 
 #define DEBUG_TYPE "dxil-op-lower"
 
@@ -522,7 +521,6 @@ class DXILOpLoweringLegacy : public ModulePass {
   static char ID; // Pass identification.
   void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
     AU.addRequired<DXILIntrinsicExpansionLegacy>();
-    AU.addRequired<ScalarizerLegacyPass>();
     AU.addRequired<DXILResourceWrapperPass>();
     AU.addPreserved<DXILResourceWrapperPass>();
   }
 
@@ -26,9 +26,9 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/InitializePasses.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCSectionDXContainer.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/MC/TargetRegistry.h"
@@ -86,7 +86,10 @@ class DirectXPassConfig : public TargetPassConfig {
   FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; }
   void addCodeGenPrepare() override {
     addPass(createDXILIntrinsicExpansionLegacyPass());
-    addPass(createScalarizerPass());
+    ScalarizerPassOptions DxilScalarOptions;
+    // The only non-default option we need to set is ScalarizeLoadStore.
+    DxilScalarOptions.ScalarizeLoadStore = true;
+    addPass(createScalarizerPass(DxilScalarOptions));
     addPass(createDXILOpLoweringLegacyPass());
     addPass(createDXILFinalizeLinkageLegacyPass());
     addPass(createDXILTranslateMetadataLegacyPass());
 
@@ -340,16 +340,25 @@ class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
   const bool ScalarizeLoadStore;
   const unsigned ScalarizeMinBits;
 };
+
+class ScalarizerLegacyPass : public FunctionPass {
+public:
+  static char ID;
+  ScalarizerPassOptions Options;
+  ScalarizerLegacyPass() : FunctionPass(ID), Options() {}
+  ScalarizerLegacyPass(const ScalarizerPassOptions &Options);
+  bool runOnFunction(Function &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
 } // end anonymous namespace
 
-ScalarizerLegacyPass::ScalarizerLegacyPass() : FunctionPass(ID) {
-    Options.ScalarizeVariableInsertExtract = true;
-    Options.ScalarizeLoadStore = true;
-}
+ScalarizerLegacyPass::ScalarizerLegacyPass(const ScalarizerPassOptions &Options)
+    : FunctionPass(ID), Options(Options) {}
 
-void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage& AU) const {
-    AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addPreserved<DominatorTreeWrapperPass>();
+void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<DominatorTreeWrapperPass>();
+  AU.addPreserved<DominatorTreeWrapperPass>();
 }
 
 char ScalarizerLegacyPass::ID = 0;
@@ -440,8 +449,8 @@ bool ScalarizerLegacyPass::runOnFunction(Function &F) {
   return Impl.visit(F);
 }
 
-FunctionPass *llvm::createScalarizerPass() {
-  return new ScalarizerLegacyPass();
+FunctionPass *llvm::createScalarizerPass(const ScalarizerPassOptions &Options) {
+  return new ScalarizerLegacyPass(Options);
 }
 
 bool ScalarizerVisitor::visit(Function &F) {
 
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
 ; Make sure dxil operation function calls for acos are generated for float and half.
 
-define noundef float @tan_float(float noundef %a) {
+define noundef float @acos_float(float noundef %a) {
 entry:
 ; CHECK:call float @dx.op.unary.f32(i32 15, float %{{.*}})
   %elt.acos = call float @llvm.acos.f32(float %a)
   ret float %elt.acos
 }
 
-define noundef half @tan_half(half noundef %a) {
+define noundef half @acos_half(half noundef %a) {
 entry:
 ; CHECK:call half @dx.op.unary.f16(i32 15, half %{{.*}})
   %elt.acos = call half @llvm.acos.f16(half %a)
   ret half %elt.acos
 }
 
+define noundef <4 x float> @acos_float4(<4 x float> noundef %a) {
+entry:
+  ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee0]])
+  ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee1]])
+  ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee2]])
+  ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee3]])
+  ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+  %2 = call <4 x float> @llvm.acos.v4f32(<4 x float> %a) 
+  ret <4 x float> %2
+}
+
 declare half @llvm.acos.f16(half)
 declare float @llvm.acos.f32(float)
+declare <4 x float> @llvm.acos.v4f32(<4 x float>)
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
 ; Make sure dxil operation function calls for asin are generated for float and half.
 
-define noundef float @tan_float(float noundef %a) {
+define noundef float @asin_float(float noundef %a) {
 entry:
 ; CHECK:call float @dx.op.unary.f32(i32 16, float %{{.*}})
   %elt.asin = call float @llvm.asin.f32(float %a)
   ret float %elt.asin
 }
 
-define noundef half @tan_half(half noundef %a) {
+define noundef half @asin_half(half noundef %a) {
 entry:
 ; CHECK:call half @dx.op.unary.f16(i32 16, half %{{.*}})
   %elt.asin = call half @llvm.asin.f16(half %a)
   ret half %elt.asin
 }
 
+define noundef <4 x float> @asin_float4(<4 x float> noundef %a) {
+entry:
+  ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee0]])
+  ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee1]])
+  ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee2]])
+  ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee3]])
+  ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+  %2 = call <4 x float> @llvm.asin.v4f32(<4 x float> %a) 
+  ret <4 x float> %2
+}
+
 declare half @llvm.asin.f16(half)
 declare float @llvm.asin.f32(float)
+declare <4 x float> @llvm.asin.v4f32(<4 x float>)
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
 ; Make sure dxil operation function calls for atan are generated for float and half.
 
-define noundef float @tan_float(float noundef %a) {
+define noundef float @atan_float(float noundef %a) {
 entry:
 ; CHECK:call float @dx.op.unary.f32(i32 17, float %{{.*}})
   %elt.atan = call float @llvm.atan.f32(float %a)
   ret float %elt.atan
 }
 
-define noundef half @tan_half(half noundef %a) {
+define noundef half @atan_half(half noundef %a) {
 entry:
 ; CHECK:call half @dx.op.unary.f16(i32 17, half %{{.*}})
   %elt.atan = call half @llvm.atan.f16(half %a)
   ret half %elt.atan
 }
 
+define noundef <4 x float> @atan_float4(<4 x float> noundef %a) {
+entry:
+  ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee0]])
+  ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee1]])
+  ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee2]])
+  ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee3]])
+  ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+  %2 = call <4 x float> @llvm.atan.v4f32(<4 x float> %a) 
+  ret <4 x float> %2
+}
+
 declare half @llvm.atan.f16(half)
 declare float @llvm.atan.f32(float)
+declare <4 x float> @llvm.atan.v4f32(<4 x float>) 
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
 ; Make sure dxil operation function calls for ceil are generated for float and half.
 
@@ -16,5 +16,24 @@ entry:
   ret half %elt.ceil
 }
 
+define noundef <4 x float> @ceil_float4(<4 x float> noundef %a) {
+entry:
+  ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee0]])
+  ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee1]])
+  ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee2]])
+  ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee3]])
+  ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+  %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) 
+  ret <4 x float> %2
+}
+
 declare half @llvm.ceil.f16(half)
 declare float @llvm.ceil.f32(float)
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>) 
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
 ; Make sure dxil operation function calls for cos are generated for float and half.
 
@@ -16,5 +16,24 @@ entry:
   ret half %elt.cos
 }
 
+define noundef <4 x float> @cos_float4(<4 x float> noundef %a) #0 {
+entry:
+  ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee0]])
+  ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee1]])
+  ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee2]])
+  ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee3]])
+  ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+  %2 = call <4 x float> @llvm.cos.v4f32(<4 x float> %a) 
+  ret <4 x float> %2
+}
+
 declare half @llvm.cos.f16(half)
 declare float @llvm.cos.f32(float)
+declare <4 x float> @llvm.cos.v4f32(<4 x float>)
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
 ; Make sure dxil operation function calls for cosh are generated for float and half.
 
-define noundef float @tan_float(float noundef %a) {
+define noundef float @cosh_float(float noundef %a) {
 entry:
 ; CHECK:call float @dx.op.unary.f32(i32 18, float %{{.*}})
   %elt.cosh = call float @llvm.cosh.f32(float %a)
   ret float %elt.cosh
 }
 
-define noundef half @tan_half(half noundef %a) {
+define noundef half @cosh_half(half noundef %a) {
 entry:
 ; CHECK:call half @dx.op.unary.f16(i32 18, half %{{.*}})
   %elt.cosh = call half @llvm.cosh.f16(half %a)
   ret half %elt.cosh
 }
 
+define noundef <4 x float> @cosh_float4(<4 x float> noundef %a) #0 {
+entry:
+  ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee0]])
+  ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee1]])
+  ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee2]])
+  ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee3]])
+  ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+  %2 = call <4 x float> @llvm.cosh.v4f32(<4 x float> %a) 
+  ret <4 x float> %2
+}
+
 declare half @llvm.cosh.f16(half)
 declare float @llvm.cosh.f32(float)
+declare <4 x float> @llvm.cosh.v4f32(<4 x float>)