Skip to content

Commit c05e29b

Browse files
authored
[LegacyPM][DirectX] Add legacy scalarizer back for use in the DirectX backend (#107427)
As discussed in this [proposal](https://github.com/llvm/wg-hlsl/pull/62/files?short_path=ac6e592#diff-ac6e59276afe8016e307eedc5c835f534c0cb353707760b44df0fa9d905a5cf8). We had to bring back the legacy pass manager interface for the scalarizer pass. Two reasons for this: 1. The DirectX backend is still using the legacy pass manager 2. The new PM isn't hooked up in clang yet via `BackendUtil.cpp`'s `AddEmitPasses` That means even if we add a `buildCodeGenPipeline` we won't be able to benefit from the new pass manager's scalarizer pass interface. The remaining changes are hooking up the scalarizer pass to the DirectX backend, updating the DirectX test cases, and allowing the `optdriver` to not block the legacy invocation of the scalarizer pass. Future work still needs to be done to allow the scalarizer pass to handle target specific intrinsics. closes #105178
1 parent 3733528 commit c05e29b

29 files changed

+510
-89
lines changed

llvm/include/llvm/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ void initializeSafepointIRVerifierPass(PassRegistry &);
276276
void initializeSelectOptimizePass(PassRegistry &);
277277
void initializeScalarEvolutionWrapperPassPass(PassRegistry &);
278278
void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &);
279+
void initializeScalarizerLegacyPassPass(PassRegistry &);
279280
void initializeScavengerTestPass(PassRegistry &);
280281
void initializeScopedNoAliasAAWrapperPassPass(PassRegistry &);
281282
void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &);

llvm/include/llvm/LinkAllPasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ struct ForcePassLinking {
130130
(void)llvm::createLowerAtomicPass();
131131
(void)llvm::createLoadStoreVectorizerPass();
132132
(void)llvm::createPartiallyInlineLibCallsPass();
133+
(void)llvm::createScalarizerPass();
133134
(void)llvm::createSeparateConstOffsetFromGEPPass();
134135
(void)llvm::createSpeculativeExecutionPass();
135136
(void)llvm::createSpeculativeExecutionIfHasBranchDivergencePass();

llvm/include/llvm/Transforms/Scalar/Scalarizer.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
namespace llvm {
2525

2626
class Function;
27+
class FunctionPass;
2728

2829
struct ScalarizerPassOptions {
2930
// These options correspond 1:1 to cl::opt options defined in
@@ -50,6 +51,10 @@ class ScalarizerPass : public PassInfoMixin<ScalarizerPass> {
5051
void setScalarizeLoadStore(bool Value) { Options.ScalarizeLoadStore = Value; }
5152
void setScalarizeMinBits(unsigned Value) { Options.ScalarizeMinBits = Value; }
5253
};
54+
55+
/// Create a legacy pass manager instance of the Scalarizer pass
56+
FunctionPass *createScalarizerPass(
57+
const ScalarizerPassOptions &Options = ScalarizerPassOptions());
5358
}
5459

5560
#endif /* LLVM_TRANSFORMS_SCALAR_SCALARIZER_H */

llvm/lib/Target/DirectX/DirectXTargetMachine.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "llvm/CodeGen/TargetPassConfig.h"
2929
#include "llvm/IR/IRPrintingPasses.h"
3030
#include "llvm/IR/LegacyPassManager.h"
31+
#include "llvm/InitializePasses.h"
3132
#include "llvm/MC/MCSectionDXContainer.h"
3233
#include "llvm/MC/SectionKind.h"
3334
#include "llvm/MC/TargetRegistry.h"
@@ -36,6 +37,7 @@
3637
#include "llvm/Support/Compiler.h"
3738
#include "llvm/Support/ErrorHandling.h"
3839
#include "llvm/Target/TargetLoweringObjectFile.h"
40+
#include "llvm/Transforms/Scalar/Scalarizer.h"
3941
#include <optional>
4042

4143
using namespace llvm;
@@ -44,6 +46,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
4446
RegisterTargetMachine<DirectXTargetMachine> X(getTheDirectXTarget());
4547
auto *PR = PassRegistry::getPassRegistry();
4648
initializeDXILIntrinsicExpansionLegacyPass(*PR);
49+
initializeScalarizerLegacyPassPass(*PR);
4750
initializeDXILPrepareModulePass(*PR);
4851
initializeEmbedDXILPassPass(*PR);
4952
initializeWriteDXILPassPass(*PR);
@@ -83,6 +86,9 @@ class DirectXPassConfig : public TargetPassConfig {
8386
FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; }
8487
void addCodeGenPrepare() override {
8588
addPass(createDXILIntrinsicExpansionLegacyPass());
89+
ScalarizerPassOptions DxilScalarOptions;
90+
DxilScalarOptions.ScalarizeLoadStore = true;
91+
addPass(createScalarizerPass(DxilScalarOptions));
8692
addPass(createDXILOpLoweringLegacyPass());
8793
addPass(createDXILFinalizeLinkageLegacyPass());
8894
addPass(createDXILTranslateMetadataLegacyPass());

llvm/lib/Transforms/Scalar/Scalar.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ using namespace llvm;
2121
void llvm::initializeScalarOpts(PassRegistry &Registry) {
2222
initializeConstantHoistingLegacyPassPass(Registry);
2323
initializeDCELegacyPassPass(Registry);
24+
initializeScalarizerLegacyPassPass(Registry);
2425
initializeGVNLegacyPassPass(Registry);
2526
initializeEarlyCSELegacyPassPass(Registry);
2627
initializeEarlyCSEMemSSALegacyPassPass(Registry);

llvm/lib/Transforms/Scalar/Scalarizer.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "llvm/IR/Module.h"
3737
#include "llvm/IR/Type.h"
3838
#include "llvm/IR/Value.h"
39+
#include "llvm/InitializePasses.h"
3940
#include "llvm/Support/Casting.h"
4041
#include "llvm/Support/CommandLine.h"
4142
#include "llvm/Transforms/Utils/Local.h"
@@ -340,8 +341,33 @@ class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
340341
const unsigned ScalarizeMinBits;
341342
};
342343

344+
class ScalarizerLegacyPass : public FunctionPass {
345+
public:
346+
static char ID;
347+
ScalarizerPassOptions Options;
348+
ScalarizerLegacyPass() : FunctionPass(ID), Options() {}
349+
ScalarizerLegacyPass(const ScalarizerPassOptions &Options);
350+
bool runOnFunction(Function &F) override;
351+
void getAnalysisUsage(AnalysisUsage &AU) const override;
352+
};
353+
343354
} // end anonymous namespace
344355

356+
ScalarizerLegacyPass::ScalarizerLegacyPass(const ScalarizerPassOptions &Options)
357+
: FunctionPass(ID), Options(Options) {}
358+
359+
void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
360+
AU.addRequired<DominatorTreeWrapperPass>();
361+
AU.addPreserved<DominatorTreeWrapperPass>();
362+
}
363+
364+
char ScalarizerLegacyPass::ID = 0;
365+
INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
366+
"Scalarize vector operations", false, false)
367+
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
368+
INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
369+
"Scalarize vector operations", false, false)
370+
345371
Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
346372
const VectorSplit &VS, ValueVector *cachePtr)
347373
: BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
@@ -414,6 +440,19 @@ Value *Scatterer::operator[](unsigned Frag) {
414440
return CV[Frag];
415441
}
416442

443+
bool ScalarizerLegacyPass::runOnFunction(Function &F) {
444+
if (skipFunction(F))
445+
return false;
446+
447+
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
448+
ScalarizerVisitor Impl(DT, Options);
449+
return Impl.visit(F);
450+
}
451+
452+
FunctionPass *llvm::createScalarizerPass(const ScalarizerPassOptions &Options) {
453+
return new ScalarizerLegacyPass(Options);
454+
}
455+
417456
bool ScalarizerVisitor::visit(Function &F) {
418457
assert(Gathered.empty() && Scattered.empty());
419458

llvm/test/CodeGen/DirectX/acos.ll

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,39 @@
1-
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
1+
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
22

33
; Make sure dxil operation function calls for acos are generated for float and half.
44

5-
define noundef float @tan_float(float noundef %a) {
5+
define noundef float @acos_float(float noundef %a) {
66
entry:
77
; CHECK:call float @dx.op.unary.f32(i32 15, float %{{.*}})
88
%elt.acos = call float @llvm.acos.f32(float %a)
99
ret float %elt.acos
1010
}
1111

12-
define noundef half @tan_half(half noundef %a) {
12+
define noundef half @acos_half(half noundef %a) {
1313
entry:
1414
; CHECK:call half @dx.op.unary.f16(i32 15, half %{{.*}})
1515
%elt.acos = call half @llvm.acos.f16(half %a)
1616
ret half %elt.acos
1717
}
1818

19+
define noundef <4 x float> @acos_float4(<4 x float> noundef %a) {
20+
entry:
21+
; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
22+
; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee0]])
23+
; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
24+
; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee1]])
25+
; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
26+
; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee2]])
27+
; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
28+
; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee3]])
29+
; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
30+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
31+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
32+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
33+
%2 = call <4 x float> @llvm.acos.v4f32(<4 x float> %a)
34+
ret <4 x float> %2
35+
}
36+
1937
declare half @llvm.acos.f16(half)
2038
declare float @llvm.acos.f32(float)
39+
declare <4 x float> @llvm.acos.v4f32(<4 x float>)

llvm/test/CodeGen/DirectX/asin.ll

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,39 @@
1-
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
1+
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
22

33
; Make sure dxil operation function calls for asin are generated for float and half.
44

5-
define noundef float @tan_float(float noundef %a) {
5+
define noundef float @asin_float(float noundef %a) {
66
entry:
77
; CHECK:call float @dx.op.unary.f32(i32 16, float %{{.*}})
88
%elt.asin = call float @llvm.asin.f32(float %a)
99
ret float %elt.asin
1010
}
1111

12-
define noundef half @tan_half(half noundef %a) {
12+
define noundef half @asin_half(half noundef %a) {
1313
entry:
1414
; CHECK:call half @dx.op.unary.f16(i32 16, half %{{.*}})
1515
%elt.asin = call half @llvm.asin.f16(half %a)
1616
ret half %elt.asin
1717
}
1818

19+
define noundef <4 x float> @asin_float4(<4 x float> noundef %a) {
20+
entry:
21+
; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
22+
; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee0]])
23+
; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
24+
; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee1]])
25+
; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
26+
; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee2]])
27+
; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
28+
; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee3]])
29+
; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
30+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
31+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
32+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
33+
%2 = call <4 x float> @llvm.asin.v4f32(<4 x float> %a)
34+
ret <4 x float> %2
35+
}
36+
1937
declare half @llvm.asin.f16(half)
2038
declare float @llvm.asin.f32(float)
39+
declare <4 x float> @llvm.asin.v4f32(<4 x float>)

llvm/test/CodeGen/DirectX/atan.ll

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,39 @@
1-
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
1+
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
22

33
; Make sure dxil operation function calls for atan are generated for float and half.
44

5-
define noundef float @tan_float(float noundef %a) {
5+
define noundef float @atan_float(float noundef %a) {
66
entry:
77
; CHECK:call float @dx.op.unary.f32(i32 17, float %{{.*}})
88
%elt.atan = call float @llvm.atan.f32(float %a)
99
ret float %elt.atan
1010
}
1111

12-
define noundef half @tan_half(half noundef %a) {
12+
define noundef half @atan_half(half noundef %a) {
1313
entry:
1414
; CHECK:call half @dx.op.unary.f16(i32 17, half %{{.*}})
1515
%elt.atan = call half @llvm.atan.f16(half %a)
1616
ret half %elt.atan
1717
}
1818

19+
define noundef <4 x float> @atan_float4(<4 x float> noundef %a) {
20+
entry:
21+
; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
22+
; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee0]])
23+
; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
24+
; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee1]])
25+
; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
26+
; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee2]])
27+
; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
28+
; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee3]])
29+
; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
30+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
31+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
32+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
33+
%2 = call <4 x float> @llvm.atan.v4f32(<4 x float> %a)
34+
ret <4 x float> %2
35+
}
36+
1937
declare half @llvm.atan.f16(half)
2038
declare float @llvm.atan.f32(float)
39+
declare <4 x float> @llvm.atan.v4f32(<4 x float>)

llvm/test/CodeGen/DirectX/ceil.ll

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
1+
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
22

33
; Make sure dxil operation function calls for ceil are generated for float and half.
44

@@ -16,5 +16,24 @@ entry:
1616
ret half %elt.ceil
1717
}
1818

19+
define noundef <4 x float> @ceil_float4(<4 x float> noundef %a) {
20+
entry:
21+
; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
22+
; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee0]])
23+
; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
24+
; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee1]])
25+
; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
26+
; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee2]])
27+
; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
28+
; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee3]])
29+
; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
30+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
31+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
32+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
33+
%2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
34+
ret <4 x float> %2
35+
}
36+
1937
declare half @llvm.ceil.f16(half)
2038
declare float @llvm.ceil.f32(float)
39+
declare <4 x float> @llvm.ceil.v4f32(<4 x float>)

llvm/test/CodeGen/DirectX/cos.ll

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
1+
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
22

33
; Make sure dxil operation function calls for cos are generated for float and half.
44

@@ -16,5 +16,24 @@ entry:
1616
ret half %elt.cos
1717
}
1818

19+
define noundef <4 x float> @cos_float4(<4 x float> noundef %a) #0 {
20+
entry:
21+
; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
22+
; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee0]])
23+
; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
24+
; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee1]])
25+
; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
26+
; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee2]])
27+
; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
28+
; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee3]])
29+
; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
30+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
31+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
32+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
33+
%2 = call <4 x float> @llvm.cos.v4f32(<4 x float> %a)
34+
ret <4 x float> %2
35+
}
36+
1937
declare half @llvm.cos.f16(half)
2038
declare float @llvm.cos.f32(float)
39+
declare <4 x float> @llvm.cos.v4f32(<4 x float>)

llvm/test/CodeGen/DirectX/cosh.ll

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,39 @@
1-
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
1+
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
22

33
; Make sure dxil operation function calls for cosh are generated for float and half.
44

5-
define noundef float @tan_float(float noundef %a) {
5+
define noundef float @cosh_float(float noundef %a) {
66
entry:
77
; CHECK:call float @dx.op.unary.f32(i32 18, float %{{.*}})
88
%elt.cosh = call float @llvm.cosh.f32(float %a)
99
ret float %elt.cosh
1010
}
1111

12-
define noundef half @tan_half(half noundef %a) {
12+
define noundef half @cosh_half(half noundef %a) {
1313
entry:
1414
; CHECK:call half @dx.op.unary.f16(i32 18, half %{{.*}})
1515
%elt.cosh = call half @llvm.cosh.f16(half %a)
1616
ret half %elt.cosh
1717
}
1818

19+
define noundef <4 x float> @cosh_float4(<4 x float> noundef %a) #0 {
20+
entry:
21+
; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
22+
; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee0]])
23+
; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
24+
; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee1]])
25+
; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
26+
; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee2]])
27+
; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
28+
; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee3]])
29+
; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
30+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
31+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
32+
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
33+
%2 = call <4 x float> @llvm.cosh.v4f32(<4 x float> %a)
34+
ret <4 x float> %2
35+
}
36+
1937
declare half @llvm.cosh.f16(half)
2038
declare float @llvm.cosh.f32(float)
39+
declare <4 x float> @llvm.cosh.v4f32(<4 x float>)

0 commit comments

Comments
 (0)