Skip to content

[CostModel] Handle vector struct results and cost llvm.sincos #123210

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,13 @@ class IntrinsicCostAttributes {
// If ScalarizationCost is UINT_MAX, the cost of scalarizing the
// arguments and the return value will be computed based on types.
InstructionCost ScalarizationCost = InstructionCost::getInvalid();
TargetLibraryInfo const *LibInfo = nullptr;

public:
IntrinsicCostAttributes(
Intrinsic::ID Id, const CallBase &CI,
InstructionCost ScalarCost = InstructionCost::getInvalid(),
bool TypeBasedOnly = false);
bool TypeBasedOnly = false, TargetLibraryInfo const *LibInfo = nullptr);

IntrinsicCostAttributes(
Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
Expand All @@ -145,7 +146,8 @@ class IntrinsicCostAttributes {
Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(),
const IntrinsicInst *I = nullptr,
InstructionCost ScalarCost = InstructionCost::getInvalid());
InstructionCost ScalarCost = InstructionCost::getInvalid(),
TargetLibraryInfo const *LibInfo = nullptr);

Intrinsic::ID getID() const { return IID; }
const IntrinsicInst *getInst() const { return II; }
Expand All @@ -154,6 +156,7 @@ class IntrinsicCostAttributes {
InstructionCost getScalarizationCost() const { return ScalarizationCost; }
const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
const TargetLibraryInfo *getLibInfo() const { return LibInfo; }

bool isTypeBasedOnly() const {
return Arguments.empty();
Expand Down
113 changes: 95 additions & 18 deletions llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TargetTransformInfoImpl.h"
#include "llvm/Analysis/ValueTracking.h"
Expand Down Expand Up @@ -285,6 +286,64 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return false;
}

/// Several intrinsics that return structs (including llvm.sincos[pi] and
/// llvm.modf) can be lowered to a vector library call (for certain VFs). The
/// vector library functions correspond to the scalar calls (e.g. sincos or
/// modf), which unlike the intrinsic return values via output pointers. This
/// helper checks if a vector call exists for the given intrinsic, and returns
/// the cost, which includes the cost of the mask (if required), and the loads
/// for values returned via output pointers. \p LC is the scalar libcall and
/// \p CallRetElementIndex (optional) is the struct element which is mapped to
/// the call return value. If std::nullopt is returned, then no vector library
/// call is available, so the intrinsic should be assigned the default cost
/// (e.g. scalarization).
std::optional<InstructionCost> getMultipleResultIntrinsicVectorLibCallCost(
const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind,
RTLIB::Libcall LC, std::optional<unsigned> CallRetElementIndex = {}) {
Type *RetTy = ICA.getReturnType();
// Vector variants of the intrinsic can be mapped to a vector library call.
auto const *LibInfo = ICA.getLibInfo();
if (!LibInfo || !isa<StructType>(RetTy) ||
!isVectorizedStructTy(cast<StructType>(RetTy)))
return std::nullopt;

// Find associated libcall.
const char *LCName = getTLI()->getLibcallName(LC);
if (!LCName)
return std::nullopt;

// Search for a corresponding vector variant.
LLVMContext &Ctx = RetTy->getContext();
ElementCount VF = getVectorizedTypeVF(RetTy);
VecDesc const *VD = nullptr;
for (bool Masked : {false, true}) {
if ((VD = LibInfo->getVectorMappingInfo(LCName, VF, Masked)))
break;
}
if (!VD)
return std::nullopt;

// Cost the call + mask.
auto Cost =
thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(), CostKind);
if (VD->isMasked())
Cost += thisT()->getShuffleCost(
TargetTransformInfo::SK_Broadcast,
VectorType::get(IntegerType::getInt1Ty(Ctx), VF), {}, CostKind, 0,
nullptr, {});

// Lowering to a library call (with output pointers) may require us to emit
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Always adding on the cost of a load seems pessimistic?

Copy link
Member Author

@MacDue MacDue Feb 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is slightly pessimistic, but the cost is still reasonably low at 12 or 13 (rather than 10 for a plain libcall), so the vectorizier still chooses to widen the intrinsic. It also means if libraries add structure-returning variants they'll preferred as they'll have a slightly lower cost.

// reloads for the results.
for (auto [Idx, VectorTy] : enumerate(getContainedTypes(RetTy))) {
if (Idx == CallRetElementIndex)
continue;
Cost += thisT()->getMemoryOpCost(
Instruction::Load, VectorTy,
thisT()->getDataLayout().getABITypeAlign(VectorTy), 0, CostKind);
}
return Cost;
}

protected:
explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
: BaseT(DL) {}
Expand Down Expand Up @@ -1726,9 +1785,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {

Type *RetTy = ICA.getReturnType();

ElementCount RetVF =
(RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
: ElementCount::getFixed(1));
ElementCount RetVF = isVectorizedTy(RetTy) ? getVectorizedTypeVF(RetTy)
: ElementCount::getFixed(1);

const IntrinsicInst *I = ICA.getInst();
const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
FastMathFlags FMF = ICA.getFlags();
Expand Down Expand Up @@ -1997,6 +2056,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}
case Intrinsic::experimental_vector_match:
return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind);
case Intrinsic::sincos: {
Type *Ty = getContainedTypes(RetTy).front();
EVT VT = getTLI()->getValueType(DL, Ty);
RTLIB::Libcall LC = RTLIB::getSINCOS(VT.getScalarType());
if (auto Cost =
getMultipleResultIntrinsicVectorLibCallCost(ICA, CostKind, LC))
return *Cost;
// Otherwise, fallback to default scalarization cost.
break;
}
}

// Assume that we need to scalarize this intrinsic.)
Expand All @@ -2005,10 +2074,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
InstructionCost ScalarizationCost = InstructionCost::getInvalid();
if (RetVF.isVector() && !RetVF.isScalable()) {
ScalarizationCost = 0;
if (!RetTy->isVoidTy())
ScalarizationCost += getScalarizationOverhead(
cast<VectorType>(RetTy),
/*Insert*/ true, /*Extract*/ false, CostKind);
if (!RetTy->isVoidTy()) {
for (Type *VectorTy : getContainedTypes(RetTy)) {
ScalarizationCost += getScalarizationOverhead(
cast<VectorType>(VectorTy),
/*Insert=*/true, /*Extract=*/false, CostKind);
}
}
ScalarizationCost +=
getOperandsScalarizationOverhead(Args, ICA.getArgTypes(), CostKind);
}
Expand Down Expand Up @@ -2689,27 +2761,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
// Else, assume that we need to scalarize this intrinsic. For math builtins
// this will emit a costly libcall, adding call overhead and spills. Make it
// very expensive.
if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
if (isVectorizedTy(RetTy)) {
ArrayRef<Type *> RetVTys = getContainedTypes(RetTy);

// Scalable vectors cannot be scalarized, so return Invalid.
if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
return isa<ScalableVectorType>(Ty);
}))
if (any_of(concat<Type *const>(RetVTys, Tys),
[](Type *Ty) { return isa<ScalableVectorType>(Ty); }))
return InstructionCost::getInvalid();

InstructionCost ScalarizationCost =
SkipScalarizationCost
? ScalarizationCostPassed
: getScalarizationOverhead(RetVTy, /*Insert*/ true,
/*Extract*/ false, CostKind);
InstructionCost ScalarizationCost = ScalarizationCostPassed;
if (!SkipScalarizationCost) {
ScalarizationCost = 0;
for (Type *RetVTy : RetVTys) {
ScalarizationCost += getScalarizationOverhead(
cast<VectorType>(RetVTy), /*Insert=*/true,
/*Extract=*/false, CostKind);
}
}

unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
unsigned ScalarCalls = getVectorizedTypeVF(RetTy).getFixedValue();
SmallVector<Type *, 4> ScalarTys;
for (Type *Ty : Tys) {
if (Ty->isVectorTy())
Ty = Ty->getScalarType();
ScalarTys.push_back(Ty);
}
IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF);
IntrinsicCostAttributes Attrs(IID, toScalarizedTy(RetTy), ScalarTys, FMF);
InstructionCost ScalarCost =
thisT()->getIntrinsicInstrCost(Attrs, CostKind);
for (Type *Ty : Tys) {
Expand Down
18 changes: 13 additions & 5 deletions llvm/lib/Analysis/CostModel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@
//===----------------------------------------------------------------------===//

#include "llvm/Analysis/CostModel.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"

using namespace llvm;

static cl::opt<TargetTransformInfo::TargetCostKind> CostKind(
Expand All @@ -42,25 +44,31 @@ static cl::opt<bool> TypeBasedIntrinsicCost("type-based-intrinsic-cost",
cl::desc("Calculate intrinsics cost based only on argument types"),
cl::init(false));

static cl::opt<bool> PreferIntrinsicCost(
"prefer-intrinsic-cost",
cl::desc("Prefer using getIntrinsicInstrCost over getInstructionCost"),
cl::init(false));

#define CM_NAME "cost-model"
#define DEBUG_TYPE CM_NAME

PreservedAnalyses CostModelPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
OS << "Printing analysis 'Cost Model Analysis' for function '" << F.getName() << "':\n";
for (BasicBlock &B : F) {
for (Instruction &Inst : B) {
// TODO: Use a pass parameter instead of cl::opt CostKind to determine
// which cost kind to print.
InstructionCost Cost;
auto *II = dyn_cast<IntrinsicInst>(&Inst);
if (II && TypeBasedIntrinsicCost) {
IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II,
InstructionCost::getInvalid(), true);
if (II && (PreferIntrinsicCost || TypeBasedIntrinsicCost)) {
IntrinsicCostAttributes ICA(
II->getIntrinsicID(), *II, InstructionCost::getInvalid(),
/*TypeBasedOnly=*/TypeBasedIntrinsicCost, &TLI);
Comment on lines +67 to +69
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't know if this is a libcall or not here, I also don't understand why this is guarded by TypeBasedIntrinsicCost. Can we remove that one too?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I only use the getIntrinsicInstrCost code path if one of the flags (type-based-intrinsic-cost or libcall-based-intrinsic-cost) is set as I discovered some targets return a different cost depending on if you call getIntrinsicInstrCost directly or via getInstructionCost. This seemed to be due to them modifying the cost returned from getIntrinsicInstrCost within their implementation of getInstructionCost. I didn't want to change tests that depend on this (and it's not relevant for AArch64), so I added a flag.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I must admit though it does seem a little odd now because -type-based-intrinsic-cost also implies libcall-based-intrinsic-cost since it causes TLI to be passed in. How about renaming libcall-based-intrinsic-cost to be prefer-intrinsic-cost or use-intrinsic-cost, since it's not really tied to the library calls?

This is just a suggestion, but if we had a use-intrinsic-cost option we could actually make it an enum along the lines of:

enum IntrinsicCostType {
  None,
  NonTypeBased,
  TypeBased,
};

that way you can collapse the two options into one and both TypeBased and NonTypeBase variants will use the TLI. However, I appreciate that would significantly increase the number of test files changed so it's possibly something for a follow-on?

Copy link
Member Author

@MacDue MacDue Feb 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've renamed the flag prefer-intrinsic-cost for this PR. The enum works nicely 👍, but since it does increase the churn a bit, so I agree it's best to post that as a small follow-up PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should at least have firm plan on removing the flag or setting it to true, so users benefit from the new code by default.

It may not be an issue for AArch64, but people building for AArch64 also won't get any benefit unless they know to set this flag and there will be very little coverage of the code on larger projects.

Copy link
Member Author

@MacDue MacDue Feb 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to be clear this flag only affects the test cost-model print pass (so users = LLVM developers), not LLVM more generally. The loop vectorizer in #128035 always passes the TLI.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changing the default would not be much trouble though -- it just requires adding a flag to a few tests.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem I see with changing the default prefer-intrinsic-cost to true is that some tests may genuinely want to exercise the getInstructionCost path, in which case they have to live without using the TLI. I think it makes sense to follow this up in a separate PR, along with potentially changing it to use an enum instead of a boolean so that we can remove the type-based-intrinsic-cost flag as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, I'm happy to address this in the follow-up PR (since there will be a little test churn in that patch anyway).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to be clear this flag only affects the test cost-model print pass (so users = LLVM developers), not LLVM more generally. The loop vectorizer in #128035 always passes the TLI.

Ok that's good, thanks. I think ideally we would get rid of the option as follow-up, as it seems confusing to have the cost model printer show something that's different to the cost that is actually used in the passes making the queries.

Cost = TTI.getIntrinsicInstrCost(ICA, CostKind);
}
else {
} else {
Cost = TTI.getInstructionCost(&Inst, CostKind);
}

Expand Down
17 changes: 8 additions & 9 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) {

IntrinsicCostAttributes::IntrinsicCostAttributes(
Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost,
bool TypeBasedOnly)
bool TypeBasedOnly, const TargetLibraryInfo *LibInfo)
: II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id),
ScalarizationCost(ScalarizationCost) {
ScalarizationCost(ScalarizationCost), LibInfo(LibInfo) {

if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
FMF = FPMO->getFastMathFlags();
Expand Down Expand Up @@ -101,13 +101,12 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
ParamTys.push_back(Argument->getType());
}

IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<const Value *> Args,
ArrayRef<Type *> Tys,
FastMathFlags Flags,
const IntrinsicInst *I,
InstructionCost ScalarCost)
: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
IntrinsicCostAttributes::IntrinsicCostAttributes(
Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
ArrayRef<Type *> Tys, FastMathFlags Flags, const IntrinsicInst *I,
InstructionCost ScalarCost, TargetLibraryInfo const *LibInfo)
: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost),
LibInfo(LibInfo) {
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
}
Expand Down
60 changes: 60 additions & 0 deletions llvm/test/Analysis/CostModel/AArch64/sincos.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "sincos"
; RUN: opt < %s -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
; RUN: opt < %s -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=ArmPL -passes="print<cost-model>" -prefer-intrinsic-cost -cost-kind=throughput 2>&1 -disable-output | FileCheck %s -check-prefix=CHECK-VECLIB

define void @sincos() {
; CHECK-LABEL: 'sincos'
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, half } @llvm.sincos.f16(half poison)
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float poison)
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double poison)
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison)
;
; CHECK: Cost Model: Found an estimated cost of 36 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison)
; CHECK: Cost Model: Found an estimated cost of 52 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison)
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison)
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison)
; CHECK: Cost Model: Found an estimated cost of 104 for instruction: %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison)
;
; CHECK: Cost Model: Invalid cost for instruction: %nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.nxv8f16(<vscale x 8 x half> poison)
; CHECK: Cost Model: Invalid cost for instruction: %nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> poison)
; CHECK: Cost Model: Invalid cost for instruction: %nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> poison)
; CHECK: Cost Model: Invalid cost for instruction: %nxv1f128 = call { <vscale x 1 x fp128>, <vscale x 1 x fp128> } @llvm.sincos.nxv1f128(<vscale x 1 x fp128> poison)
; CHECK: Cost Model: Invalid cost for instruction: %nxv8f32 = call { <vscale x 8 x float>, <vscale x 8 x float> } @llvm.sincos.nxv8f32(<vscale x 8 x float> poison)
;
; CHECK-VECLIB-LABEL: 'sincos'
; CHECK-VECLIB: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, half } @llvm.sincos.f16(half poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison)
;
; CHECK-VECLIB: Cost Model: Found an estimated cost of 36 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 104 for instruction: %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison)
;
; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.nxv8f16(<vscale x 8 x half> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 13 for instruction: %nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 13 for instruction: %nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> poison)
; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv1f128 = call { <vscale x 1 x fp128>, <vscale x 1 x fp128> } @llvm.sincos.nxv1f128(<vscale x 1 x fp128> poison)
; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv8f32 = call { <vscale x 8 x float>, <vscale x 8 x float> } @llvm.sincos.nxv8f32(<vscale x 8 x float> poison)
;
%f16 = call { half, half } @llvm.sincos.f16(half poison)
%f32 = call { float, float } @llvm.sincos.f32(float poison)
%f64 = call { double, double } @llvm.sincos.f64(double poison)
%f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison)

%v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison)
%v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison)
%v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison)
%v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison)
%v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison)

%nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.v8f16(<vscale x 8 x half> poison)
%nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.v4f32(<vscale x 4 x float> poison)
%nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.v2f64(<vscale x 2 x double> poison)
%nxv1f128 = call { <vscale x 1 x fp128>, <vscale x 1 x fp128> } @llvm.sincos.v1f128(<vscale x 1 x fp128> poison)
%nxv8f32 = call { <vscale x 8 x float>, <vscale x 8 x float> } @llvm.sincos.v8f32(<vscale x 8 x float> poison)

ret void
}
Loading