Skip to content

Commit a07348c

Browse files
author
Andy Kaylor
authored
Add new intrinsics and attributes to control accuracy of FP calls (#8134)
This patch adds a new set of fpbuiltin intrinsics to represent operations that are equivalent to common math library functions and basic operations, and adds a new call site attribute ("fp-max-error") to specify the required accuracy of these calls. The purpose of these new IR constructs is to support alternate math library implementations and provide a general mechanism for selecting among multiple implementations based on specific requirements.
1 parent 2032ce4 commit a07348c

30 files changed

+2109
-1
lines changed

llvm/docs/LangRef.rst

Lines changed: 1183 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
//===-- AltMathLibFuncs.def - Library information ---------*- C++ -*-------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// This .def file will create descriptions of available fpbuiltin math library
10+
// function implementations and their constraining attributes. The current
11+
// support is limited to a fake test library for verifying the infrastructure.
12+
// The fake implementation can be removed when a real implementation is
13+
// available.
14+
15+
// An accuracy of 0.5 indicates that the result is exact or correctly rounded.
16+
17+
#define FIXED(NL) ElementCount::getFixed(NL)
18+
#define SCALABLE(NL) ElementCount::getScalable(NL)
19+
20+
#if !(defined(TLI_DEFINE_ALTMATHFUNC))
21+
#define TLI_DEFINE_ALTMATHFUNC(IID, TYPE, VECSIZE, NAME, ACCURACY) \
22+
{IID, TYPE, VECSIZE, NAME, ACCURACY},
23+
#endif
24+
25+
26+
#if defined(TLI_DEFINE_TEST_ALTMATHFUNCS)
27+
28+
// Just define a few examples to test the infrastructure
29+
30+
// TEST_ALTMATH_LIB Half precision implementations
31+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_fdiv, Type::HalfTyID, FIXED(1), "__test_altmath_fdivh_med", 2.5)
32+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::HalfTyID, FIXED(1), "__test_altmath_sinh_high", 1.0)
33+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::HalfTyID, FIXED(1), "__test_altmath_cosh_high", 1.0)
34+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::HalfTyID, FIXED(1), "__test_altmath_cosh_med", 4.0)
35+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sqrt, Type::HalfTyID, FIXED(1), "__test_altmath_sqrth_cr", 0.5)
36+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::HalfTyID, FIXED(1), "__test_altmath_rsqrth_cr", 0.5)
37+
38+
// TEST_ALTMATH_LIB Single precision implementations
39+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_fdiv, Type::FloatTyID, FIXED(1), "__test_altmath_fdivf_med", 2.5)
40+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::FloatTyID, FIXED(1), "__test_altmath_sinf_cr", 0.5)
41+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::FloatTyID, FIXED(1), "__test_altmath_sinf_high", 1.0)
42+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::FloatTyID, FIXED(1), "__test_altmath_cosf_high", 1.0)
43+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::FloatTyID, FIXED(1), "__test_altmath_cosf_med", 4.0)
44+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_tan, Type::FloatTyID, FIXED(1), "__test_altmath_tanf_high", 1.0)
45+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sqrt, Type::FloatTyID, FIXED(1), "__test_altmath_sqrtf_cr", 0.5)
46+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sqrt, Type::FloatTyID, FIXED(1), "__test_altmath_sqrtf_med", 2.5)
47+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::FloatTyID, FIXED(1), "__test_altmath_rsqrtf_cr", 0.5)
48+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::FloatTyID, FIXED(1), "__test_altmath_rsqrtf_high", 1.0)
49+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::FloatTyID, FIXED(1), "__test_altmath_rsqrtf_low", 4096.0)
50+
51+
// TEST_ALTMATH_LIB Double precision implementations
52+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_fdiv, Type::DoubleTyID, FIXED(1), "__test_altmath_fdiv_med", 2.5)
53+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::DoubleTyID, FIXED(1), "__test_altmath_sin_cr", 0.5)
54+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::DoubleTyID, FIXED(1), "__test_altmath_sin_high", 1.0)
55+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::DoubleTyID, FIXED(1), "__test_altmath_cos_high", 1.0)
56+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::DoubleTyID, FIXED(1), "__test_altmath_cos_med", 4.0)
57+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_tan, Type::DoubleTyID, FIXED(1), "__test_altmath_tan_high", 1.0)
58+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sqrt, Type::DoubleTyID, FIXED(1), "__test_altmath_sqrt_cr", 0.5)
59+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sqrt, Type::DoubleTyID, FIXED(1), "__test_altmath_sqrt_med", 2.5)
60+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::DoubleTyID, FIXED(1), "__test_altmath_rsqrt_cr", 0.5)
61+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::DoubleTyID, FIXED(1), "__test_altmath_rsqrt_high", 1.0)
62+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::DoubleTyID, FIXED(1), "__test_altmath_rsqrt_low", 4096.0)
63+
64+
// TEST_ALTMATH_LIB 4 x float implementations
65+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::FloatTyID, FIXED(4), "__test_altmath_sinf4_high", 1.0)
66+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::FloatTyID, FIXED(4), "__test_altmath_cosf4_high", 1.0)
67+
68+
// TEST_ALTMATH_LIB 8 x float implementations
69+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::FloatTyID, FIXED(8), "__test_altmath_sinf8_high", 1.0)
70+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::FloatTyID, FIXED(8), "__test_altmath_cosf8_high", 1.0)
71+
72+
// TEST_ALTMATH_LIB 2 x double implementations
73+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::DoubleTyID, FIXED(2), "__test_altmath_sin2_high", 1.0)
74+
TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::DoubleTyID, FIXED(2), "__test_altmath_cos2_high", 1.0)
75+
76+
77+
#endif
78+
79+
80+
81+
#undef TLI_DEFINE_ALTMATHFUNC
82+
#undef TLI_DEFINE_TEST_ALTMATHFUNCS

llvm/include/llvm/Analysis/TargetLibraryInfo.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "llvm/ADT/BitVector.h"
1313
#include "llvm/ADT/DenseMap.h"
1414
#include "llvm/IR/InstrTypes.h"
15+
#include "llvm/IR/IntrinsicInst.h"
1516
#include "llvm/IR/PassManager.h"
1617
#include "llvm/Pass.h"
1718
#include <optional>
@@ -23,6 +24,15 @@ class Function;
2324
class Module;
2425
class Triple;
2526

27+
/// Describes a possible implementation of a floating point builtin operation
28+
struct AltMathDesc {
29+
Intrinsic::ID IntrinID;
30+
Type::TypeID BaseFPType;
31+
ElementCount VectorizationFactor;
32+
StringRef FnImplName;
33+
float Accuracy;
34+
};
35+
2636
/// Describes a possible vectorization of a function.
2737
/// Function 'VectorFnName' is equivalent to 'ScalarFnName' vectorized
2838
/// by a factor 'VectorizationFactor'.
@@ -68,6 +78,10 @@ class TargetLibraryInfoImpl {
6878
return static_cast<AvailabilityState>((AvailableArray[F/4] >> 2*(F&3)) & 3);
6979
}
7080

81+
/// Alternate math library functions - sorted by intrinsic ID, then type,
82+
/// then vector size, then accuracy
83+
std::vector<AltMathDesc> AltMathFuncDescs;
84+
7185
/// Vectorization descriptors - sorted by ScalarFnName.
7286
std::vector<VecDesc> VectorDescs;
7387
/// Scalarization descriptors - same content as VectorDescs but sorted based
@@ -96,6 +110,19 @@ class TargetLibraryInfoImpl {
96110
SVML // Intel short vector math library.
97111
};
98112

113+
/// List of known alternate math libraries.
114+
///
115+
/// The alternate math library provides a set of functions that can ve used
116+
/// to replace llvm.fpbuiltin intrinsic calls when one or more constraining
117+
/// attributes are specified.
118+
/// The library can be specified by either frontend or a commandline option,
119+
/// and then used by addAltMathFunctionsFromLib for populating the tables of
120+
/// math function implementations.
121+
enum AltMathLibrary {
122+
NoAltMathLibrary, // Don't use any alternate math library
123+
TestAltMathLibrary // Use a fake alternate math library for testing
124+
};
125+
99126
TargetLibraryInfoImpl();
100127
explicit TargetLibraryInfoImpl(const Triple &T);
101128

@@ -147,6 +174,19 @@ class TargetLibraryInfoImpl {
147174
/// This can be used for options like -fno-builtin.
148175
void disableAllFunctions();
149176

177+
/// Add a set of alternate math library function implementations with
178+
/// attributes that can be used to select an implementation for an
179+
/// llvm.fpbuiltin intrinsic
180+
void addAltMathFunctions(ArrayRef<AltMathDesc> Fns);
181+
182+
/// Calls addAltMathFunctions with a known preset of functions for the
183+
/// given alternate math library.
184+
void addAltMathFunctionsFromLib(enum AltMathLibrary AltLib);
185+
186+
/// Select an alternate math library implementation that meets the criteria
187+
/// described by an FPBuiltinIntrinsic call.
188+
StringRef selectFPBuiltinImplementation(FPBuiltinIntrinsic *Builtin) const;
189+
150190
/// Add a set of scalar -> vector mappings, queryable via
151191
/// getVectorizedFunction and getScalarizedFunction.
152192
void addVectorizableFunctions(ArrayRef<VecDesc> Fns);
@@ -343,6 +383,9 @@ class TargetLibraryInfo {
343383
bool isFunctionVectorizable(StringRef F) const {
344384
return Impl->isFunctionVectorizable(F);
345385
}
386+
StringRef selectFPBuiltinImplementation(FPBuiltinIntrinsic *Builtin) const {
387+
return Impl->selectFPBuiltinImplementation(Builtin);
388+
}
346389
StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const {
347390
return Impl->getVectorizedFunction(F, VF);
348391
}

llvm/include/llvm/CodeGen/CodeGenPassBuilder.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "llvm/Analysis/TargetTransformInfo.h"
2424
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
2525
#include "llvm/CodeGen/ExpandReductions.h"
26+
#include "llvm/CodeGen/FPBuiltinFnSelection.h"
2627
#include "llvm/CodeGen/MachinePassManager.h"
2728
#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
2829
#include "llvm/CodeGen/ReplaceWithVeclib.h"
@@ -582,6 +583,7 @@ void CodeGenPassBuilder<Derived>::addISelPasses(AddIRPass &addPass) const {
582583
addPass(PreISelIntrinsicLoweringPass());
583584

584585
derived().addIRPasses(addPass);
586+
addPass(FPBuiltinFnSelectionPass());
585587
derived().addCodeGenPrepare(addPass);
586588
addPassesToHandleExceptions(addPass);
587589
derived().addISelPrepare(addPass);
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
//===- FPBuiltinFnSelection.h - Pre-ISel intrinsic lowering pass ----------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass implements alternate math library implementation selection for
10+
// llvm.fpbuiltin.* intrinsics.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
#ifndef LLVM_CODEGEN_FPBUILTINFNSELECTION_H
14+
#define LLVM_CODEGEN_FPBUILTINFNSELECTION_H
15+
16+
#include "llvm/IR/PassManager.h"
17+
18+
namespace llvm {
19+
20+
class Module;
21+
22+
struct FPBuiltinFnSelectionPass : PassInfoMixin<FPBuiltinFnSelectionPass> {
23+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
24+
};
25+
26+
} // end namespace llvm
27+
28+
#endif // LLVM_CODEGEN_FPBUILTINFNSELECTION_H

llvm/include/llvm/CodeGen/MachinePassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ FUNCTION_PASS("mergeicmps", MergeICmpsPass, ())
3939
FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ())
4040
FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass, ())
4141
FUNCTION_PASS("consthoist", ConstantHoistingPass, ())
42+
FUNCTION_PASS("fpbuiltin-fn-selection", FPBuiltinFnSelectionPass, ())
4243
FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib, ())
4344
FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ())
4445
FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false))

llvm/include/llvm/CodeGen/Passes.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,10 @@ namespace llvm {
450450
/// evaluation.
451451
ModulePass *createPreISelIntrinsicLoweringPass();
452452

453+
/// This pass lowers the \@llvm.fpbuiltin.{operation} intrinsics to
454+
/// matching library function calls based on call site attributes.
455+
FunctionPass *createFPBuiltinFnSelectionPass();
456+
453457
/// GlobalMerge - This pass merges internal (by default) globals into structs
454458
/// to enable reuse of a base pointer by indexed addressing modes.
455459
/// It can also be configured to focus on size optimizations only.

llvm/include/llvm/IR/FPBuiltinOps.def

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
//===--- llvm/IR/FPBuiltinOps.def - Constrained intrinsics ------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Defines properties of floating point builtin intrinsics.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef OPERATION
14+
#define OPERATION(N,I)
15+
#endif
16+
17+
// Arguments of the entries are:
18+
// - operation name.
19+
// - name of the fpbuiltin intrinsic to represent this operation.
20+
21+
// These are definitions for instructions, that are converted into constrained
22+
// intrinsics.
23+
//
24+
OPERATION(FAdd, fpbuiltin_fadd)
25+
OPERATION(FSub, fpbuiltin_fsub)
26+
OPERATION(FMul, fpbuiltin_fmul)
27+
OPERATION(FDiv, fpbuiltin_fdiv)
28+
OPERATION(FRem, fpbuiltin_frem)
29+
OPERATION(Sin, fpbuiltin_sin)
30+
OPERATION(Cos, fpbuiltin_cos)
31+
OPERATION(Tan, fpbuiltin_tan)
32+
OPERATION(Sinh, fpbuiltin_sinh)
33+
OPERATION(Cosh, fpbuiltin_cosh)
34+
OPERATION(Tanh, fpbuiltin_tanh)
35+
OPERATION(Asin, fpbuiltin_asin)
36+
OPERATION(Acos, fpbuiltin_acos)
37+
OPERATION(Atan, fpbuiltin_atan)
38+
OPERATION(Atan2, fpbuiltin_atan2)
39+
OPERATION(Asinh, fpbuiltin_asinh)
40+
OPERATION(Acosh, fpbuiltin_acosh)
41+
OPERATION(Atanh, fpbuiltin_atanh)
42+
OPERATION(Exp, fpbuiltin_exp)
43+
OPERATION(Exp2, fpbuiltin_exp2)
44+
OPERATION(Exp10, fpbuiltin_exp10)
45+
OPERATION(Expm1, fpbuiltin_expm1)
46+
OPERATION(Log, fpbuiltin_log)
47+
OPERATION(Log2, fpbuiltin_log2)
48+
OPERATION(Log10, fpbuiltin_log10)
49+
OPERATION(Log1p, fpbuiltin_log1p)
50+
OPERATION(Hypot, fpbuiltin_hypot)
51+
OPERATION(Pow, fpbuiltin_pow)
52+
OPERATION(Ldexp, fpbuiltin_ldexp)
53+
OPERATION(Sqrt, fpbuiltin_sqrt)
54+
OPERATION(Rsqrt, fpbuiltin_rsqrt)
55+
OPERATION(Erf, fpbuiltin_erf)
56+
OPERATION(Erfc, fpbuiltin_erfc)
57+
OPERATION(Sincos, fpbuiltin_sincos)
58+
59+
#undef OPERATION

llvm/include/llvm/IR/IntrinsicInst.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#ifndef LLVM_IR_INTRINSICINST_H
2424
#define LLVM_IR_INTRINSICINST_H
2525

26+
#include "llvm/ADT/StringSet.h"
2627
#include "llvm/IR/Constants.h"
2728
#include "llvm/IR/DebugInfoMetadata.h"
2829
#include "llvm/IR/DerivedTypes.h"
@@ -598,6 +599,31 @@ class VPCmpIntrinsic : public VPIntrinsic {
598599
/// @}
599600
};
600601

602+
/// This is the common base class for floating point builtin intrinsics.
603+
class FPBuiltinIntrinsic : public IntrinsicInst {
604+
public:
605+
static const std::string FPBUILTIN_PREFIX;
606+
static const std::string FPBUILTIN_MAX_ERROR;
607+
608+
std::optional<float> getRequiredAccuracy() const;
609+
610+
Type::TypeID getBaseTypeID() const;
611+
ElementCount getElementCount() const;
612+
613+
/// Check the callsite attributes for this FPBuiltinIntrinsic against a list
614+
/// of FP attributes that the caller knows how to process to see if the
615+
/// current intrinsic has unrecognized attributes
616+
bool hasUnrecognizedFPAttrs(const StringSet<> HandledAttrs);
617+
618+
/// Methods for support type inquiry through isa, cast, and dyn_cast:
619+
/// @{
620+
static bool classof(const IntrinsicInst *I);
621+
static bool classof(const Value *V) {
622+
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
623+
}
624+
/// @}
625+
};
626+
601627
/// This is the common base class for constrained floating point intrinsics.
602628
class ConstrainedFPIntrinsic : public IntrinsicInst {
603629
public:

0 commit comments

Comments
 (0)