Skip to content

[Clang] Add __builtin_(elementwise|reduce)_(max|min)imum #110198

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 90 additions & 74 deletions clang/docs/LanguageExtensions.rst

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ C++ Language Changes

- Add ``__builtin_elementwise_fmod`` builtin for floating point types only.

- Add ``__builtin_elementwise_minimum`` and ``__builtin_elementwise_maximum``
builtin for floating point types only.

- The builtin type alias ``__builtin_common_type`` has been added to improve the
performance of ``std::common_type``.

Expand Down
24 changes: 24 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -1274,6 +1274,18 @@ def ElementwiseMin : Builtin {
let Prototype = "void(...)";
}

def ElementwiseMaximum : Builtin {
let Spellings = ["__builtin_elementwise_maximum"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
let Prototype = "void(...)";
}

def ElementwiseMinimum : Builtin {
let Spellings = ["__builtin_elementwise_minimum"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
let Prototype = "void(...)";
}

def ElementwiseCeil : Builtin {
let Spellings = ["__builtin_elementwise_ceil"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
Expand Down Expand Up @@ -1448,6 +1460,18 @@ def ReduceMin : Builtin {
let Prototype = "void(...)";
}

def ReduceMaximum : Builtin {
let Spellings = ["__builtin_reduce_maximum"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
let Prototype = "void(...)";
}

def ReduceMinimum : Builtin {
let Spellings = ["__builtin_reduce_minimum"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
let Prototype = "void(...)";
}

def ReduceXor : Builtin {
let Spellings = ["__builtin_reduce_xor"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
Expand Down
3 changes: 2 additions & 1 deletion clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -12214,7 +12214,8 @@ def err_builtin_invalid_arg_type: Error <
"a floating point type|"
"a vector of integers|"
"an unsigned integer|"
"an 'int'}1 (was %2)">;
"an 'int'|"
"a vector of floating points}1 (was %2)">;

def err_builtin_matrix_disabled: Error<
"matrix types extension is disabled. Pass -fenable-matrix to enable it">;
Expand Down
6 changes: 4 additions & 2 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -2381,7 +2381,8 @@ class Sema final : public SemaBase {
bool CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
const FunctionProtoType *Proto);

bool BuiltinVectorMath(CallExpr *TheCall, QualType &Res);
/// \param FPOnly restricts the arguments to floating-point types.
bool BuiltinVectorMath(CallExpr *TheCall, QualType &Res, bool FPOnly = false);
bool BuiltinVectorToScalarMath(CallExpr *TheCall);

/// Handles the checks for format strings, non-POD arguments to vararg
Expand Down Expand Up @@ -2573,7 +2574,8 @@ class Sema final : public SemaBase {
ExprResult AtomicOpsOverloaded(ExprResult TheCallResult,
AtomicExpr::AtomicOp Op);

bool BuiltinElementwiseMath(CallExpr *TheCall);
/// \param FPOnly restricts the arguments to floating-point types.
bool BuiltinElementwiseMath(CallExpr *TheCall, bool FPOnly = false);
bool PrepareBuiltinReduceMathOneArgCall(CallExpr *TheCall);

bool BuiltinNonDeterministicValue(CallExpr *TheCall);
Expand Down
22 changes: 22 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3964,6 +3964,22 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Result);
}

case Builtin::BI__builtin_elementwise_maximum: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
Value *Op1 = EmitScalarExpr(E->getArg(1));
Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maximum, Op0,
Op1, nullptr, "elt.maximum");
return RValue::get(Result);
}

case Builtin::BI__builtin_elementwise_minimum: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
Value *Op1 = EmitScalarExpr(E->getArg(1));
Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minimum, Op0,
Op1, nullptr, "elt.minimum");
return RValue::get(Result);
}

case Builtin::BI__builtin_reduce_max: {
auto GetIntrinsicID = [this](QualType QT) {
if (auto *VecTy = QT->getAs<VectorType>())
Expand Down Expand Up @@ -4016,6 +4032,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_reduce_and:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
case Builtin::BI__builtin_reduce_maximum:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, llvm::Intrinsic::vector_reduce_fmaximum, "rdx.maximum"));
case Builtin::BI__builtin_reduce_minimum:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, llvm::Intrinsic::vector_reduce_fminimum, "rdx.minimum"));

case Builtin::BI__builtin_matrix_transpose: {
auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
Expand Down
49 changes: 36 additions & 13 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2755,17 +2755,12 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,

// These builtins restrict the element type to floating point
// types only, and take in two arguments.
case Builtin::BI__builtin_elementwise_minimum:
case Builtin::BI__builtin_elementwise_maximum:
case Builtin::BI__builtin_elementwise_atan2:
case Builtin::BI__builtin_elementwise_fmod:
case Builtin::BI__builtin_elementwise_pow: {
if (BuiltinElementwiseMath(TheCall))
return ExprError();

QualType ArgTy = TheCall->getArg(0)->getType();
if (checkFPMathBuiltinElementType(*this, TheCall->getArg(0)->getBeginLoc(),
ArgTy, 1) ||
checkFPMathBuiltinElementType(*this, TheCall->getArg(1)->getBeginLoc(),
ArgTy, 2))
if (BuiltinElementwiseMath(TheCall, /*FPOnly=*/true))
return ExprError();
break;
}
Expand Down Expand Up @@ -2869,6 +2864,29 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
TheCall->setType(ElTy);
break;
}
case Builtin::BI__builtin_reduce_maximum:
case Builtin::BI__builtin_reduce_minimum: {
if (PrepareBuiltinReduceMathOneArgCall(TheCall))
return ExprError();

const Expr *Arg = TheCall->getArg(0);
const auto *TyA = Arg->getType()->getAs<VectorType>();

QualType ElTy;
if (TyA)
ElTy = TyA->getElementType();
else if (Arg->getType()->isSizelessVectorType())
ElTy = Arg->getType()->getSizelessVectorEltType(Context);

if (ElTy.isNull() || !ElTy->isFloatingType()) {
Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
<< 1 << /* vector of floating points */ 9 << Arg->getType();
return ExprError();
}

TheCall->setType(ElTy);
break;
}

// These builtins support vectors of integers only.
// TODO: ADD/MUL should support floating-point types.
Expand Down Expand Up @@ -14379,9 +14397,9 @@ bool Sema::PrepareBuiltinElementwiseMathOneArgCall(CallExpr *TheCall) {
return false;
}

bool Sema::BuiltinElementwiseMath(CallExpr *TheCall) {
bool Sema::BuiltinElementwiseMath(CallExpr *TheCall, bool FPOnly) {
QualType Res;
if (BuiltinVectorMath(TheCall, Res))
if (BuiltinVectorMath(TheCall, Res, FPOnly))
return true;
TheCall->setType(Res);
return false;
Expand All @@ -14400,7 +14418,7 @@ bool Sema::BuiltinVectorToScalarMath(CallExpr *TheCall) {
return false;
}

bool Sema::BuiltinVectorMath(CallExpr *TheCall, QualType &Res) {
bool Sema::BuiltinVectorMath(CallExpr *TheCall, QualType &Res, bool FPOnly) {
if (checkArgCount(TheCall, 2))
return true;

Expand All @@ -14420,8 +14438,13 @@ bool Sema::BuiltinVectorMath(CallExpr *TheCall, QualType &Res) {
diag::err_typecheck_call_different_arg_types)
<< TyA << TyB;

if (checkMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, 1))
return true;
if (FPOnly) {
if (checkFPMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, 1))
return true;
} else {
if (checkMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, 1))
return true;
}

TheCall->setArg(0, A.get());
TheCall->setArg(1, B.get());
Expand Down
76 changes: 76 additions & 0 deletions clang/test/CodeGen/builtins-elementwise-math.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,82 @@ void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
i1 = __builtin_elementwise_sub_sat(1, 'a');
}

void test_builtin_elementwise_maximum(float f1, float f2, double d1, double d2,
float4 vf1, float4 vf2, long long int i1,
long long int i2, si8 vi1, si8 vi2,
unsigned u1, unsigned u2, u4 vu1, u4 vu2,
_BitInt(31) bi1, _BitInt(31) bi2,
unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
// CHECK-LABEL: define void @test_builtin_elementwise_maximum(
// CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4
// CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4
// CHECK-NEXT: call float @llvm.maximum.f32(float [[F1]], float [[F2]])
f1 = __builtin_elementwise_maximum(f1, f2);

// CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8
// CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8
// CHECK-NEXT: call double @llvm.maximum.f64(double [[D1]], double [[D2]])
d1 = __builtin_elementwise_maximum(d1, d2);

// CHECK: [[D2:%.+]] = load double, ptr %d2.addr, align 8
// CHECK-NEXT: call double @llvm.maximum.f64(double 2.000000e+01, double [[D2]])
d1 = __builtin_elementwise_maximum(20.0, d2);

// CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
// CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
// CHECK-NEXT: call <4 x float> @llvm.maximum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
vf1 = __builtin_elementwise_maximum(vf1, vf2);

// CHECK: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
// CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
// CHECK-NEXT: call <4 x float> @llvm.maximum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
const float4 cvf1 = vf1;
vf1 = __builtin_elementwise_maximum(cvf1, vf2);

// CHECK: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
// CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
// CHECK-NEXT: call <4 x float> @llvm.maximum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
vf1 = __builtin_elementwise_maximum(vf2, cvf1);
}

void test_builtin_elementwise_minimum(float f1, float f2, double d1, double d2,
float4 vf1, float4 vf2, long long int i1,
long long int i2, si8 vi1, si8 vi2,
unsigned u1, unsigned u2, u4 vu1, u4 vu2,
_BitInt(31) bi1, _BitInt(31) bi2,
unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
// CHECK-LABEL: define void @test_builtin_elementwise_minimum(
// CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4
// CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4
// CHECK-NEXT: call float @llvm.minimum.f32(float [[F1]], float [[F2]])
f1 = __builtin_elementwise_minimum(f1, f2);

// CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8
// CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8
// CHECK-NEXT: call double @llvm.minimum.f64(double [[D1]], double [[D2]])
d1 = __builtin_elementwise_minimum(d1, d2);

// CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8
// CHECK-NEXT: call double @llvm.minimum.f64(double [[D1]], double 2.000000e+00)
d1 = __builtin_elementwise_minimum(d1, 2.0);

// CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
// CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
// CHECK-NEXT: call <4 x float> @llvm.minimum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
vf1 = __builtin_elementwise_minimum(vf1, vf2);

// CHECK: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
// CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
// CHECK-NEXT: call <4 x float> @llvm.minimum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
const float4 cvf1 = vf1;
vf1 = __builtin_elementwise_minimum(cvf1, vf2);

// CHECK: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
// CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
// CHECK-NEXT: call <4 x float> @llvm.minimum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
vf1 = __builtin_elementwise_minimum(vf2, cvf1);
}

void test_builtin_elementwise_max(float f1, float f2, double d1, double d2,
float4 vf1, float4 vf2, long long int i1,
long long int i2, si8 vi1, si8 vi2,
Expand Down
24 changes: 24 additions & 0 deletions clang/test/CodeGen/builtins-reduction-math.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,30 @@ void test_builtin_reduce_and(si8 vi1, u4 vu1) {
unsigned r3 = __builtin_reduce_and(vu1);
}

void test_builtin_reduce_maximum(float4 vf1) {
// CHECK-LABEL: define void @test_builtin_reduce_maximum(
// CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
// CHECK-NEXT: call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[VF1]])
float r1 = __builtin_reduce_maximum(vf1);

// CHECK: [[VF1_AS1:%.+]] = load <4 x float>, ptr addrspace(1) @vf1_as_one, align 16
// CHECK-NEXT: [[RDX1:%.+]] = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[VF1_AS1]])
// CHECK-NEXT: fpext float [[RDX1]] to double
const double r4 = __builtin_reduce_maximum(vf1_as_one);
}

void test_builtin_reduce_minimum(float4 vf1) {
// CHECK-LABEL: define void @test_builtin_reduce_minimum(
// CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
// CHECK-NEXT: call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[VF1]])
float r1 = __builtin_reduce_minimum(vf1);

// CHECK: [[VF1_AS1:%.+]] = load <4 x float>, ptr addrspace(1) @vf1_as_one, align 16
// CHECK-NEXT: [[RDX1:%.+]] = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[VF1_AS1]])
// CHECK-NEXT: fpext float [[RDX1]] to double
const double r4 = __builtin_reduce_minimum(vf1_as_one);
}

#if defined(__ARM_FEATURE_SVE)
#include <arm_sve.h>

Expand Down
20 changes: 20 additions & 0 deletions clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,26 @@ float4 strict_elementwise_min(float4 a, float4 b) {
return __builtin_elementwise_min(a, b);
}

// CHECK-LABEL: define dso_local noundef <4 x float> @_Z26strict_elementwise_maximumDv4_fS_
// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ELT_MAXIMUM:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]]
// CHECK-NEXT: ret <4 x float> [[ELT_MAXIMUM]]
//
float4 strict_elementwise_maximum(float4 a, float4 b) {
return __builtin_elementwise_maximum(a, b);
}

// CHECK-LABEL: define dso_local noundef <4 x float> @_Z26strict_elementwise_minimumDv4_fS_
// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ELT_MINIMUM:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]]
// CHECK-NEXT: ret <4 x float> [[ELT_MINIMUM]]
//
float4 strict_elementwise_minimum(float4 a, float4 b) {
return __builtin_elementwise_minimum(a, b);
}

// CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_ceilDv4_f
// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: entry:
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Sema/aarch64-sve-vector-pow-ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@
svfloat32_t test_pow_vv_i8mf8(svfloat32_t v) {

return __builtin_elementwise_pow(v, v);
// expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
// expected-error@-1 {{1st argument must be a floating point type}}
}
Loading
Loading