Skip to content

Commit a97d39c

Browse files
author
Francis Visoiu Mistrih
committed
[Clang] Add __builtin_elementwise|reduce_max|minimum
We have the LLVM intrinsics, and we're missing the clang builtins to be used directly in code that needs to make the distinction in NaN semantics.
1 parent f957d08 commit a97d39c

16 files changed

+429
-92
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 89 additions & 73 deletions
Large diffs are not rendered by default.

clang/docs/ReleaseNotes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@ C++ Language Changes
144144

145145
- Add ``__builtin_elementwise_fmod`` builtin for floating point types only.
146146

147+
- Add ``__builtin_elementwise_minimum`` and ``__builtin_elementwise_maximum``
148+
builtin for floating point types only.
149+
147150
- The builtin type alias ``__builtin_common_type`` has been added to improve the
148151
performance of ``std::common_type``.
149152

clang/include/clang/Basic/Builtins.td

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1268,6 +1268,18 @@ def ElementwiseMin : Builtin {
12681268
let Prototype = "void(...)";
12691269
}
12701270

1271+
def ElementwiseMaximum : Builtin {
1272+
let Spellings = ["__builtin_elementwise_maximum"];
1273+
let Attributes = [NoThrow, Const, CustomTypeChecking];
1274+
let Prototype = "void(...)";
1275+
}
1276+
1277+
def ElementwiseMinimum : Builtin {
1278+
let Spellings = ["__builtin_elementwise_minimum"];
1279+
let Attributes = [NoThrow, Const, CustomTypeChecking];
1280+
let Prototype = "void(...)";
1281+
}
1282+
12711283
def ElementwiseCeil : Builtin {
12721284
let Spellings = ["__builtin_elementwise_ceil"];
12731285
let Attributes = [NoThrow, Const, CustomTypeChecking];
@@ -1442,6 +1454,18 @@ def ReduceMin : Builtin {
14421454
let Prototype = "void(...)";
14431455
}
14441456

1457+
def ReduceMaximum : Builtin {
1458+
let Spellings = ["__builtin_reduce_maximum"];
1459+
let Attributes = [NoThrow, Const, CustomTypeChecking];
1460+
let Prototype = "void(...)";
1461+
}
1462+
1463+
def ReduceMinimum : Builtin {
1464+
let Spellings = ["__builtin_reduce_minimum"];
1465+
let Attributes = [NoThrow, Const, CustomTypeChecking];
1466+
let Prototype = "void(...)";
1467+
}
1468+
14451469
def ReduceXor : Builtin {
14461470
let Spellings = ["__builtin_reduce_xor"];
14471471
let Attributes = [NoThrow, Const, CustomTypeChecking];

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12214,7 +12214,8 @@ def err_builtin_invalid_arg_type: Error <
1221412214
"a floating point type|"
1221512215
"a vector of integers|"
1221612216
"an unsigned integer|"
12217-
"an 'int'}1 (was %2)">;
12217+
"an 'int'|"
12218+
"a vector of floating points}1 (was %2)">;
1221812219

1221912220
def err_builtin_matrix_disabled: Error<
1222012221
"matrix types extension is disabled. Pass -fenable-matrix to enable it">;

clang/include/clang/Sema/Sema.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2381,7 +2381,8 @@ class Sema final : public SemaBase {
23812381
bool CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
23822382
const FunctionProtoType *Proto);
23832383

2384-
bool BuiltinVectorMath(CallExpr *TheCall, QualType &Res);
2384+
/// \param FPOnly restricts the arguments to floating-point types.
2385+
bool BuiltinVectorMath(CallExpr *TheCall, QualType &Res, bool FPOnly = false);
23852386
bool BuiltinVectorToScalarMath(CallExpr *TheCall);
23862387

23872388
/// Handles the checks for format strings, non-POD arguments to vararg
@@ -2573,7 +2574,8 @@ class Sema final : public SemaBase {
25732574
ExprResult AtomicOpsOverloaded(ExprResult TheCallResult,
25742575
AtomicExpr::AtomicOp Op);
25752576

2576-
bool BuiltinElementwiseMath(CallExpr *TheCall);
2577+
/// \param FPOnly restricts the arguments to floating-point types.
2578+
bool BuiltinElementwiseMath(CallExpr *TheCall, bool FPOnly = false);
25772579
bool PrepareBuiltinReduceMathOneArgCall(CallExpr *TheCall);
25782580

25792581
bool BuiltinNonDeterministicValue(CallExpr *TheCall);

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3961,6 +3961,22 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
39613961
return RValue::get(Result);
39623962
}
39633963

3964+
case Builtin::BI__builtin_elementwise_maximum: {
3965+
Value *Op0 = EmitScalarExpr(E->getArg(0));
3966+
Value *Op1 = EmitScalarExpr(E->getArg(1));
3967+
Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maximum, Op0,
3968+
Op1, nullptr, "elt.maximum");
3969+
return RValue::get(Result);
3970+
}
3971+
3972+
case Builtin::BI__builtin_elementwise_minimum: {
3973+
Value *Op0 = EmitScalarExpr(E->getArg(0));
3974+
Value *Op1 = EmitScalarExpr(E->getArg(1));
3975+
Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minimum, Op0,
3976+
Op1, nullptr, "elt.minimum");
3977+
return RValue::get(Result);
3978+
}
3979+
39643980
case Builtin::BI__builtin_reduce_max: {
39653981
auto GetIntrinsicID = [this](QualType QT) {
39663982
if (auto *VecTy = QT->getAs<VectorType>())
@@ -4013,6 +4029,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
40134029
case Builtin::BI__builtin_reduce_and:
40144030
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
40154031
*this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
4032+
case Builtin::BI__builtin_reduce_maximum:
4033+
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4034+
*this, E, llvm::Intrinsic::vector_reduce_fmaximum, "rdx.maximum"));
4035+
case Builtin::BI__builtin_reduce_minimum:
4036+
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4037+
*this, E, llvm::Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
40164038

40174039
case Builtin::BI__builtin_matrix_transpose: {
40184040
auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();

clang/lib/Sema/SemaChecking.cpp

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2755,16 +2755,11 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
27552755

27562756
// These builtins restrict the element type to floating point
27572757
// types only, and take in two arguments.
2758+
case Builtin::BI__builtin_elementwise_minimum:
2759+
case Builtin::BI__builtin_elementwise_maximum:
27582760
case Builtin::BI__builtin_elementwise_fmod:
27592761
case Builtin::BI__builtin_elementwise_pow: {
2760-
if (BuiltinElementwiseMath(TheCall))
2761-
return ExprError();
2762-
2763-
QualType ArgTy = TheCall->getArg(0)->getType();
2764-
if (checkFPMathBuiltinElementType(*this, TheCall->getArg(0)->getBeginLoc(),
2765-
ArgTy, 1) ||
2766-
checkFPMathBuiltinElementType(*this, TheCall->getArg(1)->getBeginLoc(),
2767-
ArgTy, 2))
2762+
if (BuiltinElementwiseMath(TheCall, /*FPOnly=*/true))
27682763
return ExprError();
27692764
break;
27702765
}
@@ -2868,6 +2863,29 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
28682863
TheCall->setType(ElTy);
28692864
break;
28702865
}
2866+
case Builtin::BI__builtin_reduce_maximum:
2867+
case Builtin::BI__builtin_reduce_minimum: {
2868+
if (PrepareBuiltinReduceMathOneArgCall(TheCall))
2869+
return ExprError();
2870+
2871+
const Expr *Arg = TheCall->getArg(0);
2872+
const auto *TyA = Arg->getType()->getAs<VectorType>();
2873+
2874+
QualType ElTy;
2875+
if (TyA)
2876+
ElTy = TyA->getElementType();
2877+
else if (Arg->getType()->isSizelessVectorType())
2878+
ElTy = Arg->getType()->getSizelessVectorEltType(Context);
2879+
2880+
if (ElTy.isNull() || !ElTy->isFloatingType()) {
2881+
Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
2882+
<< 1 << /* vector of floating points */ 9 << Arg->getType();
2883+
return ExprError();
2884+
}
2885+
2886+
TheCall->setType(ElTy);
2887+
break;
2888+
}
28712889

28722890
// These builtins support vectors of integers only.
28732891
// TODO: ADD/MUL should support floating-point types.
@@ -14378,9 +14396,9 @@ bool Sema::PrepareBuiltinElementwiseMathOneArgCall(CallExpr *TheCall) {
1437814396
return false;
1437914397
}
1438014398

14381-
bool Sema::BuiltinElementwiseMath(CallExpr *TheCall) {
14399+
bool Sema::BuiltinElementwiseMath(CallExpr *TheCall, bool FPOnly) {
1438214400
QualType Res;
14383-
if (BuiltinVectorMath(TheCall, Res))
14401+
if (BuiltinVectorMath(TheCall, Res, FPOnly))
1438414402
return true;
1438514403
TheCall->setType(Res);
1438614404
return false;
@@ -14399,7 +14417,7 @@ bool Sema::BuiltinVectorToScalarMath(CallExpr *TheCall) {
1439914417
return false;
1440014418
}
1440114419

14402-
bool Sema::BuiltinVectorMath(CallExpr *TheCall, QualType &Res) {
14420+
bool Sema::BuiltinVectorMath(CallExpr *TheCall, QualType &Res, bool FPOnly) {
1440314421
if (checkArgCount(TheCall, 2))
1440414422
return true;
1440514423

@@ -14419,8 +14437,13 @@ bool Sema::BuiltinVectorMath(CallExpr *TheCall, QualType &Res) {
1441914437
diag::err_typecheck_call_different_arg_types)
1442014438
<< TyA << TyB;
1442114439

14422-
if (checkMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, 1))
14423-
return true;
14440+
if (FPOnly) {
14441+
if (checkFPMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, 1))
14442+
return true;
14443+
} else {
14444+
if (checkMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, 1))
14445+
return true;
14446+
}
1442414447

1442514448
TheCall->setArg(0, A.get());
1442614449
TheCall->setArg(1, B.get());

clang/test/CodeGen/builtins-elementwise-math.c

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,82 @@ void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
169169
i1 = __builtin_elementwise_sub_sat(1, 'a');
170170
}
171171

172+
void test_builtin_elementwise_maximum(float f1, float f2, double d1, double d2,
173+
float4 vf1, float4 vf2, long long int i1,
174+
long long int i2, si8 vi1, si8 vi2,
175+
unsigned u1, unsigned u2, u4 vu1, u4 vu2,
176+
_BitInt(31) bi1, _BitInt(31) bi2,
177+
unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
178+
// CHECK-LABEL: define void @test_builtin_elementwise_maximum(
179+
// CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4
180+
// CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4
181+
// CHECK-NEXT: call float @llvm.maximum.f32(float [[F1]], float [[F2]])
182+
f1 = __builtin_elementwise_maximum(f1, f2);
183+
184+
// CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8
185+
// CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8
186+
// CHECK-NEXT: call double @llvm.maximum.f64(double [[D1]], double [[D2]])
187+
d1 = __builtin_elementwise_maximum(d1, d2);
188+
189+
// CHECK: [[D2:%.+]] = load double, ptr %d2.addr, align 8
190+
// CHECK-NEXT: call double @llvm.maximum.f64(double 2.000000e+01, double [[D2]])
191+
d1 = __builtin_elementwise_maximum(20.0, d2);
192+
193+
// CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
194+
// CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
195+
// CHECK-NEXT: call <4 x float> @llvm.maximum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
196+
vf1 = __builtin_elementwise_maximum(vf1, vf2);
197+
198+
// CHECK: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
199+
// CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
200+
// CHECK-NEXT: call <4 x float> @llvm.maximum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
201+
const float4 cvf1 = vf1;
202+
vf1 = __builtin_elementwise_maximum(cvf1, vf2);
203+
204+
// CHECK: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
205+
// CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
206+
// CHECK-NEXT: call <4 x float> @llvm.maximum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
207+
vf1 = __builtin_elementwise_maximum(vf2, cvf1);
208+
}
209+
210+
void test_builtin_elementwise_minimum(float f1, float f2, double d1, double d2,
211+
float4 vf1, float4 vf2, long long int i1,
212+
long long int i2, si8 vi1, si8 vi2,
213+
unsigned u1, unsigned u2, u4 vu1, u4 vu2,
214+
_BitInt(31) bi1, _BitInt(31) bi2,
215+
unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
216+
// CHECK-LABEL: define void @test_builtin_elementwise_minimum(
217+
// CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4
218+
// CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4
219+
// CHECK-NEXT: call float @llvm.minimum.f32(float [[F1]], float [[F2]])
220+
f1 = __builtin_elementwise_minimum(f1, f2);
221+
222+
// CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8
223+
// CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8
224+
// CHECK-NEXT: call double @llvm.minimum.f64(double [[D1]], double [[D2]])
225+
d1 = __builtin_elementwise_minimum(d1, d2);
226+
227+
// CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8
228+
// CHECK-NEXT: call double @llvm.minimum.f64(double [[D1]], double 2.000000e+00)
229+
d1 = __builtin_elementwise_minimum(d1, 2.0);
230+
231+
// CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
232+
// CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
233+
// CHECK-NEXT: call <4 x float> @llvm.minimum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
234+
vf1 = __builtin_elementwise_minimum(vf1, vf2);
235+
236+
// CHECK: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
237+
// CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
238+
// CHECK-NEXT: call <4 x float> @llvm.minimum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
239+
const float4 cvf1 = vf1;
240+
vf1 = __builtin_elementwise_minimum(cvf1, vf2);
241+
242+
// CHECK: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
243+
// CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
244+
// CHECK-NEXT: call <4 x float> @llvm.minimum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
245+
vf1 = __builtin_elementwise_minimum(vf2, cvf1);
246+
}
247+
172248
void test_builtin_elementwise_max(float f1, float f2, double d1, double d2,
173249
float4 vf1, float4 vf2, long long int i1,
174250
long long int i2, si8 vi1, si8 vi2,

clang/test/CodeGen/builtins-reduction-math.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,30 @@ void test_builtin_reduce_and(si8 vi1, u4 vu1) {
138138
unsigned r3 = __builtin_reduce_and(vu1);
139139
}
140140

141+
void test_builtin_reduce_maximum(float4 vf1) {
142+
// CHECK-LABEL: define void @test_builtin_reduce_maximum(
143+
// CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
144+
// CHECK-NEXT: call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[VF1]])
145+
float r1 = __builtin_reduce_maximum(vf1);
146+
147+
// CHECK: [[VF1_AS1:%.+]] = load <4 x float>, ptr addrspace(1) @vf1_as_one, align 16
148+
// CHECK-NEXT: [[RDX1:%.+]] = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[VF1_AS1]])
149+
// CHECK-NEXT: fpext float [[RDX1]] to double
150+
const double r4 = __builtin_reduce_maximum(vf1_as_one);
151+
}
152+
153+
void test_builtin_reduce_minimum(float4 vf1) {
154+
// CHECK-LABEL: define void @test_builtin_reduce_minimum(
155+
// CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
156+
// CHECK-NEXT: call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[VF1]])
157+
float r1 = __builtin_reduce_minimum(vf1);
158+
159+
// CHECK: [[VF1_AS1:%.+]] = load <4 x float>, ptr addrspace(1) @vf1_as_one, align 16
160+
// CHECK-NEXT: [[RDX1:%.+]] = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[VF1_AS1]])
161+
// CHECK-NEXT: fpext float [[RDX1]] to double
162+
const double r4 = __builtin_reduce_minimum(vf1_as_one);
163+
}
164+
141165
#if defined(__ARM_FEATURE_SVE)
142166
#include <arm_sve.h>
143167

clang/test/CodeGen/strictfp-elementwise-bulitins.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,26 @@ float4 strict_elementwise_min(float4 a, float4 b) {
4747
return __builtin_elementwise_min(a, b);
4848
}
4949

50+
// CHECK-LABEL: define dso_local noundef <4 x float> @_Z26strict_elementwise_maximumDv4_fS_
51+
// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
52+
// CHECK-NEXT: entry:
53+
// CHECK-NEXT: [[ELT_MAXIMUM:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]]
54+
// CHECK-NEXT: ret <4 x float> [[ELT_MAXIMUM]]
55+
//
56+
float4 strict_elementwise_maximum(float4 a, float4 b) {
57+
return __builtin_elementwise_maximum(a, b);
58+
}
59+
60+
// CHECK-LABEL: define dso_local noundef <4 x float> @_Z26strict_elementwise_minimumDv4_fS_
61+
// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
62+
// CHECK-NEXT: entry:
63+
// CHECK-NEXT: [[ELT_MINIMUM:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]]
64+
// CHECK-NEXT: ret <4 x float> [[ELT_MINIMUM]]
65+
//
66+
float4 strict_elementwise_minimum(float4 a, float4 b) {
67+
return __builtin_elementwise_minimum(a, b);
68+
}
69+
5070
// CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_ceilDv4_f
5171
// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
5272
// CHECK-NEXT: entry:

clang/test/Sema/aarch64-sve-vector-pow-ops.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@
77
svfloat32_t test_pow_vv_i8mf8(svfloat32_t v) {
88

99
return __builtin_elementwise_pow(v, v);
10-
// expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
10+
// expected-error@-1 {{1st argument must be a floating point type}}
1111
}

0 commit comments

Comments
 (0)