Skip to content

Commit 840989c

Browse files
author
Francis Visoiu Mistrih
committed
[Clang] Add __builtin_elementwise|reduce_max|minimum
We have the LLVM intrinsics, and we're missing the clang builtins to be used directly in code that needs to make the distinction in NaN semantics.
1 parent fbec675 commit 840989c

14 files changed

+423
-89
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 87 additions & 71 deletions
Large diffs are not rendered by default.

clang/include/clang/Basic/Builtins.td

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1268,6 +1268,18 @@ def ElementwiseMin : Builtin {
12681268
let Prototype = "void(...)";
12691269
}
12701270

1271+
def ElementwiseMaximum : Builtin {
1272+
let Spellings = ["__builtin_elementwise_maximum"];
1273+
let Attributes = [NoThrow, Const, CustomTypeChecking];
1274+
let Prototype = "void(...)";
1275+
}
1276+
1277+
def ElementwiseMinimum : Builtin {
1278+
let Spellings = ["__builtin_elementwise_minimum"];
1279+
let Attributes = [NoThrow, Const, CustomTypeChecking];
1280+
let Prototype = "void(...)";
1281+
}
1282+
12711283
def ElementwiseCeil : Builtin {
12721284
let Spellings = ["__builtin_elementwise_ceil"];
12731285
let Attributes = [NoThrow, Const, CustomTypeChecking];
@@ -1436,6 +1448,18 @@ def ReduceMin : Builtin {
14361448
let Prototype = "void(...)";
14371449
}
14381450

1451+
def ReduceMaximum : Builtin {
1452+
let Spellings = ["__builtin_reduce_maximum"];
1453+
let Attributes = [NoThrow, Const, CustomTypeChecking];
1454+
let Prototype = "void(...)";
1455+
}
1456+
1457+
def ReduceMinimum : Builtin {
1458+
let Spellings = ["__builtin_reduce_minimum"];
1459+
let Attributes = [NoThrow, Const, CustomTypeChecking];
1460+
let Prototype = "void(...)";
1461+
}
1462+
14391463
def ReduceXor : Builtin {
14401464
let Spellings = ["__builtin_reduce_xor"];
14411465
let Attributes = [NoThrow, Const, CustomTypeChecking];

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12216,7 +12216,8 @@ def err_builtin_invalid_arg_type: Error <
1221612216
"a floating point type|"
1221712217
"a vector of integers|"
1221812218
"an unsigned integer|"
12219-
"an 'int'}1 (was %2)">;
12219+
"an 'int'|"
12220+
"a vector of floating points}1 (was %2)">;
1222012221

1222112222
def err_builtin_matrix_disabled: Error<
1222212223
"matrix types extension is disabled. Pass -fenable-matrix to enable it">;

clang/include/clang/Sema/Sema.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2381,7 +2381,8 @@ class Sema final : public SemaBase {
23812381
bool CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
23822382
const FunctionProtoType *Proto);
23832383

2384-
bool BuiltinVectorMath(CallExpr *TheCall, QualType &Res);
2384+
/// \param FPOnly restricts the arguments to floating-point types.
2385+
bool BuiltinVectorMath(CallExpr *TheCall, QualType &Res, bool FPOnly = false);
23852386
bool BuiltinVectorToScalarMath(CallExpr *TheCall);
23862387

23872388
/// Handles the checks for format strings, non-POD arguments to vararg
@@ -2573,7 +2574,8 @@ class Sema final : public SemaBase {
25732574
ExprResult AtomicOpsOverloaded(ExprResult TheCallResult,
25742575
AtomicExpr::AtomicOp Op);
25752576

2576-
bool BuiltinElementwiseMath(CallExpr *TheCall);
2577+
/// \param FPOnly restricts the arguments to floating-point types.
2578+
bool BuiltinElementwiseMath(CallExpr *TheCall, bool FPOnly = false);
25772579
bool PrepareBuiltinReduceMathOneArgCall(CallExpr *TheCall);
25782580

25792581
bool BuiltinNonDeterministicValue(CallExpr *TheCall);

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3960,6 +3960,22 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
39603960
return RValue::get(Result);
39613961
}
39623962

3963+
case Builtin::BI__builtin_elementwise_maximum: {
3964+
Value *Op0 = EmitScalarExpr(E->getArg(0));
3965+
Value *Op1 = EmitScalarExpr(E->getArg(1));
3966+
Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maximum, Op0,
3967+
Op1, nullptr, "elt.maximum");
3968+
return RValue::get(Result);
3969+
}
3970+
3971+
case Builtin::BI__builtin_elementwise_minimum: {
3972+
Value *Op0 = EmitScalarExpr(E->getArg(0));
3973+
Value *Op1 = EmitScalarExpr(E->getArg(1));
3974+
Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minimum, Op0,
3975+
Op1, nullptr, "elt.minimum");
3976+
return RValue::get(Result);
3977+
}
3978+
39633979
case Builtin::BI__builtin_reduce_max: {
39643980
auto GetIntrinsicID = [this](QualType QT) {
39653981
if (auto *VecTy = QT->getAs<VectorType>())
@@ -4012,6 +4028,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
40124028
case Builtin::BI__builtin_reduce_and:
40134029
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
40144030
*this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
4031+
case Builtin::BI__builtin_reduce_maximum:
4032+
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4033+
*this, E, llvm::Intrinsic::vector_reduce_fmaximum, "rdx.maximum"));
4034+
case Builtin::BI__builtin_reduce_minimum:
4035+
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4036+
*this, E, llvm::Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
40154037

40164038
case Builtin::BI__builtin_matrix_transpose: {
40174039
auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();

clang/lib/Sema/SemaChecking.cpp

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2755,15 +2755,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
27552755

27562756
// These builtins restrict the element type to floating point
27572757
// types only, and take in two arguments.
2758+
case Builtin::BI__builtin_elementwise_minimum:
2759+
case Builtin::BI__builtin_elementwise_maximum:
27582760
case Builtin::BI__builtin_elementwise_pow: {
2759-
if (BuiltinElementwiseMath(TheCall))
2760-
return ExprError();
2761-
2762-
QualType ArgTy = TheCall->getArg(0)->getType();
2763-
if (checkFPMathBuiltinElementType(*this, TheCall->getArg(0)->getBeginLoc(),
2764-
ArgTy, 1) ||
2765-
checkFPMathBuiltinElementType(*this, TheCall->getArg(1)->getBeginLoc(),
2766-
ArgTy, 2))
2761+
if (BuiltinElementwiseMath(TheCall, /*FPOnly=*/true))
27672762
return ExprError();
27682763
break;
27692764
}
@@ -2867,6 +2862,29 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
28672862
TheCall->setType(ElTy);
28682863
break;
28692864
}
2865+
case Builtin::BI__builtin_reduce_maximum:
2866+
case Builtin::BI__builtin_reduce_minimum: {
2867+
if (PrepareBuiltinReduceMathOneArgCall(TheCall))
2868+
return ExprError();
2869+
2870+
const Expr *Arg = TheCall->getArg(0);
2871+
const auto *TyA = Arg->getType()->getAs<VectorType>();
2872+
2873+
QualType ElTy;
2874+
if (TyA)
2875+
ElTy = TyA->getElementType();
2876+
else if (Arg->getType()->isSizelessVectorType())
2877+
ElTy = Arg->getType()->getSizelessVectorEltType(Context);
2878+
2879+
if (ElTy.isNull() || !ElTy->isFloatingType()) {
2880+
Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
2881+
<< 1 << /* vector of floating points */ 9 << Arg->getType();
2882+
return ExprError();
2883+
}
2884+
2885+
TheCall->setType(ElTy);
2886+
break;
2887+
}
28702888

28712889
// These builtins support vectors of integers only.
28722890
// TODO: ADD/MUL should support floating-point types.
@@ -14377,9 +14395,9 @@ bool Sema::PrepareBuiltinElementwiseMathOneArgCall(CallExpr *TheCall) {
1437714395
return false;
1437814396
}
1437914397

14380-
bool Sema::BuiltinElementwiseMath(CallExpr *TheCall) {
14398+
bool Sema::BuiltinElementwiseMath(CallExpr *TheCall, bool FPOnly) {
1438114399
QualType Res;
14382-
if (BuiltinVectorMath(TheCall, Res))
14400+
if (BuiltinVectorMath(TheCall, Res, FPOnly))
1438314401
return true;
1438414402
TheCall->setType(Res);
1438514403
return false;
@@ -14398,7 +14416,7 @@ bool Sema::BuiltinVectorToScalarMath(CallExpr *TheCall) {
1439814416
return false;
1439914417
}
1440014418

14401-
bool Sema::BuiltinVectorMath(CallExpr *TheCall, QualType &Res) {
14419+
bool Sema::BuiltinVectorMath(CallExpr *TheCall, QualType &Res, bool FPOnly) {
1440214420
if (checkArgCount(TheCall, 2))
1440314421
return true;
1440414422

@@ -14418,8 +14436,13 @@ bool Sema::BuiltinVectorMath(CallExpr *TheCall, QualType &Res) {
1441814436
diag::err_typecheck_call_different_arg_types)
1441914437
<< TyA << TyB;
1442014438

14421-
if (checkMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, 1))
14422-
return true;
14439+
if (FPOnly) {
14440+
if (checkFPMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, 1))
14441+
return true;
14442+
} else {
14443+
if (checkMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, 1))
14444+
return true;
14445+
}
1442314446

1442414447
TheCall->setArg(0, A.get());
1442514448
TheCall->setArg(1, B.get());

clang/test/CodeGen/builtins-elementwise-math.c

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,82 @@ void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
169169
i1 = __builtin_elementwise_sub_sat(1, 'a');
170170
}
171171

172+
void test_builtin_elementwise_maximum(float f1, float f2, double d1, double d2,
173+
float4 vf1, float4 vf2, long long int i1,
174+
long long int i2, si8 vi1, si8 vi2,
175+
unsigned u1, unsigned u2, u4 vu1, u4 vu2,
176+
_BitInt(31) bi1, _BitInt(31) bi2,
177+
unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
178+
// CHECK-LABEL: define void @test_builtin_elementwise_maximum(
179+
// CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4
180+
// CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4
181+
// CHECK-NEXT: call float @llvm.maximum.f32(float [[F1]], float [[F2]])
182+
f1 = __builtin_elementwise_maximum(f1, f2);
183+
184+
// CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8
185+
// CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8
186+
// CHECK-NEXT: call double @llvm.maximum.f64(double [[D1]], double [[D2]])
187+
d1 = __builtin_elementwise_maximum(d1, d2);
188+
189+
// CHECK: [[D2:%.+]] = load double, ptr %d2.addr, align 8
190+
// CHECK-NEXT: call double @llvm.maximum.f64(double 2.000000e+01, double [[D2]])
191+
d1 = __builtin_elementwise_maximum(20.0, d2);
192+
193+
// CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
194+
// CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
195+
// CHECK-NEXT: call <4 x float> @llvm.maximum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
196+
vf1 = __builtin_elementwise_maximum(vf1, vf2);
197+
198+
// CHECK: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
199+
// CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
200+
// CHECK-NEXT: call <4 x float> @llvm.maximum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
201+
const float4 cvf1 = vf1;
202+
vf1 = __builtin_elementwise_maximum(cvf1, vf2);
203+
204+
// CHECK: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
205+
// CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
206+
// CHECK-NEXT: call <4 x float> @llvm.maximum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
207+
vf1 = __builtin_elementwise_maximum(vf2, cvf1);
208+
}
209+
210+
void test_builtin_elementwise_minimum(float f1, float f2, double d1, double d2,
211+
float4 vf1, float4 vf2, long long int i1,
212+
long long int i2, si8 vi1, si8 vi2,
213+
unsigned u1, unsigned u2, u4 vu1, u4 vu2,
214+
_BitInt(31) bi1, _BitInt(31) bi2,
215+
unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
216+
// CHECK-LABEL: define void @test_builtin_elementwise_minimum(
217+
// CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4
218+
// CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4
219+
// CHECK-NEXT: call float @llvm.minimum.f32(float [[F1]], float [[F2]])
220+
f1 = __builtin_elementwise_minimum(f1, f2);
221+
222+
// CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8
223+
// CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8
224+
// CHECK-NEXT: call double @llvm.minimum.f64(double [[D1]], double [[D2]])
225+
d1 = __builtin_elementwise_minimum(d1, d2);
226+
227+
// CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8
228+
// CHECK-NEXT: call double @llvm.minimum.f64(double [[D1]], double 2.000000e+00)
229+
d1 = __builtin_elementwise_minimum(d1, 2.0);
230+
231+
// CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
232+
// CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
233+
// CHECK-NEXT: call <4 x float> @llvm.minimum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
234+
vf1 = __builtin_elementwise_minimum(vf1, vf2);
235+
236+
// CHECK: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
237+
// CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
238+
// CHECK-NEXT: call <4 x float> @llvm.minimum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
239+
const float4 cvf1 = vf1;
240+
vf1 = __builtin_elementwise_minimum(cvf1, vf2);
241+
242+
// CHECK: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
243+
// CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
244+
// CHECK-NEXT: call <4 x float> @llvm.minimum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
245+
vf1 = __builtin_elementwise_minimum(vf2, cvf1);
246+
}
247+
172248
void test_builtin_elementwise_max(float f1, float f2, double d1, double d2,
173249
float4 vf1, float4 vf2, long long int i1,
174250
long long int i2, si8 vi1, si8 vi2,

clang/test/CodeGen/builtins-reduction-math.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,30 @@ void test_builtin_reduce_and(si8 vi1, u4 vu1) {
138138
unsigned r3 = __builtin_reduce_and(vu1);
139139
}
140140

141+
void test_builtin_reduce_maximum(float4 vf1) {
142+
// CHECK-LABEL: define void @test_builtin_reduce_maximum(
143+
// CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
144+
// CHECK-NEXT: call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[VF1]])
145+
float r1 = __builtin_reduce_maximum(vf1);
146+
147+
// CHECK: [[VF1_AS1:%.+]] = load <4 x float>, ptr addrspace(1) @vf1_as_one, align 16
148+
// CHECK-NEXT: [[RDX1:%.+]] = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[VF1_AS1]])
149+
// CHECK-NEXT: fpext float [[RDX1]] to double
150+
const double r4 = __builtin_reduce_maximum(vf1_as_one);
151+
}
152+
153+
void test_builtin_reduce_minimum(float4 vf1) {
154+
// CHECK-LABEL: define void @test_builtin_reduce_minimum(
155+
// CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
156+
// CHECK-NEXT: call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[VF1]])
157+
float r1 = __builtin_reduce_minimum(vf1);
158+
159+
// CHECK: [[VF1_AS1:%.+]] = load <4 x float>, ptr addrspace(1) @vf1_as_one, align 16
160+
// CHECK-NEXT: [[RDX1:%.+]] = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[VF1_AS1]])
161+
// CHECK-NEXT: fpext float [[RDX1]] to double
162+
const double r4 = __builtin_reduce_minimum(vf1_as_one);
163+
}
164+
141165
#if defined(__ARM_FEATURE_SVE)
142166
#include <arm_sve.h>
143167

clang/test/CodeGen/strictfp-elementwise-bulitins.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,26 @@ float4 strict_elementwise_min(float4 a, float4 b) {
4747
return __builtin_elementwise_min(a, b);
4848
}
4949

50+
// CHECK-LABEL: define dso_local noundef <4 x float> @_Z26strict_elementwise_maximumDv4_fS_
51+
// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
52+
// CHECK-NEXT: entry:
53+
// CHECK-NEXT: [[ELT_MAXIMUM:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]]
54+
// CHECK-NEXT: ret <4 x float> [[ELT_MAXIMUM]]
55+
//
56+
float4 strict_elementwise_maximum(float4 a, float4 b) {
57+
return __builtin_elementwise_maximum(a, b);
58+
}
59+
60+
// CHECK-LABEL: define dso_local noundef <4 x float> @_Z26strict_elementwise_minimumDv4_fS_
61+
// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
62+
// CHECK-NEXT: entry:
63+
// CHECK-NEXT: [[ELT_MINIMUM:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]]
64+
// CHECK-NEXT: ret <4 x float> [[ELT_MINIMUM]]
65+
//
66+
float4 strict_elementwise_minimum(float4 a, float4 b) {
67+
return __builtin_elementwise_minimum(a, b);
68+
}
69+
5070
// CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_ceilDv4_f
5171
// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
5272
// CHECK-NEXT: entry:

clang/test/Sema/aarch64-sve-vector-pow-ops.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@
77
svfloat32_t test_pow_vv_i8mf8(svfloat32_t v) {
88

99
return __builtin_elementwise_pow(v, v);
10-
// expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
10+
// expected-error@-1 {{1st argument must be a floating point type}}
1111
}

0 commit comments

Comments
 (0)