Skip to content

Commit 7999355

Browse files
committed
[Clang] Add min/max reduction builtins.
This patch implements __builtin_reduce_max and __builtin_reduce_min as specified in D111529. The order of operations does not matter for min or max reductions and they can be directly lowered to the corresponding llvm.vector.reduce.{fmin,fmax,umin,umax,smin,smax} intrinsic calls. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D112001
1 parent 7b615a8 commit 7999355

File tree

7 files changed

+163
-1
lines changed

7 files changed

+163
-1
lines changed

clang/include/clang/Basic/Builtins.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,8 @@ BUILTIN(__builtin_call_with_static_chain, "v.", "nt")
646646
BUILTIN(__builtin_elementwise_abs, "v.", "nct")
647647
BUILTIN(__builtin_elementwise_max, "v.", "nct")
648648
BUILTIN(__builtin_elementwise_min, "v.", "nct")
649+
BUILTIN(__builtin_reduce_max, "v.", "nct")
650+
BUILTIN(__builtin_reduce_min, "v.", "nct")
649651

650652
BUILTIN(__builtin_matrix_transpose, "v.", "nFt")
651653
BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt")

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11313,7 +11313,7 @@ def err_builtin_invalid_arg_type: Error <
1131311313
"%ordinal0 argument must be a "
1131411314
"%select{vector, integer or floating point type|matrix|"
1131511315
"pointer to a valid matrix element type|"
11316-
"signed integer or floating point type}1 (was %2)">;
11316+
"signed integer or floating point type|vector type}1 (was %2)">;
1131711317

1131811318
def err_builtin_matrix_disabled: Error<
1131911319
"matrix types extension is disabled. Pass -fenable-matrix to enable it">;

clang/include/clang/Sema/Sema.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12732,6 +12732,7 @@ class Sema final {
1273212732

1273312733
bool SemaBuiltinElementwiseMath(CallExpr *TheCall);
1273412734
bool SemaBuiltinElementwiseMathOneArg(CallExpr *TheCall);
12735+
bool SemaBuiltinReduceMath(CallExpr *TheCall);
1273512736

1273612737
// Matrix builtin handling.
1273712738
ExprResult SemaBuiltinMatrixTranspose(CallExpr *TheCall,

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3145,6 +3145,44 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
31453145
return RValue::get(Result);
31463146
}
31473147

3148+
case Builtin::BI__builtin_reduce_max: {
3149+
auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) {
3150+
if (IrTy->isIntOrIntVectorTy()) {
3151+
if (auto *VecTy = QT->getAs<VectorType>())
3152+
QT = VecTy->getElementType();
3153+
if (QT->isSignedIntegerType())
3154+
return llvm::Intrinsic::vector_reduce_smax;
3155+
else
3156+
return llvm::Intrinsic::vector_reduce_umax;
3157+
}
3158+
return llvm::Intrinsic::vector_reduce_fmax;
3159+
};
3160+
Value *Op0 = EmitScalarExpr(E->getArg(0));
3161+
Value *Result = Builder.CreateUnaryIntrinsic(
3162+
GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr,
3163+
"rdx.min");
3164+
return RValue::get(Result);
3165+
}
3166+
3167+
case Builtin::BI__builtin_reduce_min: {
3168+
auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) {
3169+
if (IrTy->isIntOrIntVectorTy()) {
3170+
if (auto *VecTy = QT->getAs<VectorType>())
3171+
QT = VecTy->getElementType();
3172+
if (QT->isSignedIntegerType())
3173+
return llvm::Intrinsic::vector_reduce_smin;
3174+
else
3175+
return llvm::Intrinsic::vector_reduce_umin;
3176+
}
3177+
return llvm::Intrinsic::vector_reduce_fmin;
3178+
};
3179+
Value *Op0 = EmitScalarExpr(E->getArg(0));
3180+
Value *Result = Builder.CreateUnaryIntrinsic(
3181+
GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr,
3182+
"rdx.min");
3183+
return RValue::get(Result);
3184+
}
3185+
31483186
case Builtin::BI__builtin_matrix_transpose: {
31493187
const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
31503188
Value *MatValue = EmitScalarExpr(E->getArg(0));

clang/lib/Sema/SemaChecking.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1985,6 +1985,11 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
19851985
if (SemaBuiltinElementwiseMath(TheCall))
19861986
return ExprError();
19871987
break;
1988+
case Builtin::BI__builtin_reduce_max:
1989+
case Builtin::BI__builtin_reduce_min:
1990+
if (SemaBuiltinReduceMath(TheCall))
1991+
return ExprError();
1992+
break;
19881993
case Builtin::BI__builtin_matrix_transpose:
19891994
return SemaBuiltinMatrixTranspose(TheCall, TheCallResult);
19901995

@@ -16596,6 +16601,26 @@ bool Sema::SemaBuiltinElementwiseMath(CallExpr *TheCall) {
1659616601
return false;
1659716602
}
1659816603

16604+
bool Sema::SemaBuiltinReduceMath(CallExpr *TheCall) {
16605+
if (checkArgCount(*this, TheCall, 1))
16606+
return true;
16607+
16608+
ExprResult A = UsualUnaryConversions(TheCall->getArg(0));
16609+
if (A.isInvalid())
16610+
return true;
16611+
16612+
TheCall->setArg(0, A.get());
16613+
const VectorType *TyA = A.get()->getType()->getAs<VectorType>();
16614+
if (!TyA) {
16615+
SourceLocation ArgLoc = TheCall->getArg(0)->getBeginLoc();
16616+
return Diag(ArgLoc, diag::err_builtin_invalid_arg_type)
16617+
<< 1 << /* vector ty*/ 4 << A.get()->getType();
16618+
}
16619+
16620+
TheCall->setType(TyA->getElementType());
16621+
return false;
16622+
}
16623+
1659916624
ExprResult Sema::SemaBuiltinMatrixTranspose(CallExpr *TheCall,
1660016625
ExprResult CallResult) {
1660116626
if (checkArgCount(*this, TheCall, 1))
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
2+
3+
typedef float float4 __attribute__((ext_vector_type(4)));
4+
typedef short int si8 __attribute__((ext_vector_type(8)));
5+
typedef unsigned int u4 __attribute__((ext_vector_type(4)));
6+
7+
__attribute__((address_space(1))) float4 vf1_as_one;
8+
9+
void test_builtin_reduce_max(float4 vf1, si8 vi1, u4 vu1) {
10+
// CHECK-LABEL: define void @test_builtin_reduce_max(
11+
// CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
12+
// CHECK-NEXT: call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[VF1]])
13+
float r1 = __builtin_reduce_max(vf1);
14+
15+
// CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
16+
// CHECK-NEXT: call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> [[VI1]])
17+
short r2 = __builtin_reduce_max(vi1);
18+
19+
// CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
20+
// CHECK-NEXT: call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[VU1]])
21+
unsigned r3 = __builtin_reduce_max(vu1);
22+
23+
// CHECK: [[VF1_AS1:%.+]] = load <4 x float>, <4 x float> addrspace(1)* @vf1_as_one, align 16
24+
// CHECK-NEXT: [[RDX1:%.+]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[VF1_AS1]])
25+
// CHECK-NEXT: fpext float [[RDX1]] to double
26+
const double r4 = __builtin_reduce_max(vf1_as_one);
27+
28+
// CHECK: [[CVI1:%.+]] = load <8 x i16>, <8 x i16>* %cvi1, align 16
29+
// CHECK-NEXT: [[RDX2:%.+]] = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> [[CVI1]])
30+
// CHECK-NEXT: sext i16 [[RDX2]] to i64
31+
const si8 cvi1 = vi1;
32+
unsigned long long r5 = __builtin_reduce_max(cvi1);
33+
}
34+
35+
void test_builtin_reduce_min(float4 vf1, si8 vi1, u4 vu1) {
36+
// CHECK-LABEL: define void @test_builtin_reduce_min(
37+
// CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
38+
// CHECK-NEXT: call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[VF1]])
39+
float r1 = __builtin_reduce_min(vf1);
40+
41+
// CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
42+
// CHECK-NEXT: call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[VI1]])
43+
short r2 = __builtin_reduce_min(vi1);
44+
45+
// CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
46+
// CHECK-NEXT: call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[VU1]])
47+
unsigned r3 = __builtin_reduce_min(vu1);
48+
49+
// CHECK: [[VF1_AS1:%.+]] = load <4 x float>, <4 x float> addrspace(1)* @vf1_as_one, align 16
50+
// CHECK-NEXT: [[RDX1:%.+]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[VF1_AS1]])
51+
// CHECK-NEXT: fpext float [[RDX1]] to double
52+
const double r4 = __builtin_reduce_min(vf1_as_one);
53+
54+
// CHECK: [[CVI1:%.+]] = load <8 x i16>, <8 x i16>* %cvi1, align 16
55+
// CHECK-NEXT: [[RDX2:%.+]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[CVI1]])
56+
// CHECK-NEXT: sext i16 [[RDX2]] to i64
57+
const si8 cvi1 = vi1;
58+
unsigned long long r5 = __builtin_reduce_min(cvi1);
59+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// RUN: %clang_cc1 %s -pedantic -verify -triple=x86_64-apple-darwin9
2+
3+
typedef float float4 __attribute__((ext_vector_type(4)));
4+
typedef int int3 __attribute__((ext_vector_type(3)));
5+
typedef unsigned unsigned4 __attribute__((ext_vector_type(4)));
6+
7+
struct Foo {
8+
char *p;
9+
};
10+
11+
void test_builtin_reduce_max(int i, float4 v, int3 iv) {
12+
struct Foo s = __builtin_reduce_max(iv);
13+
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
14+
15+
i = __builtin_reduce_max(v, v);
16+
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
17+
18+
i = __builtin_reduce_max();
19+
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
20+
21+
i = __builtin_reduce_max(i);
22+
// expected-error@-1 {{1st argument must be a vector type (was 'int')}}
23+
}
24+
25+
void test_builtin_reduce_min(int i, float4 v, int3 iv) {
26+
struct Foo s = __builtin_reduce_min(iv);
27+
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
28+
29+
i = __builtin_reduce_min(v, v);
30+
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
31+
32+
i = __builtin_reduce_min();
33+
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
34+
35+
i = __builtin_reduce_min(i);
36+
// expected-error@-1 {{1st argument must be a vector type (was 'int')}}
37+
}

0 commit comments

Comments
 (0)