llvm · francisvm · Oct 1, 2024 · Sep 27, 2024
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
@@ -144,6 +144,9 @@ C++ Language Changes
 
 - Add ``__builtin_elementwise_fmod`` builtin for floating point types only.
 
+- Add ``__builtin_elementwise_minimum`` and ``__builtin_elementwise_maximum``
+  builtin for floating point types only.
+
 - The builtin type alias ``__builtin_common_type`` has been added to improve the
   performance of ``std::common_type``.
 

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
@@ -1274,6 +1274,18 @@ def ElementwiseMin : Builtin {
   let Prototype = "void(...)";
 }
 
+def ElementwiseMaximum : Builtin {
+  let Spellings = ["__builtin_elementwise_maximum"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
+def ElementwiseMinimum : Builtin {
+  let Spellings = ["__builtin_elementwise_minimum"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def ElementwiseCeil : Builtin {
   let Spellings = ["__builtin_elementwise_ceil"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];
@@ -1448,6 +1460,18 @@ def ReduceMin : Builtin {
   let Prototype = "void(...)";
 }
 
+def ReduceMaximum : Builtin {
+  let Spellings = ["__builtin_reduce_maximum"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
+def ReduceMinimum : Builtin {
+  let Spellings = ["__builtin_reduce_minimum"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def ReduceXor : Builtin {
   let Spellings = ["__builtin_reduce_xor"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12214,7 +12214,8 @@ def err_builtin_invalid_arg_type: Error <
   "a floating point type|"
   "a vector of integers|"
   "an unsigned integer|"
-  "an 'int'}1 (was %2)">;
+  "an 'int'|"
+  "a vector of floating points}1 (was %2)">;
 
 def err_builtin_matrix_disabled: Error<
   "matrix types extension is disabled. Pass -fenable-matrix to enable it">;

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
@@ -2381,7 +2381,8 @@ class Sema final : public SemaBase {
   bool CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
                          const FunctionProtoType *Proto);
 
-  bool BuiltinVectorMath(CallExpr *TheCall, QualType &Res);
+  /// \param FPOnly restricts the arguments to floating-point types.
+  bool BuiltinVectorMath(CallExpr *TheCall, QualType &Res, bool FPOnly = false);
   bool BuiltinVectorToScalarMath(CallExpr *TheCall);
 
   /// Handles the checks for format strings, non-POD arguments to vararg
@@ -2573,7 +2574,8 @@ class Sema final : public SemaBase {
   ExprResult AtomicOpsOverloaded(ExprResult TheCallResult,
                                  AtomicExpr::AtomicOp Op);
 
-  bool BuiltinElementwiseMath(CallExpr *TheCall);
+  /// \param FPOnly restricts the arguments to floating-point types.
+  bool BuiltinElementwiseMath(CallExpr *TheCall, bool FPOnly = false);
   bool PrepareBuiltinReduceMathOneArgCall(CallExpr *TheCall);
 
   bool BuiltinNonDeterministicValue(CallExpr *TheCall);

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3964,6 +3964,22 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     return RValue::get(Result);
   }
 
+  case Builtin::BI__builtin_elementwise_maximum: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    Value *Op1 = EmitScalarExpr(E->getArg(1));
+    Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maximum, Op0,
+                                                  Op1, nullptr, "elt.maximum");
+    return RValue::get(Result);
+  }
+
+  case Builtin::BI__builtin_elementwise_minimum: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    Value *Op1 = EmitScalarExpr(E->getArg(1));
+    Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minimum, Op0,
+                                                  Op1, nullptr, "elt.minimum");
+    return RValue::get(Result);
+  }
+
   case Builtin::BI__builtin_reduce_max: {
     auto GetIntrinsicID = [this](QualType QT) {
       if (auto *VecTy = QT->getAs<VectorType>())
@@ -4016,6 +4032,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__builtin_reduce_and:
     return RValue::get(emitBuiltinWithOneOverloadedType<1>(
         *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
+  case Builtin::BI__builtin_reduce_maximum:
+    return RValue::get(emitBuiltinWithOneOverloadedType<1>(
+        *this, E, llvm::Intrinsic::vector_reduce_fmaximum, "rdx.maximum"));
+  case Builtin::BI__builtin_reduce_minimum:
+    return RValue::get(emitBuiltinWithOneOverloadedType<1>(
+        *this, E, llvm::Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
 
   case Builtin::BI__builtin_matrix_transpose: {
     auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
@@ -2755,17 +2755,12 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
 
   // These builtins restrict the element type to floating point
   // types only, and take in two arguments.
+  case Builtin::BI__builtin_elementwise_minimum:
+  case Builtin::BI__builtin_elementwise_maximum:
   case Builtin::BI__builtin_elementwise_atan2:
   case Builtin::BI__builtin_elementwise_fmod:
   case Builtin::BI__builtin_elementwise_pow: {
-    if (BuiltinElementwiseMath(TheCall))
-      return ExprError();
-
-    QualType ArgTy = TheCall->getArg(0)->getType();
-    if (checkFPMathBuiltinElementType(*this, TheCall->getArg(0)->getBeginLoc(),
-                                      ArgTy, 1) ||
-        checkFPMathBuiltinElementType(*this, TheCall->getArg(1)->getBeginLoc(),
-                                      ArgTy, 2))
+    if (BuiltinElementwiseMath(TheCall, /*FPOnly=*/true))
       return ExprError();
     break;
   }
@@ -2869,6 +2864,29 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     TheCall->setType(ElTy);
     break;
   }
+  case Builtin::BI__builtin_reduce_maximum:
+  case Builtin::BI__builtin_reduce_minimum: {
+    if (PrepareBuiltinReduceMathOneArgCall(TheCall))
+      return ExprError();
+
+    const Expr *Arg = TheCall->getArg(0);
+    const auto *TyA = Arg->getType()->getAs<VectorType>();
+
+    QualType ElTy;
+    if (TyA)
+      ElTy = TyA->getElementType();
+    else if (Arg->getType()->isSizelessVectorType())
+      ElTy = Arg->getType()->getSizelessVectorEltType(Context);
+
+    if (ElTy.isNull() || !ElTy->isFloatingType()) {
+      Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+          << 1 << /* vector of floating points */ 9 << Arg->getType();
+      return ExprError();
+    }
+
+    TheCall->setType(ElTy);
+    break;
+  }
 
   // These builtins support vectors of integers only.
   // TODO: ADD/MUL should support floating-point types.
@@ -14379,9 +14397,9 @@ bool Sema::PrepareBuiltinElementwiseMathOneArgCall(CallExpr *TheCall) {
   return false;
 }
 
-bool Sema::BuiltinElementwiseMath(CallExpr *TheCall) {
+bool Sema::BuiltinElementwiseMath(CallExpr *TheCall, bool FPOnly) {
   QualType Res;
-  if (BuiltinVectorMath(TheCall, Res))
+  if (BuiltinVectorMath(TheCall, Res, FPOnly))
     return true;
   TheCall->setType(Res);
   return false;
@@ -14400,7 +14418,7 @@ bool Sema::BuiltinVectorToScalarMath(CallExpr *TheCall) {
   return false;
 }
 
-bool Sema::BuiltinVectorMath(CallExpr *TheCall, QualType &Res) {
+bool Sema::BuiltinVectorMath(CallExpr *TheCall, QualType &Res, bool FPOnly) {
   if (checkArgCount(TheCall, 2))
     return true;
 
@@ -14420,8 +14438,13 @@ bool Sema::BuiltinVectorMath(CallExpr *TheCall, QualType &Res) {
                 diag::err_typecheck_call_different_arg_types)
            << TyA << TyB;
 
-  if (checkMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, 1))
-    return true;
+  if (FPOnly) {
+    if (checkFPMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, 1))
+      return true;
+  } else {
+    if (checkMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, 1))
+      return true;
+  }
 
   TheCall->setArg(0, A.get());
   TheCall->setArg(1, B.get());

diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -169,6 +169,82 @@ void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
   i1 = __builtin_elementwise_sub_sat(1, 'a');
 }
 
+void test_builtin_elementwise_maximum(float f1, float f2, double d1, double d2,
+                                      float4 vf1, float4 vf2, long long int i1,
+                                      long long int i2, si8 vi1, si8 vi2,
+                                      unsigned u1, unsigned u2, u4 vu1, u4 vu2,
+                                      _BitInt(31) bi1, _BitInt(31) bi2,
+                                      unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
+  // CHECK-LABEL: define void @test_builtin_elementwise_maximum(
+  // CHECK:      [[F1:%.+]] = load float, ptr %f1.addr, align 4
+  // CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4
+  // CHECK-NEXT:  call float @llvm.maximum.f32(float [[F1]], float [[F2]])
+  f1 = __builtin_elementwise_maximum(f1, f2);
+
+  // CHECK:      [[D1:%.+]] = load double, ptr %d1.addr, align 8
+  // CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8
+  // CHECK-NEXT: call double @llvm.maximum.f64(double [[D1]], double [[D2]])
+  d1 = __builtin_elementwise_maximum(d1, d2);
+
+  // CHECK:      [[D2:%.+]] = load double, ptr %d2.addr, align 8
+  // CHECK-NEXT: call double @llvm.maximum.f64(double 2.000000e+01, double [[D2]])
+  d1 = __builtin_elementwise_maximum(20.0, d2);
+
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+  // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.maximum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
+  vf1 = __builtin_elementwise_maximum(vf1, vf2);
+
+  // CHECK:      [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
+  // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.maximum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
+  const float4 cvf1 = vf1;
+  vf1 = __builtin_elementwise_maximum(cvf1, vf2);
+
+  // CHECK:      [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
+  // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.maximum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
+  vf1 = __builtin_elementwise_maximum(vf2, cvf1);
+}
+
+void test_builtin_elementwise_minimum(float f1, float f2, double d1, double d2,
+                                      float4 vf1, float4 vf2, long long int i1,
+                                      long long int i2, si8 vi1, si8 vi2,
+                                      unsigned u1, unsigned u2, u4 vu1, u4 vu2,
+                                      _BitInt(31) bi1, _BitInt(31) bi2,
+                                      unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
+  // CHECK-LABEL: define void @test_builtin_elementwise_minimum(
+  // CHECK:      [[F1:%.+]] = load float, ptr %f1.addr, align 4
+  // CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4
+  // CHECK-NEXT:  call float @llvm.minimum.f32(float [[F1]], float [[F2]])
+  f1 = __builtin_elementwise_minimum(f1, f2);
+
+  // CHECK:      [[D1:%.+]] = load double, ptr %d1.addr, align 8
+  // CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8
+  // CHECK-NEXT: call double @llvm.minimum.f64(double [[D1]], double [[D2]])
+  d1 = __builtin_elementwise_minimum(d1, d2);
+
+  // CHECK:      [[D1:%.+]] = load double, ptr %d1.addr, align 8
+  // CHECK-NEXT: call double @llvm.minimum.f64(double [[D1]], double 2.000000e+00)
+  d1 = __builtin_elementwise_minimum(d1, 2.0);
+
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+  // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.minimum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
+  vf1 = __builtin_elementwise_minimum(vf1, vf2);
+
+  // CHECK:      [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
+  // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.minimum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
+  const float4 cvf1 = vf1;
+  vf1 = __builtin_elementwise_minimum(cvf1, vf2);
+
+  // CHECK:      [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16
+  // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.minimum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
+  vf1 = __builtin_elementwise_minimum(vf2, cvf1);
+}
+
 void test_builtin_elementwise_max(float f1, float f2, double d1, double d2,
                                   float4 vf1, float4 vf2, long long int i1,
                                   long long int i2, si8 vi1, si8 vi2,

diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c
@@ -138,6 +138,30 @@ void test_builtin_reduce_and(si8 vi1, u4 vu1) {
   unsigned r3 = __builtin_reduce_and(vu1);
 }
 
+void test_builtin_reduce_maximum(float4 vf1) {
+  // CHECK-LABEL: define void @test_builtin_reduce_maximum(
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+  // CHECK-NEXT: call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[VF1]])
+  float r1 = __builtin_reduce_maximum(vf1);
+
+  // CHECK:      [[VF1_AS1:%.+]] = load <4 x float>, ptr addrspace(1) @vf1_as_one, align 16
+  // CHECK-NEXT: [[RDX1:%.+]] = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[VF1_AS1]])
+  // CHECK-NEXT: fpext float [[RDX1]] to double
+  const double r4 = __builtin_reduce_maximum(vf1_as_one);
+}
+
+void test_builtin_reduce_minimum(float4 vf1) {
+  // CHECK-LABEL: define void @test_builtin_reduce_minimum(
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+  // CHECK-NEXT: call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[VF1]])
+  float r1 = __builtin_reduce_minimum(vf1);
+
+  // CHECK:      [[VF1_AS1:%.+]] = load <4 x float>, ptr addrspace(1) @vf1_as_one, align 16
+  // CHECK-NEXT: [[RDX1:%.+]] = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[VF1_AS1]])
+  // CHECK-NEXT: fpext float [[RDX1]] to double
+  const double r4 = __builtin_reduce_minimum(vf1_as_one);
+}
+
 #if defined(__ARM_FEATURE_SVE)
 #include <arm_sve.h>
 

diff --git a/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp b/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
@@ -47,6 +47,26 @@ float4 strict_elementwise_min(float4 a, float4 b) {
   return __builtin_elementwise_min(a, b);
 }
 
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z26strict_elementwise_maximumDv4_fS_
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_MAXIMUM:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_MAXIMUM]]
+//
+float4 strict_elementwise_maximum(float4 a, float4 b) {
+  return __builtin_elementwise_maximum(a, b);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z26strict_elementwise_minimumDv4_fS_
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_MINIMUM:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_MINIMUM]]
+//
+float4 strict_elementwise_minimum(float4 a, float4 b) {
+  return __builtin_elementwise_minimum(a, b);
+}
+
 // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_ceilDv4_f
 // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  entry:

diff --git a/clang/test/Sema/aarch64-sve-vector-pow-ops.c b/clang/test/Sema/aarch64-sve-vector-pow-ops.c
@@ -7,5 +7,5 @@
 svfloat32_t test_pow_vv_i8mf8(svfloat32_t v) {
 
   return __builtin_elementwise_pow(v, v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+  // expected-error@-1 {{1st argument must be a floating point type}}
 }