[Clang][AArch64] Allow FP8 Neon vector types to be used by __builtin_shufflevector

momchil-velikov · momchil-velikov · commit 8d0693f0949b · 2024-12-06T20:59:00.000Z
The Neon vector types for FP8 (`__MFloat8x8_t` and `__MFloat8x16_t`) are
implemented as builtin types and need a special case in
`__builtin_shufflevector`.
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
@@ -2404,6 +2404,10 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
   /// SVE vector or predicate, excluding tuple types such as svint32x4_t.
   bool isSveVLSBuiltinType() const;
 
+  /// Determines if this is a *builtin* NEON vector type, a type not built with
+  /// `neon_vector_type`
+  bool isNeonVectorBuiltinType() const;
+
   /// Returns the representative type for the element of an SVE builtin type.
   /// This is used to represent fixed-length SVE vectors created with the
   /// 'arm_sve_vector_bits' type attribute as VectorType.
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10540,13 +10540,18 @@ def err_vec_builtin_incompatible_vector : Error<
 def err_vsx_builtin_nonconstant_argument : Error<
   "argument %0 to %1 must be a 2-bit unsigned literal (i.e. 0, 1, 2 or 3)">;
 
+def err_shufflevector_incompatible_index_vector : Error<
+  "second argument for __builtin_shufflevector must be integer vector "
+  "with length equal to the length of the first argument">;
 def err_shufflevector_nonconstant_argument : Error<
   "index for __builtin_shufflevector must be a constant integer">;
 def err_shufflevector_argument_too_large : Error<
   "index for __builtin_shufflevector must be less than the total number "
   "of vector elements">;
 def err_shufflevector_minus_one_is_undefined_behavior_constexpr : Error<
   "index for __builtin_shufflevector not within the bounds of the input vectors; index of -1 found at position %0 is not permitted in a constexpr context">;
+def err_shufflevector_unsupported_result_vector_type : Error<
+  "unsupported vector type for the result">;
 
 def err_convertvector_non_vector : Error<
   "first argument to __builtin_convertvector must be a vector">;
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
@@ -2576,6 +2576,19 @@ bool Type::isSveVLSBuiltinType() const {
   return false;
 }
 
+bool Type::isNeonVectorBuiltinType() const {
+  if (const BuiltinType *BT = getAs<BuiltinType>()) {
+    switch (BT->getKind()) {
+    case BuiltinType::MFloat8x8:
+    case BuiltinType::MFloat8x16:
+      return true;
+    default:
+      return false;
+    }
+  }
+  return false;
+}
+
 QualType Type::getSizelessVectorEltType(const ASTContext &Ctx) const {
   assert(isSizelessVectorType() && "Must be sizeless vector type");
   // Currently supports SVE and RVV
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
@@ -5127,24 +5127,32 @@ ExprResult Sema::BuiltinShuffleVector(CallExpr *TheCall) {
     QualType LHSType = TheCall->getArg(0)->getType();
     QualType RHSType = TheCall->getArg(1)->getType();
 
-    if (!LHSType->isVectorType() || !RHSType->isVectorType())
+    if (!LHSType->isVectorType() && !LHSType->isNeonVectorBuiltinType())
       return ExprError(
-          Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_non_vector)
-          << TheCall->getDirectCallee() << /*isMorethantwoArgs*/ false
+          Diag(TheCall->getBeginLoc(), diag::err_builtin_non_vector_type)
+          << "first" << TheCall->getDirectCallee()
+          << /*isMorethantwoArgs*/ false
           << SourceRange(TheCall->getArg(0)->getBeginLoc(),
                          TheCall->getArg(1)->getEndLoc()));
 
-    numElements = LHSType->castAs<VectorType>()->getNumElements();
+    if (auto *Ty = LHSType->getAs<BuiltinType>()) {
+      assert(Ty->getKind() == BuiltinType::MFloat8x8 ||
+             Ty->getKind() == BuiltinType::MFloat8x16);
+      numElements = Ty->getKind() == BuiltinType::MFloat8x8 ? 8 : 16;
+    } else {
+      numElements = LHSType->castAs<VectorType>()->getNumElements();
+    }
+
     unsigned numResElements = TheCall->getNumArgs() - 2;
 
     // Check to see if we have a call with 2 vector arguments, the unary shuffle
     // with mask.  If so, verify that RHS is an integer vector type with the
     // same number of elts as lhs.
     if (TheCall->getNumArgs() == 2) {
-      if (!RHSType->hasIntegerRepresentation() ||
+      if (!RHSType->isVectorType() || !RHSType->hasIntegerRepresentation() ||
           RHSType->castAs<VectorType>()->getNumElements() != numElements)
         return ExprError(Diag(TheCall->getBeginLoc(),
-                              diag::err_vec_builtin_incompatible_vector)
+                              diag::err_shufflevector_incompatible_index_vector)
                          << TheCall->getDirectCallee()
                          << /*isMorethantwoArgs*/ false
                          << SourceRange(TheCall->getArg(1)->getBeginLoc(),
@@ -5157,6 +5165,25 @@ ExprResult Sema::BuiltinShuffleVector(CallExpr *TheCall) {
                        << SourceRange(TheCall->getArg(0)->getBeginLoc(),
                                       TheCall->getArg(1)->getEndLoc()));
     } else if (numElements != numResElements) {
+      if (auto *Ty = LHSType->getAs<BuiltinType>()) {
+        assert(Ty->getKind() == BuiltinType::MFloat8x8 ||
+               Ty->getKind() == BuiltinType::MFloat8x16);
+        switch (numResElements) {
+        case 8:
+          resType = Context.MFloat8x8Ty;
+          break;
+        case 16:
+          resType = Context.MFloat8x16Ty;
+          break;
+        default:
+          return ExprError(Diag(TheCall->getBeginLoc(),
+                                diag::err_shufflevector_unsupported_result_vector_type)
+                           << TheCall->getDirectCallee()
+                           << /*isMorethantwoArgs*/ false
+                           << SourceRange(TheCall->getArg(0)->getBeginLoc(),
+                                          TheCall->getArg(1)->getEndLoc()));
+        }
+      }
       QualType eltType = LHSType->castAs<VectorType>()->getElementType();
       resType =
           Context.getVectorType(eltType, numResElements, VectorKind::Generic);
diff --git a/clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c b/clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c
@@ -0,0 +1,123 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple aarch64-linux -target-feature +neon -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+typedef __attribute__((neon_vector_type(8))) signed char int8x8_t;
+typedef __attribute__((neon_vector_type(16))) signed char int8x16_t;
+
+typedef __MFloat8x8_t mfloat8x8_t;
+typedef __MFloat8x16_t mfloat8x16_t;
+
+// CHECK-LABEL: define dso_local <8 x i8> @f0(
+// CHECK-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i8> [[X]], <8 x i8> [[X]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT:    ret <8 x i8> [[SHUFFLE]]
+//
+mfloat8x8_t f0(mfloat8x8_t x) {
+  return __builtin_shufflevector(x, x, 3, 2, 1, 0, 3, 2, 1, 0);
+}
+
+// CHECK-LABEL: define dso_local <8 x i8> @f1(
+// CHECK-SAME: <8 x i8> [[X:%.*]], <8 x i8> noundef [[P:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MASK:%.*]] = and <8 x i8> [[P]], splat (i8 7)
+// CHECK-NEXT:    [[SHUF_IDX:%.*]] = extractelement <8 x i8> [[MASK]], i64 0
+// CHECK-NEXT:    [[SHUF_ELT:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX]]
+// CHECK-NEXT:    [[SHUF_INS:%.*]] = insertelement <8 x i8> poison, i8 [[SHUF_ELT]], i64 0
+// CHECK-NEXT:    [[SHUF_IDX1:%.*]] = extractelement <8 x i8> [[MASK]], i64 1
+// CHECK-NEXT:    [[SHUF_ELT2:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX1]]
+// CHECK-NEXT:    [[SHUF_INS3:%.*]] = insertelement <8 x i8> [[SHUF_INS]], i8 [[SHUF_ELT2]], i64 1
+// CHECK-NEXT:    [[SHUF_IDX4:%.*]] = extractelement <8 x i8> [[MASK]], i64 2
+// CHECK-NEXT:    [[SHUF_ELT5:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX4]]
+// CHECK-NEXT:    [[SHUF_INS6:%.*]] = insertelement <8 x i8> [[SHUF_INS3]], i8 [[SHUF_ELT5]], i64 2
+// CHECK-NEXT:    [[SHUF_IDX7:%.*]] = extractelement <8 x i8> [[MASK]], i64 3
+// CHECK-NEXT:    [[SHUF_ELT8:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX7]]
+// CHECK-NEXT:    [[SHUF_INS9:%.*]] = insertelement <8 x i8> [[SHUF_INS6]], i8 [[SHUF_ELT8]], i64 3
+// CHECK-NEXT:    [[SHUF_IDX10:%.*]] = extractelement <8 x i8> [[MASK]], i64 4
+// CHECK-NEXT:    [[SHUF_ELT11:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX10]]
+// CHECK-NEXT:    [[SHUF_INS12:%.*]] = insertelement <8 x i8> [[SHUF_INS9]], i8 [[SHUF_ELT11]], i64 4
+// CHECK-NEXT:    [[SHUF_IDX13:%.*]] = extractelement <8 x i8> [[MASK]], i64 5
+// CHECK-NEXT:    [[SHUF_ELT14:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX13]]
+// CHECK-NEXT:    [[SHUF_INS15:%.*]] = insertelement <8 x i8> [[SHUF_INS12]], i8 [[SHUF_ELT14]], i64 5
+// CHECK-NEXT:    [[SHUF_IDX16:%.*]] = extractelement <8 x i8> [[MASK]], i64 6
+// CHECK-NEXT:    [[SHUF_ELT17:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX16]]
+// CHECK-NEXT:    [[SHUF_INS18:%.*]] = insertelement <8 x i8> [[SHUF_INS15]], i8 [[SHUF_ELT17]], i64 6
+// CHECK-NEXT:    [[SHUF_IDX19:%.*]] = extractelement <8 x i8> [[MASK]], i64 7
+// CHECK-NEXT:    [[SHUF_ELT20:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX19]]
+// CHECK-NEXT:    [[SHUF_INS21:%.*]] = insertelement <8 x i8> [[SHUF_INS18]], i8 [[SHUF_ELT20]], i64 7
+// CHECK-NEXT:    ret <8 x i8> [[SHUF_INS21]]
+//
+mfloat8x8_t f1(mfloat8x8_t x, int8x8_t p) {
+  return __builtin_shufflevector(x, p);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @f3(
+// CHECK-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[X]], <16 x i8> [[X]], <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT:    ret <16 x i8> [[SHUFFLE]]
+//
+mfloat8x16_t f3(mfloat8x16_t x) {
+  return __builtin_shufflevector(x, x, 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2,
+                                 1, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @f4(
+// CHECK-SAME: <16 x i8> [[X:%.*]], <16 x i8> noundef [[P:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MASK:%.*]] = and <16 x i8> [[P]], splat (i8 15)
+// CHECK-NEXT:    [[SHUF_IDX:%.*]] = extractelement <16 x i8> [[MASK]], i64 0
+// CHECK-NEXT:    [[SHUF_ELT:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX]]
+// CHECK-NEXT:    [[SHUF_INS:%.*]] = insertelement <16 x i8> poison, i8 [[SHUF_ELT]], i64 0
+// CHECK-NEXT:    [[SHUF_IDX1:%.*]] = extractelement <16 x i8> [[MASK]], i64 1
+// CHECK-NEXT:    [[SHUF_ELT2:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX1]]
+// CHECK-NEXT:    [[SHUF_INS3:%.*]] = insertelement <16 x i8> [[SHUF_INS]], i8 [[SHUF_ELT2]], i64 1
+// CHECK-NEXT:    [[SHUF_IDX4:%.*]] = extractelement <16 x i8> [[MASK]], i64 2
+// CHECK-NEXT:    [[SHUF_ELT5:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX4]]
+// CHECK-NEXT:    [[SHUF_INS6:%.*]] = insertelement <16 x i8> [[SHUF_INS3]], i8 [[SHUF_ELT5]], i64 2
+// CHECK-NEXT:    [[SHUF_IDX7:%.*]] = extractelement <16 x i8> [[MASK]], i64 3
+// CHECK-NEXT:    [[SHUF_ELT8:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX7]]
+// CHECK-NEXT:    [[SHUF_INS9:%.*]] = insertelement <16 x i8> [[SHUF_INS6]], i8 [[SHUF_ELT8]], i64 3
+// CHECK-NEXT:    [[SHUF_IDX10:%.*]] = extractelement <16 x i8> [[MASK]], i64 4
+// CHECK-NEXT:    [[SHUF_ELT11:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX10]]
+// CHECK-NEXT:    [[SHUF_INS12:%.*]] = insertelement <16 x i8> [[SHUF_INS9]], i8 [[SHUF_ELT11]], i64 4
+// CHECK-NEXT:    [[SHUF_IDX13:%.*]] = extractelement <16 x i8> [[MASK]], i64 5
+// CHECK-NEXT:    [[SHUF_ELT14:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX13]]
+// CHECK-NEXT:    [[SHUF_INS15:%.*]] = insertelement <16 x i8> [[SHUF_INS12]], i8 [[SHUF_ELT14]], i64 5
+// CHECK-NEXT:    [[SHUF_IDX16:%.*]] = extractelement <16 x i8> [[MASK]], i64 6
+// CHECK-NEXT:    [[SHUF_ELT17:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX16]]
+// CHECK-NEXT:    [[SHUF_INS18:%.*]] = insertelement <16 x i8> [[SHUF_INS15]], i8 [[SHUF_ELT17]], i64 6
+// CHECK-NEXT:    [[SHUF_IDX19:%.*]] = extractelement <16 x i8> [[MASK]], i64 7
+// CHECK-NEXT:    [[SHUF_ELT20:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX19]]
+// CHECK-NEXT:    [[SHUF_INS21:%.*]] = insertelement <16 x i8> [[SHUF_INS18]], i8 [[SHUF_ELT20]], i64 7
+// CHECK-NEXT:    [[SHUF_IDX22:%.*]] = extractelement <16 x i8> [[MASK]], i64 8
+// CHECK-NEXT:    [[SHUF_ELT23:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX22]]
+// CHECK-NEXT:    [[SHUF_INS24:%.*]] = insertelement <16 x i8> [[SHUF_INS21]], i8 [[SHUF_ELT23]], i64 8
+// CHECK-NEXT:    [[SHUF_IDX25:%.*]] = extractelement <16 x i8> [[MASK]], i64 9
+// CHECK-NEXT:    [[SHUF_ELT26:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX25]]
+// CHECK-NEXT:    [[SHUF_INS27:%.*]] = insertelement <16 x i8> [[SHUF_INS24]], i8 [[SHUF_ELT26]], i64 9
+// CHECK-NEXT:    [[SHUF_IDX28:%.*]] = extractelement <16 x i8> [[MASK]], i64 10
+// CHECK-NEXT:    [[SHUF_ELT29:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX28]]
+// CHECK-NEXT:    [[SHUF_INS30:%.*]] = insertelement <16 x i8> [[SHUF_INS27]], i8 [[SHUF_ELT29]], i64 10
+// CHECK-NEXT:    [[SHUF_IDX31:%.*]] = extractelement <16 x i8> [[MASK]], i64 11
+// CHECK-NEXT:    [[SHUF_ELT32:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX31]]
+// CHECK-NEXT:    [[SHUF_INS33:%.*]] = insertelement <16 x i8> [[SHUF_INS30]], i8 [[SHUF_ELT32]], i64 11
+// CHECK-NEXT:    [[SHUF_IDX34:%.*]] = extractelement <16 x i8> [[MASK]], i64 12
+// CHECK-NEXT:    [[SHUF_ELT35:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX34]]
+// CHECK-NEXT:    [[SHUF_INS36:%.*]] = insertelement <16 x i8> [[SHUF_INS33]], i8 [[SHUF_ELT35]], i64 12
+// CHECK-NEXT:    [[SHUF_IDX37:%.*]] = extractelement <16 x i8> [[MASK]], i64 13
+// CHECK-NEXT:    [[SHUF_ELT38:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX37]]
+// CHECK-NEXT:    [[SHUF_INS39:%.*]] = insertelement <16 x i8> [[SHUF_INS36]], i8 [[SHUF_ELT38]], i64 13
+// CHECK-NEXT:    [[SHUF_IDX40:%.*]] = extractelement <16 x i8> [[MASK]], i64 14
+// CHECK-NEXT:    [[SHUF_ELT41:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX40]]
+// CHECK-NEXT:    [[SHUF_INS42:%.*]] = insertelement <16 x i8> [[SHUF_INS39]], i8 [[SHUF_ELT41]], i64 14
+// CHECK-NEXT:    [[SHUF_IDX43:%.*]] = extractelement <16 x i8> [[MASK]], i64 15
+// CHECK-NEXT:    [[SHUF_ELT44:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX43]]
+// CHECK-NEXT:    [[SHUF_INS45:%.*]] = insertelement <16 x i8> [[SHUF_INS42]], i8 [[SHUF_ELT44]], i64 15
+// CHECK-NEXT:    ret <16 x i8> [[SHUF_INS45]]
+//
+mfloat8x16_t f4(mfloat8x16_t x, int8x16_t p) {
+  return __builtin_shufflevector(x, p);
+}
diff --git a/clang/test/Sema/builtin-shufflevector.c b/clang/test/Sema/builtin-shufflevector.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple aarch64 -fsyntax-only -verify %s
+
+// REQUIRES: aarch64-registered-target
+
+typedef __attribute__((neon_vector_type(8))) signed char int8x8_t;
+typedef __attribute__((neon_vector_type(16))) signed char int8x16_t;
+
+typedef __MFloat8x8_t mfloat8x8_t;
+typedef __MFloat8x16_t mfloat8x16_t;
+
+int8x8_t non_vector(int x) {
+  return __builtin_shufflevector(x, x, 3, 2, 1, 0, 3, 2, 1, 0);
+  // expected-error@-1 {{first argument to '__builtin_shufflevector' must be of vector type}}
+}
+
+mfloat8x8_t unsuported_vector(mfloat8x8_t x) {
+  return __builtin_shufflevector(x, x, 3, 2, 1, 0, 3, 2, 1, 0, 0);
+  // expected-error@-1 {{unsupported vector type for the result}}
+}
+
+int8x8_t non_vector_index(int8x8_t x, int p) {
+  return __builtin_shufflevector(x, p);
+  // expected-error@-1 {{second argument for __builtin_shufflevector must be integer vector with length equal to the length of the first argument}}
+}
+
+int8x8_t bad_vector_index_length(int8x8_t x, int8x16_t p) {
+  return __builtin_shufflevector(x, p);
+  // expected-error@-1 {{second argument for __builtin_shufflevector must be integer vector with length equal to the length of the first argument}}
+}
+