Skip to content

Commit 8d0693f

Browse files
[Clang][AArch64] Allow FP8 Neon vector types to be used by __builtin_shufflevector
The Neon vector types for FP8 (`__MFloat8x8_t` and `__MFloat8x16_t`) are implemented as builtin types and need a special case in `__builtin_shufflevector`.
1 parent a83b190 commit 8d0693f

File tree

6 files changed

+208
-6
lines changed

6 files changed

+208
-6
lines changed

clang/include/clang/AST/Type.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2404,6 +2404,10 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
24042404
/// SVE vector or predicate, excluding tuple types such as svint32x4_t.
24052405
bool isSveVLSBuiltinType() const;
24062406

2407+
/// Determines if this is a *builtin* NEON vector type, a type not built with
2408+
/// `neon_vector_type`
2409+
bool isNeonVectorBuiltinType() const;
2410+
24072411
/// Returns the representative type for the element of an SVE builtin type.
24082412
/// This is used to represent fixed-length SVE vectors created with the
24092413
/// 'arm_sve_vector_bits' type attribute as VectorType.

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10540,13 +10540,18 @@ def err_vec_builtin_incompatible_vector : Error<
1054010540
def err_vsx_builtin_nonconstant_argument : Error<
1054110541
"argument %0 to %1 must be a 2-bit unsigned literal (i.e. 0, 1, 2 or 3)">;
1054210542

10543+
def err_shufflevector_incompatible_index_vector : Error<
10544+
"second argument for __builtin_shufflevector must be integer vector "
10545+
"with length equal to the length of the first argument">;
1054310546
def err_shufflevector_nonconstant_argument : Error<
1054410547
"index for __builtin_shufflevector must be a constant integer">;
1054510548
def err_shufflevector_argument_too_large : Error<
1054610549
"index for __builtin_shufflevector must be less than the total number "
1054710550
"of vector elements">;
1054810551
def err_shufflevector_minus_one_is_undefined_behavior_constexpr : Error<
1054910552
"index for __builtin_shufflevector not within the bounds of the input vectors; index of -1 found at position %0 is not permitted in a constexpr context">;
10553+
def err_shufflevector_unsupported_result_vector_type : Error<
10554+
"unsupported vector type for the result">;
1055010555

1055110556
def err_convertvector_non_vector : Error<
1055210557
"first argument to __builtin_convertvector must be a vector">;

clang/lib/AST/Type.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2576,6 +2576,19 @@ bool Type::isSveVLSBuiltinType() const {
25762576
return false;
25772577
}
25782578

2579+
bool Type::isNeonVectorBuiltinType() const {
2580+
if (const BuiltinType *BT = getAs<BuiltinType>()) {
2581+
switch (BT->getKind()) {
2582+
case BuiltinType::MFloat8x8:
2583+
case BuiltinType::MFloat8x16:
2584+
return true;
2585+
default:
2586+
return false;
2587+
}
2588+
}
2589+
return false;
2590+
}
2591+
25792592
QualType Type::getSizelessVectorEltType(const ASTContext &Ctx) const {
25802593
assert(isSizelessVectorType() && "Must be sizeless vector type");
25812594
// Currently supports SVE and RVV

clang/lib/Sema/SemaChecking.cpp

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5127,24 +5127,32 @@ ExprResult Sema::BuiltinShuffleVector(CallExpr *TheCall) {
51275127
QualType LHSType = TheCall->getArg(0)->getType();
51285128
QualType RHSType = TheCall->getArg(1)->getType();
51295129

5130-
if (!LHSType->isVectorType() || !RHSType->isVectorType())
5130+
if (!LHSType->isVectorType() && !LHSType->isNeonVectorBuiltinType())
51315131
return ExprError(
5132-
Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_non_vector)
5133-
<< TheCall->getDirectCallee() << /*isMorethantwoArgs*/ false
5132+
Diag(TheCall->getBeginLoc(), diag::err_builtin_non_vector_type)
5133+
<< "first" << TheCall->getDirectCallee()
5134+
<< /*isMorethantwoArgs*/ false
51345135
<< SourceRange(TheCall->getArg(0)->getBeginLoc(),
51355136
TheCall->getArg(1)->getEndLoc()));
51365137

5137-
numElements = LHSType->castAs<VectorType>()->getNumElements();
5138+
if (auto *Ty = LHSType->getAs<BuiltinType>()) {
5139+
assert(Ty->getKind() == BuiltinType::MFloat8x8 ||
5140+
Ty->getKind() == BuiltinType::MFloat8x16);
5141+
numElements = Ty->getKind() == BuiltinType::MFloat8x8 ? 8 : 16;
5142+
} else {
5143+
numElements = LHSType->castAs<VectorType>()->getNumElements();
5144+
}
5145+
51385146
unsigned numResElements = TheCall->getNumArgs() - 2;
51395147

51405148
// Check to see if we have a call with 2 vector arguments, the unary shuffle
51415149
// with mask. If so, verify that RHS is an integer vector type with the
51425150
// same number of elts as lhs.
51435151
if (TheCall->getNumArgs() == 2) {
5144-
if (!RHSType->hasIntegerRepresentation() ||
5152+
if (!RHSType->isVectorType() || !RHSType->hasIntegerRepresentation() ||
51455153
RHSType->castAs<VectorType>()->getNumElements() != numElements)
51465154
return ExprError(Diag(TheCall->getBeginLoc(),
5147-
diag::err_vec_builtin_incompatible_vector)
5155+
diag::err_shufflevector_incompatible_index_vector)
51485156
<< TheCall->getDirectCallee()
51495157
<< /*isMorethantwoArgs*/ false
51505158
<< SourceRange(TheCall->getArg(1)->getBeginLoc(),
@@ -5157,6 +5165,25 @@ ExprResult Sema::BuiltinShuffleVector(CallExpr *TheCall) {
51575165
<< SourceRange(TheCall->getArg(0)->getBeginLoc(),
51585166
TheCall->getArg(1)->getEndLoc()));
51595167
} else if (numElements != numResElements) {
5168+
if (auto *Ty = LHSType->getAs<BuiltinType>()) {
5169+
assert(Ty->getKind() == BuiltinType::MFloat8x8 ||
5170+
Ty->getKind() == BuiltinType::MFloat8x16);
5171+
switch (numResElements) {
5172+
case 8:
5173+
resType = Context.MFloat8x8Ty;
5174+
break;
5175+
case 16:
5176+
resType = Context.MFloat8x16Ty;
5177+
break;
5178+
default:
5179+
return ExprError(Diag(TheCall->getBeginLoc(),
5180+
diag::err_shufflevector_unsupported_result_vector_type)
5181+
<< TheCall->getDirectCallee()
5182+
<< /*isMorethantwoArgs*/ false
5183+
<< SourceRange(TheCall->getArg(0)->getBeginLoc(),
5184+
TheCall->getArg(1)->getEndLoc()));
5185+
}
5186+
}
51605187
QualType eltType = LHSType->castAs<VectorType>()->getElementType();
51615188
resType =
51625189
Context.getVectorType(eltType, numResElements, VectorKind::Generic);
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -triple aarch64-linux -target-feature +neon -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
3+
4+
// REQUIRES: aarch64-registered-target
5+
6+
typedef __attribute__((neon_vector_type(8))) signed char int8x8_t;
7+
typedef __attribute__((neon_vector_type(16))) signed char int8x16_t;
8+
9+
typedef __MFloat8x8_t mfloat8x8_t;
10+
typedef __MFloat8x16_t mfloat8x16_t;
11+
12+
// CHECK-LABEL: define dso_local <8 x i8> @f0(
13+
// CHECK-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
14+
// CHECK-NEXT: [[ENTRY:.*:]]
15+
// CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i8> [[X]], <8 x i8> [[X]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
16+
// CHECK-NEXT: ret <8 x i8> [[SHUFFLE]]
17+
//
18+
mfloat8x8_t f0(mfloat8x8_t x) {
19+
return __builtin_shufflevector(x, x, 3, 2, 1, 0, 3, 2, 1, 0);
20+
}
21+
22+
// CHECK-LABEL: define dso_local <8 x i8> @f1(
23+
// CHECK-SAME: <8 x i8> [[X:%.*]], <8 x i8> noundef [[P:%.*]]) #[[ATTR0]] {
24+
// CHECK-NEXT: [[ENTRY:.*:]]
25+
// CHECK-NEXT: [[MASK:%.*]] = and <8 x i8> [[P]], splat (i8 7)
26+
// CHECK-NEXT: [[SHUF_IDX:%.*]] = extractelement <8 x i8> [[MASK]], i64 0
27+
// CHECK-NEXT: [[SHUF_ELT:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX]]
28+
// CHECK-NEXT: [[SHUF_INS:%.*]] = insertelement <8 x i8> poison, i8 [[SHUF_ELT]], i64 0
29+
// CHECK-NEXT: [[SHUF_IDX1:%.*]] = extractelement <8 x i8> [[MASK]], i64 1
30+
// CHECK-NEXT: [[SHUF_ELT2:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX1]]
31+
// CHECK-NEXT: [[SHUF_INS3:%.*]] = insertelement <8 x i8> [[SHUF_INS]], i8 [[SHUF_ELT2]], i64 1
32+
// CHECK-NEXT: [[SHUF_IDX4:%.*]] = extractelement <8 x i8> [[MASK]], i64 2
33+
// CHECK-NEXT: [[SHUF_ELT5:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX4]]
34+
// CHECK-NEXT: [[SHUF_INS6:%.*]] = insertelement <8 x i8> [[SHUF_INS3]], i8 [[SHUF_ELT5]], i64 2
35+
// CHECK-NEXT: [[SHUF_IDX7:%.*]] = extractelement <8 x i8> [[MASK]], i64 3
36+
// CHECK-NEXT: [[SHUF_ELT8:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX7]]
37+
// CHECK-NEXT: [[SHUF_INS9:%.*]] = insertelement <8 x i8> [[SHUF_INS6]], i8 [[SHUF_ELT8]], i64 3
38+
// CHECK-NEXT: [[SHUF_IDX10:%.*]] = extractelement <8 x i8> [[MASK]], i64 4
39+
// CHECK-NEXT: [[SHUF_ELT11:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX10]]
40+
// CHECK-NEXT: [[SHUF_INS12:%.*]] = insertelement <8 x i8> [[SHUF_INS9]], i8 [[SHUF_ELT11]], i64 4
41+
// CHECK-NEXT: [[SHUF_IDX13:%.*]] = extractelement <8 x i8> [[MASK]], i64 5
42+
// CHECK-NEXT: [[SHUF_ELT14:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX13]]
43+
// CHECK-NEXT: [[SHUF_INS15:%.*]] = insertelement <8 x i8> [[SHUF_INS12]], i8 [[SHUF_ELT14]], i64 5
44+
// CHECK-NEXT: [[SHUF_IDX16:%.*]] = extractelement <8 x i8> [[MASK]], i64 6
45+
// CHECK-NEXT: [[SHUF_ELT17:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX16]]
46+
// CHECK-NEXT: [[SHUF_INS18:%.*]] = insertelement <8 x i8> [[SHUF_INS15]], i8 [[SHUF_ELT17]], i64 6
47+
// CHECK-NEXT: [[SHUF_IDX19:%.*]] = extractelement <8 x i8> [[MASK]], i64 7
48+
// CHECK-NEXT: [[SHUF_ELT20:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX19]]
49+
// CHECK-NEXT: [[SHUF_INS21:%.*]] = insertelement <8 x i8> [[SHUF_INS18]], i8 [[SHUF_ELT20]], i64 7
50+
// CHECK-NEXT: ret <8 x i8> [[SHUF_INS21]]
51+
//
52+
mfloat8x8_t f1(mfloat8x8_t x, int8x8_t p) {
53+
return __builtin_shufflevector(x, p);
54+
}
55+
56+
// CHECK-LABEL: define dso_local <16 x i8> @f3(
57+
// CHECK-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] {
58+
// CHECK-NEXT: [[ENTRY:.*:]]
59+
// CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[X]], <16 x i8> [[X]], <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
60+
// CHECK-NEXT: ret <16 x i8> [[SHUFFLE]]
61+
//
62+
mfloat8x16_t f3(mfloat8x16_t x) {
63+
return __builtin_shufflevector(x, x, 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2,
64+
1, 0);
65+
}
66+
67+
// CHECK-LABEL: define dso_local <16 x i8> @f4(
68+
// CHECK-SAME: <16 x i8> [[X:%.*]], <16 x i8> noundef [[P:%.*]]) #[[ATTR0]] {
69+
// CHECK-NEXT: [[ENTRY:.*:]]
70+
// CHECK-NEXT: [[MASK:%.*]] = and <16 x i8> [[P]], splat (i8 15)
71+
// CHECK-NEXT: [[SHUF_IDX:%.*]] = extractelement <16 x i8> [[MASK]], i64 0
72+
// CHECK-NEXT: [[SHUF_ELT:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX]]
73+
// CHECK-NEXT: [[SHUF_INS:%.*]] = insertelement <16 x i8> poison, i8 [[SHUF_ELT]], i64 0
74+
// CHECK-NEXT: [[SHUF_IDX1:%.*]] = extractelement <16 x i8> [[MASK]], i64 1
75+
// CHECK-NEXT: [[SHUF_ELT2:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX1]]
76+
// CHECK-NEXT: [[SHUF_INS3:%.*]] = insertelement <16 x i8> [[SHUF_INS]], i8 [[SHUF_ELT2]], i64 1
77+
// CHECK-NEXT: [[SHUF_IDX4:%.*]] = extractelement <16 x i8> [[MASK]], i64 2
78+
// CHECK-NEXT: [[SHUF_ELT5:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX4]]
79+
// CHECK-NEXT: [[SHUF_INS6:%.*]] = insertelement <16 x i8> [[SHUF_INS3]], i8 [[SHUF_ELT5]], i64 2
80+
// CHECK-NEXT: [[SHUF_IDX7:%.*]] = extractelement <16 x i8> [[MASK]], i64 3
81+
// CHECK-NEXT: [[SHUF_ELT8:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX7]]
82+
// CHECK-NEXT: [[SHUF_INS9:%.*]] = insertelement <16 x i8> [[SHUF_INS6]], i8 [[SHUF_ELT8]], i64 3
83+
// CHECK-NEXT: [[SHUF_IDX10:%.*]] = extractelement <16 x i8> [[MASK]], i64 4
84+
// CHECK-NEXT: [[SHUF_ELT11:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX10]]
85+
// CHECK-NEXT: [[SHUF_INS12:%.*]] = insertelement <16 x i8> [[SHUF_INS9]], i8 [[SHUF_ELT11]], i64 4
86+
// CHECK-NEXT: [[SHUF_IDX13:%.*]] = extractelement <16 x i8> [[MASK]], i64 5
87+
// CHECK-NEXT: [[SHUF_ELT14:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX13]]
88+
// CHECK-NEXT: [[SHUF_INS15:%.*]] = insertelement <16 x i8> [[SHUF_INS12]], i8 [[SHUF_ELT14]], i64 5
89+
// CHECK-NEXT: [[SHUF_IDX16:%.*]] = extractelement <16 x i8> [[MASK]], i64 6
90+
// CHECK-NEXT: [[SHUF_ELT17:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX16]]
91+
// CHECK-NEXT: [[SHUF_INS18:%.*]] = insertelement <16 x i8> [[SHUF_INS15]], i8 [[SHUF_ELT17]], i64 6
92+
// CHECK-NEXT: [[SHUF_IDX19:%.*]] = extractelement <16 x i8> [[MASK]], i64 7
93+
// CHECK-NEXT: [[SHUF_ELT20:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX19]]
94+
// CHECK-NEXT: [[SHUF_INS21:%.*]] = insertelement <16 x i8> [[SHUF_INS18]], i8 [[SHUF_ELT20]], i64 7
95+
// CHECK-NEXT: [[SHUF_IDX22:%.*]] = extractelement <16 x i8> [[MASK]], i64 8
96+
// CHECK-NEXT: [[SHUF_ELT23:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX22]]
97+
// CHECK-NEXT: [[SHUF_INS24:%.*]] = insertelement <16 x i8> [[SHUF_INS21]], i8 [[SHUF_ELT23]], i64 8
98+
// CHECK-NEXT: [[SHUF_IDX25:%.*]] = extractelement <16 x i8> [[MASK]], i64 9
99+
// CHECK-NEXT: [[SHUF_ELT26:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX25]]
100+
// CHECK-NEXT: [[SHUF_INS27:%.*]] = insertelement <16 x i8> [[SHUF_INS24]], i8 [[SHUF_ELT26]], i64 9
101+
// CHECK-NEXT: [[SHUF_IDX28:%.*]] = extractelement <16 x i8> [[MASK]], i64 10
102+
// CHECK-NEXT: [[SHUF_ELT29:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX28]]
103+
// CHECK-NEXT: [[SHUF_INS30:%.*]] = insertelement <16 x i8> [[SHUF_INS27]], i8 [[SHUF_ELT29]], i64 10
104+
// CHECK-NEXT: [[SHUF_IDX31:%.*]] = extractelement <16 x i8> [[MASK]], i64 11
105+
// CHECK-NEXT: [[SHUF_ELT32:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX31]]
106+
// CHECK-NEXT: [[SHUF_INS33:%.*]] = insertelement <16 x i8> [[SHUF_INS30]], i8 [[SHUF_ELT32]], i64 11
107+
// CHECK-NEXT: [[SHUF_IDX34:%.*]] = extractelement <16 x i8> [[MASK]], i64 12
108+
// CHECK-NEXT: [[SHUF_ELT35:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX34]]
109+
// CHECK-NEXT: [[SHUF_INS36:%.*]] = insertelement <16 x i8> [[SHUF_INS33]], i8 [[SHUF_ELT35]], i64 12
110+
// CHECK-NEXT: [[SHUF_IDX37:%.*]] = extractelement <16 x i8> [[MASK]], i64 13
111+
// CHECK-NEXT: [[SHUF_ELT38:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX37]]
112+
// CHECK-NEXT: [[SHUF_INS39:%.*]] = insertelement <16 x i8> [[SHUF_INS36]], i8 [[SHUF_ELT38]], i64 13
113+
// CHECK-NEXT: [[SHUF_IDX40:%.*]] = extractelement <16 x i8> [[MASK]], i64 14
114+
// CHECK-NEXT: [[SHUF_ELT41:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX40]]
115+
// CHECK-NEXT: [[SHUF_INS42:%.*]] = insertelement <16 x i8> [[SHUF_INS39]], i8 [[SHUF_ELT41]], i64 14
116+
// CHECK-NEXT: [[SHUF_IDX43:%.*]] = extractelement <16 x i8> [[MASK]], i64 15
117+
// CHECK-NEXT: [[SHUF_ELT44:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX43]]
118+
// CHECK-NEXT: [[SHUF_INS45:%.*]] = insertelement <16 x i8> [[SHUF_INS42]], i8 [[SHUF_ELT44]], i64 15
119+
// CHECK-NEXT: ret <16 x i8> [[SHUF_INS45]]
120+
//
121+
mfloat8x16_t f4(mfloat8x16_t x, int8x16_t p) {
122+
return __builtin_shufflevector(x, p);
123+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// RUN: %clang_cc1 -triple aarch64 -fsyntax-only -verify %s
2+
3+
// REQUIRES: aarch64-registered-target
4+
5+
typedef __attribute__((neon_vector_type(8))) signed char int8x8_t;
6+
typedef __attribute__((neon_vector_type(16))) signed char int8x16_t;
7+
8+
typedef __MFloat8x8_t mfloat8x8_t;
9+
typedef __MFloat8x16_t mfloat8x16_t;
10+
11+
int8x8_t non_vector(int x) {
12+
return __builtin_shufflevector(x, x, 3, 2, 1, 0, 3, 2, 1, 0);
13+
// expected-error@-1 {{first argument to '__builtin_shufflevector' must be of vector type}}
14+
}
15+
16+
mfloat8x8_t unsuported_vector(mfloat8x8_t x) {
17+
return __builtin_shufflevector(x, x, 3, 2, 1, 0, 3, 2, 1, 0, 0);
18+
// expected-error@-1 {{unsupported vector type for the result}}
19+
}
20+
21+
int8x8_t non_vector_index(int8x8_t x, int p) {
22+
return __builtin_shufflevector(x, p);
23+
// expected-error@-1 {{second argument for __builtin_shufflevector must be integer vector with length equal to the length of the first argument}}
24+
}
25+
26+
int8x8_t bad_vector_index_length(int8x8_t x, int8x16_t p) {
27+
return __builtin_shufflevector(x, p);
28+
// expected-error@-1 {{second argument for __builtin_shufflevector must be integer vector with length equal to the length of the first argument}}
29+
}
30+

0 commit comments

Comments
 (0)