Skip to content

Commit 832a7bb

Browse files
[AArch64] Add missing Neon Types (#126945)
The AAPCS64 adds a number of vector types to the C unconditionally: https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#11appendix-support-for-advanced-simd-extensions The equivalent SVE types are already available in clang: https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#12appendix-support-for-scalable-vectors __mfp8 is defined in the ACLE https://arm-software.github.io/acle/main/acle.html#data-types --------- Co-authored-by: David Green <[email protected]>
1 parent 3290d62 commit 832a7bb

File tree

8 files changed

+259
-16
lines changed

8 files changed

+259
-16
lines changed

clang/include/clang/Basic/AArch64ACLETypes.def

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,13 @@
66
//
77
//===----------------------------------------------------------------------===//
88
//
9-
// This file defines various SVE builtin types. The macros are:
9+
// This file defines various Neon and SVE builtin types. The macros are:
10+
//
11+
// NEON_VECTOR_TYPE:
12+
// - (Name, BaseType, ElBits, NumEls, VectorKind)
13+
// Unlike the SVE types, the Neon vector types are not builtin types and
14+
// mapped to the equivalent __attribute__(neon_vector_type(...)) vector type.
15+
// They are not builtin types.
1016
//
1117
// SVE_TYPE:
1218
// - (Name, MangledName, Id, SingletonId)
@@ -57,6 +63,10 @@
5763
// - IsBF true for vector of brain float elements.
5864
//===----------------------------------------------------------------------===//
5965

66+
#ifndef NEON_VECTOR_TYPE
67+
#define NEON_VECTOR_TYPE(Name, BaseType, ElBits, NumEls, VectorKind)
68+
#endif
69+
6070
#ifndef SVE_TYPE
6171
#define SVE_TYPE(Name, Id, SingletonId)
6272
#endif
@@ -111,7 +121,38 @@
111121
SVE_TYPE(Name, Id, SingletonId)
112122
#endif
113123

114-
//===- Vector point types -----------------------------------------------===//
124+
//===- Neon Vector point types --------------------------------------------===//
125+
126+
NEON_VECTOR_TYPE(__Int8x8_t, CharTy, 8, 8, VectorKind::Neon)
127+
NEON_VECTOR_TYPE(__Int16x4_t, ShortTy, 16, 4, VectorKind::Neon)
128+
NEON_VECTOR_TYPE(__Int32x2_t, IntTy, 32, 2, VectorKind::Neon)
129+
NEON_VECTOR_TYPE(__Uint8x8_t, CharTy, 8, 8, VectorKind::Neon)
130+
NEON_VECTOR_TYPE(__Uint16x4_t, UnsignedShortTy, 16, 4, VectorKind::Neon)
131+
NEON_VECTOR_TYPE(__Uint32x2_t, UnsignedIntTy, 32, 2, VectorKind::Neon)
132+
NEON_VECTOR_TYPE(__Float16x4_t, Float16Ty, 16, 4, VectorKind::Neon)
133+
NEON_VECTOR_TYPE(__Float32x2_t, FloatTy, 32, 2, VectorKind::Neon)
134+
NEON_VECTOR_TYPE(__Poly8x8_t, CharTy, 8, 8, VectorKind::NeonPoly)
135+
NEON_VECTOR_TYPE(__Poly16x4_t, UnsignedShortTy, 16, 4, VectorKind::NeonPoly)
136+
NEON_VECTOR_TYPE(__Bfloat16x4_t, BFloat16Ty, 16, 4, VectorKind::Neon)
137+
NEON_VECTOR_TYPE(__Int8x16_t, CharTy, 8, 16, VectorKind::Neon)
138+
NEON_VECTOR_TYPE(__Int16x8_t, ShortTy, 16, 8, VectorKind::Neon)
139+
NEON_VECTOR_TYPE(__Int32x4_t, IntTy, 32, 4, VectorKind::Neon)
140+
NEON_VECTOR_TYPE(__Int64x2_t, LongLongTy, 64, 2, VectorKind::Neon)
141+
NEON_VECTOR_TYPE(__Uint8x16_t, CharTy, 8, 16, VectorKind::Neon)
142+
NEON_VECTOR_TYPE(__Uint16x8_t, UnsignedShortTy, 16, 8, VectorKind::Neon)
143+
NEON_VECTOR_TYPE(__Uint32x4_t, UnsignedIntTy, 32, 4, VectorKind::Neon)
144+
NEON_VECTOR_TYPE(__Uint64x2_t, UnsignedLongLongTy, 64, 2, VectorKind::Neon)
145+
NEON_VECTOR_TYPE(__Float16x8_t, Float16Ty, 16, 8, VectorKind::Neon)
146+
NEON_VECTOR_TYPE(__Float32x4_t, FloatTy, 32, 4, VectorKind::Neon)
147+
NEON_VECTOR_TYPE(__Float64x2_t, DoubleTy, 64, 2, VectorKind::Neon)
148+
NEON_VECTOR_TYPE(__Poly8x16_t, CharTy, 8, 16, VectorKind::NeonPoly)
149+
NEON_VECTOR_TYPE(__Poly16x8_t, UnsignedShortTy, 16, 8, VectorKind::NeonPoly)
150+
NEON_VECTOR_TYPE(__Poly64x2_t, UnsignedLongLongTy, 64, 2, VectorKind::NeonPoly)
151+
NEON_VECTOR_TYPE(__Bfloat16x8_t, BFloat16Ty, 16, 8, VectorKind::Neon)
152+
NEON_VECTOR_TYPE(__Mfloat8x8_t, MFloat8Ty, 8, 8, VectorKind::Neon)
153+
NEON_VECTOR_TYPE(__Mfloat8x16_t, MFloat8Ty, 8, 16, VectorKind::Neon)
154+
155+
//===- SVE Vector point types ---------------------------------------------===//
115156

116157
SVE_VECTOR_TYPE_INT(__SVInt8_t, __SVInt8_t, SveInt8, SveInt8Ty, 16, 8, 1, true)
117158
SVE_VECTOR_TYPE_INT(__SVInt16_t, __SVInt16_t, SveInt16, SveInt16Ty, 8, 16, 1, true)
@@ -205,6 +246,7 @@ SVE_OPAQUE_TYPE(__SVCount_t, __SVCount_t, SveCount, SveCountTy)
205246

206247
SVE_SCALAR_TYPE(__mfp8, __mfp8, MFloat8, MFloat8Ty, 8)
207248

249+
#undef NEON_VECTOR_TYPE
208250
#undef SVE_VECTOR_TYPE
209251
#undef SVE_VECTOR_TYPE_MFLOAT
210252
#undef SVE_VECTOR_TYPE_BFLOAT

clang/include/clang/Basic/TargetInfo.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ class TargetInfo : public TransferrableTargetInfo,
270270
unsigned HasBuiltinMSVaList : 1;
271271

272272
LLVM_PREFERRED_TYPE(bool)
273-
unsigned HasAArch64SVETypes : 1;
273+
unsigned HasAArch64ACLETypes : 1;
274274

275275
LLVM_PREFERRED_TYPE(bool)
276276
unsigned HasRISCVVTypes : 1;
@@ -1055,9 +1055,9 @@ class TargetInfo : public TransferrableTargetInfo,
10551055
/// available on this target.
10561056
bool hasBuiltinMSVaList() const { return HasBuiltinMSVaList; }
10571057

1058-
/// Returns whether or not the AArch64 SVE built-in types are
1058+
/// Returns whether or not the AArch64 ACLE built-in types are
10591059
/// available on this target.
1060-
bool hasAArch64SVETypes() const { return HasAArch64SVETypes; }
1060+
bool hasAArch64ACLETypes() const { return HasAArch64ACLETypes; }
10611061

10621062
/// Returns whether or not the RISC-V V built-in types are
10631063
/// available on this target.

clang/lib/AST/ASTContext.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1447,10 +1447,10 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
14471447
#include "clang/Basic/HLSLIntangibleTypes.def"
14481448
}
14491449

1450-
if (Target.hasAArch64SVETypes() ||
1451-
(AuxTarget && AuxTarget->hasAArch64SVETypes())) {
1452-
#define SVE_TYPE(Name, Id, SingletonId) \
1453-
InitBuiltinType(SingletonId, BuiltinType::Id);
1450+
if (Target.hasAArch64ACLETypes() ||
1451+
(AuxTarget && AuxTarget->hasAArch64ACLETypes())) {
1452+
#define SVE_TYPE(Name, Id, SingletonId) \
1453+
InitBuiltinType(SingletonId, BuiltinType::Id);
14541454
#include "clang/Basic/AArch64ACLETypes.def"
14551455
}
14561456

@@ -4529,7 +4529,7 @@ QualType ASTContext::getWebAssemblyExternrefType() const {
45294529
/// type.
45304530
QualType ASTContext::getScalableVectorType(QualType EltTy, unsigned NumElts,
45314531
unsigned NumFields) const {
4532-
if (Target->hasAArch64SVETypes()) {
4532+
if (Target->hasAArch64ACLETypes()) {
45334533
uint64_t EltTySize = getTypeSize(EltTy);
45344534

45354535
#define SVE_VECTOR_TYPE_INT(Name, MangledName, Id, SingletonId, NumEls, \

clang/lib/Basic/TargetInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) {
157157
SSERegParmMax = 0;
158158
HasAlignMac68kSupport = false;
159159
HasBuiltinMSVaList = false;
160-
HasAArch64SVETypes = false;
160+
HasAArch64ACLETypes = false;
161161
HasRISCVVTypes = false;
162162
AllowAMDGPUUnsafeFPAtomics = false;
163163
HasUnalignedAccess = false;

clang/lib/Basic/Targets/AArch64.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -239,15 +239,15 @@ AArch64TargetInfo::AArch64TargetInfo(const llvm::Triple &Triple,
239239
// Make __builtin_ms_va_list available.
240240
HasBuiltinMSVaList = true;
241241

242-
// Make the SVE types available. Note that this deliberately doesn't
243-
// depend on SveMode, since in principle it should be possible to turn
242+
// Make the Neon ACLE and SVE types available. Note that this deliberately
243+
// doesn't depend on SveMode, since in principle it should be possible to turn
244244
// SVE on and off within a translation unit. It should also be possible
245245
// to compile the global declaration:
246246
//
247247
// __SVInt8_t *ptr;
248248
//
249249
// even without SVE.
250-
HasAArch64SVETypes = true;
250+
HasAArch64ACLETypes = true;
251251

252252
// {} in inline assembly are neon specifiers, not assembly variant
253253
// specifiers.

clang/lib/Sema/Sema.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -515,11 +515,14 @@ void Sema::Initialize() {
515515
#include "clang/Basic/OpenCLExtensionTypes.def"
516516
}
517517

518-
if (Context.getTargetInfo().hasAArch64SVETypes() ||
518+
if (Context.getTargetInfo().hasAArch64ACLETypes() ||
519519
(Context.getAuxTargetInfo() &&
520-
Context.getAuxTargetInfo()->hasAArch64SVETypes())) {
520+
Context.getAuxTargetInfo()->hasAArch64ACLETypes())) {
521521
#define SVE_TYPE(Name, Id, SingletonId) \
522522
addImplicitTypedef(#Name, Context.SingletonId);
523+
#define NEON_VECTOR_TYPE(Name, BaseType, ElBits, NumEls, VectorKind) \
524+
addImplicitTypedef( \
525+
#Name, Context.getVectorType(Context.BaseType, NumEls, VectorKind));
523526
#include "clang/Basic/AArch64ACLETypes.def"
524527
}
525528

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
// Test that NEON types are defined, even when arm_neon.h is not included.
2+
// as required by AAPCS64 "Support for Advanced SIMD Extensions".
3+
4+
// RUN: %clang_cc1 -ast-dump -triple aarch64-linux-gnu %s -x c | FileCheck %s
5+
// RUN: %clang_cc1 -ast-dump -triple aarch64-linux-gnu %s -x c++ | FileCheck %s
6+
// RUN: %clang_cc1 -verify -verify-ignore-unexpected=note -triple x86_64 %s -x c
7+
// RUN: %clang_cc1 -verify -verify-ignore-unexpected=note -triple x86_64 %s -x c++
8+
// RUN: %clang_cc1 -verify -verify-ignore-unexpected=note -triple arm-linux-gnu %s -x c
9+
// RUN: %clang_cc1 -verify -verify-ignore-unexpected=note -triple arm-linux-gnu %s -x c++
10+
11+
__Int8x8_t Int8x8;
12+
// CHECK: Int8x8 '__Int8x8_t':'__attribute__((neon_vector_type(8))) char'
13+
// expected-error@-2{{unknown type name '__Int8x8_t'}}
14+
15+
__Int16x4_t Int16x4;
16+
// CHECK: Int16x4 '__Int16x4_t':'__attribute__((neon_vector_type(4))) short'
17+
// expected-error@-2{{unknown type name '__Int16x4_t'}}
18+
19+
__Int32x2_t Int32x2;
20+
// CHECK: Int32x2 '__Int32x2_t':'__attribute__((neon_vector_type(2))) int'
21+
// expected-error@-2{{unknown type name '__Int32x2_t'}}
22+
23+
__Uint8x8_t Uint8x8;
24+
// CHECK: Uint8x8 '__Uint8x8_t':'__attribute__((neon_vector_type(8))) char'
25+
// expected-error@-2{{unknown type name '__Uint8x8_t'}}
26+
27+
__Uint16x4_t Uint16x4;
28+
// CHECK: Uint16x4 '__Uint16x4_t':'__attribute__((neon_vector_type(4))) unsigned short'
29+
// expected-error@-2{{unknown type name '__Uint16x4_t'}}
30+
31+
__Uint32x2_t Uint32x2;
32+
// CHECK: Uint32x2 '__Uint32x2_t':'__attribute__((neon_vector_type(2))) unsigned int'
33+
// expected-error@-2{{unknown type name '__Uint32x2_t'}}
34+
35+
__Float16x4_t Float16x4;
36+
// CHECK: Float16x4 '__Float16x4_t':'__attribute__((neon_vector_type(4))) _Float16'
37+
// expected-error@-2{{unknown type name '__Float16x4_t'}}
38+
39+
__Float32x2_t Float32x2;
40+
// CHECK: Float32x2 '__Float32x2_t':'__attribute__((neon_vector_type(2))) float'
41+
// expected-error@-2{{unknown type name '__Float32x2_t'}}
42+
43+
__Poly8x8_t Poly8x8;
44+
// CHECK: Poly8x8 '__Poly8x8_t':'__attribute__((neon_polyvector_type(8))) char'
45+
// expected-error@-2{{unknown type name '__Poly8x8_t'}}
46+
47+
__Poly16x4_t Poly16x4;
48+
// CHECK: Poly16x4 '__Poly16x4_t':'__attribute__((neon_polyvector_type(4))) unsigned short'
49+
// expected-error@-2{{unknown type name '__Poly16x4_t'}}
50+
51+
__Bfloat16x4_t Bfloat16x4;
52+
// CHECK: Bfloat16x4 '__Bfloat16x4_t':'__attribute__((neon_vector_type(4))) __bf16'
53+
// expected-error@-2{{unknown type name '__Bfloat16x4_t'}}
54+
55+
__Int8x16_t Int8x16;
56+
// CHECK: Int8x16 '__Int8x16_t':'__attribute__((neon_vector_type(16))) char'
57+
// expected-error@-2{{unknown type name '__Int8x16_t'}}
58+
59+
__Int16x8_t Int16x8;
60+
// CHECK: Int16x8 '__Int16x8_t':'__attribute__((neon_vector_type(8))) short'
61+
// expected-error@-2{{unknown type name '__Int16x8_t'}}
62+
63+
__Int32x4_t Int32x4;
64+
// CHECK: Int32x4 '__Int32x4_t':'__attribute__((neon_vector_type(4))) int'
65+
// expected-error@-2{{unknown type name '__Int32x4_t'}}
66+
67+
__Int64x2_t Int64x2;
68+
// CHECK: Int64x2 '__Int64x2_t':'__attribute__((neon_vector_type(2))) long long'
69+
// expected-error@-2{{unknown type name '__Int64x2_t'}}
70+
71+
__Uint8x16_t Uint8x16;
72+
// CHECK: Uint8x16 '__Uint8x16_t':'__attribute__((neon_vector_type(16))) char'
73+
// expected-error@-2{{unknown type name '__Uint8x16_t'}}
74+
75+
__Uint16x8_t Uint16x8;
76+
// CHECK: Uint16x8 '__Uint16x8_t':'__attribute__((neon_vector_type(8))) unsigned short'
77+
// expected-error@-2{{unknown type name '__Uint16x8_t'}}
78+
79+
__Uint32x4_t Uint32x4;
80+
// CHECK: Uint32x4 '__Uint32x4_t':'__attribute__((neon_vector_type(4))) unsigned int'
81+
// expected-error@-2{{unknown type name '__Uint32x4_t'}}
82+
83+
__Uint64x2_t Uint64x2;
84+
// CHECK: Uint64x2 '__Uint64x2_t':'__attribute__((neon_vector_type(2))) unsigned long long'
85+
// expected-error@-2{{unknown type name '__Uint64x2_t'}}
86+
87+
__Float16x8_t Float16x8;
88+
// CHECK: Float16x8 '__Float16x8_t':'__attribute__((neon_vector_type(8))) _Float16'
89+
// expected-error@-2{{unknown type name '__Float16x8_t'}}
90+
91+
__Float32x4_t Float32x4;
92+
// CHECK: Float32x4 '__Float32x4_t':'__attribute__((neon_vector_type(4))) float'
93+
// expected-error@-2{{unknown type name '__Float32x4_t'}}
94+
95+
__Float64x2_t Float64x2;
96+
// CHECK: Float64x2 '__Float64x2_t':'__attribute__((neon_vector_type(2))) double'
97+
// expected-error@-2{{unknown type name '__Float64x2_t'}}
98+
99+
__Poly8x16_t Poly8x16;
100+
// CHECK: Poly8x16 '__Poly8x16_t':'__attribute__((neon_polyvector_type(16))) char'
101+
// expected-error@-2{{unknown type name '__Poly8x16_t'}}
102+
103+
__Poly16x8_t Poly16x8;
104+
// CHECK: Poly16x8 '__Poly16x8_t':'__attribute__((neon_polyvector_type(8))) unsigned short'
105+
// expected-error@-2{{unknown type name '__Poly16x8_t'}}
106+
107+
__Poly64x2_t Poly64x2;
108+
// CHECK: Poly64x2 '__Poly64x2_t':'__attribute__((neon_polyvector_type(2))) unsigned long long'
109+
// expected-error@-2{{unknown type name '__Poly64x2_t'}}
110+
111+
__Bfloat16x8_t Bfloat16x8;
112+
// CHECK: Bfloat16x8 '__Bfloat16x8_t':'__attribute__((neon_vector_type(8))) __bf16'
113+
// expected-error@-2{{unknown type name '__Bfloat16x8_t'}}
114+
115+
__mfp8 mfp8;
116+
// CHECK: mfp8 '__mfp8'
117+
// expected-error@-2{{unknown type name '__mfp8'}}
118+
119+
__Mfloat8x8_t Mfloat8x8;
120+
// CHECK: Mfloat8x8 '__Mfloat8x8_t':'__attribute__((neon_vector_type(8))) __mfp8'
121+
// expected-error@-2{{unknown type name '__Mfloat8x8_t'}}
122+
123+
__Mfloat8x16_t Mfloat8x16;
124+
// CHECK: Mfloat8x16 '__Mfloat8x16_t':'__attribute__((neon_vector_type(16))) __mfp8'
125+
// expected-error@-2{{unknown type name '__Mfloat8x16_t'}}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -x c %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-C
3+
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -x c++ %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-CPP
4+
5+
typedef __Uint32x4_t X;
6+
7+
// CHECK-C-LABEL: define dso_local <4 x i32> @test(
8+
// CHECK-C-SAME: <4 x i32> noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
9+
// CHECK-C-NEXT: [[ENTRY:.*:]]
10+
// CHECK-C-NEXT: [[X_ADDR:%.*]] = alloca <4 x i32>, align 16
11+
// CHECK-C-NEXT: store <4 x i32> [[X]], ptr [[X_ADDR]], align 16
12+
// CHECK-C-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[X_ADDR]], align 16
13+
// CHECK-C-NEXT: ret <4 x i32> [[TMP0]]
14+
//
15+
// CHECK-CPP-LABEL: define dso_local noundef <4 x i32> @_Z4test12__Uint32x4_t(
16+
// CHECK-CPP-SAME: <4 x i32> noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
17+
// CHECK-CPP-NEXT: [[ENTRY:.*:]]
18+
// CHECK-CPP-NEXT: [[X_ADDR:%.*]] = alloca <4 x i32>, align 16
19+
// CHECK-CPP-NEXT: store <4 x i32> [[X]], ptr [[X_ADDR]], align 16
20+
// CHECK-CPP-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[X_ADDR]], align 16
21+
// CHECK-CPP-NEXT: ret <4 x i32> [[TMP0]]
22+
//
23+
X test(X x) {
24+
return x;
25+
}
26+
27+
#include <arm_neon.h>
28+
29+
// CHECK-C-LABEL: define dso_local <16 x i8> @testboth(
30+
// CHECK-C-SAME: <4 x i32> noundef [[X:%.*]]) #[[ATTR0]] {
31+
// CHECK-C-NEXT: [[ENTRY:.*:]]
32+
// CHECK-C-NEXT: [[__P0_ADDR_I:%.*]] = alloca <16 x i8>, align 16
33+
// CHECK-C-NEXT: [[__P1_ADDR_I:%.*]] = alloca <16 x i8>, align 16
34+
// CHECK-C-NEXT: [[__RET_I:%.*]] = alloca <16 x i8>, align 16
35+
// CHECK-C-NEXT: [[X_ADDR:%.*]] = alloca <4 x i32>, align 16
36+
// CHECK-C-NEXT: store <4 x i32> [[X]], ptr [[X_ADDR]], align 16
37+
// CHECK-C-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[X_ADDR]], align 16
38+
// CHECK-C-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
39+
// CHECK-C-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X_ADDR]], align 16
40+
// CHECK-C-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
41+
// CHECK-C-NEXT: store <16 x i8> [[TMP1]], ptr [[__P0_ADDR_I]], align 16
42+
// CHECK-C-NEXT: store <16 x i8> [[TMP3]], ptr [[__P1_ADDR_I]], align 16
43+
// CHECK-C-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
44+
// CHECK-C-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[__P1_ADDR_I]], align 16
45+
// CHECK-C-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[TMP4]], [[TMP5]]
46+
// CHECK-C-NEXT: store <16 x i8> [[ADD_I]], ptr [[__RET_I]], align 16
47+
// CHECK-C-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[__RET_I]], align 16
48+
// CHECK-C-NEXT: ret <16 x i8> [[TMP6]]
49+
//
50+
// CHECK-CPP-LABEL: define dso_local noundef <16 x i8> @_Z8testboth12__Uint32x4_t(
51+
// CHECK-CPP-SAME: <4 x i32> noundef [[X:%.*]]) #[[ATTR0]] {
52+
// CHECK-CPP-NEXT: [[ENTRY:.*:]]
53+
// CHECK-CPP-NEXT: [[__P0_ADDR_I:%.*]] = alloca <16 x i8>, align 16
54+
// CHECK-CPP-NEXT: [[__P1_ADDR_I:%.*]] = alloca <16 x i8>, align 16
55+
// CHECK-CPP-NEXT: [[__RET_I:%.*]] = alloca <16 x i8>, align 16
56+
// CHECK-CPP-NEXT: [[X_ADDR:%.*]] = alloca <4 x i32>, align 16
57+
// CHECK-CPP-NEXT: store <4 x i32> [[X]], ptr [[X_ADDR]], align 16
58+
// CHECK-CPP-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[X_ADDR]], align 16
59+
// CHECK-CPP-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
60+
// CHECK-CPP-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X_ADDR]], align 16
61+
// CHECK-CPP-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
62+
// CHECK-CPP-NEXT: store <16 x i8> [[TMP1]], ptr [[__P0_ADDR_I]], align 16
63+
// CHECK-CPP-NEXT: store <16 x i8> [[TMP3]], ptr [[__P1_ADDR_I]], align 16
64+
// CHECK-CPP-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
65+
// CHECK-CPP-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[__P1_ADDR_I]], align 16
66+
// CHECK-CPP-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[TMP4]], [[TMP5]]
67+
// CHECK-CPP-NEXT: store <16 x i8> [[ADD_I]], ptr [[__RET_I]], align 16
68+
// CHECK-CPP-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[__RET_I]], align 16
69+
// CHECK-CPP-NEXT: ret <16 x i8> [[TMP6]]
70+
//
71+
int8x16_t testboth(X x) {
72+
return vaddq_u8(x, x);
73+
}

0 commit comments

Comments
 (0)