Skip to content

Commit 1df5c94

Browse files
[AArch64] Implement FP8 floating-point mode helper intrinsics (#100608)
Implement FP8 mode helper intrinsics (as inline functions) as specified in ACLE 2024Q3 "14.2 Helper intrinsics" https://github.com/ARM-software/acle/releases/download/r2024Q3/acle-2024Q3.pdf
1 parent f5ff3a5 commit 1df5c94

File tree

2 files changed

+219
-0
lines changed

2 files changed

+219
-0
lines changed
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
3+
// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c -DUSE_NEON_H %s -o - | FileCheck %s
4+
// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c -DUSE_SVE_H %s -o - | FileCheck %s
5+
// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c -DUSE_SME_H %s -o - | FileCheck %s
6+
// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_NEON_H %s -o - | FileCheck %s
7+
// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_SVE_H %s -o - | FileCheck %s
8+
// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_SME_H %s -o - | FileCheck %s
9+
10+
// REQUIRES: aarch64-registered-target
11+
12+
#ifdef USE_NEON_H
13+
#include "arm_neon.h"
14+
#endif
15+
16+
#ifdef USE_SVE_H
17+
#include "arm_sve.h"
18+
#endif
19+
20+
#ifdef USE_SME_H
21+
#include "arm_sme.h"
22+
#endif
23+
24+
#ifdef __cplusplus
25+
extern "C" {
26+
#endif
27+
28+
#define INIT_ZERO 0
29+
#define INIT_ONES 0xffffffffffffffffU
30+
31+
// CHECK-LABEL: define dso_local noundef i64 @test_init(
32+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
33+
// CHECK-NEXT: [[ENTRY:.*:]]
34+
// CHECK-NEXT: ret i64 0
35+
//
36+
fpm_t test_init() { return __arm_fpm_init(); }
37+
38+
// CHECK-LABEL: define dso_local noundef i64 @test_src1_1(
39+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
40+
// CHECK-NEXT: [[ENTRY:.*:]]
41+
// CHECK-NEXT: ret i64 -8
42+
//
43+
fpm_t test_src1_1() {
44+
return __arm_set_fpm_src1_format(INIT_ONES, __ARM_FPM_E5M2);
45+
}
46+
47+
// CHECK-LABEL: define dso_local noundef i64 @test_src1_2(
48+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
49+
// CHECK-NEXT: [[ENTRY:.*:]]
50+
// CHECK-NEXT: ret i64 1
51+
//
52+
fpm_t test_src1_2() {
53+
return __arm_set_fpm_src1_format(INIT_ZERO, __ARM_FPM_E4M3);
54+
}
55+
56+
// CHECK-LABEL: define dso_local noundef i64 @test_src2_1(
57+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
58+
// CHECK-NEXT: [[ENTRY:.*:]]
59+
// CHECK-NEXT: ret i64 -57
60+
//
61+
fpm_t test_src2_1() {
62+
return __arm_set_fpm_src2_format(INIT_ONES, __ARM_FPM_E5M2);
63+
}
64+
65+
// CHECK-LABEL: define dso_local noundef i64 @test_src2_2(
66+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
67+
// CHECK-NEXT: [[ENTRY:.*:]]
68+
// CHECK-NEXT: ret i64 8
69+
//
70+
fpm_t test_src2_2() {
71+
return __arm_set_fpm_src2_format(INIT_ZERO, __ARM_FPM_E4M3);
72+
}
73+
74+
// CHECK-LABEL: define dso_local noundef i64 @test_dst1_1(
75+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
76+
// CHECK-NEXT: [[ENTRY:.*:]]
77+
// CHECK-NEXT: ret i64 -449
78+
//
79+
fpm_t test_dst1_1() {
80+
return __arm_set_fpm_dst_format(INIT_ONES, __ARM_FPM_E5M2);
81+
}
82+
83+
// CHECK-LABEL: define dso_local noundef i64 @test_dst2_2(
84+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
85+
// CHECK-NEXT: [[ENTRY:.*:]]
86+
// CHECK-NEXT: ret i64 64
87+
//
88+
fpm_t test_dst2_2() {
89+
return __arm_set_fpm_dst_format(INIT_ZERO, __ARM_FPM_E4M3);
90+
}
91+
92+
// CHECK-LABEL: define dso_local noundef i64 @test_of_mul_1(
93+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
94+
// CHECK-NEXT: [[ENTRY:.*:]]
95+
// CHECK-NEXT: ret i64 -16385
96+
//
97+
fpm_t test_of_mul_1() {
98+
return __arm_set_fpm_overflow_mul(INIT_ONES, __ARM_FPM_INFNAN);
99+
}
100+
101+
// CHECK-LABEL: define dso_local noundef i64 @test_of_mul_2(
102+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
103+
// CHECK-NEXT: [[ENTRY:.*:]]
104+
// CHECK-NEXT: ret i64 16384
105+
//
106+
fpm_t test_of_mul_2() {
107+
return __arm_set_fpm_overflow_mul(INIT_ZERO, __ARM_FPM_SATURATE);
108+
}
109+
110+
// CHECK-LABEL: define dso_local noundef i64 @test_of_cvt_1(
111+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
112+
// CHECK-NEXT: [[ENTRY:.*:]]
113+
// CHECK-NEXT: ret i64 -32769
114+
//
115+
fpm_t test_of_cvt_1() {
116+
return __arm_set_fpm_overflow_cvt(INIT_ONES, __ARM_FPM_INFNAN);
117+
}
118+
119+
// CHECK-LABEL: define dso_local noundef i64 @test_of_cvt_2(
120+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
121+
// CHECK-NEXT: [[ENTRY:.*:]]
122+
// CHECK-NEXT: ret i64 32768
123+
//
124+
fpm_t test_of_cvt_2() {
125+
return __arm_set_fpm_overflow_cvt(INIT_ZERO, __ARM_FPM_SATURATE);
126+
}
127+
128+
// CHECK-LABEL: define dso_local noundef i64 @test_lscale(
129+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
130+
// CHECK-NEXT: [[ENTRY:.*:]]
131+
// CHECK-NEXT: ret i64 8323072
132+
//
133+
fpm_t test_lscale() { return __arm_set_fpm_lscale(INIT_ZERO, 127); }
134+
135+
// CHECK-LABEL: define dso_local noundef i64 @test_lscale2(
136+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
137+
// CHECK-NEXT: [[ENTRY:.*:]]
138+
// CHECK-NEXT: ret i64 270582939648
139+
//
140+
fpm_t test_lscale2() { return __arm_set_fpm_lscale2(INIT_ZERO, 63); }
141+
142+
// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_1(
143+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
144+
// CHECK-NEXT: [[ENTRY:.*:]]
145+
// CHECK-NEXT: ret i64 2147483648
146+
//
147+
fpm_t test_nscale_1() { return __arm_set_fpm_nscale(INIT_ZERO, -128); }
148+
149+
// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_2(
150+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
151+
// CHECK-NEXT: [[ENTRY:.*:]]
152+
// CHECK-NEXT: ret i64 2130706432
153+
//
154+
fpm_t test_nscale_2() { return __arm_set_fpm_nscale(INIT_ZERO, 127); }
155+
156+
// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_3(
157+
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
158+
// CHECK-NEXT: [[ENTRY:.*:]]
159+
// CHECK-NEXT: ret i64 4278190080
160+
//
161+
fpm_t test_nscale_3() { return __arm_set_fpm_nscale(INIT_ZERO, -1); }
162+
163+
#ifdef __cplusplus
164+
}
165+
#endif

clang/utils/TableGen/NeonEmitter.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2594,6 +2594,60 @@ void NeonEmitter::runVectorTypes(raw_ostream &OS) {
25942594
OS << "typedef double float64_t;\n";
25952595
OS << "#endif\n\n";
25962596

2597+
OS << R"(
2598+
typedef uint64_t fpm_t;
2599+
2600+
enum __ARM_FPM_FORMAT { __ARM_FPM_E5M2, __ARM_FPM_E4M3 };
2601+
2602+
enum __ARM_FPM_OVERFLOW { __ARM_FPM_INFNAN, __ARM_FPM_SATURATE };
2603+
2604+
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2605+
__arm_fpm_init(void) {
2606+
return 0;
2607+
}
2608+
2609+
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2610+
__arm_set_fpm_src1_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2611+
return (__fpm & ~7ull) | (fpm_t)__format;
2612+
}
2613+
2614+
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2615+
__arm_set_fpm_src2_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2616+
return (__fpm & ~0x38ull) | ((fpm_t)__format << 3u);
2617+
}
2618+
2619+
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2620+
__arm_set_fpm_dst_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2621+
return (__fpm & ~0x1c0ull) | ((fpm_t)__format << 6u);
2622+
}
2623+
2624+
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2625+
__arm_set_fpm_overflow_mul(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
2626+
return (__fpm & ~0x4000ull) | ((fpm_t)__behaviour << 14u);
2627+
}
2628+
2629+
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2630+
__arm_set_fpm_overflow_cvt(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
2631+
return (__fpm & ~0x8000ull) | ((fpm_t)__behaviour << 15u);
2632+
}
2633+
2634+
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2635+
__arm_set_fpm_lscale(fpm_t __fpm, uint64_t __scale) {
2636+
return (__fpm & ~0x7f0000ull) | (__scale << 16u);
2637+
}
2638+
2639+
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2640+
__arm_set_fpm_nscale(fpm_t __fpm, int64_t __scale) {
2641+
return (__fpm & ~0xff000000ull) | (((fpm_t)__scale & 0xffu) << 24u);
2642+
}
2643+
2644+
static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2645+
__arm_set_fpm_lscale2(fpm_t __fpm, uint64_t __scale) {
2646+
return (uint32_t)__fpm | (__scale << 32u);
2647+
}
2648+
2649+
)";
2650+
25972651
emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQd", OS);
25982652

25992653
emitNeonTypeDefs("bQb", OS);

0 commit comments

Comments
 (0)