Skip to content

Commit 40bb302

Browse files
committed
clang: Add start of header test for __clang_hip_libdevice_declares
It was shockingly difficult to get identical checks out of the 3 languages. The visibility, dso_local and fp-contract settings seem to differ unless I force every test function to static. update_cc_test_checks also isn't smart enough when the attribute groups slightly differ.
1 parent 53acada commit 40bb302

File tree

1 file changed

+144
-0
lines changed

1 file changed

+144
-0
lines changed
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
2+
// REQUIRES: amdgpu-registered-target, x86-registered-target
3+
4+
// fp-contract, -no-enable-noundef-analysis and visibility are to just get the
5+
// same output for openmp and hip.
6+
7+
// RUN: %clang_cc1 -x c++ -fopenmp -fvisibility=default -ffp-contract=off -no-enable-noundef-analysis \
8+
// RUN: -internal-isystem %S/../../lib/Headers/openmp_wrappers \
9+
// RUN: -include __clang_openmp_device_functions.h \
10+
// RUN: -internal-isystem %S/../../lib/Headers/openmp_wrappers \
11+
// RUN: -internal-isystem %S/Inputs/include \
12+
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -verify \
13+
// RUN: -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm -fopenmp-is-device \
14+
// RUN: -o - %s | FileCheck --check-prefixes=CHECK,OPENMP,OPENMP-CPP %s
15+
16+
// RUN: %clang_cc1 -x c -fopenmp -fvisibility=default -ffp-contract=off -no-enable-noundef-analysis \
17+
// RUN: -internal-isystem %S/../../lib/Headers/openmp_wrappers \
18+
// RUN: -include __clang_openmp_device_functions.h \
19+
// RUN: -internal-isystem %S/../../lib/Headers/openmp_wrappers \
20+
// RUN: -internal-isystem %S/Inputs/include \
21+
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -verify \
22+
// RUN: -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm -fopenmp-is-device \
23+
// RUN: -o - %s | FileCheck --check-prefixes=CHECK,OPENMP,OPENMP-C %s
24+
25+
// RUN: %clang_cc1 -x hip -fvisibility=default -ffp-contract=off -no-enable-noundef-analysis \
26+
// RUN: -include __clang_hip_runtime_wrapper.h \
27+
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
28+
// RUN: -internal-isystem %S/Inputs/include \
29+
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -verify \
30+
// RUN: -emit-llvm -fcuda-is-device -o - \
31+
// RUN: -D__HIPCC_RTC__ %s | FileCheck -check-prefixes=CHECK,HIP %s
32+
33+
// expected-no-diagnostics
34+
35+
#ifdef __cplusplus
36+
#include <cmath>
37+
#else
38+
#include <math.h>
39+
typedef _Bool bool;
40+
#endif
41+
42+
#ifdef _OPENMP
43+
#define __device__
44+
#endif
45+
46+
// static and overloadable to get the same function annotations between C, C++ and HIP.
47+
#define TEST_FUNC_ATTRS static __device__ __attribute__((used,overloadable))
48+
49+
#ifdef _OPENMP
50+
#pragma omp begin declare target
51+
#endif
52+
53+
// This function is a hack to get the same IR out of HIP and OpenMP. The HIP
54+
// headers declare __cxa_* functions with these attributes, such that the
55+
// attribute groups are different. update_cc_test_checks isn't smart enough to
56+
// strip attributes from the checks, or semantically compare them.
57+
__attribute__((__visibility__("default")))
58+
__attribute__((weak))
59+
__attribute__((noreturn))
60+
__attribute__((overloadable))
61+
// OPENMP-LABEL: define weak hidden void @_Z20attribute_check_hackv
62+
// OPENMP-SAME: () #[[ATTR0:[0-9]+]] {
63+
// OPENMP-NEXT: entry:
64+
// OPENMP-NEXT: call void @llvm.trap()
65+
// OPENMP-NEXT: unreachable
66+
//
67+
void attribute_check_hack(void) {
68+
__builtin_trap();
69+
}
70+
71+
// CHECK-LABEL: define internal float @_ZL18test_ockl_acos_f32f
72+
// CHECK-SAME: (float [[SRC:%.*]]) #[[ATTR2:[0-9]+]] {
73+
// CHECK-NEXT: entry:
74+
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
75+
// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5)
76+
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
77+
// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr
78+
// CHECK-NEXT: store float [[SRC]], ptr [[SRC_ADDR_ASCAST]], align 4
79+
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4
80+
// CHECK-NEXT: [[CALL:%.*]] = call float @__ocml_acos_f32(float [[TMP0]]) #[[ATTR4:[0-9]+]]
81+
// CHECK-NEXT: ret float [[CALL]]
82+
//
83+
TEST_FUNC_ATTRS float test_ockl_acos_f32(float src) {
84+
return __ocml_acos_f32(src);
85+
}
86+
87+
// CHECK-LABEL: define internal float @_ZL15test_ockl_fdot2Dv2_DF16_S_fbi
88+
// CHECK-SAME: (<2 x half> [[A:%.*]], <2 x half> [[B:%.*]], float [[C:%.*]], i1 zeroext [[S:%.*]], i32 [[S_INT:%.*]]) #[[ATTR2]] {
89+
// CHECK-NEXT: entry:
90+
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
91+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
92+
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
93+
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float, align 4, addrspace(5)
94+
// CHECK-NEXT: [[S_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
95+
// CHECK-NEXT: [[S_INT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
96+
// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5)
97+
// CHECK-NEXT: [[Y:%.*]] = alloca float, align 4, addrspace(5)
98+
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
99+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
100+
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
101+
// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
102+
// CHECK-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_ADDR]] to ptr
103+
// CHECK-NEXT: [[S_INT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_INT_ADDR]] to ptr
104+
// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
105+
// CHECK-NEXT: [[Y_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[Y]] to ptr
106+
// CHECK-NEXT: store <2 x half> [[A]], ptr [[A_ADDR_ASCAST]], align 4
107+
// CHECK-NEXT: store <2 x half> [[B]], ptr [[B_ADDR_ASCAST]], align 4
108+
// CHECK-NEXT: store float [[C]], ptr [[C_ADDR_ASCAST]], align 4
109+
// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[S]] to i8
110+
// CHECK-NEXT: store i8 [[FROMBOOL]], ptr [[S_ADDR_ASCAST]], align 1
111+
// CHECK-NEXT: store i32 [[S_INT]], ptr [[S_INT_ADDR_ASCAST]], align 4
112+
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
113+
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[B_ADDR_ASCAST]], align 4
114+
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[C_ADDR_ASCAST]], align 4
115+
// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[S_ADDR_ASCAST]], align 1
116+
// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1
117+
// CHECK-NEXT: [[CALL:%.*]] = call float @__ockl_fdot2(<2 x half> [[TMP0]], <2 x half> [[TMP1]], float [[TMP2]], i1 zeroext [[TOBOOL]]) #[[ATTR4]]
118+
// CHECK-NEXT: store float [[CALL]], ptr [[X_ASCAST]], align 4
119+
// CHECK-NEXT: [[TMP4:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
120+
// CHECK-NEXT: [[TMP5:%.*]] = load <2 x half>, ptr [[B_ADDR_ASCAST]], align 4
121+
// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[C_ADDR_ASCAST]], align 4
122+
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[S_INT_ADDR_ASCAST]], align 4
123+
// CHECK-NEXT: [[TOBOOL1:%.*]] = icmp ne i32 [[TMP7]], 0
124+
// CHECK-NEXT: [[CALL2:%.*]] = call float @__ockl_fdot2(<2 x half> [[TMP4]], <2 x half> [[TMP5]], float [[TMP6]], i1 zeroext [[TOBOOL1]]) #[[ATTR4]]
125+
// CHECK-NEXT: store float [[CALL2]], ptr [[Y_ASCAST]], align 4
126+
// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[X_ASCAST]], align 4
127+
// CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[Y_ASCAST]], align 4
128+
// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP8]], [[TMP9]]
129+
// CHECK-NEXT: ret float [[ADD]]
130+
//
131+
TEST_FUNC_ATTRS float test_ockl_fdot2(__2f16 a, __2f16 b, float c, bool s, int s_int) {
132+
float x = __ockl_fdot2(a, b, c, s);
133+
float y = __ockl_fdot2(a, b, c, s_int);
134+
return x + y;
135+
}
136+
137+
138+
#ifdef _OPENMP
139+
#pragma omp end declare target
140+
#endif
141+
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
142+
// HIP: {{.*}}
143+
// OPENMP-C: {{.*}}
144+
// OPENMP-CPP: {{.*}}

0 commit comments

Comments
 (0)