Skip to content

Commit 053dd30

Browse files
committed
Reland "[HIP] Allow std::malloc in device function"
Reland f5033c3 as the regression in Tensile was fixed by ea72a4e.
1 parent e94c171 commit 053dd30

File tree

6 files changed

+73
-35
lines changed

6 files changed

+73
-35
lines changed

clang/lib/Headers/__clang_hip_runtime_wrapper.h

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -47,28 +47,9 @@ extern "C" {
4747
#endif //__cplusplus
4848

4949
#if !defined(__HIPCC_RTC__)
50-
#include <cmath>
51-
#include <cstdlib>
52-
#include <stdlib.h>
5350
#if __has_include("hip/hip_version.h")
5451
#include "hip/hip_version.h"
5552
#endif // __has_include("hip/hip_version.h")
56-
#else
57-
typedef __SIZE_TYPE__ size_t;
58-
// Define macros which are needed to declare HIP device API's without standard
59-
// C/C++ headers. This is for readability so that these API's can be written
60-
// the same way as non-hipRTC use case. These macros need to be popped so that
61-
// they do not pollute users' name space.
62-
#pragma push_macro("NULL")
63-
#pragma push_macro("uint32_t")
64-
#pragma push_macro("uint64_t")
65-
#pragma push_macro("CHAR_BIT")
66-
#pragma push_macro("INT_MAX")
67-
#define NULL (void *)0
68-
#define uint32_t __UINT32_TYPE__
69-
#define uint64_t __UINT64_TYPE__
70-
#define CHAR_BIT __CHAR_BIT__
71-
#define INT_MAX __INTMAX_MAX__
7253
#endif // __HIPCC_RTC__
7354

7455
typedef __SIZE_TYPE__ __hip_size_t;
@@ -78,11 +59,13 @@ extern "C" {
7859
#endif //__cplusplus
7960

8061
#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
81-
extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
82-
extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr);
62+
__device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
63+
__device__ void __ockl_dm_dealloc(unsigned long long __addr);
8364
#if __has_feature(address_sanitizer)
84-
extern "C" __device__ unsigned long long __asan_malloc_impl(unsigned long long __size, unsigned long long __pc);
85-
extern "C" __device__ void __asan_free_impl(unsigned long long __addr, unsigned long long __pc);
65+
__device__ unsigned long long __asan_malloc_impl(unsigned long long __size,
66+
unsigned long long __pc);
67+
__device__ void __asan_free_impl(unsigned long long __addr,
68+
unsigned long long __pc);
8669
__attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) {
8770
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
8871
return (void *)__asan_malloc_impl(__size, __pc);
@@ -91,7 +74,7 @@ __attribute__((noinline, weak)) __device__ void free(void *__ptr) {
9174
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
9275
__asan_free_impl((unsigned long long)__ptr, __pc);
9376
}
94-
#else
77+
#else // __has_feature(address_sanitizer)
9578
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
9679
return (void *) __ockl_dm_alloc(__size);
9780
}
@@ -109,21 +92,46 @@ __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
10992
__attribute__((weak)) inline __device__ void free(void *__ptr) {
11093
__hip_free(__ptr);
11194
}
112-
#else
95+
#else // __HIP_ENABLE_DEVICE_MALLOC__
11396
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
11497
__builtin_trap();
11598
return (void *)0;
11699
}
117100
__attribute__((weak)) inline __device__ void free(void *__ptr) {
118101
__builtin_trap();
119102
}
120-
#endif
103+
#endif // __HIP_ENABLE_DEVICE_MALLOC__
121104
#endif // HIP version check
122105

123106
#ifdef __cplusplus
124107
} // extern "C"
125108
#endif //__cplusplus
126109

110+
#if !defined(__HIPCC_RTC__)
111+
#include <cmath>
112+
#include <cstdlib>
113+
#include <stdlib.h>
114+
#if __has_include("hip/hip_version.h")
115+
#include "hip/hip_version.h"
116+
#endif // __has_include("hip/hip_version.h")
117+
#else
118+
typedef __SIZE_TYPE__ size_t;
119+
// Define macros which are needed to declare HIP device API's without standard
120+
// C/C++ headers. This is for readability so that these API's can be written
121+
// the same way as non-hipRTC use case. These macros need to be popped so that
122+
// they do not pollute users' name space.
123+
#pragma push_macro("NULL")
124+
#pragma push_macro("uint32_t")
125+
#pragma push_macro("uint64_t")
126+
#pragma push_macro("CHAR_BIT")
127+
#pragma push_macro("INT_MAX")
128+
#define NULL (void *)0
129+
#define uint32_t __UINT32_TYPE__
130+
#define uint64_t __UINT64_TYPE__
131+
#define CHAR_BIT __CHAR_BIT__
132+
#define INT_MAX __INTMAX_MAX__
133+
#endif // __HIPCC_RTC__
134+
127135
#include <__clang_hip_libdevice_declares.h>
128136
#include <__clang_hip_math.h>
129137
#include <__clang_hip_stdlib.h>

clang/test/Headers/Inputs/include/cstdlib

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,7 @@ float fabs(float __x) { return __builtin_fabs(__x); }
2626
float abs(float __x) { return fabs(__x); }
2727
double abs(double __x) { return fabs(__x); }
2828

29+
using ::malloc;
30+
using ::free;
2931
}
3032

clang/test/Headers/Inputs/include/math.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,6 @@ long lrint(double __a);
105105
long lrintf(float __a);
106106
long lround(double __a);
107107
long lroundf(float __a);
108-
int max(int __a, int __b);
109-
int min(int __a, int __b);
110108
double modf(double __a, double *__b);
111109
float modff(float __a, float *__b);
112110
double nearbyint(double __a);

clang/test/Headers/Inputs/include/sstream

Whitespace-only changes.

clang/test/Headers/Inputs/include/stdexcept

Whitespace-only changes.

clang/test/Headers/hip-header.hip

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,14 @@
3131
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
3232
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
3333
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
34-
// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,MALLOC %s
34+
// RUN: -D__HIPCC_RTC__ -disable-llvm-passes | FileCheck -check-prefixes=MALLOC %s
35+
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
36+
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
37+
// RUN: -internal-isystem %S/Inputs/include \
38+
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
39+
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
40+
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
41+
// RUN: -disable-llvm-passes | FileCheck -check-prefixes=MALLOC %s
3542
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
3643
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
3744
// RUN: -internal-isystem %S/Inputs/include \
@@ -40,6 +47,13 @@
4047
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
4148
// RUN: -fsanitize=address -disable-llvm-passes -D__HIPCC_RTC__ \
4249
// RUN: | FileCheck -check-prefixes=MALLOC-ASAN %s
50+
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
51+
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
52+
// RUN: -internal-isystem %S/Inputs/include \
53+
// RUN: -aux-triple amdgcn-amd-amdhsa -triple x86_64-unknown-unknown \
54+
// RUN: -emit-llvm %s -o - \
55+
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
56+
// RUN: -disable-llvm-passes | FileCheck -check-prefixes=MALLOC-HOST %s
4357

4458
// expected-no-diagnostics
4559

@@ -148,26 +162,42 @@ __device__ double test_isnan() {
148162

149163
// Check that device malloc and free do not conflict with std headers.
150164
#include <cstdlib>
151-
// CHECK-LABEL: define{{.*}}@_Z11test_malloc
152-
// CHECK: call {{.*}}ptr @malloc(i64
153-
// CHECK-LABEL: define weak {{.*}}ptr @malloc(i64
165+
// MALLOC-LABEL: define{{.*}}@_Z11test_malloc
166+
// MALLOC: call {{.*}}ptr @malloc(i64
167+
// MALLOC: call {{.*}}ptr @malloc(i64
168+
// MALLOC-LABEL: define weak {{.*}}ptr @malloc(i64
154169
// MALLOC: call i64 @__ockl_dm_alloc
155170
// NOMALLOC: call void @llvm.trap
156171
// MALLOC-ASAN-LABEL: define weak {{.*}}ptr @malloc(i64
157172
// MALLOC-ASAN: call ptr @llvm.returnaddress(i32 0)
158173
// MALLOC-ASAN: call i64 @__asan_malloc_impl(i64 {{.*}}, i64 {{.*}})
159174
__device__ void test_malloc(void *a) {
160175
a = malloc(42);
176+
a = std::malloc(42);
161177
}
162178

163-
// CHECK-LABEL: define{{.*}}@_Z9test_free
164-
// CHECK: call {{.*}}void @free(ptr
165-
// CHECK-LABEL: define weak {{.*}}void @free(ptr
179+
// MALLOC-LABEL: define{{.*}}@_Z9test_free
180+
// MALLOC: call {{.*}}void @free(ptr
181+
// MALLOC: call {{.*}}void @free(ptr
182+
// MALLOC-LABEL: define weak {{.*}}void @free(ptr
166183
// MALLOC: call void @__ockl_dm_dealloc
167184
// NOMALLOC: call void @llvm.trap
168185
// MALLOC-ASAN-LABEL: define weak {{.*}}void @free(ptr
169186
// MALLOC-ASAN: call ptr @llvm.returnaddress(i32 0)
170187
// MALLOC-ASAN: call void @__asan_free_impl(i64 {{.*}}, i64 {{.*}})
171188
__device__ void test_free(void *a) {
172189
free(a);
190+
std::free(a);
191+
}
192+
193+
// MALLOC-HOST-LABEL: define{{.*}}@_Z16test_malloc_host
194+
// MALLOC-HOST: call {{.*}}ptr @_Z6mallocm(i64
195+
// MALLOC-HOST: call {{.*}}void @_Z4freePv(ptr
196+
// MALLOC-HOST: call {{.*}}ptr @_Z6mallocm(i64
197+
// MALLOC-HOST: call {{.*}}void @_Z4freePv(ptr
198+
void test_malloc_host(void *a) {
199+
a = malloc(42);
200+
free(a);
201+
a = std::malloc(42);
202+
std::free(a);
173203
}

0 commit comments

Comments
 (0)