Skip to content

Commit 8193b29

Browse files
committed
Revert "[HIP] Allow std::malloc in device function"
This reverts commit f5033c3. revert this patch since it causes regressions for Tensile. A reduced test case is: int main() { std::shared_ptr<float> a; a = std::shared_ptr<float>( (float*)std::malloc(sizeof(float) * 100), std::free ); return 0; } Will fix the issue then re-commit. Fixes: SWDEV-405317
1 parent 83b7f01 commit 8193b29

File tree

6 files changed

+35
-73
lines changed

6 files changed

+35
-73
lines changed

clang/lib/Headers/__clang_hip_runtime_wrapper.h

Lines changed: 26 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,28 @@ extern "C" {
4747
#endif //__cplusplus
4848

4949
#if !defined(__HIPCC_RTC__)
50+
#include <cmath>
51+
#include <cstdlib>
52+
#include <stdlib.h>
5053
#if __has_include("hip/hip_version.h")
5154
#include "hip/hip_version.h"
5255
#endif // __has_include("hip/hip_version.h")
56+
#else
57+
typedef __SIZE_TYPE__ size_t;
58+
// Define macros which are needed to declare HIP device API's without standard
59+
// C/C++ headers. This is for readability so that these API's can be written
60+
// the same way as non-hipRTC use case. These macros need to be popped so that
61+
// they do not pollute users' name space.
62+
#pragma push_macro("NULL")
63+
#pragma push_macro("uint32_t")
64+
#pragma push_macro("uint64_t")
65+
#pragma push_macro("CHAR_BIT")
66+
#pragma push_macro("INT_MAX")
67+
#define NULL (void *)0
68+
#define uint32_t __UINT32_TYPE__
69+
#define uint64_t __UINT64_TYPE__
70+
#define CHAR_BIT __CHAR_BIT__
71+
#define INT_MAX __INTMAX_MAX__
5372
#endif // __HIPCC_RTC__
5473

5574
typedef __SIZE_TYPE__ __hip_size_t;
@@ -59,13 +78,11 @@ extern "C" {
5978
#endif //__cplusplus
6079

6180
#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
62-
__device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
63-
__device__ void __ockl_dm_dealloc(unsigned long long __addr);
81+
extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
82+
extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr);
6483
#if __has_feature(address_sanitizer)
65-
__device__ unsigned long long __asan_malloc_impl(unsigned long long __size,
66-
unsigned long long __pc);
67-
__device__ void __asan_free_impl(unsigned long long __addr,
68-
unsigned long long __pc);
84+
extern "C" __device__ unsigned long long __asan_malloc_impl(unsigned long long __size, unsigned long long __pc);
85+
extern "C" __device__ void __asan_free_impl(unsigned long long __addr, unsigned long long __pc);
6986
__attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) {
7087
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
7188
return (void *)__asan_malloc_impl(__size, __pc);
@@ -74,7 +91,7 @@ __attribute__((noinline, weak)) __device__ void free(void *__ptr) {
7491
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
7592
__asan_free_impl((unsigned long long)__ptr, __pc);
7693
}
77-
#else // __has_feature(address_sanitizer)
94+
#else
7895
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
7996
return (void *) __ockl_dm_alloc(__size);
8097
}
@@ -92,46 +109,21 @@ __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
92109
__attribute__((weak)) inline __device__ void free(void *__ptr) {
93110
__hip_free(__ptr);
94111
}
95-
#else // __HIP_ENABLE_DEVICE_MALLOC__
112+
#else
96113
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
97114
__builtin_trap();
98115
return (void *)0;
99116
}
100117
__attribute__((weak)) inline __device__ void free(void *__ptr) {
101118
__builtin_trap();
102119
}
103-
#endif // __HIP_ENABLE_DEVICE_MALLOC__
120+
#endif
104121
#endif // HIP version check
105122

106123
#ifdef __cplusplus
107124
} // extern "C"
108125
#endif //__cplusplus
109126

110-
#if !defined(__HIPCC_RTC__)
111-
#include <cmath>
112-
#include <cstdlib>
113-
#include <stdlib.h>
114-
#if __has_include("hip/hip_version.h")
115-
#include "hip/hip_version.h"
116-
#endif // __has_include("hip/hip_version.h")
117-
#else
118-
typedef __SIZE_TYPE__ size_t;
119-
// Define macros which are needed to declare HIP device API's without standard
120-
// C/C++ headers. This is for readability so that these API's can be written
121-
// the same way as non-hipRTC use case. These macros need to be popped so that
122-
// they do not pollute users' name space.
123-
#pragma push_macro("NULL")
124-
#pragma push_macro("uint32_t")
125-
#pragma push_macro("uint64_t")
126-
#pragma push_macro("CHAR_BIT")
127-
#pragma push_macro("INT_MAX")
128-
#define NULL (void *)0
129-
#define uint32_t __UINT32_TYPE__
130-
#define uint64_t __UINT64_TYPE__
131-
#define CHAR_BIT __CHAR_BIT__
132-
#define INT_MAX __INTMAX_MAX__
133-
#endif // __HIPCC_RTC__
134-
135127
#include <__clang_hip_libdevice_declares.h>
136128
#include <__clang_hip_math.h>
137129
#include <__clang_hip_stdlib.h>

clang/test/Headers/Inputs/include/cstdlib

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,5 @@ float fabs(float __x) { return __builtin_fabs(__x); }
2626
float abs(float __x) { return fabs(__x); }
2727
double abs(double __x) { return fabs(__x); }
2828

29-
using ::malloc;
30-
using ::free;
3129
}
3230

clang/test/Headers/Inputs/include/math.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ long lrint(double __a);
105105
long lrintf(float __a);
106106
long lround(double __a);
107107
long lroundf(float __a);
108+
int max(int __a, int __b);
109+
int min(int __a, int __b);
108110
double modf(double __a, double *__b);
109111
float modff(float __a, float *__b);
110112
double nearbyint(double __a);

clang/test/Headers/Inputs/include/sstream

Whitespace-only changes.

clang/test/Headers/Inputs/include/stdexcept

Whitespace-only changes.

clang/test/Headers/hip-header.hip

Lines changed: 7 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,7 @@
3131
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
3232
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
3333
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
34-
// RUN: -D__HIPCC_RTC__ -disable-llvm-passes | FileCheck -check-prefixes=MALLOC %s
35-
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
36-
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
37-
// RUN: -internal-isystem %S/Inputs/include \
38-
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
39-
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
40-
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
41-
// RUN: -disable-llvm-passes | FileCheck -check-prefixes=MALLOC %s
34+
// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,MALLOC %s
4235
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
4336
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
4437
// RUN: -internal-isystem %S/Inputs/include \
@@ -47,13 +40,6 @@
4740
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
4841
// RUN: -fsanitize=address -disable-llvm-passes -D__HIPCC_RTC__ \
4942
// RUN: | FileCheck -check-prefixes=MALLOC-ASAN %s
50-
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
51-
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
52-
// RUN: -internal-isystem %S/Inputs/include \
53-
// RUN: -aux-triple amdgcn-amd-amdhsa -triple x86_64-unknown-unknown \
54-
// RUN: -emit-llvm %s -o - \
55-
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
56-
// RUN: -disable-llvm-passes | FileCheck -check-prefixes=MALLOC-HOST %s
5743

5844
// expected-no-diagnostics
5945

@@ -147,42 +133,26 @@ __device__ double test_isnan() {
147133

148134
// Check that device malloc and free do not conflict with std headers.
149135
#include <cstdlib>
150-
// MALLOC-LABEL: define{{.*}}@_Z11test_malloc
151-
// MALLOC: call {{.*}}ptr @malloc(i64
152-
// MALLOC: call {{.*}}ptr @malloc(i64
153-
// MALLOC-LABEL: define weak {{.*}}ptr @malloc(i64
136+
// CHECK-LABEL: define{{.*}}@_Z11test_malloc
137+
// CHECK: call {{.*}}ptr @malloc(i64
138+
// CHECK-LABEL: define weak {{.*}}ptr @malloc(i64
154139
// MALLOC: call i64 @__ockl_dm_alloc
155140
// NOMALLOC: call void @llvm.trap
156141
// MALLOC-ASAN-LABEL: define weak {{.*}}ptr @malloc(i64
157142
// MALLOC-ASAN: call ptr @llvm.returnaddress(i32 0)
158143
// MALLOC-ASAN: call i64 @__asan_malloc_impl(i64 {{.*}}, i64 {{.*}})
159144
__device__ void test_malloc(void *a) {
160145
a = malloc(42);
161-
a = std::malloc(42);
162146
}
163147

164-
// MALLOC-LABEL: define{{.*}}@_Z9test_free
165-
// MALLOC: call {{.*}}void @free(ptr
166-
// MALLOC: call {{.*}}void @free(ptr
167-
// MALLOC-LABEL: define weak {{.*}}void @free(ptr
148+
// CHECK-LABEL: define{{.*}}@_Z9test_free
149+
// CHECK: call {{.*}}void @free(ptr
150+
// CHECK-LABEL: define weak {{.*}}void @free(ptr
168151
// MALLOC: call void @__ockl_dm_dealloc
169152
// NOMALLOC: call void @llvm.trap
170153
// MALLOC-ASAN-LABEL: define weak {{.*}}void @free(ptr
171154
// MALLOC-ASAN: call ptr @llvm.returnaddress(i32 0)
172155
// MALLOC-ASAN: call void @__asan_free_impl(i64 {{.*}}, i64 {{.*}})
173156
__device__ void test_free(void *a) {
174157
free(a);
175-
std::free(a);
176-
}
177-
178-
// MALLOC-HOST-LABEL: define{{.*}}@_Z16test_malloc_host
179-
// MALLOC-HOST: call {{.*}}ptr @_Z6mallocm(i64
180-
// MALLOC-HOST: call {{.*}}void @_Z4freePv(ptr
181-
// MALLOC-HOST: call {{.*}}ptr @_Z6mallocm(i64
182-
// MALLOC-HOST: call {{.*}}void @_Z4freePv(ptr
183-
void test_malloc_host(void *a) {
184-
a = malloc(42);
185-
free(a);
186-
a = std::malloc(42);
187-
std::free(a);
188158
}

0 commit comments

Comments
 (0)