9
9
#ifndef __OMPX_H
10
10
#define __OMPX_H
11
11
12
- #ifdef __AMDGCN_WAVEFRONT_SIZE
13
- #define __WARP_SIZE __AMDGCN_WAVEFRONT_SIZE
14
- #else
15
- #define __WARP_SIZE 32
16
- #endif
17
-
18
12
typedef unsigned long uint64_t;
19
13
20
14
#ifdef __cplusplus
@@ -81,44 +75,28 @@ _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(grid_dim, 1)
81
75
static inline RETTY ompx_##NAME(ARGS) { BODY; }
82
76
83
77
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block, int Ordering,
84
- _Pragma("omp barrier"))
78
+ _Pragma("omp barrier"));
85
79
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_acq_rel, void,
86
- ompx_sync_block(ompx_acq_rel))
80
+ ompx_sync_block(ompx_acq_rel));
87
81
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering,
88
- ompx_sync_block(Ordering))
82
+ ompx_sync_block(Ordering));
89
83
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C
90
84
///}
91
85
92
86
static inline uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
93
87
__builtin_trap();
94
88
}
95
89
96
- /// ompx_shfl_down_sync_{i,f,l,d}
97
- ///{
98
- #define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(TYPE, TY) \
99
- static inline TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, \
100
- unsigned delta, int width) { \
101
- __builtin_trap(); \
102
- }
103
-
104
- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(int, i)
105
- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(float, f)
106
- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(long, l)
107
- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(double, d)
108
-
109
- #undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL
110
- ///}
111
-
112
90
#pragma omp end declare variant
113
91
114
92
/// ompx_{sync_block}_{,divergent}
115
93
///{
116
94
#define _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(RETTY, NAME, ARGS) \
117
95
RETTY ompx_##NAME(ARGS);
118
96
119
- _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering)
120
- _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void)
121
- _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering)
97
+ _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering);
98
+ _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void);
99
+ _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering);
122
100
#undef _TGT_KERNEL_LANGUAGE_DECL_SYNC_C
123
101
///}
124
102
@@ -139,20 +117,6 @@ _TGT_KERNEL_LANGUAGE_DECL_GRID_C(grid_dim)
139
117
140
118
uint64_t ompx_ballot_sync(uint64_t mask, int pred);
141
119
142
- /// ompx_shfl_down_sync_{i,f,l,d}
143
- ///{
144
- #define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \
145
- TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, unsigned delta, \
146
- int width);
147
-
148
- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
149
- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
150
- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
151
- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
152
-
153
- #undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
154
- ///}
155
-
156
120
#ifdef __cplusplus
157
121
}
158
122
#endif
@@ -198,32 +162,16 @@ _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(grid_dim)
198
162
}
199
163
200
164
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block, int Ordering = acc_rel,
201
- Ordering)
165
+ Ordering);
202
166
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent,
203
- int Ordering = acc_rel, Ordering)
167
+ int Ordering = acc_rel, Ordering);
204
168
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX
205
169
///}
206
170
207
171
static inline uint64_t ballot_sync(uint64_t mask, int pred) {
208
172
return ompx_ballot_sync(mask, pred);
209
173
}
210
174
211
- /// shfl_down_sync
212
- ///{
213
- #define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \
214
- static inline TYPE shfl_down_sync(uint64_t mask, TYPE var, unsigned delta, \
215
- int width = __WARP_SIZE) { \
216
- return ompx_shfl_down_sync_##TY(mask, var, delta, width); \
217
- }
218
-
219
- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
220
- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
221
- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
222
- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
223
-
224
- #undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
225
- ///}
226
-
227
175
} // namespace ompx
228
176
#endif
229
177
0 commit comments