Skip to content

Commit 438a9f2

Browse files
author
Hugh Delaney
committed
Merge branch '9-may-22-cuda' of https://github.com/jchlanda/llvm into tf32-joint-matrix
2 parents f120b53 + 1305861 commit 438a9f2

File tree

13 files changed

+446
-529
lines changed

13 files changed

+446
-529
lines changed

libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,10 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int);
7070
}
7171

7272
#define __CLC_NVVM_ATOMIC_CAS_IMPL( \
73-
TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, OP_MANGLED, ADDR_SPACE, \
74-
ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \
75-
_CLC_DECL TYPE \
76-
_Z29__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_MANGLED##TYPE_MANGLED( \
73+
FN_MANGLED, TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, ADDR_SPACE, \
74+
ADDR_SPACE_NV) \
75+
__attribute__((always_inline)) _CLC_DECL TYPE \
76+
FN_MANGLED( \
7777
volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \
7878
enum MemorySemanticsMask semantics1, \
7979
enum MemorySemanticsMask semantics2, TYPE cmp, TYPE value) { \
@@ -118,21 +118,24 @@ Memory order is stored in the lowest 5 bits */
118118
__builtin_unreachable(); \
119119
}
120120

121-
#define __CLC_NVVM_ATOMIC_CAS(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \
122-
OP, OP_MANGLED) \
123-
__attribute__((always_inline)) \
124-
__CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
125-
OP_MANGLED, __global, AS1, _global_) \
126-
__attribute__((always_inline)) \
127-
__CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \
128-
OP, OP_MANGLED, __local, AS3, _shared_)
121+
#define __CLC_NVVM_ATOMIC_CAS(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \
122+
OP) \
123+
__CLC_NVVM_ATOMIC_CAS_IMPL( \
124+
_Z29__spirv_AtomicCompareExchange##P##TYPE_MANGLED##N5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagES4_##TYPE_MANGLED##TYPE_MANGLED, \
125+
TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, , _gen_) \
126+
__CLC_NVVM_ATOMIC_CAS_IMPL( \
127+
_Z29__spirv_AtomicCompareExchange##PU3AS1##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_MANGLED##TYPE_MANGLED, \
128+
TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, __global, _global_) \
129+
__CLC_NVVM_ATOMIC_CAS_IMPL( \
130+
_Z29__spirv_AtomicCompareExchange##PU3AS3##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_MANGLED##TYPE_MANGLED, \
131+
TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, __local, _shared_)
129132

130-
__CLC_NVVM_ATOMIC_CAS(int, i, int, i, cas, CompareExchange)
131-
__CLC_NVVM_ATOMIC_CAS(long, l, long, l, cas, CompareExchange)
132-
__CLC_NVVM_ATOMIC_CAS(unsigned int, j, int, i, cas, CompareExchange)
133-
__CLC_NVVM_ATOMIC_CAS(unsigned long, m, long, l, cas, CompareExchange)
134-
__CLC_NVVM_ATOMIC_CAS(float, f, float, f, cas, CompareExchange)
135-
__CLC_NVVM_ATOMIC_CAS(double, d, double, d, cas, CompareExchange)
133+
__CLC_NVVM_ATOMIC_CAS(int, i, int, i, cas)
134+
__CLC_NVVM_ATOMIC_CAS(long, l, long, l, cas)
135+
__CLC_NVVM_ATOMIC_CAS(unsigned int, j, int, i, cas)
136+
__CLC_NVVM_ATOMIC_CAS(unsigned long, m, long, l, cas)
137+
__CLC_NVVM_ATOMIC_CAS(float, f, float, f, cas)
138+
__CLC_NVVM_ATOMIC_CAS(double, d, double, d, cas)
136139

137140
#undef __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER
138141
#undef __CLC_NVVM_ATOMIC_CAS

libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h

Lines changed: 56 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -72,62 +72,63 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int);
7272
} \
7373
}
7474

75-
#define __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \
76-
OP, NAME_MANGLED, ADDR_SPACE, \
77-
ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \
78-
_CLC_DECL TYPE \
79-
NAME_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \
80-
volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \
81-
enum MemorySemanticsMask semantics, TYPE value) { \
82-
/* Semantics mask may include memory order, storage class and other info \
83-
Memory order is stored in the lowest 5 bits */ \
84-
unsigned int order = semantics & 0x1F; \
85-
switch (order) { \
86-
case None: \
87-
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
88-
ADDR_SPACE, ADDR_SPACE_NV, ) \
89-
break; \
90-
case Acquire: \
91-
if (__clc_nvvm_reflect_arch() >= 700) { \
92-
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
93-
ADDR_SPACE, ADDR_SPACE_NV, _acquire) \
94-
} else { \
95-
__CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
96-
OP, ADDR_SPACE, ADDR_SPACE_NV) \
97-
} \
98-
break; \
99-
case Release: \
100-
if (__clc_nvvm_reflect_arch() >= 700) { \
101-
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
102-
ADDR_SPACE, ADDR_SPACE_NV, _release) \
103-
} else { \
104-
__spirv_MemoryBarrier(scope, Release); \
105-
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
106-
ADDR_SPACE, ADDR_SPACE_NV, ) \
107-
} \
108-
break; \
109-
case AcquireRelease: \
110-
if (__clc_nvvm_reflect_arch() >= 700) { \
111-
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
112-
ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \
113-
} else { \
114-
__spirv_MemoryBarrier(scope, Release); \
115-
__CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
116-
OP, ADDR_SPACE, ADDR_SPACE_NV) \
117-
} \
118-
break; \
119-
} \
120-
__builtin_trap(); \
121-
__builtin_unreachable(); \
75+
#define __CLC_NVVM_ATOMIC_IMPL(FN_MANGLED, TYPE, TYPE_MANGLED, TYPE_NV, \
76+
TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \
77+
__attribute__((always_inline)) _CLC_DECL TYPE FN_MANGLED( \
78+
volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \
79+
enum MemorySemanticsMask semantics, TYPE value) { \
80+
/* Semantics mask may include memory order, storage class and other info \
81+
Memory order is stored in the lowest 5 bits */ \
82+
unsigned int order = semantics & 0x1F; \
83+
switch (order) { \
84+
case None: \
85+
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
86+
ADDR_SPACE, ADDR_SPACE_NV, ) \
87+
break; \
88+
case Acquire: \
89+
if (__clc_nvvm_reflect_arch() >= 700) { \
90+
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
91+
ADDR_SPACE, ADDR_SPACE_NV, _acquire) \
92+
} else { \
93+
__CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
94+
OP, ADDR_SPACE, ADDR_SPACE_NV) \
95+
} \
96+
break; \
97+
case Release: \
98+
if (__clc_nvvm_reflect_arch() >= 700) { \
99+
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
100+
ADDR_SPACE, ADDR_SPACE_NV, _release) \
101+
} else { \
102+
__spirv_MemoryBarrier(scope, Release); \
103+
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
104+
ADDR_SPACE, ADDR_SPACE_NV, ) \
105+
} \
106+
break; \
107+
case AcquireRelease: \
108+
if (__clc_nvvm_reflect_arch() >= 700) { \
109+
__CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \
110+
ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \
111+
} else { \
112+
__spirv_MemoryBarrier(scope, Release); \
113+
__CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \
114+
OP, ADDR_SPACE, ADDR_SPACE_NV) \
115+
} \
116+
break; \
117+
} \
118+
__builtin_trap(); \
119+
__builtin_unreachable(); \
122120
}
123121

124-
#define __CLC_NVVM_ATOMIC(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
125-
NAME_MANGLED) \
126-
__attribute__((always_inline)) \
127-
__CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
128-
NAME_MANGLED, __global, AS1, _global_) \
129-
__attribute__((always_inline)) \
130-
__CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
131-
NAME_MANGLED, __local, AS3, _shared_)
122+
#define __CLC_NVVM_ATOMIC(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
123+
NAME_MANGLED) \
124+
__CLC_NVVM_ATOMIC_IMPL( \
125+
NAME_MANGLED##P##TYPE_MANGLED##N5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagE##TYPE_MANGLED, \
126+
TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, , _gen_) \
127+
__CLC_NVVM_ATOMIC_IMPL( \
128+
NAME_MANGLED##PU3AS1##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED, \
129+
TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, __global, _global_) \
130+
__CLC_NVVM_ATOMIC_IMPL( \
131+
NAME_MANGLED##PU3AS3##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED, \
132+
TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, __local, _shared_)
132133

133134
#endif

0 commit comments

Comments
 (0)