Skip to content

Commit 9e87061

Browse files
authored
[libcxx] Adjust inline assembly constraints for the AMDGPU target (#101747)
Summary: These assembly constraints are illegal / invalid on the AMDGPU target. The `r` constraint is only valid on inputs and the `m` constraint isn't accepted at all. The NVPTX target can handle them because it uses a more permissive virtual machine (PTX is an IR). Simply add exceptions on the target to make these work.
1 parent 51ed383 commit 9e87061

File tree

1 file changed

+15
-5
lines changed

1 file changed

+15
-5
lines changed

libcxx/test/support/test_macros.h

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -291,17 +291,27 @@ struct is_same<T, T> { enum {value = 1}; };
291291
// when optimizations are enabled.
292292
template <class Tp>
293293
inline Tp const& DoNotOptimize(Tp const& value) {
294-
asm volatile("" : : "r,m"(value) : "memory");
295-
return value;
294+
// The `m` constraint is invalid in the AMDGPU backend.
295+
# if defined(__AMDGPU__) || defined(__NVPTX__)
296+
asm volatile("" : : "r"(value) : "memory");
297+
# else
298+
asm volatile("" : : "r,m"(value) : "memory");
299+
# endif
300+
return value;
296301
}
297302

298303
template <class Tp>
299304
inline Tp& DoNotOptimize(Tp& value) {
300-
#if defined(__clang__)
305+
// The `m` and `r` output constraint is invalid in the AMDGPU backend as well
306+
// as i8 / i1 arguments, so we just capture the pointer instead.
307+
# if defined(__AMDGPU__)
308+
Tp* tmp = &value;
309+
asm volatile("" : "+v"(tmp) : : "memory");
310+
# elif defined(__clang__)
301311
asm volatile("" : "+r,m"(value) : : "memory");
302-
#else
312+
# else
303313
asm volatile("" : "+m,r"(value) : : "memory");
304-
#endif
314+
# endif
305315
return value;
306316
}
307317
#else

0 commit comments

Comments
 (0)