Skip to content

Commit cb9486d

Browse files
[SYCL] Make internal device_global pointer mutable (#8780)
The implementation of device_global allows it to be const. However, for cases where the device_global is shared between programs, the value of the device_global is defined through a USM pointer inside the device_global. Since this pointer is seen as const when the device_global is const-qualified, the compiler may chose to optimize it out, potentially causing invalid accesses on device. To avoid this, this commit marks the underlying pointer member as mutable which prevents this potential optimization. --------- Signed-off-by: Larsen, Steffen <[email protected]>
1 parent 6c3fdcc commit cb9486d

File tree

2 files changed

+25
-1
lines changed

2 files changed

+25
-1
lines changed

sycl/include/sycl/ext/oneapi/device_global/device_global.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,11 @@ template <typename T, typename PropertyListT, typename = void>
4444
class device_global_base {
4545
protected:
4646
using pointer_t = typename decorated_global_ptr<T>::pointer;
47-
pointer_t usmptr{};
47+
48+
// The pointer member is mutable to avoid the compiler optimizing it out when
49+
// accessing const-qualified device_global variables.
50+
mutable pointer_t usmptr{};
51+
4852
pointer_t get_ptr() noexcept { return usmptr; }
4953
const pointer_t get_ptr() const noexcept { return usmptr; }
5054

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// RUN: %clangxx -fsycl -c -fsycl-device-only -S -emit-llvm %s -o - | FileCheck %s
2+
3+
// Tests that the underlying pointer in a const-qualified shared device_global
4+
// is not optimized out during access.
5+
6+
#include <sycl/sycl.hpp>
7+
8+
using namespace sycl;
9+
using namespace sycl::ext::oneapi::experimental;
10+
11+
const device_global<int> DeviceGlobalVar;
12+
13+
int main() {
14+
queue Q;
15+
Q.single_task([]() {
16+
// CHECK: load i32 {{.*}}({{.*}}* @_ZL15DeviceGlobalVar, i64 0, i32 0)
17+
volatile int ReadVal = DeviceGlobalVar;
18+
});
19+
return 0;
20+
}

0 commit comments

Comments
 (0)