Skip to content

Commit 597709b

Browse files
authored
[DevTSAN] Treat each work item as a thread for both CPU & GPU device (#18580)
According to sycl spec, these is no dependency between each work item. So we'd better treat each work item as a thread. * Fix one minor bug to calculate thread id * Make e2e tests more robust
1 parent 83f86e5 commit 597709b

File tree

10 files changed

+48
-82
lines changed

10 files changed

+48
-82
lines changed

libdevice/sanitizer/tsan_rtl.cpp

Lines changed: 3 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -134,40 +134,9 @@ inline __SYCL_GLOBAL__ RawShadow *MemToShadow(uptr addr, uint32_t as) {
134134
return shadow_ptr;
135135
}
136136

137-
// For CPU device, each work group is a thread
138-
inline Sid GetCurrentSid_CPU() {
139-
// work-group linear id
140-
const auto wg_lid =
141-
__spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y *
142-
__spirv_BuiltInNumWorkgroups.z +
143-
__spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z +
144-
__spirv_BuiltInWorkgroupId.z;
145-
return wg_lid;
146-
}
147-
148-
// For GPU device, each work item is a thread
149-
inline Sid GetCurrentSid_GPU() {
150-
// sub-group linear id
151-
const auto lid = __spirv_BuiltInGlobalLinearId;
152-
return lid;
153-
}
154-
155137
inline Sid GetCurrentSid() {
156-
#if defined(__LIBDEVICE_CPU__)
157-
return GetCurrentSid_CPU();
158-
#elif defined(__LIBDEVICE_PVC__)
159-
return GetCurrentSid_GPU();
160-
#else
161-
if (TsanLaunchInfo->DeviceTy == DeviceType::CPU) {
162-
return GetCurrentSid_CPU();
163-
} else if (TsanLaunchInfo->DeviceTy != DeviceType::UNKNOWN) {
164-
return GetCurrentSid_GPU();
165-
} else {
166-
TSAN_DEBUG(__spirv_ocl_printf(__tsan_print_unsupport_device_type,
167-
(int)TsanLaunchInfo->DeviceTy));
168-
return 0;
169-
}
170-
#endif
138+
const auto lid = __spirv_BuiltInGlobalLinearId;
139+
return lid % kThreadSlotCount;
171140
}
172141

173142
inline RawShadow LoadShadow(const __SYCL_GLOBAL__ RawShadow *p) {
@@ -466,10 +435,6 @@ DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_private(uptr addr, uint32_t size) {
466435

467436
DEVICE_EXTERN_C_INLINE void __tsan_device_barrier() {
468437
Sid sid = GetCurrentSid();
469-
__spirv_ControlBarrier(__spv::Scope::Device, __spv::Scope::Device,
470-
__spv::MemorySemanticsMask::SequentiallyConsistent |
471-
__spv::MemorySemanticsMask::CrossWorkgroupMemory |
472-
__spv::MemorySemanticsMask::WorkgroupMemory);
473438

474439
// sync current thread clock to global state
475440
TsanLaunchInfo->Clock[kThreadSlotCount].clk_[sid] =
@@ -484,19 +449,10 @@ DEVICE_EXTERN_C_INLINE void __tsan_device_barrier() {
484449
for (uptr i = 0; i < kThreadSlotCount; i++)
485450
TsanLaunchInfo->Clock[sid].clk_[i] =
486451
TsanLaunchInfo->Clock[kThreadSlotCount].clk_[i];
487-
488-
__spirv_ControlBarrier(__spv::Scope::Device, __spv::Scope::Device,
489-
__spv::MemorySemanticsMask::SequentiallyConsistent |
490-
__spv::MemorySemanticsMask::CrossWorkgroupMemory |
491-
__spv::MemorySemanticsMask::WorkgroupMemory);
492452
}
493453

494-
static inline void __tsan_group_barrier_impl() {
454+
DEVICE_EXTERN_C_INLINE void __tsan_group_barrier() {
495455
Sid sid = GetCurrentSid();
496-
__spirv_ControlBarrier(__spv::Scope::Workgroup, __spv::Scope::Workgroup,
497-
__spv::MemorySemanticsMask::SequentiallyConsistent |
498-
__spv::MemorySemanticsMask::CrossWorkgroupMemory |
499-
__spv::MemorySemanticsMask::WorkgroupMemory);
500456

501457
// sync current thread clock to global state
502458
TsanLaunchInfo->Clock[kThreadSlotCount].clk_[sid] =
@@ -511,23 +467,6 @@ static inline void __tsan_group_barrier_impl() {
511467
for (uptr i = 0; i < kThreadSlotCount; i++)
512468
TsanLaunchInfo->Clock[sid].clk_[i] =
513469
TsanLaunchInfo->Clock[kThreadSlotCount].clk_[i];
514-
515-
__spirv_ControlBarrier(__spv::Scope::Workgroup, __spv::Scope::Workgroup,
516-
__spv::MemorySemanticsMask::SequentiallyConsistent |
517-
__spv::MemorySemanticsMask::CrossWorkgroupMemory |
518-
__spv::MemorySemanticsMask::WorkgroupMemory);
519-
}
520-
521-
DEVICE_EXTERN_C_INLINE void __tsan_group_barrier() {
522-
#if defined(__LIBDEVICE_CPU__)
523-
return;
524-
#elif defined(__LIBDEVICE_PVC__)
525-
__tsan_group_barrier_impl();
526-
#else
527-
if (TsanLaunchInfo->DeviceTy == DeviceType::CPU)
528-
return;
529-
__tsan_group_barrier_impl();
530-
#endif
531470
}
532471

533472
#endif // __SPIR__ || __SPIRV__

llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,8 +361,7 @@ void ThreadSanitizerOnSpirv::appendDebugInfoToArgs(
361361
auto &Loc = I->getDebugLoc();
362362

363363
// SPIR constant address space
364-
PointerType *ConstASPtrTy =
365-
PointerType::get(Type::getInt8Ty(C), kSpirOffloadConstantAS);
364+
PointerType *ConstASPtrTy = PointerType::get(C, kSpirOffloadConstantAS);
366365

367366
// File & Line
368367
if (Loc) {

sycl/test-e2e/ThreadSanitizer/aot/Inputs/usm_data_race.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,21 @@
11
#include "sycl/detail/core.hpp"
22
#include "sycl/usm.hpp"
33

4+
__attribute__((noinline)) void foo(char *array, int val) { *array += val; }
5+
46
int main() {
57
sycl::queue Q;
68
auto *array = sycl::malloc_device<char>(1, Q);
79
Q.submit([&](sycl::handler &h) {
8-
h.parallel_for<class Test>(sycl::nd_range<1>(32, 8),
9-
[=](sycl::nd_item<1>) { array[0]++; });
10+
h.parallel_for<class Test>(sycl::nd_range<1>(128, 8),
11+
[=](sycl::nd_item<1> it) {
12+
*array += it.get_global_linear_id();
13+
foo(array, it.get_local_linear_id());
14+
});
1015
}).wait();
1116
// CHECK: DeviceSanitizer: data race
1217
// CHECK-NEXT: When write of size 1 at 0x{{.*}} in kernel <{{.*}}Test>
13-
// CHECK-NEXT: #0 {{.*}}usm_data_race.cpp:[[@LINE-4]]
18+
// CHECK-NEXT: #0 {{.*}}usm_data_race.cpp
1419

1520
sycl::free(array, Q);
1621
return 0;

sycl/test-e2e/ThreadSanitizer/check_buffer.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@ int main() {
2121
sycl::buffer<int, 1> buf(v.data(), v.size());
2222
q.submit([&](sycl::handler &h) {
2323
auto A = buf.get_access<sycl::access::mode::read_write>(h);
24-
h.parallel_for<class Test>(sycl::nd_range<1>(N, 1),
25-
[=](sycl::nd_item<1>) { A[0]++; });
24+
h.parallel_for<class Test>(
25+
sycl::nd_range<1>(N, 1),
26+
[=](sycl::nd_item<1> it) { A[0] += it.get_global_linear_id(); });
2627
}).wait();
2728
// CHECK: WARNING: DeviceSanitizer: data race
2829
// CHECK-NEXT: When write of size 4 at 0x{{.*}} in kernel <{{.*}}Test>

sycl/test-e2e/ThreadSanitizer/check_device_global.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,21 @@ sycl::ext::oneapi::experimental::device_global<
1515
int[4], decltype(properties(device_image_scope, host_access_read_write))>
1616
dev_global;
1717

18+
__attribute__((noinline)) void foo(int *array, int val) { *array += val; }
19+
1820
int main() {
1921
sycl::queue Q;
2022

2123
Q.submit([&](sycl::handler &h) {
2224
h.parallel_for<class Test>(sycl::nd_range<1>(128, 8),
23-
[=](sycl::nd_item<1>) { dev_global[0]++; });
25+
[=](sycl::nd_item<1> it) {
26+
dev_global[0] += it.get_global_linear_id();
27+
foo(dev_global, it.get_local_linear_id());
28+
});
2429
}).wait();
2530
// CHECK: WARNING: DeviceSanitizer: data race
2631
// CHECK-NEXT: When write of size 4 at 0x{{.*}} in kernel <{{.*}}Test>
27-
// CHECK-NEXT: #0 {{.*}}check_device_global.cpp:[[@LINE-4]]
32+
// CHECK-NEXT: #0 {{.*}}check_device_global.cpp
2833

2934
return 0;
3035
}

sycl/test-e2e/ThreadSanitizer/check_device_usm.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,22 @@
55
#include "sycl/detail/core.hpp"
66
#include "sycl/usm.hpp"
77

8+
__attribute__((noinline)) void foo(char *array, int val) { *array += val; }
9+
810
int main() {
911
sycl::queue Q;
1012
auto *array = sycl::malloc_device<char>(1, Q);
1113

1214
Q.submit([&](sycl::handler &h) {
1315
h.parallel_for<class Test>(sycl::nd_range<1>(128, 8),
14-
[=](sycl::nd_item<1>) { array[0]++; });
16+
[=](sycl::nd_item<1> it) {
17+
*array += it.get_global_linear_id();
18+
foo(array, it.get_local_linear_id());
19+
});
1520
}).wait();
1621
// CHECK: WARNING: DeviceSanitizer: data race
1722
// CHECK-NEXT: When write of size 1 at 0x{{.*}} in kernel <{{.*}}Test>
18-
// CHECK-NEXT: #0 {{.*}}check_device_usm.cpp:[[@LINE-4]]
23+
// CHECK-NEXT: #0 {{.*}}check_device_usm.cpp
1924

2025
sycl::free(array, Q);
2126
return 0;

sycl/test-e2e/ThreadSanitizer/check_host_usm.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,22 @@
55
#include "sycl/detail/core.hpp"
66
#include "sycl/usm.hpp"
77

8+
__attribute__((noinline)) void foo(char *array, int val) { *array += val; }
9+
810
int main() {
911
sycl::queue Q;
1012
auto *array = sycl::malloc_host<char>(1, Q);
1113

1214
Q.submit([&](sycl::handler &h) {
1315
h.parallel_for<class Test>(sycl::nd_range<1>(128, 8),
14-
[=](sycl::nd_item<1>) { array[0]++; });
16+
[=](sycl::nd_item<1> it) {
17+
*array += it.get_global_linear_id();
18+
foo(array, it.get_local_linear_id());
19+
});
1520
}).wait();
1621
// CHECK: WARNING: DeviceSanitizer: data race
1722
// CHECK-NEXT: When write of size 1 at 0x{{.*}} in kernel <{{.*}}Test>
18-
// CHECK-NEXT: #0 {{.*}}check_host_usm.cpp:[[@LINE-4]]
23+
// CHECK-NEXT: #0 {{.*}}check_host_usm.cpp
1924

2025
sycl::free(array, Q);
2126
return 0;

sycl/test-e2e/ThreadSanitizer/check_no_race.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ int main() {
1414
Q.submit([&](sycl::handler &h) {
1515
h.parallel_for<class MyKernelR_4>(
1616
sycl::nd_range<1>(N, 8),
17-
[=](sycl::nd_item<1> item) { array[item.get_group_linear_id()]++; });
17+
[=](sycl::nd_item<1> item) { array[item.get_global_linear_id()]++; });
1818
}).wait();
1919
// CHECK-NOT: WARNING: DeviceSanitizer: data race
2020

sycl/test-e2e/ThreadSanitizer/check_shared_usm.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,22 @@
55
#include "sycl/detail/core.hpp"
66
#include "sycl/usm.hpp"
77

8+
__attribute__((noinline)) void foo(char *array, int val) { *array += val; }
9+
810
int main() {
911
sycl::queue Q;
1012
auto *array = sycl::malloc_shared<char>(1, Q);
1113

1214
Q.submit([&](sycl::handler &h) {
1315
h.parallel_for<class Test>(sycl::nd_range<1>(128, 8),
14-
[=](sycl::nd_item<1>) { array[0]++; });
16+
[=](sycl::nd_item<1> it) {
17+
*array += it.get_global_linear_id();
18+
foo(array, it.get_local_linear_id());
19+
});
1520
}).wait();
1621
// CHECK: WARNING: DeviceSanitizer: data race
1722
// CHECK-NEXT: When write of size 1 at 0x{{.*}} in kernel <{{.*}}Test>
18-
// CHECK-NEXT: #0 {{.*}}check_shared_usm.cpp:[[@LINE-4]]
23+
// CHECK-NEXT: #0 {{.*}}check_shared_usm.cpp
1924

2025
sycl::free(array, Q);
2126
return 0;

sycl/test-e2e/ThreadSanitizer/check_sub_buffer.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,13 @@ int main() {
2222
q.submit([&](sycl::handler &cgh) {
2323
auto accessor = sub_buf.get_access<sycl::access::mode::read_write>(cgh);
2424
cgh.parallel_for<class Test>(sycl::nd_range<1>(size_x / 2, 1),
25-
[=](sycl::nd_item<1>) { accessor[0]++; });
25+
[=](sycl::nd_item<1> it) {
26+
accessor[0] += it.get_global_linear_id();
27+
});
2628
}).wait();
2729
// CHECK: WARNING: DeviceSanitizer: data race
2830
// CHECK-NEXT: When write of size 4 at 0x{{.*}} in kernel <{{.*}}Test>
29-
// CHECK-NEXT: #0 {{.*}}check_sub_buffer.cpp:[[@LINE-4]]
31+
// CHECK-NEXT: #0 {{.*}}check_sub_buffer.cpp:[[@LINE-5]]
3032
}
3133

3234
return 0;

0 commit comments

Comments
 (0)