@@ -144,9 +144,21 @@ inline __SYCL_GLOBAL__ RawShadow *MemToShadow(uptr addr, uint32_t as) {
144
144
return shadow_ptr;
145
145
}
146
146
147
- inline Sid GetCurrentSid () {
148
- const auto lid = __spirv_BuiltInGlobalLinearId;
149
- return lid % kThreadSlotCount ;
147
+ // We selected up to 4 work items in each work group to do detection, the whole
148
+ // number of selected work items no more than kThreadSlotCount. This may cause
149
+ // some false negtive cases in non-uniform memory access which has data race.
150
+ // Since the cases are very rare and the change will greatly reduce runtime
151
+ // overhead, it should be worthwhile.
152
+ inline int GetCurrentSid () {
153
+ const size_t lid = LocalLinearId ();
154
+ const size_t ThreadPerWorkGroup =
155
+ Min (4 , __spirv_BuiltInWorkgroupSize.x * __spirv_BuiltInWorkgroupSize.y *
156
+ __spirv_BuiltInWorkgroupSize.z );
157
+ if (lid >= ThreadPerWorkGroup)
158
+ return -1 ;
159
+
160
+ const size_t Id = lid + WorkGroupLinearId () * ThreadPerWorkGroup;
161
+ return Id < kThreadSlotCount ? Id : -1 ;
150
162
}
151
163
152
164
inline RawShadow LoadShadow (const __SYCL_GLOBAL__ RawShadow *p) {
@@ -315,7 +327,9 @@ inline bool ContainsSameAccess(__SYCL_GLOBAL__ RawShadow *s, Shadow cur,
315
327
__SYCL_GLOBAL__ RawShadow *shadow_mem = MemToShadow (addr, as); \
316
328
if (!shadow_mem) \
317
329
return ; \
318
- Sid sid = GetCurrentSid (); \
330
+ int sid = GetCurrentSid (); \
331
+ if (sid == -1 ) \
332
+ return ; \
319
333
uint16_t current_clock = IncrementEpoch (sid) + 1 ; \
320
334
TSAN_DEBUG (__spirv_ocl_printf (__tsan_print_raw_shadow, (void *)addr, as, \
321
335
(void *)shadow_mem, shadow_mem[0 ], \
@@ -360,7 +374,9 @@ __tsan_read16(uptr addr, uint32_t as, const char __SYCL_CONSTANT__ *file,
360
374
__SYCL_GLOBAL__ RawShadow *shadow_mem = MemToShadow (addr, as); \
361
375
if (!shadow_mem) \
362
376
return ; \
363
- Sid sid = GetCurrentSid (); \
377
+ int sid = GetCurrentSid (); \
378
+ if (sid == -1 ) \
379
+ return ; \
364
380
uint16_t current_clock = IncrementEpoch (sid) + 1 ; \
365
381
AccessType type = is_write ? kAccessWrite : kAccessRead ; \
366
382
uptr size1 = Min (size, RoundUpTo (addr + 1 , kShadowCell ) - addr); \
@@ -499,39 +515,47 @@ DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_dynamic_local(uptr ptr,
499
515
}
500
516
501
517
DEVICE_EXTERN_C_INLINE void __tsan_device_barrier () {
502
- Sid sid = GetCurrentSid ();
518
+ int sid = GetCurrentSid ();
503
519
504
- // sync current thread clock to global state
505
- TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [sid] =
506
- TsanLaunchInfo->Clock [sid].clk_ [sid];
520
+ if (sid != -1 ) {
521
+ // sync current thread clock to global state
522
+ TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [sid] =
523
+ TsanLaunchInfo->Clock [sid].clk_ [sid];
524
+ }
507
525
508
526
__spirv_ControlBarrier (__spv::Scope::Device, __spv::Scope::Device,
509
527
__spv::MemorySemanticsMask::SequentiallyConsistent |
510
528
__spv::MemorySemanticsMask::CrossWorkgroupMemory |
511
529
__spv::MemorySemanticsMask::WorkgroupMemory);
512
530
513
- // sync global state back
514
- for (uptr i = 0 ; i < kThreadSlotCount ; i++)
515
- TsanLaunchInfo->Clock [sid].clk_ [i] =
516
- TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [i];
531
+ if (sid != -1 ) {
532
+ // sync global state back
533
+ for (uptr i = 0 ; i < kThreadSlotCount ; i++)
534
+ TsanLaunchInfo->Clock [sid].clk_ [i] =
535
+ TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [i];
536
+ }
517
537
}
518
538
519
539
DEVICE_EXTERN_C_INLINE void __tsan_group_barrier () {
520
- Sid sid = GetCurrentSid ();
540
+ int sid = GetCurrentSid ();
521
541
522
- // sync current thread clock to global state
523
- TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [sid] =
524
- TsanLaunchInfo->Clock [sid].clk_ [sid];
542
+ if (sid != -1 ) {
543
+ // sync current thread clock to global state
544
+ TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [sid] =
545
+ TsanLaunchInfo->Clock [sid].clk_ [sid];
546
+ }
525
547
526
548
__spirv_ControlBarrier (__spv::Scope::Workgroup, __spv::Scope::Workgroup,
527
549
__spv::MemorySemanticsMask::SequentiallyConsistent |
528
550
__spv::MemorySemanticsMask::CrossWorkgroupMemory |
529
551
__spv::MemorySemanticsMask::WorkgroupMemory);
530
552
531
- // sync global state back
532
- for (uptr i = 0 ; i < kThreadSlotCount ; i++)
533
- TsanLaunchInfo->Clock [sid].clk_ [i] =
534
- TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [i];
553
+ if (sid != -1 ) {
554
+ // sync global state back
555
+ for (uptr i = 0 ; i < kThreadSlotCount ; i++)
556
+ TsanLaunchInfo->Clock [sid].clk_ [i] =
557
+ TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [i];
558
+ }
535
559
}
536
560
537
561
#endif // __SPIR__ || __SPIRV__
0 commit comments