25
25
#include " tsan_suppressions.h"
26
26
#include " tsan_symbolize.h"
27
27
28
+ #ifdef __SSE3__
29
+ // <emmintrin.h> transitively includes <stdlib.h>,
30
+ // and it's prohibited to include std headers into tsan runtime.
31
+ // So we do this dirty trick.
32
+ #define _MM_MALLOC_H_INCLUDED
33
+ #define __MM_MALLOC_H
34
+ #include < emmintrin.h>
35
+ typedef __m128i m128;
36
+ #endif
37
+
28
38
volatile int __tsan_resumed = 0 ;
29
39
30
40
extern " C" void __tsan_resume () {
@@ -471,7 +481,8 @@ void StoreIfNotYetStored(u64 *sp, u64 *s) {
471
481
*s = 0 ;
472
482
}
473
483
474
- static inline void HandleRace (ThreadState *thr, u64 *shadow_mem,
484
+ ALWAYS_INLINE
485
+ void HandleRace (ThreadState *thr, u64 *shadow_mem,
475
486
Shadow cur, Shadow old) {
476
487
thr->racy_state [0 ] = cur.raw ();
477
488
thr->racy_state [1 ] = old.raw ();
@@ -483,16 +494,12 @@ static inline void HandleRace(ThreadState *thr, u64 *shadow_mem,
483
494
#endif
484
495
}
485
496
486
- static inline bool OldIsInSameSynchEpoch (Shadow old, ThreadState *thr) {
487
- return old.epoch () >= thr->fast_synch_epoch ;
488
- }
489
-
490
497
static inline bool HappensBefore (Shadow old, ThreadState *thr) {
491
498
return thr->clock .get (old.TidWithIgnore ()) >= old.epoch ();
492
499
}
493
500
494
- ALWAYS_INLINE USED
495
- void MemoryAccessImpl (ThreadState *thr, uptr addr,
501
+ ALWAYS_INLINE
502
+ void MemoryAccessImpl1 (ThreadState *thr, uptr addr,
496
503
int kAccessSizeLog , bool kAccessIsWrite , bool kIsAtomic ,
497
504
u64 *shadow_mem, Shadow cur) {
498
505
StatInc (thr, StatMop);
@@ -586,6 +593,90 @@ void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr,
586
593
}
587
594
}
588
595
596
+ ALWAYS_INLINE
597
+ bool ContainsSameAccessSlow (u64 *s, u64 a, u64 sync_epoch, bool is_write) {
598
+ Shadow cur (a);
599
+ for (uptr i = 0 ; i < kShadowCnt ; i++) {
600
+ Shadow old (LoadShadow (&s[i]));
601
+ if (Shadow::Addr0AndSizeAreEqual (cur, old) &&
602
+ old.TidWithIgnore () == cur.TidWithIgnore () &&
603
+ old.epoch () > sync_epoch &&
604
+ old.IsAtomic () == cur.IsAtomic () &&
605
+ old.IsRead () <= cur.IsRead ())
606
+ return true ;
607
+ }
608
+ return false ;
609
+ }
610
+
611
+ #if defined(__SSE3__) && TSAN_SHADOW_COUNT == 4
612
+ #define SHUF (v0, v1, i0, i1, i2, i3 ) _mm_castps_si128(_mm_shuffle_ps( \
613
+ _mm_castsi128_ps (v0), _mm_castsi128_ps(v1), \
614
+ (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64))
615
+ ALWAYS_INLINE
616
+ bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
617
+ // This is an optimized version of ContainsSameAccessSlow.
618
+ // load current access into access[0:63]
619
+ const m128 access = _mm_cvtsi64_si128 (a);
620
+ // duplicate high part of access in addr0:
621
+ // addr0[0:31] = access[32:63]
622
+ // addr0[32:63] = access[32:63]
623
+ // addr0[64:95] = access[32:63]
624
+ // addr0[96:127] = access[32:63]
625
+ const m128 addr0 = SHUF (access, access, 1 , 1 , 1 , 1 );
626
+ // load 4 shadow slots
627
+ const m128 shadow0 = _mm_load_si128 ((__m128i*)s);
628
+ const m128 shadow1 = _mm_load_si128 ((__m128i*)s + 1 );
629
+ // load high parts of 4 shadow slots into addr_vect:
630
+ // addr_vect[0:31] = shadow0[32:63]
631
+ // addr_vect[32:63] = shadow0[96:127]
632
+ // addr_vect[64:95] = shadow1[32:63]
633
+ // addr_vect[96:127] = shadow1[96:127]
634
+ m128 addr_vect = SHUF (shadow0, shadow1, 1 , 3 , 1 , 3 );
635
+ if (!is_write) {
636
+ // set IsRead bit in addr_vect
637
+ const m128 rw_mask1 = _mm_cvtsi64_si128 (1 <<15 );
638
+ const m128 rw_mask = SHUF (rw_mask1, rw_mask1, 0 , 0 , 0 , 0 );
639
+ addr_vect = _mm_or_si128 (addr_vect, rw_mask);
640
+ }
641
+ // addr0 == addr_vect?
642
+ const m128 addr_res = _mm_cmpeq_epi32 (addr0, addr_vect);
643
+ // epoch1[0:63] = sync_epoch
644
+ const m128 epoch1 = _mm_cvtsi64_si128 (sync_epoch);
645
+ // epoch[0:31] = sync_epoch[0:31]
646
+ // epoch[32:63] = sync_epoch[0:31]
647
+ // epoch[64:95] = sync_epoch[0:31]
648
+ // epoch[96:127] = sync_epoch[0:31]
649
+ const m128 epoch = SHUF (epoch1, epoch1, 0 , 0 , 0 , 0 );
650
+ // load low parts of shadow cell epochs into epoch_vect:
651
+ // epoch_vect[0:31] = shadow0[0:31]
652
+ // epoch_vect[32:63] = shadow0[64:95]
653
+ // epoch_vect[64:95] = shadow1[0:31]
654
+ // epoch_vect[96:127] = shadow1[64:95]
655
+ const m128 epoch_vect = SHUF (shadow0, shadow1, 0 , 2 , 0 , 2 );
656
+ // epoch_vect >= sync_epoch?
657
+ const m128 epoch_res = _mm_cmpgt_epi32 (epoch_vect, epoch);
658
+ // addr_res & epoch_res
659
+ const m128 res = _mm_and_si128 (addr_res, epoch_res);
660
+ // mask[0] = res[7]
661
+ // mask[1] = res[15]
662
+ // ...
663
+ // mask[15] = res[127]
664
+ const int mask = _mm_movemask_epi8 (res);
665
+ return mask != 0 ;
666
+ }
667
+ #endif
668
+
669
+ ALWAYS_INLINE
670
+ bool ContainsSameAccess (u64 *s, u64 a, u64 sync_epoch, bool is_write) {
671
+ #if defined(__SSE3__) && TSAN_SHADOW_COUNT == 4
672
+ bool res = ContainsSameAccessFast (s, a, sync_epoch, is_write);
673
+ DCHECK_EQ (res, ContainsSameAccessSlow (s, a, sync_epoch, is_write));
674
+ return res;
675
+ #else
676
+ return ContainsSameAccessSlow (s, a, sync_epoch, is_write);
677
+ #endif
678
+ }
679
+
589
680
ALWAYS_INLINE USED
590
681
void MemoryAccess (ThreadState *thr, uptr pc, uptr addr,
591
682
int kAccessSizeLog , bool kAccessIsWrite , bool kIsAtomic ) {
@@ -618,22 +709,53 @@ void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
618
709
}
619
710
620
711
FastState fast_state = thr->fast_state ;
621
- if (fast_state.GetIgnoreBit ())
712
+ if (fast_state.GetIgnoreBit ()) {
713
+ StatInc (thr, StatMop);
714
+ StatInc (thr, kAccessIsWrite ? StatMopWrite : StatMopRead);
715
+ StatInc (thr, (StatType)(StatMop1 + kAccessSizeLog ));
716
+ StatInc (thr, StatMopIgnored);
622
717
return ;
623
- if (kCollectHistory ) {
624
- fast_state.IncrementEpoch ();
625
- thr->fast_state = fast_state;
626
- // We must not store to the trace if we do not store to the shadow.
627
- // That is, this call must be moved somewhere below.
628
- TraceAddEvent (thr, fast_state, EventTypeMop, pc);
629
718
}
630
719
631
720
Shadow cur (fast_state);
632
721
cur.SetAddr0AndSizeLog (addr & 7 , kAccessSizeLog );
633
722
cur.SetWrite (kAccessIsWrite );
634
723
cur.SetAtomic (kIsAtomic );
635
724
636
- MemoryAccessImpl (thr, addr, kAccessSizeLog , kAccessIsWrite , kIsAtomic ,
725
+ if (LIKELY (ContainsSameAccess (shadow_mem, cur.raw (),
726
+ thr->fast_synch_epoch , kAccessIsWrite ))) {
727
+ StatInc (thr, StatMop);
728
+ StatInc (thr, kAccessIsWrite ? StatMopWrite : StatMopRead);
729
+ StatInc (thr, (StatType)(StatMop1 + kAccessSizeLog ));
730
+ StatInc (thr, StatMopSame);
731
+ return ;
732
+ }
733
+
734
+ if (kCollectHistory ) {
735
+ fast_state.IncrementEpoch ();
736
+ TraceAddEvent (thr, fast_state, EventTypeMop, pc);
737
+ thr->fast_state = fast_state;
738
+ cur.IncrementEpoch ();
739
+ }
740
+
741
+ MemoryAccessImpl1 (thr, addr, kAccessSizeLog , kAccessIsWrite , kIsAtomic ,
742
+ shadow_mem, cur);
743
+ }
744
+
745
+ // Called by MemoryAccessRange in tsan_rtl_thread.cc
746
+ void MemoryAccessImpl (ThreadState *thr, uptr addr,
747
+ int kAccessSizeLog , bool kAccessIsWrite , bool kIsAtomic ,
748
+ u64 *shadow_mem, Shadow cur) {
749
+ if (LIKELY (ContainsSameAccess (shadow_mem, cur.raw (),
750
+ thr->fast_synch_epoch , kAccessIsWrite ))) {
751
+ StatInc (thr, StatMop);
752
+ StatInc (thr, kAccessIsWrite ? StatMopWrite : StatMopRead);
753
+ StatInc (thr, (StatType)(StatMop1 + kAccessSizeLog ));
754
+ StatInc (thr, StatMopSame);
755
+ return ;
756
+ }
757
+
758
+ MemoryAccessImpl1 (thr, addr, kAccessSizeLog , kAccessIsWrite , kIsAtomic ,
637
759
shadow_mem, cur);
638
760
}
639
761
0 commit comments