@@ -59,7 +59,7 @@ extern crate std;
59
59
60
60
use alloc:: boxed:: Box ;
61
61
use core:: fmt;
62
- use sync:: atomic:: { self , AtomicUsize , Ordering } ;
62
+ use sync:: atomic:: { self , Ordering } ;
63
63
64
64
#[ cfg( feature = "std" ) ]
65
65
use std:: error;
@@ -538,28 +538,31 @@ impl<T> fmt::Display for PushError<T> {
538
538
/// Equivalent to `atomic::fence(Ordering::SeqCst)`, but in some cases faster.
539
539
#[ inline]
540
540
fn full_fence ( ) {
541
- if cfg ! ( all(
542
- any( target_arch = "x86" , target_arch = "x86_64" ) ,
543
- not( miri) ,
544
- not( loom)
545
- ) ) {
541
+ #[ cfg( all( any( target_arch = "x86" , target_arch = "x86_64" ) , not( miri) , not( loom) ) ) ]
542
+ {
543
+ use core:: { arch:: asm, cell:: UnsafeCell } ;
546
544
// HACK(stjepang): On x86 architectures there are two different ways of executing
547
545
// a `SeqCst` fence.
548
546
//
549
547
// 1. `atomic::fence(SeqCst)`, which compiles into a `mfence` instruction.
550
- // 2. `_.compare_exchange(_, _, SeqCst, SeqCst)`, which compiles into a ` lock cmpxchg ` instruction.
548
+ // 2. A ` lock <op> ` instruction.
551
549
//
552
550
// Both instructions have the effect of a full barrier, but empirical benchmarks have shown
553
551
// that the second one is sometimes a bit faster.
554
- //
555
- // The ideal solution here would be to use inline assembly, but we're instead creating a
556
- // temporary atomic variable and compare-and-exchanging its value. No sane compiler to
557
- // x86 platforms is going to optimize this away.
558
- atomic:: compiler_fence ( Ordering :: SeqCst ) ;
559
- let a = AtomicUsize :: new ( 0 ) ;
560
- let _ = a. compare_exchange ( 0 , 1 , Ordering :: SeqCst , Ordering :: SeqCst ) ;
561
- atomic:: compiler_fence ( Ordering :: SeqCst ) ;
562
- } else {
552
+ let a = UnsafeCell :: new ( 0_usize ) ;
553
+ // It is common to use `lock or` here, but when using a local variable, `lock not`, which
554
+ // does not change the flag, should be slightly more efficient.
555
+ // Refs: https://www.felixcloutier.com/x86/not
556
+ unsafe {
557
+ #[ cfg( target_pointer_width = "64" ) ]
558
+ asm ! ( "lock not qword ptr [{0}]" , in( reg) a. get( ) , options( nostack, preserves_flags) ) ;
559
+ #[ cfg( target_pointer_width = "32" ) ]
560
+ asm ! ( "lock not dword ptr [{0:e}]" , in( reg) a. get( ) , options( nostack, preserves_flags) ) ;
561
+ }
562
+ return ;
563
+ }
564
+ #[ allow( unreachable_code) ]
565
+ {
563
566
atomic:: fence ( Ordering :: SeqCst ) ;
564
567
}
565
568
}
0 commit comments