bugfix: Use inline assembly in full_fence

taiki-e · web-flow · commit cbdf9e88e150 · 2023-08-13T14:33:40.000-07:00
This commit bumps the MSRV to 1.59
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -57,7 +57,7 @@ jobs:
       matrix:
         # When updating this, the reminder to update the minimum supported
         # Rust version in Cargo.toml.
-        rust: ['1.38']
+        rust: ['1.59']
     steps:
       - uses: actions/checkout@v3
       - name: Install Rust
diff --git a/Cargo.toml b/Cargo.toml
@@ -10,7 +10,7 @@ authors = [
     "John Nunley <jtnunley01@gmail.com>"
 ]
 edition = "2018"
-rust-version = "1.38"
+rust-version = "1.59"
 description = "Concurrent multi-producer multi-consumer queue"
 license = "Apache-2.0 OR MIT"
 repository = "https://github.com/smol-rs/concurrent-queue"
diff --git a/src/lib.rs b/src/lib.rs
@@ -59,7 +59,7 @@ extern crate std;
 
 use alloc::boxed::Box;
 use core::fmt;
-use sync::atomic::{self, AtomicUsize, Ordering};
+use sync::atomic::{self, Ordering};
 
 #[cfg(feature = "std")]
 use std::error;
@@ -538,28 +538,31 @@ impl<T> fmt::Display for PushError<T> {
 /// Equivalent to `atomic::fence(Ordering::SeqCst)`, but in some cases faster.
 #[inline]
 fn full_fence() {
-    if cfg!(all(
-        any(target_arch = "x86", target_arch = "x86_64"),
-        not(miri),
-        not(loom)
-    )) {
+    #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(miri), not(loom)))]
+    {
+        use core::{arch::asm, cell::UnsafeCell};
         // HACK(stjepang): On x86 architectures there are two different ways of executing
         // a `SeqCst` fence.
         //
         // 1. `atomic::fence(SeqCst)`, which compiles into a `mfence` instruction.
-        // 2. `_.compare_exchange(_, _, SeqCst, SeqCst)`, which compiles into a `lock cmpxchg` instruction.
+        // 2. A `lock <op>` instruction.
         //
         // Both instructions have the effect of a full barrier, but empirical benchmarks have shown
         // that the second one is sometimes a bit faster.
-        //
-        // The ideal solution here would be to use inline assembly, but we're instead creating a
-        // temporary atomic variable and compare-and-exchanging its value. No sane compiler to
-        // x86 platforms is going to optimize this away.
-        atomic::compiler_fence(Ordering::SeqCst);
-        let a = AtomicUsize::new(0);
-        let _ = a.compare_exchange(0, 1, Ordering::SeqCst, Ordering::SeqCst);
-        atomic::compiler_fence(Ordering::SeqCst);
-    } else {
+        let a = UnsafeCell::new(0_usize);
+        // It is common to use `lock or` here, but when using a local variable, `lock not`, which
+        // does not change the flag, should be slightly more efficient.
+        // Refs: https://www.felixcloutier.com/x86/not
+        unsafe {
+            #[cfg(target_pointer_width = "64")]
+            asm!("lock not qword ptr [{0}]", in(reg) a.get(), options(nostack, preserves_flags));
+            #[cfg(target_pointer_width = "32")]
+            asm!("lock not dword ptr [{0:e}]", in(reg) a.get(), options(nostack, preserves_flags));
+        }
+        return;
+    }
+    #[allow(unreachable_code)]
+    {
         atomic::fence(Ordering::SeqCst);
     }
 }
diff --git a/src/sync.rs b/src/sync.rs
@@ -33,7 +33,6 @@ mod sync_impl {
     pub(crate) use loom::cell;
 
     pub(crate) mod atomic {
-        pub(crate) use core::sync::atomic::compiler_fence;
         pub(crate) use loom::sync::atomic::*;
     }
 

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@ authors = [`
`10`	`10`	`"John Nunley <[email protected]>"`
`11`	`11`	`]`
`12`	`12`	`edition = "2018"`
`13`		`-rust-version = "1.38"`
	`13`	`+rust-version = "1.59"`
`14`	`14`	`description = "Concurrent multi-producer multi-consumer queue"`
`15`	`15`	`license = "Apache-2.0 OR MIT"`
`16`	`16`	`repository = "https://github.com/smol-rs/concurrent-queue"`
Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,6 @@ mod sync_impl {`
`33`	`33`	`pub(crate) use loom::cell;`
`34`	`34`
`35`	`35`	`pub(crate) mod atomic {`
`36`		`- pub(crate) use core::sync::atomic::compiler_fence;`
`37`	`36`	`pub(crate) use loom::sync::atomic::*;`
`38`	`37`	`}`
`39`	`38`