Skip to content

Commit cbdf9e8

Browse files
authored
bugfix: Use inline assembly in full_fence
This commit bumps the MSRV to 1.59
1 parent 2d30937 commit cbdf9e8

File tree

4 files changed

+21
-19
lines changed

4 files changed

+21
-19
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
matrix:
5858
# When updating this, the reminder to update the minimum supported
5959
# Rust version in Cargo.toml.
60-
rust: ['1.38']
60+
rust: ['1.59']
6161
steps:
6262
- uses: actions/checkout@v3
6363
- name: Install Rust

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ authors = [
1010
"John Nunley <[email protected]>"
1111
]
1212
edition = "2018"
13-
rust-version = "1.38"
13+
rust-version = "1.59"
1414
description = "Concurrent multi-producer multi-consumer queue"
1515
license = "Apache-2.0 OR MIT"
1616
repository = "https://github.com/smol-rs/concurrent-queue"

src/lib.rs

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ extern crate std;
5959

6060
use alloc::boxed::Box;
6161
use core::fmt;
62-
use sync::atomic::{self, AtomicUsize, Ordering};
62+
use sync::atomic::{self, Ordering};
6363

6464
#[cfg(feature = "std")]
6565
use std::error;
@@ -538,28 +538,31 @@ impl<T> fmt::Display for PushError<T> {
538538
/// Equivalent to `atomic::fence(Ordering::SeqCst)`, but in some cases faster.
539539
#[inline]
540540
fn full_fence() {
541-
if cfg!(all(
542-
any(target_arch = "x86", target_arch = "x86_64"),
543-
not(miri),
544-
not(loom)
545-
)) {
541+
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(miri), not(loom)))]
542+
{
543+
use core::{arch::asm, cell::UnsafeCell};
546544
// HACK(stjepang): On x86 architectures there are two different ways of executing
547545
// a `SeqCst` fence.
548546
//
549547
// 1. `atomic::fence(SeqCst)`, which compiles into a `mfence` instruction.
550-
// 2. `_.compare_exchange(_, _, SeqCst, SeqCst)`, which compiles into a `lock cmpxchg` instruction.
548+
// 2. A `lock <op>` instruction.
551549
//
552550
// Both instructions have the effect of a full barrier, but empirical benchmarks have shown
553551
// that the second one is sometimes a bit faster.
554-
//
555-
// The ideal solution here would be to use inline assembly, but we're instead creating a
556-
// temporary atomic variable and compare-and-exchanging its value. No sane compiler to
557-
// x86 platforms is going to optimize this away.
558-
atomic::compiler_fence(Ordering::SeqCst);
559-
let a = AtomicUsize::new(0);
560-
let _ = a.compare_exchange(0, 1, Ordering::SeqCst, Ordering::SeqCst);
561-
atomic::compiler_fence(Ordering::SeqCst);
562-
} else {
552+
let a = UnsafeCell::new(0_usize);
553+
// It is common to use `lock or` here, but when using a local variable, `lock not`, which
554+
// does not change the flag, should be slightly more efficient.
555+
// Refs: https://www.felixcloutier.com/x86/not
556+
unsafe {
557+
#[cfg(target_pointer_width = "64")]
558+
asm!("lock not qword ptr [{0}]", in(reg) a.get(), options(nostack, preserves_flags));
559+
#[cfg(target_pointer_width = "32")]
560+
asm!("lock not dword ptr [{0:e}]", in(reg) a.get(), options(nostack, preserves_flags));
561+
}
562+
return;
563+
}
564+
#[allow(unreachable_code)]
565+
{
563566
atomic::fence(Ordering::SeqCst);
564567
}
565568
}

src/sync.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ mod sync_impl {
3333
pub(crate) use loom::cell;
3434

3535
pub(crate) mod atomic {
36-
pub(crate) use core::sync::atomic::compiler_fence;
3736
pub(crate) use loom::sync::atomic::*;
3837
}
3938

0 commit comments

Comments
 (0)