Skip to content

Add atomic loads and stores and barriers #9247

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions UNITTESTS/stubs/mbed_critical_stub.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,6 @@ bool core_util_atomic_flag_test_and_set(volatile core_util_atomic_flag *flagPtr)
return false;
}

void core_util_atomic_flag_clear(volatile core_util_atomic_flag *flagPtr)
{
}

bool core_util_atomic_cas_u8(volatile uint8_t *ptr, uint8_t *expectedCurrentValue, uint8_t desiredValue)
{
return false;
Expand Down
25 changes: 20 additions & 5 deletions platform/mbed_critical.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,6 @@ void core_util_critical_section_exit(void)
}
}

void core_util_atomic_flag_clear(volatile core_util_atomic_flag *flagPtr)
{
flagPtr->_flag = false;
}

#if MBED_EXCLUSIVE_ACCESS

/* Supress __ldrex and __strex deprecated warnings - "#3731-D: intrinsic is deprecated" */
Expand All @@ -115,14 +110,17 @@ void core_util_atomic_flag_clear(volatile core_util_atomic_flag *flagPtr)
bool core_util_atomic_flag_test_and_set(volatile core_util_atomic_flag *flagPtr)
{
uint8_t currentValue;
MBED_BARRIER();
do {
currentValue = __LDREXB(&flagPtr->_flag);
} while (__STREXB(true, &flagPtr->_flag));
MBED_BARRIER();
return currentValue;
}

bool core_util_atomic_cas_u8(volatile uint8_t *ptr, uint8_t *expectedCurrentValue, uint8_t desiredValue)
{
MBED_BARRIER();
do {
uint8_t currentValue = __LDREXB(ptr);
if (currentValue != *expectedCurrentValue) {
Expand All @@ -131,11 +129,13 @@ bool core_util_atomic_cas_u8(volatile uint8_t *ptr, uint8_t *expectedCurrentValu
return false;
}
} while (__STREXB(desiredValue, ptr));
MBED_BARRIER();
return true;
}

bool core_util_atomic_cas_u16(volatile uint16_t *ptr, uint16_t *expectedCurrentValue, uint16_t desiredValue)
{
MBED_BARRIER();
do {
uint16_t currentValue = __LDREXH(ptr);
if (currentValue != *expectedCurrentValue) {
Expand All @@ -144,12 +144,14 @@ bool core_util_atomic_cas_u16(volatile uint16_t *ptr, uint16_t *expectedCurrentV
return false;
}
} while (__STREXH(desiredValue, ptr));
MBED_BARRIER();
return true;
}


bool core_util_atomic_cas_u32(volatile uint32_t *ptr, uint32_t *expectedCurrentValue, uint32_t desiredValue)
{
MBED_BARRIER();
do {
uint32_t currentValue = __LDREXW(ptr);
if (currentValue != *expectedCurrentValue) {
Expand All @@ -158,61 +160,74 @@ bool core_util_atomic_cas_u32(volatile uint32_t *ptr, uint32_t *expectedCurrentV
return false;
}
} while (__STREXW(desiredValue, ptr));
MBED_BARRIER();
return true;
}

uint8_t core_util_atomic_incr_u8(volatile uint8_t *valuePtr, uint8_t delta)
{
MBED_BARRIER();
uint8_t newValue;
do {
newValue = __LDREXB(valuePtr) + delta;
} while (__STREXB(newValue, valuePtr));
MBED_BARRIER();
return newValue;
}

uint16_t core_util_atomic_incr_u16(volatile uint16_t *valuePtr, uint16_t delta)
{
MBED_BARRIER();
uint16_t newValue;
do {
newValue = __LDREXH(valuePtr) + delta;
} while (__STREXH(newValue, valuePtr));
MBED_BARRIER();
return newValue;
}

uint32_t core_util_atomic_incr_u32(volatile uint32_t *valuePtr, uint32_t delta)
{
uint32_t newValue;
MBED_BARRIER();
do {
newValue = __LDREXW(valuePtr) + delta;
} while (__STREXW(newValue, valuePtr));
MBED_BARRIER();
return newValue;
}


uint8_t core_util_atomic_decr_u8(volatile uint8_t *valuePtr, uint8_t delta)
{
uint8_t newValue;
MBED_BARRIER();
do {
newValue = __LDREXB(valuePtr) - delta;
} while (__STREXB(newValue, valuePtr));
MBED_BARRIER();
return newValue;
}

uint16_t core_util_atomic_decr_u16(volatile uint16_t *valuePtr, uint16_t delta)
{
uint16_t newValue;
MBED_BARRIER();
do {
newValue = __LDREXH(valuePtr) - delta;
} while (__STREXH(newValue, valuePtr));
MBED_BARRIER();
return newValue;
}

uint32_t core_util_atomic_decr_u32(volatile uint32_t *valuePtr, uint32_t delta)
{
uint32_t newValue;
MBED_BARRIER();
do {
newValue = __LDREXW(valuePtr) - delta;
} while (__STREXW(newValue, valuePtr));
MBED_BARRIER();
return newValue;
}

Expand Down
116 changes: 115 additions & 1 deletion platform/mbed_critical.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <stdbool.h>
#include <stdint.h>
#include <stddef.h>
#include "mbed_toolchain.h"

#ifdef __cplusplus
extern "C" {
Expand Down Expand Up @@ -89,6 +90,19 @@ void core_util_critical_section_exit(void);
*/
bool core_util_in_critical_section(void);

/**@}*/

/**
* \defgroup platform_atomic atomic functions
*
* Atomic functions function analogously to C11 and C++11 - loads have
* acquire semantics, stores have release semantics, and atomic operations
* are sequentially consistent. Atomicity is enforced both between threads and
* interrupt handlers.
*
* @{
*/

/**
* A lock-free, primitive atomic flag.
*
Expand Down Expand Up @@ -124,7 +138,11 @@ bool core_util_atomic_flag_test_and_set(volatile core_util_atomic_flag *flagPtr)
*
* @param flagPtr Target flag being cleared.
*/
void core_util_atomic_flag_clear(volatile core_util_atomic_flag *flagPtr);
MBED_FORCEINLINE void core_util_atomic_flag_clear(volatile core_util_atomic_flag *flagPtr)
{
MBED_BARRIER();
flagPtr->_flag = false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why isn't there a memory barrier after _flag is set to false?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, one is needed there. I was thinking it wasn't needed for single-CPU, but sequential consistency requires it for the same reason you would need two DMBs (https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html). Assuming we want sequential consistency (the C++11 default), then we need to guarantee that

atomic_store(x); // release
atomic_load(y); // acquire

can't be reordered. So that has to end up as:

BARRIER // for release semantics - keep preceding above
x = foo;
BARRIER // stop store+load being reordered
bar = y;
BARRIER // acquire semantics - keep following below

And it's store that gets the extra sequential-consistency barrier (like the extra DMB), because stores are probably less common.

}

/**
* Atomic compare and set. It compares the contents of a memory location to a
Expand Down Expand Up @@ -354,6 +372,102 @@ bool core_util_atomic_cas_u32(volatile uint32_t *ptr, uint32_t *expectedCurrentV
*/
bool core_util_atomic_cas_ptr(void *volatile *ptr, void **expectedCurrentValue, void *desiredValue);

/**
* Atomic load.
* @param valuePtr Target memory location.
* @return The loaded value.
*/
MBED_FORCEINLINE uint8_t core_util_atomic_load_u8(const volatile uint8_t *valuePtr)
{
uint8_t value = *valuePtr;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why isn't there a memory barrier before *valuePtr is read?

Copy link
Contributor Author

@kjbracey kjbracey Jan 4, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because load has "acquire" semantics, as per C++11/C11. It only needs to stop stuff from afterwards moving before it.

MBED_BARRIER();
return value;
}

/**
* Atomic load.
* @param valuePtr Target memory location.
* @return The loaded value.
*/
MBED_FORCEINLINE uint16_t core_util_atomic_load_u16(const volatile uint16_t *valuePtr)
{
uint16_t value = *valuePtr;
MBED_BARRIER();
return value;
}

/**
* Atomic load.
* @param valuePtr Target memory location.
* @return The loaded value.
*/
MBED_FORCEINLINE uint32_t core_util_atomic_load_u32(const volatile uint32_t *valuePtr)
{
uint32_t value = *valuePtr;
MBED_BARRIER();
return value;
}

/**
* Atomic load.
* @param valuePtr Target memory location.
* @return The loaded value.
*/
MBED_FORCEINLINE void *core_util_atomic_load_ptr(void *const volatile *valuePtr)
{
void *value = *valuePtr;
MBED_BARRIER();
return value;
}

/**
* Atomic store.
* @param valuePtr Target memory location.
* @param desiredValue The value to store.
*/
MBED_FORCEINLINE void core_util_atomic_store_u8(volatile uint8_t *valuePtr, uint8_t desiredValue)
{
MBED_BARRIER();
*valuePtr = desiredValue;
MBED_BARRIER();
}

/**
* Atomic store.
* @param valuePtr Target memory location.
* @param desiredValue The value to store.
*/
MBED_FORCEINLINE void core_util_atomic_store_u16(volatile uint16_t *valuePtr, uint16_t desiredValue)
{
MBED_BARRIER();
*valuePtr = desiredValue;
MBED_BARRIER();
}

/**
* Atomic store.
* @param valuePtr Target memory location.
* @param desiredValue The value to store.
*/
MBED_FORCEINLINE void core_util_atomic_store_u32(volatile uint32_t *valuePtr, uint32_t desiredValue)
{
MBED_BARRIER();
*valuePtr = desiredValue;
MBED_BARRIER();
}

/**
* Atomic store.
* @param valuePtr Target memory location.
* @param desiredValue The value to store.
*/
MBED_FORCEINLINE void core_util_atomic_store_ptr(void *volatile *valuePtr, void *desiredValue)
{
MBED_BARRIER();
*valuePtr = desiredValue;
MBED_BARRIER();
}

/**
* Atomic increment.
* @param valuePtr Target memory location being incremented.
Expand Down
71 changes: 71 additions & 0 deletions platform/mbed_toolchain.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,77 @@
#endif
#endif

/** MBED_COMPILER_BARRIER
* Stop the compiler moving memory accesses.
*
* The barrier stops memory accesses from being moved from one side of the
* barrier to the other for safety against other threads and interrupts.
*
* This macro should only be used if we know only one CPU is accessing the data,
* or we are otherwise synchronising CPUs via acquire/release instructions.
* Otherwise, use MBED_BARRIER, which will act as a compiler barrier and also
* a CPU barrier if necessary.
*
* @internal
* This is not for use by normal code - it is a building block for the
* higher-level functions in mbed_critical.h. Higher-level lock/unlock or
* acquire/release APIs always provide ordering semantics, using this if
* necessary.
*
* @code
* #include "mbed_toolchain.h"
*
* void atomic_flag_clear_armv8(atomic_flag *flagPtr)
* {
* // ARMv8 LDA and STL instructions provide sequential consistency against
* // other CPUs, so no CPU barrier is needed. But we still need compiler
* // barriers to give us sequentially-consistent release semantics with
* // respect to compiler reordering - __STLB does not currently
* // include this.
* MBED_COMPILER_BARRIER();
* __STLB(&flagPtr->_flag, false);
* MBED_COMPILER_BARRIER();
* }
*/
#ifdef __CC_ARM
#define MBED_COMPILER_BARRIER() __memory_changed()
#elif defined(__GNUC__) || defined(__clang__) || defined(__ICCARM__)
#define MBED_COMPILER_BARRIER() asm volatile("" : : : "memory")
#else
#error "Missing MBED_COMPILER_BARRIER implementation"
#endif

/** MBED_BARRIER
* Stop the compiler, and CPU if SMP, from moving memory accesses.
*
* The barrier stops memory accesses from being moved from one side of the
* barrier to the other for safety against other threads and interrupts,
* potentially on other CPUs.
*
* In a single-CPU system, this is just a compiler barrier.
* If we supported multiple CPUs, this would be a DMB (with implied compiler
* barrier).
*
* @internal
* This is not for use by normal code - it is a building block for the
* higher-level functions in mbed_critical.h. Higher-level lock/unlock or
* acquire/release APIs always provide ordering semantics, using this if
* necessary.
* @code
* #include "mbed_toolchain.h"
*
* void atomic_flag_clear_armv7(atomic_flag *flagPtr)
* {
* // ARMv7 LDR and STR instructions do not provide any ordering
* // consistency against other CPUs, so explicit barrier DMBs are needed
* // for a multi-CPU system, otherwise just compiler barriers for single-CPU.
* MBED_BARRIER();
* flagPtr->_flag = false;
* MBED_BARRIER();
* }
*/
#define MBED_BARRIER() MBED_COMPILER_BARRIER()

/** MBED_PURE
* Hint to the compiler that a function depends only on parameters
*
Expand Down