Skip to content

Bug #79487: Provide non-Windows implementations for LF_BACKOFF #40

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 0 additions & 26 deletions include/atomic/generic-msvc.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,30 +106,4 @@ static inline void my_atomic_storeptr(void * volatile *a, void *v)
(void)InterlockedExchangePointer(a, v);
}


/*
my_yield_processor (equivalent of x86 PAUSE instruction) should be used
to improve performance on hyperthreaded CPUs. Intel recommends to use it in
spin loops also on non-HT machines to reduce power consumption (see e.g
http://softwarecommunity.intel.com/articles/eng/2004.htm)

Running benchmarks for spinlocks implemented with InterlockedCompareExchange
and YieldProcessor shows that much better performance is achieved by calling
YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
loop count in the range 200-300 brought best results.
*/
#define YIELD_LOOPS 200

static inline int my_yield_processor()
{
int i;
for (i=0; i<YIELD_LOOPS; i++)
{
YieldProcessor();
}
return 1;
}

#define LF_BACKOFF my_yield_processor()

#endif /* ATOMIC_MSC_INCLUDED */
2 changes: 2 additions & 0 deletions include/lf.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

C_MODE_START

#define LF_BACKOFF my_yield_processor()

/*
wait-free dynamic array, see lf_dynarray.c

Expand Down
67 changes: 62 additions & 5 deletions include/my_atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,69 @@
#endif

/*
the macro below defines (as an expression) the code that
will be run in spin-loops. Intel manuals recummend to have PAUSE there.
It is expected to be defined in include/atomic/ *.h files
the macro below defines (as an expression) the code that will be run in
spin-loops. Intel manuals recommend to have PAUSE there.
*/
#ifndef LF_BACKOFF
#define LF_BACKOFF (1)
#ifdef HAVE_PAUSE_INSTRUCTION
/*
According to the gcc info page, asm volatile means that the instruction
has important side-effects and must not be removed. Also asm volatile may
trigger a memory barrier (spilling all registers to memory).
*/
# define MY_PAUSE() __asm__ __volatile__ ("pause")
# elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
# define UT_PAUSE() __asm__ __volatile__ ("rep; nop")
# elif defined(_MSC_VER)
/*
In the Win32 API, the x86 PAUSE instruction is executed by calling the
YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
independent way by using YieldProcessor.
*/
# define MY_PAUSE() YieldProcessor()
# else
# define MY_PAUSE() ((void) 0)
#endif

/*
POWER-specific macros to relax CPU threads to give more core resources to
other threads executing in the core.
*/
#if defined(HAVE_HMT_PRIORITY_INSTRUCTION)
# define MY_LOW_PRIORITY_CPU() __asm__ __volatile__ ("or 1,1,1")
# define MY_RESUME_PRIORITY_CPU() __asm__ __volatile__ ("or 2,2,2")
#else
# define MY_LOW_PRIORITY_CPU() ((void)0)
# define MY_RESUME_PRIORITY_CPU() ((void)0)
#endif

/*
my_yield_processor (equivalent of x86 PAUSE instruction) should be used to
improve performance on hyperthreaded CPUs. Intel recommends to use it in spin
loops also on non-HT machines to reduce power consumption (see e.g
http://softwarecommunity.intel.com/articles/eng/2004.htm)

Running benchmarks for spinlocks implemented with InterlockedCompareExchange
and YieldProcessor shows that much better performance is achieved by calling
YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
loop count in the range 200-300 brought best results.
*/
#define MY_YIELD_LOOPS 200

static inline int my_yield_processor()
{
int i;

MY_LOW_PRIORITY_CPU();

for (i= 0; i < MY_YIELD_LOOPS; i++)
{
MY_COMPILER_BARRIER();
MY_PAUSE();
}

MY_RESUME_PRIORITY_CPU();

return 1;
}

#endif /* MY_ATOMIC_INCLUDED */
12 changes: 12 additions & 0 deletions include/my_compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,18 @@ inline bool unlikely(bool expr)
# define MY_ALIGNED(size)
#endif

/*
the macro below defines a compiler barrier, i.e. compiler-specific code to
prevent instructions reordering during compile time.
*/
#if defined __GNUC__ || defined __SUNPRO_C || defined __SUNPRO_CC
# define MY_COMPILER_BARRIER() __asm__ __volatile__ ("" ::: "memory")
#elif defined _MSC_VER
# define MY_COMPILER_BARRIER() _ReadWriteBarrier()
#else
# error No MY_COMPILER_BARRIER() implementation for this compiler!
#endif

/* Visual Studio requires '__inline' for C code */
#if !defined(__cplusplus) && defined(_MSC_VER)
# define inline __inline
Expand Down
46 changes: 17 additions & 29 deletions storage/innobase/include/ut0ut.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,39 +54,27 @@ Created 1/20/1994 Heikki Tuuri
typedef time_t ib_time_t;

#ifndef UNIV_HOTBACKUP
# if defined(HAVE_PAUSE_INSTRUCTION)
/* According to the gcc info page, asm volatile means that the
instruction has important side-effects and must not be removed.
Also asm volatile may trigger a memory barrier (spilling all registers
to memory). */
# ifdef __SUNPRO_CC
# define UT_RELAX_CPU() asm ("pause" )
# else
# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
# endif /* __SUNPRO_CC */

# elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
# elif defined _WIN32
/* In the Win32 API, the x86 PAUSE instruction is executed by calling
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
independent way by using YieldProcessor. */
# define UT_RELAX_CPU() YieldProcessor()
# else
# define UT_RELAX_CPU() do { \
volatile lint volatile_var; \
os_compare_and_swap_lint(&volatile_var, 0, 1); \
} while (0)
# endif

# if defined(HAVE_HMT_PRIORITY_INSTRUCTION)
# define UT_LOW_PRIORITY_CPU() __asm__ __volatile__ ("or 1,1,1")
# define UT_RESUME_PRIORITY_CPU() __asm__ __volatile__ ("or 2,2,2")
#include <my_atomic.h>

# ifdef MY_PAUSE
# define UT_RELAX_CPU() MY_PAUSE()
# else
# define UT_LOW_PRIORITY_CPU() ((void)0)
# define UT_RESUME_PRIORITY_CPU() ((void)0)
# error MY_PAUSE() is undefined
# endif

#ifdef MY_LOW_PRIORITY_CPU
# define UT_LOW_PRIORITY_CPU() MY_LOW_PRIORITY_CPU()
#else
# error MY_LOW_PRIORITY_CPU() is undefined!
#endif

#ifdef MY_RESUME_PRIORITY_CPU
# define UT_RESUME_PRIORITY_CPU() MY_RESUME_PRIORITY_CPU()
#else
# error MY_RESUME_PRIORITY_CPU() is undefined!
#endif

/*********************************************************************//**
Delays execution for at most max_wait_us microseconds or returns earlier
if cond becomes true.
Expand Down