Skip to content

Commit 3521c48

Browse files
committed
Bug #79487: Provide non-Windows implementations for LF_BACKOFF
Add LF_BACKOFF implementations for GCC and Sun Studio + some minor code unification with InnoDB.
1 parent d04d7a8 commit 3521c48

File tree

5 files changed

+93
-60
lines changed

5 files changed

+93
-60
lines changed

include/atomic/generic-msvc.h

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -106,30 +106,4 @@ static inline void my_atomic_storeptr(void * volatile *a, void *v)
106106
(void)InterlockedExchangePointer(a, v);
107107
}
108108

109-
110-
/*
111-
my_yield_processor (equivalent of x86 PAUSE instruction) should be used
112-
to improve performance on hyperthreaded CPUs. Intel recommends to use it in
113-
spin loops also on non-HT machines to reduce power consumption (see e.g
114-
http://softwarecommunity.intel.com/articles/eng/2004.htm)
115-
116-
Running benchmarks for spinlocks implemented with InterlockedCompareExchange
117-
and YieldProcessor shows that much better performance is achieved by calling
118-
YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
119-
loop count in the range 200-300 brought best results.
120-
*/
121-
#define YIELD_LOOPS 200
122-
123-
static inline int my_yield_processor()
124-
{
125-
int i;
126-
for (i=0; i<YIELD_LOOPS; i++)
127-
{
128-
YieldProcessor();
129-
}
130-
return 1;
131-
}
132-
133-
#define LF_BACKOFF my_yield_processor()
134-
135109
#endif /* ATOMIC_MSC_INCLUDED */

include/lf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323

2424
C_MODE_START
2525

26+
#define LF_BACKOFF my_yield_processor()
27+
2628
/*
2729
wait-free dynamic array, see lf_dynarray.c
2830

include/my_atomic.h

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,69 @@
6363
#endif
6464

6565
/*
66-
the macro below defines (as an expression) the code that
67-
will be run in spin-loops. Intel manuals recummend to have PAUSE there.
68-
It is expected to be defined in include/atomic/ *.h files
66+
the macro below defines (as an expression) the code that will be run in
67+
spin-loops. Intel manuals recommend to have PAUSE there.
6968
*/
70-
#ifndef LF_BACKOFF
71-
#define LF_BACKOFF (1)
69+
#ifdef HAVE_PAUSE_INSTRUCTION
70+
/*
71+
According to the gcc info page, asm volatile means that the instruction
72+
has important side-effects and must not be removed. Also asm volatile may
73+
trigger a memory barrier (spilling all registers to memory).
74+
*/
75+
# define MY_PAUSE() __asm__ __volatile__ ("pause")
76+
# elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
77+
# define UT_PAUSE() __asm__ __volatile__ ("rep; nop")
78+
# elif defined(_MSC_VER)
79+
/*
80+
In the Win32 API, the x86 PAUSE instruction is executed by calling the
81+
YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
82+
independent way by using YieldProcessor.
83+
*/
84+
# define MY_PAUSE() YieldProcessor()
85+
# else
86+
# define MY_PAUSE() ((void) 0)
7287
#endif
7388

89+
/*
90+
POWER-specific macros to relax CPU threads to give more core resources to
91+
other threads executing in the core.
92+
*/
93+
#if defined(HAVE_HMT_PRIORITY_INSTRUCTION)
94+
# define MY_LOW_PRIORITY_CPU() __asm__ __volatile__ ("or 1,1,1")
95+
# define MY_RESUME_PRIORITY_CPU() __asm__ __volatile__ ("or 2,2,2")
96+
#else
97+
# define MY_LOW_PRIORITY_CPU() ((void)0)
98+
# define MY_RESUME_PRIORITY_CPU() ((void)0)
99+
#endif
100+
101+
/*
102+
my_yield_processor (equivalent of x86 PAUSE instruction) should be used to
103+
improve performance on hyperthreaded CPUs. Intel recommends to use it in spin
104+
loops also on non-HT machines to reduce power consumption (see e.g
105+
http://softwarecommunity.intel.com/articles/eng/2004.htm)
106+
107+
Running benchmarks for spinlocks implemented with InterlockedCompareExchange
108+
and YieldProcessor shows that much better performance is achieved by calling
109+
YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
110+
loop count in the range 200-300 brought best results.
111+
*/
112+
#define MY_YIELD_LOOPS 200
113+
114+
static inline int my_yield_processor()
115+
{
116+
int i;
117+
118+
MY_LOW_PRIORITY_CPU();
119+
120+
for (i= 0; i < MY_YIELD_LOOPS; i++)
121+
{
122+
MY_COMPILER_BARRIER();
123+
MY_PAUSE();
124+
}
125+
126+
MY_RESUME_PRIORITY_CPU();
127+
128+
return 1;
129+
}
130+
74131
#endif /* MY_ATOMIC_INCLUDED */

include/my_compiler.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,18 @@ inline bool unlikely(bool expr)
104104
# define MY_ALIGNED(size)
105105
#endif
106106

107+
/*
108+
the macro below defines a compiler barrier, i.e. compiler-specific code to
109+
prevent instructions reordering during compile time.
110+
*/
111+
#if defined __GNUC__ || defined __SUNPRO_C || defined __SUNPRO_CC
112+
# define MY_COMPILER_BARRIER() __asm__ __volatile__ ("" ::: "memory")
113+
#elif defined _MSC_VER
114+
# define MY_COMPILER_BARRIER() _ReadWriteBarrier()
115+
#else
116+
# error No MY_COMPILER_BARRIER() implementation for this compiler!
117+
#endif
118+
107119
/* Visual Studio requires '__inline' for C code */
108120
#if !defined(__cplusplus) && defined(_MSC_VER)
109121
# define inline __inline

storage/innobase/include/ut0ut.h

Lines changed: 17 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -54,39 +54,27 @@ Created 1/20/1994 Heikki Tuuri
5454
typedef time_t ib_time_t;
5555

5656
#ifndef UNIV_HOTBACKUP
57-
# if defined(HAVE_PAUSE_INSTRUCTION)
58-
/* According to the gcc info page, asm volatile means that the
59-
instruction has important side-effects and must not be removed.
60-
Also asm volatile may trigger a memory barrier (spilling all registers
61-
to memory). */
62-
# ifdef __SUNPRO_CC
63-
# define UT_RELAX_CPU() asm ("pause" )
64-
# else
65-
# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
66-
# endif /* __SUNPRO_CC */
67-
68-
# elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
69-
# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
70-
# elif defined _WIN32
71-
/* In the Win32 API, the x86 PAUSE instruction is executed by calling
72-
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
73-
independent way by using YieldProcessor. */
74-
# define UT_RELAX_CPU() YieldProcessor()
75-
# else
76-
# define UT_RELAX_CPU() do { \
77-
volatile lint volatile_var; \
78-
os_compare_and_swap_lint(&volatile_var, 0, 1); \
79-
} while (0)
80-
# endif
8157

82-
# if defined(HAVE_HMT_PRIORITY_INSTRUCTION)
83-
# define UT_LOW_PRIORITY_CPU() __asm__ __volatile__ ("or 1,1,1")
84-
# define UT_RESUME_PRIORITY_CPU() __asm__ __volatile__ ("or 2,2,2")
58+
#include <my_atomic.h>
59+
60+
# ifdef MY_PAUSE
61+
# define UT_RELAX_CPU() MY_PAUSE()
8562
# else
86-
# define UT_LOW_PRIORITY_CPU() ((void)0)
87-
# define UT_RESUME_PRIORITY_CPU() ((void)0)
63+
# error MY_PAUSE() is undefined
8864
# endif
8965

66+
#ifdef MY_LOW_PRIORITY_CPU
67+
# define UT_LOW_PRIORITY_CPU() MY_LOW_PRIORITY_CPU()
68+
#else
69+
# error MY_LOW_PRIORITY_CPU() is undefined!
70+
#endif
71+
72+
#ifdef MY_RESUME_PRIORITY_CPU
73+
# define UT_RESUME_PRIORITY_CPU() MY_RESUME_PRIORITY_CPU()
74+
#else
75+
# error MY_RESUME_PRIORITY_CPU() is undefined!
76+
#endif
77+
9078
/*********************************************************************//**
9179
Delays execution for at most max_wait_us microseconds or returns earlier
9280
if cond becomes true.

0 commit comments

Comments
 (0)