Skip to content

Commit cde307e

Browse files
committed
[scudo] Fine tune busy-waiting in HybridMutex
Instead of using hardware specific instruction, using simple loop over volatile variable gives similar and more predicatable waiting time. Also fine tune the waiting time to fit with the average time in malloc/free operations. Reviewed By: cferris Differential Revision: https://reviews.llvm.org/D156951
1 parent 846eb76 commit cde307e

File tree

2 files changed

+14
-18
lines changed

2 files changed

+14
-18
lines changed

compiler-rt/lib/scudo/standalone/common.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -112,21 +112,6 @@ template <typename T> inline void shuffle(T *A, u32 N, u32 *RandState) {
112112
*RandState = State;
113113
}
114114

115-
// Hardware specific inlinable functions.
116-
117-
inline void yieldProcessor(UNUSED u8 Count) {
118-
#if defined(__i386__) || defined(__x86_64__)
119-
__asm__ __volatile__("" ::: "memory");
120-
for (u8 I = 0; I < Count; I++)
121-
__asm__ __volatile__("pause");
122-
#elif defined(__aarch64__) || defined(__arm__)
123-
__asm__ __volatile__("" ::: "memory");
124-
for (u8 I = 0; I < Count; I++)
125-
__asm__ __volatile__("yield");
126-
#endif
127-
__asm__ __volatile__("" ::: "memory");
128-
}
129-
130115
// Platform specific functions.
131116

132117
extern uptr PageSizeCached;

compiler-rt/lib/scudo/standalone/mutex.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class CAPABILITY("mutex") HybridMutex {
3535
#pragma nounroll
3636
#endif
3737
for (u8 I = 0U; I < NumberOfTries; I++) {
38-
yieldProcessor(NumberOfYields);
38+
delayLoop();
3939
if (tryLock())
4040
return;
4141
}
@@ -53,10 +53,21 @@ class CAPABILITY("mutex") HybridMutex {
5353
}
5454

5555
private:
56+
void delayLoop() {
57+
// The value comes from the average time spent in accessing caches (which
58+
// are the fastest operations) so that we are unlikely to wait too long for
59+
// fast operations.
60+
constexpr u32 SpinTimes = 16;
61+
volatile u32 V = 0;
62+
for (u32 I = 0; I < SpinTimes; ++I)
63+
++V;
64+
}
65+
5666
void assertHeldImpl();
5767

58-
static constexpr u8 NumberOfTries = 8U;
59-
static constexpr u8 NumberOfYields = 8U;
68+
// TODO(chiahungduan): Adapt this value based on scenarios. E.g., primary and
69+
// secondary allocator have different allocation times.
70+
static constexpr u8 NumberOfTries = 32U;
6071

6172
#if SCUDO_LINUX
6273
atomic_u32 M = {};

0 commit comments

Comments
 (0)