Skip to content

Commit 28db61e

Browse files
npigginmpe
authored andcommitted
powerpc/qspinlock: allow propagation of yield CPU down the queue
Having all CPUs poll the lock word for the owner CPU that should be yielded to defeats most of the purpose of using MCS queueing for scalability. Yet it may be desirable for queued waiters to yield to a preempted owner. With this change, queue waiters never sample the owner CPU directly from the lock word. The queue head (which is spinning on the lock) propagates the owner CPU back to the next waiter if it finds the owner has been preempted. That waiter then propagates the owner CPU back to the next waiter, and so on. s390 addresses this problem differenty, by having queued waiters sample the lock word to find the owner at a low frequency. That has the advantage of being simpler, the advantage of propagation is that the lock word never has to be accesed by queued waiters, and the transfer of cache lines to transmit the owner data is only required when lock holder vCPU preemption occurs. Signed-off-by: Nicholas Piggin <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent b4c3cdc commit 28db61e

File tree

1 file changed

+79
-0
lines changed

1 file changed

+79
-0
lines changed

arch/powerpc/lib/qspinlock.c

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
struct qnode {
1313
struct qnode *next;
1414
struct qspinlock *lock;
15+
int yield_cpu;
1516
u8 locked; /* 1 if lock acquired */
1617
};
1718

@@ -28,6 +29,7 @@ static int head_spins __read_mostly = (1 << 8);
2829
static bool pv_yield_owner __read_mostly = true;
2930
static bool pv_yield_allow_steal __read_mostly = false;
3031
static bool pv_yield_prev __read_mostly = true;
32+
static bool pv_yield_propagate_owner __read_mostly = true;
3133

3234
static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
3335

@@ -232,14 +234,67 @@ static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u
232234
__yield_to_locked_owner(lock, val, paravirt, mustq);
233235
}
234236

237+
static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
238+
{
239+
struct qnode *next;
240+
int owner;
241+
242+
if (!paravirt)
243+
return;
244+
if (!pv_yield_propagate_owner)
245+
return;
246+
247+
owner = get_owner_cpu(val);
248+
if (*set_yield_cpu == owner)
249+
return;
250+
251+
next = READ_ONCE(node->next);
252+
if (!next)
253+
return;
254+
255+
if (vcpu_is_preempted(owner)) {
256+
next->yield_cpu = owner;
257+
*set_yield_cpu = owner;
258+
} else if (*set_yield_cpu != -1) {
259+
next->yield_cpu = owner;
260+
*set_yield_cpu = owner;
261+
}
262+
}
263+
235264
static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
236265
{
237266
int prev_cpu = decode_tail_cpu(val);
238267
u32 yield_count;
268+
int yield_cpu;
239269

240270
if (!paravirt)
241271
goto relax;
242272

273+
if (!pv_yield_propagate_owner)
274+
goto yield_prev;
275+
276+
yield_cpu = READ_ONCE(node->yield_cpu);
277+
if (yield_cpu == -1) {
278+
/* Propagate back the -1 CPU */
279+
if (node->next && node->next->yield_cpu != -1)
280+
node->next->yield_cpu = yield_cpu;
281+
goto yield_prev;
282+
}
283+
284+
yield_count = yield_count_of(yield_cpu);
285+
if ((yield_count & 1) == 0)
286+
goto yield_prev; /* owner vcpu is running */
287+
288+
smp_rmb();
289+
290+
if (yield_cpu == node->yield_cpu) {
291+
if (node->next && node->next->yield_cpu != yield_cpu)
292+
node->next->yield_cpu = yield_cpu;
293+
yield_to_preempted(yield_cpu, yield_count);
294+
return;
295+
}
296+
297+
yield_prev:
243298
if (!pv_yield_prev)
244299
goto relax;
245300

@@ -293,6 +348,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
293348
u32 val, old, tail;
294349
bool mustq = false;
295350
int idx;
351+
int set_yield_cpu = -1;
296352
int iters = 0;
297353

298354
BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
@@ -314,6 +370,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
314370
node = &qnodesp->nodes[idx];
315371
node->next = NULL;
316372
node->lock = lock;
373+
node->yield_cpu = -1;
317374
node->locked = 0;
318375

319376
tail = encode_tail_cpu(smp_processor_id());
@@ -334,6 +391,10 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
334391
while (!node->locked)
335392
yield_to_prev(lock, node, old, paravirt);
336393

394+
/* Clear out stale propagated yield_cpu */
395+
if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1)
396+
node->yield_cpu = -1;
397+
337398
smp_rmb(); /* acquire barrier for the mcs lock */
338399
}
339400

@@ -344,6 +405,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
344405
if (!(val & _Q_LOCKED_VAL))
345406
break;
346407

408+
propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
347409
yield_head_to_locked_owner(lock, val, paravirt);
348410
if (!maybe_stealers)
349411
continue;
@@ -512,6 +574,22 @@ static int pv_yield_prev_get(void *data, u64 *val)
512574

513575
DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
514576

577+
static int pv_yield_propagate_owner_set(void *data, u64 val)
578+
{
579+
pv_yield_propagate_owner = !!val;
580+
581+
return 0;
582+
}
583+
584+
static int pv_yield_propagate_owner_get(void *data, u64 *val)
585+
{
586+
*val = pv_yield_propagate_owner;
587+
588+
return 0;
589+
}
590+
591+
DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n");
592+
515593
static __init int spinlock_debugfs_init(void)
516594
{
517595
debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
@@ -520,6 +598,7 @@ static __init int spinlock_debugfs_init(void)
520598
debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
521599
debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
522600
debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
601+
debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
523602
}
524603

525604
return 0;

0 commit comments

Comments
 (0)