105
105
* atomic_long_cmpxchg() will be used to obtain writer lock.
106
106
*
107
107
* There are three places where the lock handoff bit may be set or cleared.
108
- * 1) rwsem_mark_wake() for readers.
109
- * 2) rwsem_try_write_lock() for writers.
110
- * 3) Error path of rwsem_down_write_slowpath().
108
+ * 1) rwsem_mark_wake() for readers -- set, clear
109
+ * 2) rwsem_try_write_lock() for writers -- set, clear
110
+ * 3) rwsem_del_waiter() -- clear
111
111
*
112
112
* For all the above cases, wait_lock will be held. A writer must also
113
113
* be the first one in the wait_list to be eligible for setting the handoff
@@ -334,6 +334,9 @@ struct rwsem_waiter {
334
334
struct task_struct * task ;
335
335
enum rwsem_waiter_type type ;
336
336
unsigned long timeout ;
337
+
338
+ /* Writer only, not initialized in reader */
339
+ bool handoff_set ;
337
340
};
338
341
#define rwsem_first_waiter (sem ) \
339
342
list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
@@ -344,12 +347,6 @@ enum rwsem_wake_type {
344
347
RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
345
348
};
346
349
347
- enum writer_wait_state {
348
- WRITER_NOT_FIRST , /* Writer is not first in wait list */
349
- WRITER_FIRST , /* Writer is first in wait list */
350
- WRITER_HANDOFF /* Writer is first & handoff needed */
351
- };
352
-
353
350
/*
354
351
* The typical HZ value is either 250 or 1000. So set the minimum waiting
355
352
* time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
@@ -365,6 +362,31 @@ enum writer_wait_state {
365
362
*/
366
363
#define MAX_READERS_WAKEUP 0x100
367
364
365
+ static inline void
366
+ rwsem_add_waiter (struct rw_semaphore * sem , struct rwsem_waiter * waiter )
367
+ {
368
+ lockdep_assert_held (& sem -> wait_lock );
369
+ list_add_tail (& waiter -> list , & sem -> wait_list );
370
+ /* caller will set RWSEM_FLAG_WAITERS */
371
+ }
372
+
373
+ /*
374
+ * Remove a waiter from the wait_list and clear flags.
375
+ *
376
+ * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of
377
+ * this function. Modify with care.
378
+ */
379
+ static inline void
380
+ rwsem_del_waiter (struct rw_semaphore * sem , struct rwsem_waiter * waiter )
381
+ {
382
+ lockdep_assert_held (& sem -> wait_lock );
383
+ list_del (& waiter -> list );
384
+ if (likely (!list_empty (& sem -> wait_list )))
385
+ return ;
386
+
387
+ atomic_long_andnot (RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS , & sem -> count );
388
+ }
389
+
368
390
/*
369
391
* handle the lock release when processes blocked on it that can now run
370
392
* - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
@@ -376,6 +398,8 @@ enum writer_wait_state {
376
398
* preferably when the wait_lock is released
377
399
* - woken process blocks are discarded from the list after having task zeroed
378
400
* - writers are only marked woken if downgrading is false
401
+ *
402
+ * Implies rwsem_del_waiter() for all woken readers.
379
403
*/
380
404
static void rwsem_mark_wake (struct rw_semaphore * sem ,
381
405
enum rwsem_wake_type wake_type ,
@@ -490,18 +514,25 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
490
514
491
515
adjustment = woken * RWSEM_READER_BIAS - adjustment ;
492
516
lockevent_cond_inc (rwsem_wake_reader , woken );
517
+
518
+ oldcount = atomic_long_read (& sem -> count );
493
519
if (list_empty (& sem -> wait_list )) {
494
- /* hit end of list above */
520
+ /*
521
+ * Combined with list_move_tail() above, this implies
522
+ * rwsem_del_waiter().
523
+ */
495
524
adjustment -= RWSEM_FLAG_WAITERS ;
525
+ if (oldcount & RWSEM_FLAG_HANDOFF )
526
+ adjustment -= RWSEM_FLAG_HANDOFF ;
527
+ } else if (woken ) {
528
+ /*
529
+ * When we've woken a reader, we no longer need to force
530
+ * writers to give up the lock and we can clear HANDOFF.
531
+ */
532
+ if (oldcount & RWSEM_FLAG_HANDOFF )
533
+ adjustment -= RWSEM_FLAG_HANDOFF ;
496
534
}
497
535
498
- /*
499
- * When we've woken a reader, we no longer need to force writers
500
- * to give up the lock and we can clear HANDOFF.
501
- */
502
- if (woken && (atomic_long_read (& sem -> count ) & RWSEM_FLAG_HANDOFF ))
503
- adjustment -= RWSEM_FLAG_HANDOFF ;
504
-
505
536
if (adjustment )
506
537
atomic_long_add (adjustment , & sem -> count );
507
538
@@ -532,12 +563,12 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
532
563
* race conditions between checking the rwsem wait list and setting the
533
564
* sem->count accordingly.
534
565
*
535
- * If wstate is WRITER_HANDOFF, it will make sure that either the handoff
536
- * bit is set or the lock is acquired with handoff bit cleared.
566
+ * Implies rwsem_del_waiter() on success.
537
567
*/
538
568
static inline bool rwsem_try_write_lock (struct rw_semaphore * sem ,
539
- enum writer_wait_state wstate )
569
+ struct rwsem_waiter * waiter )
540
570
{
571
+ bool first = rwsem_first_waiter (sem ) == waiter ;
541
572
long count , new ;
542
573
543
574
lockdep_assert_held (& sem -> wait_lock );
@@ -546,13 +577,19 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
546
577
do {
547
578
bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF );
548
579
549
- if (has_handoff && wstate == WRITER_NOT_FIRST )
550
- return false;
580
+ if (has_handoff ) {
581
+ if (!first )
582
+ return false;
583
+
584
+ /* First waiter inherits a previously set handoff bit */
585
+ waiter -> handoff_set = true;
586
+ }
551
587
552
588
new = count ;
553
589
554
590
if (count & RWSEM_LOCK_MASK ) {
555
- if (has_handoff || (wstate != WRITER_HANDOFF ))
591
+ if (has_handoff || (!rt_task (waiter -> task ) &&
592
+ !time_after (jiffies , waiter -> timeout )))
556
593
return false;
557
594
558
595
new |= RWSEM_FLAG_HANDOFF ;
@@ -569,9 +606,17 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
569
606
* We have either acquired the lock with handoff bit cleared or
570
607
* set the handoff bit.
571
608
*/
572
- if (new & RWSEM_FLAG_HANDOFF )
609
+ if (new & RWSEM_FLAG_HANDOFF ) {
610
+ waiter -> handoff_set = true;
611
+ lockevent_inc (rwsem_wlock_handoff );
573
612
return false;
613
+ }
574
614
615
+ /*
616
+ * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on
617
+ * success.
618
+ */
619
+ list_del (& waiter -> list );
575
620
rwsem_set_owner (sem );
576
621
return true;
577
622
}
@@ -956,7 +1001,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
956
1001
}
957
1002
adjustment += RWSEM_FLAG_WAITERS ;
958
1003
}
959
- list_add_tail ( & waiter . list , & sem -> wait_list );
1004
+ rwsem_add_waiter ( sem , & waiter );
960
1005
961
1006
/* we're now waiting on the lock, but no longer actively locking */
962
1007
count = atomic_long_add_return (adjustment , & sem -> count );
@@ -1002,11 +1047,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
1002
1047
return sem ;
1003
1048
1004
1049
out_nolock :
1005
- list_del (& waiter .list );
1006
- if (list_empty (& sem -> wait_list )) {
1007
- atomic_long_andnot (RWSEM_FLAG_WAITERS |RWSEM_FLAG_HANDOFF ,
1008
- & sem -> count );
1009
- }
1050
+ rwsem_del_waiter (sem , & waiter );
1010
1051
raw_spin_unlock_irq (& sem -> wait_lock );
1011
1052
__set_current_state (TASK_RUNNING );
1012
1053
lockevent_inc (rwsem_rlock_fail );
@@ -1020,9 +1061,7 @@ static struct rw_semaphore *
1020
1061
rwsem_down_write_slowpath (struct rw_semaphore * sem , int state )
1021
1062
{
1022
1063
long count ;
1023
- enum writer_wait_state wstate ;
1024
1064
struct rwsem_waiter waiter ;
1025
- struct rw_semaphore * ret = sem ;
1026
1065
DEFINE_WAKE_Q (wake_q );
1027
1066
1028
1067
/* do optimistic spinning and steal lock if possible */
@@ -1038,16 +1077,13 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
1038
1077
waiter .task = current ;
1039
1078
waiter .type = RWSEM_WAITING_FOR_WRITE ;
1040
1079
waiter .timeout = jiffies + RWSEM_WAIT_TIMEOUT ;
1080
+ waiter .handoff_set = false;
1041
1081
1042
1082
raw_spin_lock_irq (& sem -> wait_lock );
1043
-
1044
- /* account for this before adding a new element to the list */
1045
- wstate = list_empty (& sem -> wait_list ) ? WRITER_FIRST : WRITER_NOT_FIRST ;
1046
-
1047
- list_add_tail (& waiter .list , & sem -> wait_list );
1083
+ rwsem_add_waiter (sem , & waiter );
1048
1084
1049
1085
/* we're now waiting on the lock */
1050
- if (wstate == WRITER_NOT_FIRST ) {
1086
+ if (rwsem_first_waiter ( sem ) != & waiter ) {
1051
1087
count = atomic_long_read (& sem -> count );
1052
1088
1053
1089
/*
@@ -1083,13 +1119,16 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
1083
1119
/* wait until we successfully acquire the lock */
1084
1120
set_current_state (state );
1085
1121
for (;;) {
1086
- if (rwsem_try_write_lock (sem , wstate )) {
1122
+ if (rwsem_try_write_lock (sem , & waiter )) {
1087
1123
/* rwsem_try_write_lock() implies ACQUIRE on success */
1088
1124
break ;
1089
1125
}
1090
1126
1091
1127
raw_spin_unlock_irq (& sem -> wait_lock );
1092
1128
1129
+ if (signal_pending_state (state , current ))
1130
+ goto out_nolock ;
1131
+
1093
1132
/*
1094
1133
* After setting the handoff bit and failing to acquire
1095
1134
* the lock, attempt to spin on owner to accelerate lock
@@ -1098,7 +1137,7 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
1098
1137
* In this case, we attempt to acquire the lock again
1099
1138
* without sleeping.
1100
1139
*/
1101
- if (wstate == WRITER_HANDOFF ) {
1140
+ if (waiter . handoff_set ) {
1102
1141
enum owner_state owner_state ;
1103
1142
1104
1143
preempt_disable ();
@@ -1109,66 +1148,26 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
1109
1148
goto trylock_again ;
1110
1149
}
1111
1150
1112
- /* Block until there are no active lockers. */
1113
- for (;;) {
1114
- if (signal_pending_state (state , current ))
1115
- goto out_nolock ;
1116
-
1117
- schedule ();
1118
- lockevent_inc (rwsem_sleep_writer );
1119
- set_current_state (state );
1120
- /*
1121
- * If HANDOFF bit is set, unconditionally do
1122
- * a trylock.
1123
- */
1124
- if (wstate == WRITER_HANDOFF )
1125
- break ;
1126
-
1127
- if ((wstate == WRITER_NOT_FIRST ) &&
1128
- (rwsem_first_waiter (sem ) == & waiter ))
1129
- wstate = WRITER_FIRST ;
1130
-
1131
- count = atomic_long_read (& sem -> count );
1132
- if (!(count & RWSEM_LOCK_MASK ))
1133
- break ;
1134
-
1135
- /*
1136
- * The setting of the handoff bit is deferred
1137
- * until rwsem_try_write_lock() is called.
1138
- */
1139
- if ((wstate == WRITER_FIRST ) && (rt_task (current ) ||
1140
- time_after (jiffies , waiter .timeout ))) {
1141
- wstate = WRITER_HANDOFF ;
1142
- lockevent_inc (rwsem_wlock_handoff );
1143
- break ;
1144
- }
1145
- }
1151
+ schedule ();
1152
+ lockevent_inc (rwsem_sleep_writer );
1153
+ set_current_state (state );
1146
1154
trylock_again :
1147
1155
raw_spin_lock_irq (& sem -> wait_lock );
1148
1156
}
1149
1157
__set_current_state (TASK_RUNNING );
1150
- list_del (& waiter .list );
1151
1158
raw_spin_unlock_irq (& sem -> wait_lock );
1152
1159
lockevent_inc (rwsem_wlock );
1153
-
1154
- return ret ;
1160
+ return sem ;
1155
1161
1156
1162
out_nolock :
1157
1163
__set_current_state (TASK_RUNNING );
1158
1164
raw_spin_lock_irq (& sem -> wait_lock );
1159
- list_del (& waiter .list );
1160
-
1161
- if (unlikely (wstate == WRITER_HANDOFF ))
1162
- atomic_long_add (- RWSEM_FLAG_HANDOFF , & sem -> count );
1163
-
1164
- if (list_empty (& sem -> wait_list ))
1165
- atomic_long_andnot (RWSEM_FLAG_WAITERS , & sem -> count );
1166
- else
1165
+ rwsem_del_waiter (sem , & waiter );
1166
+ if (!list_empty (& sem -> wait_list ))
1167
1167
rwsem_mark_wake (sem , RWSEM_WAKE_ANY , & wake_q );
1168
1168
raw_spin_unlock_irq (& sem -> wait_lock );
1169
1169
wake_up_q (& wake_q );
1170
1170
lockevent_inc (rwsem_wlock_fail );
1171
-
1172
1171
return ERR_PTR (- EINTR );
1173
1172
}
1174
1173
0 commit comments