Skip to content

Commit 2005f5b

Browse files
jtlaytonchucklever
authored andcommitted
lockd: fix races in client GRANTED_MSG wait logic
After the wait for a grant is done (for whatever reason), nlmclnt_block updates the status of the nlm_rqst with the status of the block. At the point it does this, however, the block is still queued its status could change at any time. This is particularly a problem when the waiting task is signaled during the wait. We can end up giving up on the lock just before the GRANTED_MSG callback comes in, and accept it even though the lock request gets back an error, leaving a dangling lock on the server. Since the nlm_wait never lives beyond the end of nlmclnt_lock, put it on the stack and add functions to allow us to enqueue and dequeue the block. Enqueue it just before the lock/wait loop, and dequeue it just after we exit the loop instead of waiting until the end of the function. Also, scrape the status at the time that we dequeue it to ensure that it's final. Reported-by: Yongcheng Yang <[email protected]> Link: https://bugzilla.redhat.com/show_bug.cgi?id=2063818 Signed-off-by: Jeff Layton <[email protected]> Signed-off-by: Chuck Lever <[email protected]>
1 parent f0aa485 commit 2005f5b

File tree

3 files changed

+44
-34
lines changed

3 files changed

+44
-34
lines changed

fs/lockd/clntlock.c

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -82,41 +82,42 @@ void nlmclnt_done(struct nlm_host *host)
8282
}
8383
EXPORT_SYMBOL_GPL(nlmclnt_done);
8484

85+
void nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host, struct file_lock *fl)
86+
{
87+
block->b_host = host;
88+
block->b_lock = fl;
89+
init_waitqueue_head(&block->b_wait);
90+
block->b_status = nlm_lck_blocked;
91+
}
92+
8593
/*
8694
* Queue up a lock for blocking so that the GRANTED request can see it
8795
*/
88-
struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl)
96+
void nlmclnt_queue_block(struct nlm_wait *block)
8997
{
90-
struct nlm_wait *block;
91-
92-
block = kmalloc(sizeof(*block), GFP_KERNEL);
93-
if (block != NULL) {
94-
block->b_host = host;
95-
block->b_lock = fl;
96-
init_waitqueue_head(&block->b_wait);
97-
block->b_status = nlm_lck_blocked;
98-
99-
spin_lock(&nlm_blocked_lock);
100-
list_add(&block->b_list, &nlm_blocked);
101-
spin_unlock(&nlm_blocked_lock);
102-
}
103-
return block;
98+
spin_lock(&nlm_blocked_lock);
99+
list_add(&block->b_list, &nlm_blocked);
100+
spin_unlock(&nlm_blocked_lock);
104101
}
105102

106-
void nlmclnt_finish_block(struct nlm_wait *block)
103+
/*
104+
* Dequeue the block and return its final status
105+
*/
106+
__be32 nlmclnt_dequeue_block(struct nlm_wait *block)
107107
{
108-
if (block == NULL)
109-
return;
108+
__be32 status;
109+
110110
spin_lock(&nlm_blocked_lock);
111111
list_del(&block->b_list);
112+
status = block->b_status;
112113
spin_unlock(&nlm_blocked_lock);
113-
kfree(block);
114+
return status;
114115
}
115116

116117
/*
117118
* Block on a lock
118119
*/
119-
int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
120+
int nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
120121
{
121122
long ret;
122123

@@ -142,7 +143,6 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
142143
/* Reset the lock status after a server reboot so we resend */
143144
if (block->b_status == nlm_lck_denied_grace_period)
144145
block->b_status = nlm_lck_blocked;
145-
req->a_res.status = block->b_status;
146146
return 0;
147147
}
148148

fs/lockd/clntproc.c

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -516,9 +516,10 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
516516
const struct cred *cred = nfs_file_cred(fl->fl_file);
517517
struct nlm_host *host = req->a_host;
518518
struct nlm_res *resp = &req->a_res;
519-
struct nlm_wait *block = NULL;
519+
struct nlm_wait block;
520520
unsigned char fl_flags = fl->fl_flags;
521521
unsigned char fl_type;
522+
__be32 b_status;
522523
int status = -ENOLCK;
523524

524525
if (nsm_monitor(host) < 0)
@@ -531,31 +532,41 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
531532
if (status < 0)
532533
goto out;
533534

534-
block = nlmclnt_prepare_block(host, fl);
535+
nlmclnt_prepare_block(&block, host, fl);
535536
again:
536537
/*
537538
* Initialise resp->status to a valid non-zero value,
538539
* since 0 == nlm_lck_granted
539540
*/
540541
resp->status = nlm_lck_blocked;
541-
for(;;) {
542+
543+
/*
544+
* A GRANTED callback can come at any time -- even before the reply
545+
* to the LOCK request arrives, so we queue the wait before
546+
* requesting the lock.
547+
*/
548+
nlmclnt_queue_block(&block);
549+
for (;;) {
542550
/* Reboot protection */
543551
fl->fl_u.nfs_fl.state = host->h_state;
544552
status = nlmclnt_call(cred, req, NLMPROC_LOCK);
545553
if (status < 0)
546554
break;
547555
/* Did a reclaimer thread notify us of a server reboot? */
548-
if (resp->status == nlm_lck_denied_grace_period)
556+
if (resp->status == nlm_lck_denied_grace_period)
549557
continue;
550558
if (resp->status != nlm_lck_blocked)
551559
break;
552560
/* Wait on an NLM blocking lock */
553-
status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
561+
status = nlmclnt_wait(&block, req, NLMCLNT_POLL_TIMEOUT);
554562
if (status < 0)
555563
break;
556-
if (resp->status != nlm_lck_blocked)
564+
if (block.b_status != nlm_lck_blocked)
557565
break;
558566
}
567+
b_status = nlmclnt_dequeue_block(&block);
568+
if (resp->status == nlm_lck_blocked)
569+
resp->status = b_status;
559570

560571
/* if we were interrupted while blocking, then cancel the lock request
561572
* and exit
@@ -564,7 +575,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
564575
if (!req->a_args.block)
565576
goto out_unlock;
566577
if (nlmclnt_cancel(host, req->a_args.block, fl) == 0)
567-
goto out_unblock;
578+
goto out;
568579
}
569580

570581
if (resp->status == nlm_granted) {
@@ -593,16 +604,13 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
593604
status = -ENOLCK;
594605
else
595606
status = nlm_stat_to_errno(resp->status);
596-
out_unblock:
597-
nlmclnt_finish_block(block);
598607
out:
599608
nlmclnt_release_call(req);
600609
return status;
601610
out_unlock:
602611
/* Fatal error: ensure that we remove the lock altogether */
603612
dprintk("lockd: lock attempt ended in fatal error.\n"
604613
" Attempting to unlock.\n");
605-
nlmclnt_finish_block(block);
606614
fl_type = fl->fl_type;
607615
fl->fl_type = F_UNLCK;
608616
down_read(&host->h_rwsem);

include/linux/lockd/lockd.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -211,9 +211,11 @@ struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
211211
int nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
212212
int nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
213213
void nlmclnt_release_call(struct nlm_rqst *);
214-
struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
215-
void nlmclnt_finish_block(struct nlm_wait *block);
216-
int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
214+
void nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host,
215+
struct file_lock *fl);
216+
void nlmclnt_queue_block(struct nlm_wait *block);
217+
__be32 nlmclnt_dequeue_block(struct nlm_wait *block);
218+
int nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
217219
__be32 nlmclnt_grant(const struct sockaddr *addr,
218220
const struct nlm_lock *lock);
219221
void nlmclnt_recovery(struct nlm_host *);

0 commit comments

Comments
 (0)