Skip to content

Commit ba16b28

Browse files
shligitaxboe
authored andcommitted
kernfs: add an API to get kernfs node from inode number
Add an API to get kernfs node from inode number. We will need this to implement exportfs operations. This API will be used in blktrace too later, so it should be as fast as possible. To make the API lock free, kernfs node is freed in RCU context. And we depend on kernfs_node count/ino number to filter out stale kernfs nodes. Acked-by: Tejun Heo <[email protected]> Acked-by: Greg Kroah-Hartman <[email protected]> Signed-off-by: Shaohua Li <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent 4a3ef68 commit ba16b28

File tree

3 files changed

+69
-1
lines changed

3 files changed

+69
-1
lines changed

fs/kernfs/dir.c

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,10 @@ void kernfs_put(struct kernfs_node *kn)
508508
struct kernfs_node *parent;
509509
struct kernfs_root *root;
510510

511+
/*
512+
* kernfs_node is freed with ->count 0, kernfs_find_and_get_node_by_ino
513+
* depends on this to filter reused stale node
514+
*/
511515
if (!kn || !atomic_dec_and_test(&kn->count))
512516
return;
513517
root = kernfs_root(kn);
@@ -649,6 +653,11 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
649653
kn->ino = ret;
650654
kn->generation = gen;
651655

656+
/*
657+
* set ino first. This barrier is paired with atomic_inc_not_zero in
658+
* kernfs_find_and_get_node_by_ino
659+
*/
660+
smp_mb__before_atomic();
652661
atomic_set(&kn->count, 1);
653662
atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
654663
RB_CLEAR_NODE(&kn->rb);
@@ -680,6 +689,54 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
680689
return kn;
681690
}
682691

692+
/*
693+
* kernfs_find_and_get_node_by_ino - get kernfs_node from inode number
694+
* @root: the kernfs root
695+
* @ino: inode number
696+
*
697+
* RETURNS:
698+
* NULL on failure. Return a kernfs node with reference counter incremented
699+
*/
700+
struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
701+
unsigned int ino)
702+
{
703+
struct kernfs_node *kn;
704+
705+
rcu_read_lock();
706+
kn = idr_find(&root->ino_idr, ino);
707+
if (!kn)
708+
goto out;
709+
710+
/*
711+
* Since kernfs_node is freed in RCU, it's possible an old node for ino
712+
* is freed, but reused before RCU grace period. But a freed node (see
713+
* kernfs_put) or an incompletedly initialized node (see
714+
* __kernfs_new_node) should have 'count' 0. We can use this fact to
715+
* filter out such node.
716+
*/
717+
if (!atomic_inc_not_zero(&kn->count)) {
718+
kn = NULL;
719+
goto out;
720+
}
721+
722+
/*
723+
* The node could be a new node or a reused node. If it's a new node,
724+
* we are ok. If it's reused because of RCU (because of
725+
* SLAB_TYPESAFE_BY_RCU), the __kernfs_new_node always sets its 'ino'
726+
* before 'count'. So if 'count' is uptodate, 'ino' should be uptodate,
727+
* hence we can use 'ino' to filter stale node.
728+
*/
729+
if (kn->ino != ino)
730+
goto out;
731+
rcu_read_unlock();
732+
733+
return kn;
734+
out:
735+
rcu_read_unlock();
736+
kernfs_put(kn);
737+
return NULL;
738+
}
739+
683740
/**
684741
* kernfs_add_one - add kernfs_node to parent without warning
685742
* @kn: kernfs_node to be added

fs/kernfs/kernfs-internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ int kernfs_add_one(struct kernfs_node *kn);
9898
struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
9999
const char *name, umode_t mode,
100100
unsigned flags);
101+
struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
102+
unsigned int ino);
101103

102104
/*
103105
* file.c

fs/kernfs/mount.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,16 @@ struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns)
330330

331331
void __init kernfs_init(void)
332332
{
333+
334+
/*
335+
* the slab is freed in RCU context, so kernfs_find_and_get_node_by_ino
336+
* can access the slab lock free. This could introduce stale nodes,
337+
* please see how kernfs_find_and_get_node_by_ino filters out stale
338+
* nodes.
339+
*/
333340
kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
334341
sizeof(struct kernfs_node),
335-
0, SLAB_PANIC, NULL);
342+
0,
343+
SLAB_PANIC | SLAB_TYPESAFE_BY_RCU,
344+
NULL);
336345
}

0 commit comments

Comments
 (0)