Skip to content

Commit de1b794

Browse files
jankaratytso
authored andcommitted
jbd2: Fix oops in jbd2_journal_remove_journal_head()
jbd2_journal_remove_journal_head() can oops when trying to access journal_head returned by bh2jh(). This is caused for example by the following race: TASK1 TASK2 jbd2_journal_commit_transaction() ... processing t_forget list __jbd2_journal_refile_buffer(jh); if (!jh->b_transaction) { jbd_unlock_bh_state(bh); jbd2_journal_try_to_free_buffers() jbd2_journal_grab_journal_head(bh) jbd_lock_bh_state(bh) __journal_try_to_free_buffer() jbd2_journal_put_journal_head(jh) jbd2_journal_remove_journal_head(bh); jbd2_journal_put_journal_head() in TASK2 sees that b_jcount == 0 and buffer is not part of any transaction and thus frees journal_head before TASK1 gets to doing so. Note that even buffer_head can be released by try_to_free_buffers() after jbd2_journal_put_journal_head() which adds even larger opportunity for oops (but I didn't see this happen in reality). Fix the problem by making transactions hold their own journal_head reference (in b_jcount). That way we don't have to remove journal_head explicitely via jbd2_journal_remove_journal_head() and instead just remove journal_head when b_jcount drops to zero. The result of this is that [__]jbd2_journal_refile_buffer(), [__]jbd2_journal_unfile_buffer(), and __jdb2_journal_remove_checkpoint() can free journal_head which needs modification of a few callers. Also we have to be careful because once journal_head is removed, buffer_head might be freed as well. So we have to get our own buffer_head reference where it matters. Signed-off-by: Jan Kara <[email protected]> Signed-off-by: "Theodore Ts'o" <[email protected]>
1 parent 1fb74cd commit de1b794

File tree

5 files changed

+99
-122
lines changed

5 files changed

+99
-122
lines changed

fs/jbd2/checkpoint.c

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
9797

9898
if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
9999
!buffer_dirty(bh) && !buffer_write_io_error(bh)) {
100+
/*
101+
* Get our reference so that bh cannot be freed before
102+
* we unlock it
103+
*/
104+
get_bh(bh);
100105
JBUFFER_TRACE(jh, "remove from checkpoint list");
101106
ret = __jbd2_journal_remove_checkpoint(jh) + 1;
102107
jbd_unlock_bh_state(bh);
103-
jbd2_journal_remove_journal_head(bh);
104108
BUFFER_TRACE(bh, "release");
105109
__brelse(bh);
106110
} else {
@@ -223,8 +227,8 @@ static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
223227
spin_lock(&journal->j_list_lock);
224228
goto restart;
225229
}
230+
get_bh(bh);
226231
if (buffer_locked(bh)) {
227-
atomic_inc(&bh->b_count);
228232
spin_unlock(&journal->j_list_lock);
229233
jbd_unlock_bh_state(bh);
230234
wait_on_buffer(bh);
@@ -243,7 +247,6 @@ static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
243247
*/
244248
released = __jbd2_journal_remove_checkpoint(jh);
245249
jbd_unlock_bh_state(bh);
246-
jbd2_journal_remove_journal_head(bh);
247250
__brelse(bh);
248251
}
249252

@@ -284,7 +287,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
284287
int ret = 0;
285288

286289
if (buffer_locked(bh)) {
287-
atomic_inc(&bh->b_count);
290+
get_bh(bh);
288291
spin_unlock(&journal->j_list_lock);
289292
jbd_unlock_bh_state(bh);
290293
wait_on_buffer(bh);
@@ -316,12 +319,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
316319
ret = 1;
317320
if (unlikely(buffer_write_io_error(bh)))
318321
ret = -EIO;
322+
get_bh(bh);
319323
J_ASSERT_JH(jh, !buffer_jbddirty(bh));
320324
BUFFER_TRACE(bh, "remove from checkpoint");
321325
__jbd2_journal_remove_checkpoint(jh);
322326
spin_unlock(&journal->j_list_lock);
323327
jbd_unlock_bh_state(bh);
324-
jbd2_journal_remove_journal_head(bh);
325328
__brelse(bh);
326329
} else {
327330
/*
@@ -554,7 +557,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
554557
/*
555558
* journal_clean_one_cp_list
556559
*
557-
* Find all the written-back checkpoint buffers in the given list and release them.
560+
* Find all the written-back checkpoint buffers in the given list and
561+
* release them.
558562
*
559563
* Called with the journal locked.
560564
* Called with j_list_lock held.
@@ -663,8 +667,8 @@ int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
663667
* checkpoint lists.
664668
*
665669
* The function returns 1 if it frees the transaction, 0 otherwise.
670+
* The function can free jh and bh.
666671
*
667-
* This function is called with the journal locked.
668672
* This function is called with j_list_lock held.
669673
* This function is called with jbd_lock_bh_state(jh2bh(jh))
670674
*/
@@ -684,13 +688,14 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
684688
}
685689
journal = transaction->t_journal;
686690

691+
JBUFFER_TRACE(jh, "removing from transaction");
687692
__buffer_unlink(jh);
688693
jh->b_cp_transaction = NULL;
694+
jbd2_journal_put_journal_head(jh);
689695

690696
if (transaction->t_checkpoint_list != NULL ||
691697
transaction->t_checkpoint_io_list != NULL)
692698
goto out;
693-
JBUFFER_TRACE(jh, "transaction has no more buffers");
694699

695700
/*
696701
* There is one special case to worry about: if we have just pulled the
@@ -701,10 +706,8 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
701706
* The locking here around t_state is a bit sleazy.
702707
* See the comment at the end of jbd2_journal_commit_transaction().
703708
*/
704-
if (transaction->t_state != T_FINISHED) {
705-
JBUFFER_TRACE(jh, "belongs to running/committing transaction");
709+
if (transaction->t_state != T_FINISHED)
706710
goto out;
707-
}
708711

709712
/* OK, that was the last buffer for the transaction: we can now
710713
safely remove this transaction from the log */
@@ -723,7 +726,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
723726
wake_up(&journal->j_wait_logspace);
724727
ret = 1;
725728
out:
726-
JBUFFER_TRACE(jh, "exit");
727729
return ret;
728730
}
729731

@@ -742,6 +744,8 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
742744
J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
743745
J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
744746

747+
/* Get reference for checkpointing transaction */
748+
jbd2_journal_grab_journal_head(jh2bh(jh));
745749
jh->b_cp_transaction = transaction;
746750

747751
if (!transaction->t_checkpoint_list) {

fs/jbd2/commit.c

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -848,10 +848,16 @@ void jbd2_journal_commit_transaction(journal_t *journal)
848848
while (commit_transaction->t_forget) {
849849
transaction_t *cp_transaction;
850850
struct buffer_head *bh;
851+
int try_to_free = 0;
851852

852853
jh = commit_transaction->t_forget;
853854
spin_unlock(&journal->j_list_lock);
854855
bh = jh2bh(jh);
856+
/*
857+
* Get a reference so that bh cannot be freed before we are
858+
* done with it.
859+
*/
860+
get_bh(bh);
855861
jbd_lock_bh_state(bh);
856862
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
857863

@@ -914,28 +920,27 @@ void jbd2_journal_commit_transaction(journal_t *journal)
914920
__jbd2_journal_insert_checkpoint(jh, commit_transaction);
915921
if (is_journal_aborted(journal))
916922
clear_buffer_jbddirty(bh);
917-
JBUFFER_TRACE(jh, "refile for checkpoint writeback");
918-
__jbd2_journal_refile_buffer(jh);
919-
jbd_unlock_bh_state(bh);
920923
} else {
921924
J_ASSERT_BH(bh, !buffer_dirty(bh));
922-
/* The buffer on BJ_Forget list and not jbddirty means
925+
/*
926+
* The buffer on BJ_Forget list and not jbddirty means
923927
* it has been freed by this transaction and hence it
924928
* could not have been reallocated until this
925929
* transaction has committed. *BUT* it could be
926930
* reallocated once we have written all the data to
927931
* disk and before we process the buffer on BJ_Forget
928-
* list. */
929-
JBUFFER_TRACE(jh, "refile or unfile freed buffer");
930-
__jbd2_journal_refile_buffer(jh);
931-
if (!jh->b_transaction) {
932-
jbd_unlock_bh_state(bh);
933-
/* needs a brelse */
934-
jbd2_journal_remove_journal_head(bh);
935-
release_buffer_page(bh);
936-
} else
937-
jbd_unlock_bh_state(bh);
932+
* list.
933+
*/
934+
if (!jh->b_next_transaction)
935+
try_to_free = 1;
938936
}
937+
JBUFFER_TRACE(jh, "refile or unfile buffer");
938+
__jbd2_journal_refile_buffer(jh);
939+
jbd_unlock_bh_state(bh);
940+
if (try_to_free)
941+
release_buffer_page(bh); /* Drops bh reference */
942+
else
943+
__brelse(bh);
939944
cond_resched_lock(&journal->j_list_lock);
940945
}
941946
spin_unlock(&journal->j_list_lock);

fs/jbd2/journal.c

Lines changed: 29 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -2078,10 +2078,9 @@ static void journal_free_journal_head(struct journal_head *jh)
20782078
* When a buffer has its BH_JBD bit set it is immune from being released by
20792079
* core kernel code, mainly via ->b_count.
20802080
*
2081-
* A journal_head may be detached from its buffer_head when the journal_head's
2082-
* b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
2083-
* Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the
2084-
* journal_head can be dropped if needed.
2081+
* A journal_head is detached from its buffer_head when the journal_head's
2082+
* b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
2083+
* transaction (b_cp_transaction) hold their references to b_jcount.
20852084
*
20862085
* Various places in the kernel want to attach a journal_head to a buffer_head
20872086
* _before_ attaching the journal_head to a transaction. To protect the
@@ -2094,17 +2093,16 @@ static void journal_free_journal_head(struct journal_head *jh)
20942093
* (Attach a journal_head if needed. Increments b_jcount)
20952094
* struct journal_head *jh = jbd2_journal_add_journal_head(bh);
20962095
* ...
2096+
* (Get another reference for transaction)
2097+
* jbd2_journal_grab_journal_head(bh);
20972098
* jh->b_transaction = xxx;
2099+
* (Put original reference)
20982100
* jbd2_journal_put_journal_head(jh);
2099-
*
2100-
* Now, the journal_head's b_jcount is zero, but it is safe from being released
2101-
* because it has a non-zero b_transaction.
21022101
*/
21032102

21042103
/*
21052104
* Give a buffer_head a journal_head.
21062105
*
2107-
* Doesn't need the journal lock.
21082106
* May sleep.
21092107
*/
21102108
struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh)
@@ -2168,61 +2166,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
21682166
struct journal_head *jh = bh2jh(bh);
21692167

21702168
J_ASSERT_JH(jh, jh->b_jcount >= 0);
2171-
2172-
get_bh(bh);
2173-
if (jh->b_jcount == 0) {
2174-
if (jh->b_transaction == NULL &&
2175-
jh->b_next_transaction == NULL &&
2176-
jh->b_cp_transaction == NULL) {
2177-
J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
2178-
J_ASSERT_BH(bh, buffer_jbd(bh));
2179-
J_ASSERT_BH(bh, jh2bh(jh) == bh);
2180-
BUFFER_TRACE(bh, "remove journal_head");
2181-
if (jh->b_frozen_data) {
2182-
printk(KERN_WARNING "%s: freeing "
2183-
"b_frozen_data\n",
2184-
__func__);
2185-
jbd2_free(jh->b_frozen_data, bh->b_size);
2186-
}
2187-
if (jh->b_committed_data) {
2188-
printk(KERN_WARNING "%s: freeing "
2189-
"b_committed_data\n",
2190-
__func__);
2191-
jbd2_free(jh->b_committed_data, bh->b_size);
2192-
}
2193-
bh->b_private = NULL;
2194-
jh->b_bh = NULL; /* debug, really */
2195-
clear_buffer_jbd(bh);
2196-
__brelse(bh);
2197-
journal_free_journal_head(jh);
2198-
} else {
2199-
BUFFER_TRACE(bh, "journal_head was locked");
2200-
}
2169+
J_ASSERT_JH(jh, jh->b_transaction == NULL);
2170+
J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
2171+
J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
2172+
J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
2173+
J_ASSERT_BH(bh, buffer_jbd(bh));
2174+
J_ASSERT_BH(bh, jh2bh(jh) == bh);
2175+
BUFFER_TRACE(bh, "remove journal_head");
2176+
if (jh->b_frozen_data) {
2177+
printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
2178+
jbd2_free(jh->b_frozen_data, bh->b_size);
22012179
}
2180+
if (jh->b_committed_data) {
2181+
printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
2182+
jbd2_free(jh->b_committed_data, bh->b_size);
2183+
}
2184+
bh->b_private = NULL;
2185+
jh->b_bh = NULL; /* debug, really */
2186+
clear_buffer_jbd(bh);
2187+
journal_free_journal_head(jh);
22022188
}
22032189

22042190
/*
2205-
* jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction
2206-
* and has a zero b_jcount then remove and release its journal_head. If we did
2207-
* see that the buffer is not used by any transaction we also "logically"
2208-
* decrement ->b_count.
2209-
*
2210-
* We in fact take an additional increment on ->b_count as a convenience,
2211-
* because the caller usually wants to do additional things with the bh
2212-
* after calling here.
2213-
* The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some
2214-
* time. Once the caller has run __brelse(), the buffer is eligible for
2215-
* reaping by try_to_free_buffers().
2216-
*/
2217-
void jbd2_journal_remove_journal_head(struct buffer_head *bh)
2218-
{
2219-
jbd_lock_bh_journal_head(bh);
2220-
__journal_remove_journal_head(bh);
2221-
jbd_unlock_bh_journal_head(bh);
2222-
}
2223-
2224-
/*
2225-
* Drop a reference on the passed journal_head. If it fell to zero then try to
2191+
* Drop a reference on the passed journal_head. If it fell to zero then
22262192
* release the journal_head from the buffer_head.
22272193
*/
22282194
void jbd2_journal_put_journal_head(struct journal_head *jh)
@@ -2232,11 +2198,12 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
22322198
jbd_lock_bh_journal_head(bh);
22332199
J_ASSERT_JH(jh, jh->b_jcount > 0);
22342200
--jh->b_jcount;
2235-
if (!jh->b_jcount && !jh->b_transaction) {
2201+
if (!jh->b_jcount) {
22362202
__journal_remove_journal_head(bh);
2203+
jbd_unlock_bh_journal_head(bh);
22372204
__brelse(bh);
2238-
}
2239-
jbd_unlock_bh_journal_head(bh);
2205+
} else
2206+
jbd_unlock_bh_journal_head(bh);
22402207
}
22412208

22422209
/*

0 commit comments

Comments
 (0)