Skip to content

Commit b6e5131

Browse files
author
Jens Axboe
committed
writeback: separate starting of sync vs opportunistic writeback
bdi_start_writeback() is currently split into two paths, one for WB_SYNC_NONE and one for WB_SYNC_ALL. Add bdi_sync_writeback() for WB_SYNC_ALL writeback and let bdi_start_writeback() handle only WB_SYNC_NONE. Push down the writeback_control allocation and only accept the parameters that make sense for each function. This cleans up the API considerably. Signed-off-by: Jens Axboe <[email protected]>
1 parent bcddc3f commit b6e5131

File tree

5 files changed

+75
-95
lines changed

5 files changed

+75
-95
lines changed

fs/fs-writeback.c

Lines changed: 67 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,10 @@ static inline bool bdi_work_on_stack(struct bdi_work *work)
7474
}
7575

7676
static inline void bdi_work_init(struct bdi_work *work,
77-
struct writeback_control *wbc)
77+
struct wb_writeback_args *args)
7878
{
7979
INIT_RCU_HEAD(&work->rcu_head);
80-
work->args.sb = wbc->sb;
81-
work->args.nr_pages = wbc->nr_to_write;
82-
work->args.sync_mode = wbc->sync_mode;
83-
work->args.range_cyclic = wbc->range_cyclic;
84-
work->args.for_kupdate = 0;
80+
work->args = *args;
8581
work->state = WS_USED;
8682
}
8783

@@ -194,7 +190,7 @@ static void bdi_wait_on_work_clear(struct bdi_work *work)
194190
}
195191

196192
static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
197-
struct writeback_control *wbc)
193+
struct wb_writeback_args *args)
198194
{
199195
struct bdi_work *work;
200196

@@ -204,7 +200,7 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
204200
*/
205201
work = kmalloc(sizeof(*work), GFP_ATOMIC);
206202
if (work) {
207-
bdi_work_init(work, wbc);
203+
bdi_work_init(work, args);
208204
bdi_queue_work(bdi, work);
209205
} else {
210206
struct bdi_writeback *wb = &bdi->wb;
@@ -214,24 +210,54 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
214210
}
215211
}
216212

217-
void bdi_start_writeback(struct writeback_control *wbc)
213+
/**
214+
* bdi_sync_writeback - start and wait for writeback
215+
* @bdi: the backing device to write from
216+
* @sb: write inodes from this super_block
217+
*
218+
* Description:
219+
* This does WB_SYNC_ALL data integrity writeback and waits for the
220+
* IO to complete. Callers must hold the sb s_umount semaphore for
221+
* reading, to avoid having the super disappear before we are done.
222+
*/
223+
static void bdi_sync_writeback(struct backing_dev_info *bdi,
224+
struct super_block *sb)
218225
{
219-
/*
220-
* WB_SYNC_NONE is opportunistic writeback. If this allocation fails,
221-
* bdi_queue_work() will wake up the thread and flush old data. This
222-
* should ensure some amount of progress in freeing memory.
223-
*/
224-
if (wbc->sync_mode != WB_SYNC_ALL)
225-
bdi_alloc_queue_work(wbc->bdi, wbc);
226-
else {
227-
struct bdi_work work;
226+
struct wb_writeback_args args = {
227+
.sb = sb,
228+
.sync_mode = WB_SYNC_ALL,
229+
.nr_pages = LONG_MAX,
230+
.range_cyclic = 0,
231+
};
232+
struct bdi_work work;
228233

229-
bdi_work_init(&work, wbc);
230-
work.state |= WS_ONSTACK;
234+
bdi_work_init(&work, &args);
235+
work.state |= WS_ONSTACK;
231236

232-
bdi_queue_work(wbc->bdi, &work);
233-
bdi_wait_on_work_clear(&work);
234-
}
237+
bdi_queue_work(bdi, &work);
238+
bdi_wait_on_work_clear(&work);
239+
}
240+
241+
/**
242+
* bdi_start_writeback - start writeback
243+
* @bdi: the backing device to write from
244+
* @nr_pages: the number of pages to write
245+
*
246+
* Description:
247+
* This does WB_SYNC_NONE opportunistic writeback. The IO is only
248+
* started when this function returns, we make no guarentees on
249+
* completion. Caller need not hold sb s_umount semaphore.
250+
*
251+
*/
252+
void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
253+
{
254+
struct wb_writeback_args args = {
255+
.sync_mode = WB_SYNC_NONE,
256+
.nr_pages = nr_pages,
257+
.range_cyclic = 1,
258+
};
259+
260+
bdi_alloc_queue_work(bdi, &args);
235261
}
236262

237263
/*
@@ -863,23 +889,25 @@ int bdi_writeback_task(struct bdi_writeback *wb)
863889
}
864890

865891
/*
866-
* Schedule writeback for all backing devices. Can only be used for
867-
* WB_SYNC_NONE writeback, WB_SYNC_ALL should use bdi_start_writeback()
868-
* and pass in the superblock.
892+
* Schedule writeback for all backing devices. This does WB_SYNC_NONE
893+
* writeback, for integrity writeback see bdi_sync_writeback().
869894
*/
870-
static void bdi_writeback_all(struct writeback_control *wbc)
895+
static void bdi_writeback_all(struct super_block *sb, long nr_pages)
871896
{
897+
struct wb_writeback_args args = {
898+
.sb = sb,
899+
.nr_pages = nr_pages,
900+
.sync_mode = WB_SYNC_NONE,
901+
};
872902
struct backing_dev_info *bdi;
873903

874-
WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
875-
876904
rcu_read_lock();
877905

878906
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
879907
if (!bdi_has_dirty_io(bdi))
880908
continue;
881909

882-
bdi_alloc_queue_work(bdi, wbc);
910+
bdi_alloc_queue_work(bdi, &args);
883911
}
884912

885913
rcu_read_unlock();
@@ -891,17 +919,10 @@ static void bdi_writeback_all(struct writeback_control *wbc)
891919
*/
892920
void wakeup_flusher_threads(long nr_pages)
893921
{
894-
struct writeback_control wbc = {
895-
.sync_mode = WB_SYNC_NONE,
896-
.older_than_this = NULL,
897-
.range_cyclic = 1,
898-
};
899-
900922
if (nr_pages == 0)
901923
nr_pages = global_page_state(NR_FILE_DIRTY) +
902924
global_page_state(NR_UNSTABLE_NFS);
903-
wbc.nr_to_write = nr_pages;
904-
bdi_writeback_all(&wbc);
925+
bdi_writeback_all(NULL, nr_pages);
905926
}
906927

907928
static noinline void block_dump___mark_inode_dirty(struct inode *inode)
@@ -1048,15 +1069,15 @@ EXPORT_SYMBOL(__mark_inode_dirty);
10481069
* on the writer throttling path, and we get decent balancing between many
10491070
* throttled threads: we don't want them all piling up on inode_sync_wait.
10501071
*/
1051-
static void wait_sb_inodes(struct writeback_control *wbc)
1072+
static void wait_sb_inodes(struct super_block *sb)
10521073
{
10531074
struct inode *inode, *old_inode = NULL;
10541075

10551076
/*
10561077
* We need to be protected against the filesystem going from
10571078
* r/o to r/w or vice versa.
10581079
*/
1059-
WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount));
1080+
WARN_ON(!rwsem_is_locked(&sb->s_umount));
10601081

10611082
spin_lock(&inode_lock);
10621083

@@ -1067,7 +1088,7 @@ static void wait_sb_inodes(struct writeback_control *wbc)
10671088
* In which case, the inode may not be on the dirty list, but
10681089
* we still have to wait for that writeout.
10691090
*/
1070-
list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) {
1091+
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
10711092
struct address_space *mapping;
10721093

10731094
if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
@@ -1107,24 +1128,16 @@ static void wait_sb_inodes(struct writeback_control *wbc)
11071128
* for IO completion of submitted IO. The number of pages submitted is
11081129
* returned.
11091130
*/
1110-
long writeback_inodes_sb(struct super_block *sb)
1131+
void writeback_inodes_sb(struct super_block *sb)
11111132
{
1112-
struct writeback_control wbc = {
1113-
.sb = sb,
1114-
.sync_mode = WB_SYNC_NONE,
1115-
.range_start = 0,
1116-
.range_end = LLONG_MAX,
1117-
};
11181133
unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
11191134
unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
11201135
long nr_to_write;
11211136

11221137
nr_to_write = nr_dirty + nr_unstable +
11231138
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
11241139

1125-
wbc.nr_to_write = nr_to_write;
1126-
bdi_writeback_all(&wbc);
1127-
return nr_to_write - wbc.nr_to_write;
1140+
bdi_writeback_all(sb, nr_to_write);
11281141
}
11291142
EXPORT_SYMBOL(writeback_inodes_sb);
11301143

@@ -1135,21 +1148,10 @@ EXPORT_SYMBOL(writeback_inodes_sb);
11351148
* This function writes and waits on any dirty inode belonging to this
11361149
* super_block. The number of pages synced is returned.
11371150
*/
1138-
long sync_inodes_sb(struct super_block *sb)
1151+
void sync_inodes_sb(struct super_block *sb)
11391152
{
1140-
struct writeback_control wbc = {
1141-
.sb = sb,
1142-
.bdi = sb->s_bdi,
1143-
.sync_mode = WB_SYNC_ALL,
1144-
.range_start = 0,
1145-
.range_end = LLONG_MAX,
1146-
};
1147-
long nr_to_write = LONG_MAX; /* doesn't actually matter */
1148-
1149-
wbc.nr_to_write = nr_to_write;
1150-
bdi_start_writeback(&wbc);
1151-
wait_sb_inodes(&wbc);
1152-
return nr_to_write - wbc.nr_to_write;
1153+
bdi_sync_writeback(sb->s_bdi, sb);
1154+
wait_sb_inodes(sb);
11531155
}
11541156
EXPORT_SYMBOL(sync_inodes_sb);
11551157

fs/ubifs/budget.c

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -54,29 +54,15 @@
5454
* @nr_to_write: how many dirty pages to write-back
5555
*
5656
* This function shrinks UBIFS liability by means of writing back some amount
57-
* of dirty inodes and their pages. Returns the amount of pages which were
58-
* written back. The returned value does not include dirty inodes which were
59-
* synchronized.
57+
* of dirty inodes and their pages.
6058
*
6159
* Note, this function synchronizes even VFS inodes which are locked
6260
* (@i_mutex) by the caller of the budgeting function, because write-back does
6361
* not touch @i_mutex.
6462
*/
65-
static int shrink_liability(struct ubifs_info *c, int nr_to_write)
63+
static void shrink_liability(struct ubifs_info *c, int nr_to_write)
6664
{
67-
int nr_written;
68-
69-
nr_written = writeback_inodes_sb(c->vfs_sb);
70-
if (!nr_written) {
71-
/*
72-
* Re-try again but wait on pages/inodes which are being
73-
* written-back concurrently (e.g., by pdflush).
74-
*/
75-
nr_written = sync_inodes_sb(c->vfs_sb);
76-
}
77-
78-
dbg_budg("%d pages were written back", nr_written);
79-
return nr_written;
65+
writeback_inodes_sb(c->vfs_sb);
8066
}
8167

8268
/**

include/linux/backing-dev.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
101101
const char *fmt, ...);
102102
int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
103103
void bdi_unregister(struct backing_dev_info *bdi);
104-
void bdi_start_writeback(struct writeback_control *wbc);
104+
void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
105105
int bdi_writeback_task(struct bdi_writeback *wb);
106106
int bdi_has_dirty_io(struct backing_dev_info *bdi);
107107

include/linux/writeback.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ struct writeback_control {
6868
*/
6969
struct bdi_writeback;
7070
int inode_wait(void *);
71-
long writeback_inodes_sb(struct super_block *);
72-
long sync_inodes_sb(struct super_block *);
71+
void writeback_inodes_sb(struct super_block *);
72+
void sync_inodes_sb(struct super_block *);
7373
void writeback_inodes_wbc(struct writeback_control *wbc);
7474
long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
7575
void wakeup_flusher_threads(long nr_pages);

mm/page-writeback.c

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -582,16 +582,8 @@ static void balance_dirty_pages(struct address_space *mapping)
582582
if ((laptop_mode && pages_written) ||
583583
(!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY)
584584
+ global_page_state(NR_UNSTABLE_NFS))
585-
> background_thresh))) {
586-
struct writeback_control wbc = {
587-
.bdi = bdi,
588-
.sync_mode = WB_SYNC_NONE,
589-
.nr_to_write = nr_writeback,
590-
};
591-
592-
593-
bdi_start_writeback(&wbc);
594-
}
585+
> background_thresh)))
586+
bdi_start_writeback(bdi, nr_writeback);
595587
}
596588

597589
void set_page_dirty_balance(struct page *page, int page_mkwrite)

0 commit comments

Comments
 (0)