Skip to content

Commit 9ccce09

Browse files
committed
Merge tag 'for-linus-5.13-ofs-1' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux
Pull orangefs updates from Mike Marshall: "orangefs: implement orangefs_readahead mm/readahead.c/read_pages was quite a bit different back when I put my open-coded readahead logic into orangefs_readpage. That logic seemed to work as designed back then, it is a trainwreck now. This implements orangefs_readahead using the new xarray and readahead_expand features and removes all my open-coded readahead logic. This results in an extreme read performance improvement, these sample numbers are from my test VM: Here's an example of what's upstream in 5.11.8-200.fc33.x86_64: 30+0 records in 30+0 records out 125829120 bytes (126 MB, 120 MiB) copied, 5.77943 s, 21.8 MB/s And here's this version of orangefs_readahead on top of 5.12.0-rc4: 30+0 records in 30+0 records out 125829120 bytes (126 MB, 120 MiB) copied, 0.325919 s, 386 MB/s There are four xfstest regressions with this patch. David Howells and Matthew Wilcox have been helping me work with this code" * tag 'for-linus-5.13-ofs-1' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux: orangefs: leave files in the page cache for a few micro seconds at least Orangef: implement orangefs_readahead.
2 parents 27787ba + 211f9f2 commit 9ccce09

File tree

3 files changed

+54
-104
lines changed

3 files changed

+54
-104
lines changed

fs/orangefs/file.c

Lines changed: 6 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -248,21 +248,7 @@ ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
248248
* or it can pointers to struct page's
249249
*/
250250

251-
/*
252-
* When reading, readahead_size will only be zero when
253-
* we're doing O_DIRECT, otherwise we got here from
254-
* orangefs_readpage.
255-
*
256-
* If we got here from orangefs_readpage we want to
257-
* copy either a page or the whole file into the io
258-
* vector, whichever is smaller.
259-
*/
260-
if (readahead_size)
261-
copy_amount =
262-
min(new_op->downcall.resp.io.amt_complete,
263-
(__s64)PAGE_SIZE);
264-
else
265-
copy_amount = new_op->downcall.resp.io.amt_complete;
251+
copy_amount = new_op->downcall.resp.io.amt_complete;
266252

267253
ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index,
268254
copy_amount);
@@ -283,19 +269,11 @@ ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
283269

284270
out:
285271
if (buffer_index >= 0) {
286-
if ((readahead_size) && (type == ORANGEFS_IO_READ)) {
287-
/* readpage */
288-
*index_return = buffer_index;
289-
gossip_debug(GOSSIP_FILE_DEBUG,
290-
"%s: hold on to buffer_index :%d:\n",
291-
__func__, buffer_index);
292-
} else {
293-
/* O_DIRECT */
294-
orangefs_bufmap_put(buffer_index);
295-
gossip_debug(GOSSIP_FILE_DEBUG,
296-
"%s(%pU): PUT buffer_index %d\n",
297-
__func__, handle, buffer_index);
298-
}
272+
orangefs_bufmap_put(buffer_index);
273+
gossip_debug(GOSSIP_FILE_DEBUG,
274+
"%s(%pU): PUT buffer_index %d\n",
275+
__func__, handle, buffer_index);
276+
buffer_index = -1;
299277
}
300278
op_release(new_op);
301279
return ret;

fs/orangefs/inode.c

Lines changed: 47 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -245,51 +245,75 @@ static int orangefs_writepages(struct address_space *mapping,
245245

246246
static int orangefs_launder_page(struct page *);
247247

248+
static void orangefs_readahead(struct readahead_control *rac)
249+
{
250+
loff_t offset;
251+
struct iov_iter iter;
252+
struct file *file = rac->file;
253+
struct inode *inode = file->f_mapping->host;
254+
struct xarray *i_pages;
255+
struct page *page;
256+
loff_t new_start = readahead_pos(rac);
257+
int ret;
258+
size_t new_len = 0;
259+
260+
loff_t bytes_remaining = inode->i_size - readahead_pos(rac);
261+
loff_t pages_remaining = bytes_remaining / PAGE_SIZE;
262+
263+
if (pages_remaining >= 1024)
264+
new_len = 4194304;
265+
else if (pages_remaining > readahead_count(rac))
266+
new_len = bytes_remaining;
267+
268+
if (new_len)
269+
readahead_expand(rac, new_start, new_len);
270+
271+
offset = readahead_pos(rac);
272+
i_pages = &file->f_mapping->i_pages;
273+
274+
iov_iter_xarray(&iter, READ, i_pages, offset, readahead_length(rac));
275+
276+
/* read in the pages. */
277+
if ((ret = wait_for_direct_io(ORANGEFS_IO_READ, inode,
278+
&offset, &iter, readahead_length(rac),
279+
inode->i_size, NULL, NULL, file)) < 0)
280+
gossip_debug(GOSSIP_FILE_DEBUG,
281+
"%s: wait_for_direct_io failed. \n", __func__);
282+
else
283+
ret = 0;
284+
285+
/* clean up. */
286+
while ((page = readahead_page(rac))) {
287+
page_endio(page, false, ret);
288+
put_page(page);
289+
}
290+
}
291+
248292
static int orangefs_readpage(struct file *file, struct page *page)
249293
{
250294
struct inode *inode = page->mapping->host;
251295
struct iov_iter iter;
252296
struct bio_vec bv;
253297
ssize_t ret;
254298
loff_t off; /* offset into this page */
255-
pgoff_t index; /* which page */
256-
struct page *next_page;
257-
char *kaddr;
258-
loff_t read_size;
259-
int buffer_index = -1; /* orangefs shared memory slot */
260-
int slot_index; /* index into slot */
261-
int remaining;
262-
263-
/*
264-
* Get up to this many bytes from Orangefs at a time and try
265-
* to fill them into the page cache at once. Tests with dd made
266-
* this seem like a reasonable static number, if there was
267-
* interest perhaps this number could be made setable through
268-
* sysfs...
269-
*/
270-
read_size = 524288;
271299

272300
if (PageDirty(page))
273301
orangefs_launder_page(page);
274302

275303
off = page_offset(page);
276-
index = off >> PAGE_SHIFT;
277304
bv.bv_page = page;
278305
bv.bv_len = PAGE_SIZE;
279306
bv.bv_offset = 0;
280307
iov_iter_bvec(&iter, READ, &bv, 1, PAGE_SIZE);
281308

282309
ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, &off, &iter,
283-
read_size, inode->i_size, NULL, &buffer_index, file);
284-
remaining = ret;
310+
PAGE_SIZE, inode->i_size, NULL, NULL, file);
285311
/* this will only zero remaining unread portions of the page data */
286312
iov_iter_zero(~0U, &iter);
287313
/* takes care of potential aliasing */
288314
flush_dcache_page(page);
289315
if (ret < 0) {
290316
SetPageError(page);
291-
unlock_page(page);
292-
goto out;
293317
} else {
294318
SetPageUptodate(page);
295319
if (PageError(page))
@@ -298,60 +322,7 @@ static int orangefs_readpage(struct file *file, struct page *page)
298322
}
299323
/* unlock the page after the ->readpage() routine completes */
300324
unlock_page(page);
301-
302-
if (remaining > PAGE_SIZE) {
303-
slot_index = 0;
304-
while ((remaining - PAGE_SIZE) >= PAGE_SIZE) {
305-
remaining -= PAGE_SIZE;
306-
/*
307-
* It is an optimization to try and fill more than one
308-
* page... by now we've already gotten the single
309-
* page we were after, if stuff doesn't seem to
310-
* be going our way at this point just return
311-
* and hope for the best.
312-
*
313-
* If we look for pages and they're already there is
314-
* one reason to give up, and if they're not there
315-
* and we can't create them is another reason.
316-
*/
317-
318-
index++;
319-
slot_index++;
320-
next_page = find_get_page(inode->i_mapping, index);
321-
if (next_page) {
322-
gossip_debug(GOSSIP_FILE_DEBUG,
323-
"%s: found next page, quitting\n",
324-
__func__);
325-
put_page(next_page);
326-
goto out;
327-
}
328-
next_page = find_or_create_page(inode->i_mapping,
329-
index,
330-
GFP_KERNEL);
331-
/*
332-
* I've never hit this, leave it as a printk for
333-
* now so it will be obvious.
334-
*/
335-
if (!next_page) {
336-
printk("%s: can't create next page, quitting\n",
337-
__func__);
338-
goto out;
339-
}
340-
kaddr = kmap_atomic(next_page);
341-
orangefs_bufmap_page_fill(kaddr,
342-
buffer_index,
343-
slot_index);
344-
kunmap_atomic(kaddr);
345-
SetPageUptodate(next_page);
346-
unlock_page(next_page);
347-
put_page(next_page);
348-
}
349-
}
350-
351-
out:
352-
if (buffer_index != -1)
353-
orangefs_bufmap_put(buffer_index);
354-
return ret;
325+
return ret;
355326
}
356327

357328
static int orangefs_write_begin(struct file *file,
@@ -660,6 +631,7 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb,
660631
/** ORANGEFS2 implementation of address space operations */
661632
static const struct address_space_operations orangefs_address_operations = {
662633
.writepage = orangefs_writepage,
634+
.readahead = orangefs_readahead,
663635
.readpage = orangefs_readpage,
664636
.writepages = orangefs_writepages,
665637
.set_page_dirty = __set_page_dirty_nobuffers,

fs/orangefs/orangefs-mod.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ static ulong module_parm_debug_mask;
3131
__u64 orangefs_gossip_debug_mask;
3232
int op_timeout_secs = ORANGEFS_DEFAULT_OP_TIMEOUT_SECS;
3333
int slot_timeout_secs = ORANGEFS_DEFAULT_SLOT_TIMEOUT_SECS;
34-
int orangefs_cache_timeout_msecs = 50;
34+
int orangefs_cache_timeout_msecs = 500;
3535
int orangefs_dcache_timeout_msecs = 50;
3636
int orangefs_getattr_timeout_msecs = 50;
3737

0 commit comments

Comments
 (0)