Skip to content

Commit ec95f1d

Browse files
committed
orangefs: get rid of knob code...
Christoph Hellwig sent in a reversion of "orangefs: remember count when reading." because: ->read_iter calls can race with each other and one or more ->flush calls. Remove the the scheme to store the read count in the file private data as is is completely racy and can cause use after free or double free conditions Christoph's reversion caused Orangefs not to work or to compile. I added a patch that fixed that, but intel's kbuild test robot pointed out that sending Christoph's patch followed by my patch upstream, it would break bisection because of the failure to compile. So I have combined the reversion plus my patch... here's the commit message that was in my patch: Logically, optimal Orangefs "pages" are 4 megabytes. Reading large Orangefs files 4096 bytes at a time is like trying to kick a dead whale down the beach. Before Christoph's "Revert orangefs: remember count when reading." I tried to give users a knob whereby they could, for example, use "count" in read(2) or bs with dd(1) to get whatever they considered an appropriate amount of bytes at a time from Orangefs and fill as many page cache pages as they could at once. Without the racy code that Christoph reverted Orangefs won't even compile, much less work. So this replaces the logic that used the private file data that Christoph reverted with a static number of bytes to read from Orangefs. I ran tests like the following to determine what a reasonable static number of bytes might be: dd if=/pvfsmnt/asdf of=/dev/null count=128 bs=4194304 dd if=/pvfsmnt/asdf of=/dev/null count=256 bs=2097152 dd if=/pvfsmnt/asdf of=/dev/null count=512 bs=1048576 . . . dd if=/pvfsmnt/asdf of=/dev/null count=4194304 bs=128 Reads seem faster using the static number, so my "knob code" wasn't just racy, it wasn't even a good idea... Signed-off-by: Mike Marshall <[email protected]> Reported-by: kbuild test robot <[email protected]>
1 parent 7111951 commit ec95f1d

File tree

3 files changed

+7
-62
lines changed

3 files changed

+7
-62
lines changed

fs/orangefs/file.c

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -346,23 +346,8 @@ static ssize_t orangefs_file_read_iter(struct kiocb *iocb,
346346
struct iov_iter *iter)
347347
{
348348
int ret;
349-
struct orangefs_read_options *ro;
350-
351349
orangefs_stats.reads++;
352350

353-
/*
354-
* Remember how they set "count" in read(2) or pread(2) or whatever -
355-
* users can use count as a knob to control orangefs io size and later
356-
* we can try to help them fill as many pages as possible in readpage.
357-
*/
358-
if (!iocb->ki_filp->private_data) {
359-
iocb->ki_filp->private_data = kmalloc(sizeof *ro, GFP_KERNEL);
360-
if (!iocb->ki_filp->private_data)
361-
return(ENOMEM);
362-
ro = iocb->ki_filp->private_data;
363-
ro->blksiz = iter->count;
364-
}
365-
366351
down_read(&file_inode(iocb->ki_filp)->i_rwsem);
367352
ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp));
368353
if (ret)
@@ -650,12 +635,6 @@ static int orangefs_lock(struct file *filp, int cmd, struct file_lock *fl)
650635
return rc;
651636
}
652637

653-
static int orangefs_file_open(struct inode * inode, struct file *file)
654-
{
655-
file->private_data = NULL;
656-
return generic_file_open(inode, file);
657-
}
658-
659638
static int orangefs_flush(struct file *file, fl_owner_t id)
660639
{
661640
/*
@@ -669,9 +648,6 @@ static int orangefs_flush(struct file *file, fl_owner_t id)
669648
struct inode *inode = file->f_mapping->host;
670649
int r;
671650

672-
kfree(file->private_data);
673-
file->private_data = NULL;
674-
675651
if (inode->i_state & I_DIRTY_TIME) {
676652
spin_lock(&inode->i_lock);
677653
inode->i_state &= ~I_DIRTY_TIME;
@@ -694,7 +670,7 @@ const struct file_operations orangefs_file_operations = {
694670
.lock = orangefs_lock,
695671
.unlocked_ioctl = orangefs_ioctl,
696672
.mmap = orangefs_file_mmap,
697-
.open = orangefs_file_open,
673+
.open = generic_file_open,
698674
.flush = orangefs_flush,
699675
.release = orangefs_file_release,
700676
.fsync = orangefs_fsync,

fs/orangefs/inode.c

Lines changed: 6 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -259,46 +259,19 @@ static int orangefs_readpage(struct file *file, struct page *page)
259259
pgoff_t index; /* which page */
260260
struct page *next_page;
261261
char *kaddr;
262-
struct orangefs_read_options *ro = file->private_data;
263262
loff_t read_size;
264-
loff_t roundedup;
265263
int buffer_index = -1; /* orangefs shared memory slot */
266264
int slot_index; /* index into slot */
267265
int remaining;
268266

269267
/*
270-
* If they set some miniscule size for "count" in read(2)
271-
* (for example) then let's try to read a page, or the whole file
272-
* if it is smaller than a page. Once "count" goes over a page
273-
* then lets round up to the highest page size multiple that is
274-
* less than or equal to "count" and do that much orangefs IO and
275-
* try to fill as many pages as we can from it.
276-
*
277-
* "count" should be represented in ro->blksiz.
278-
*
279-
* inode->i_size = file size.
268+
* Get up to this many bytes from Orangefs at a time and try
269+
* to fill them into the page cache at once. Tests with dd made
270+
* this seem like a reasonable static number, if there was
271+
* interest perhaps this number could be made setable through
272+
* sysfs...
280273
*/
281-
if (ro) {
282-
if (ro->blksiz < PAGE_SIZE) {
283-
if (inode->i_size < PAGE_SIZE)
284-
read_size = inode->i_size;
285-
else
286-
read_size = PAGE_SIZE;
287-
} else {
288-
roundedup = ((PAGE_SIZE - 1) & ro->blksiz) ?
289-
((ro->blksiz + PAGE_SIZE) & ~(PAGE_SIZE -1)) :
290-
ro->blksiz;
291-
if (roundedup > inode->i_size)
292-
read_size = inode->i_size;
293-
else
294-
read_size = roundedup;
295-
296-
}
297-
} else {
298-
read_size = PAGE_SIZE;
299-
}
300-
if (!read_size)
301-
read_size = PAGE_SIZE;
274+
read_size = 524288;
302275

303276
if (PageDirty(page))
304277
orangefs_launder_page(page);

fs/orangefs/orangefs-kernel.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,10 +239,6 @@ struct orangefs_write_range {
239239
kgid_t gid;
240240
};
241241

242-
struct orangefs_read_options {
243-
ssize_t blksiz;
244-
};
245-
246242
extern struct orangefs_stats orangefs_stats;
247243

248244
/*

0 commit comments

Comments
 (0)