Skip to content

Commit 56244ef

Browse files
committed
Btrfs: fix handling of faults from btrfs_copy_from_user
When btrfs_copy_from_user isn't able to copy all of the pages, we need to adjust our accounting to reflect the work that was actually done. Commit 2e78c92 changed around the decisions a little and we ended up skipping the accounting adjustments some of the time. This commit makes sure that when we don't copy anything at all, we still hop into the adjustments, and switches to release_bytes instead of write_bytes, since write_bytes isn't aligned. The accounting errors led to warnings during btrfs_destroy_inode: [ 70.847532] WARNING: CPU: 10 PID: 514 at fs/btrfs/inode.c:9350 btrfs_destroy_inode+0x2b3/0x2c0 [ 70.847536] Modules linked in: i2c_piix4 virtio_net i2c_core input_leds button led_class serio_raw acpi_cpufreq sch_fq_codel autofs4 virtio_blk [ 70.847538] CPU: 10 PID: 514 Comm: umount Tainted: G W 4.6.0-rc6_00062_g2997da1-dirty #23 [ 70.847539] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.0-1.fc24 04/01/2014 [ 70.847542] 0000000000000000 ffff880ff5cafab8 ffffffff8149d5e9 0000000000000202 [ 70.847543] 0000000000000000 0000000000000000 0000000000000000 ffff880ff5cafb08 [ 70.847547] ffffffff8107bdfd ffff880ff5cafaf8 000024868120013d ffff880ff5cafb28 [ 70.847547] Call Trace: [ 70.847550] [<ffffffff8149d5e9>] dump_stack+0x51/0x78 [ 70.847551] [<ffffffff8107bdfd>] __warn+0xfd/0x120 [ 70.847553] [<ffffffff8107be3d>] warn_slowpath_null+0x1d/0x20 [ 70.847555] [<ffffffff8139c9e3>] btrfs_destroy_inode+0x2b3/0x2c0 [ 70.847556] [<ffffffff812003a1>] ? __destroy_inode+0x71/0x140 [ 70.847558] [<ffffffff812004b3>] destroy_inode+0x43/0x70 [ 70.847559] [<ffffffff810b7b5f>] ? wake_up_bit+0x2f/0x40 [ 70.847560] [<ffffffff81200c68>] evict+0x148/0x1d0 [ 70.847562] [<ffffffff81398ade>] ? start_transaction+0x3de/0x460 [ 70.847564] [<ffffffff81200d49>] dispose_list+0x59/0x80 [ 70.847565] [<ffffffff81201ba0>] evict_inodes+0x180/0x190 [ 70.847566] [<ffffffff812191ff>] ? __sync_filesystem+0x3f/0x50 [ 70.847568] [<ffffffff811e95f8>] generic_shutdown_super+0x48/0x100 [ 70.847569] [<ffffffff810b75c0>] ? woken_wake_function+0x20/0x20 [ 70.847571] [<ffffffff811e9796>] kill_anon_super+0x16/0x30 [ 70.847573] [<ffffffff81365cde>] btrfs_kill_super+0x1e/0x130 [ 70.847574] [<ffffffff811e99be>] deactivate_locked_super+0x4e/0x90 [ 70.847576] [<ffffffff811e9e61>] deactivate_super+0x51/0x70 [ 70.847577] [<ffffffff8120536f>] cleanup_mnt+0x3f/0x80 [ 70.847579] [<ffffffff81205402>] __cleanup_mnt+0x12/0x20 [ 70.847581] [<ffffffff81098358>] task_work_run+0x68/0xa0 [ 70.847582] [<ffffffff810022b6>] exit_to_usermode_loop+0xd6/0xe0 [ 70.847583] [<ffffffff81002e1d>] do_syscall_64+0xbd/0x170 [ 70.847586] [<ffffffff817d4dbc>] entry_SYSCALL64_slow_path+0x25/0x25 This is the test program I used to force short returns from btrfs_copy_from_user void *dontneed(void *arg) { char *p = arg; int ret; while(1) { ret = madvise(p, BUFSIZE/4, MADV_DONTNEED); if (ret) { perror("madvise"); exit(1); } } } int main(int ac, char **av) { int ret; int fd; char *filename; unsigned long offset; char *buf; int i; pthread_t tid; if (ac != 2) { fprintf(stderr, "usage: dammitdave filename\n"); exit(1); } buf = mmap(NULL, BUFSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (buf == MAP_FAILED) { perror("mmap"); exit(1); } memset(buf, 'a', BUFSIZE); filename = av[1]; ret = pthread_create(&tid, NULL, dontneed, buf); if (ret) { fprintf(stderr, "error %d from pthread_create\n", ret); exit(1); } ret = pthread_detach(tid); if (ret) { fprintf(stderr, "pthread detach failed %d\n", ret); exit(1); } while (1) { fd = open(filename, O_RDWR | O_CREAT, 0600); if (fd < 0) { perror("open"); exit(1); } for (i = 0; i < ROUNDS; i++) { int this_write = BUFSIZE; offset = rand() % MAXSIZE; ret = pwrite(fd, buf, this_write, offset); if (ret < 0) { perror("pwrite"); exit(1); } else if (ret != this_write) { fprintf(stderr, "short write to %s offset %lu ret %d\n", filename, offset, ret); exit(1); } if (i == ROUNDS - 1) { ret = sync_file_range(fd, offset, 4096, SYNC_FILE_RANGE_WRITE); if (ret < 0) { perror("sync_file_range"); exit(1); } } } ret = ftruncate(fd, 0); if (ret < 0) { perror("ftruncate"); exit(1); } ret = close(fd); if (ret) { perror("close"); exit(1); } ret = unlink(filename); if (ret) { perror("unlink"); exit(1); } } return 0; } Signed-off-by: Chris Mason <[email protected]> Reported-by: Dave Jones <[email protected]> Fixes: 2e78c92 cc: [email protected] # v4.6 Signed-off-by: Chris Mason <[email protected]>
1 parent 9257b4c commit 56244ef

File tree

1 file changed

+17
-10
lines changed

1 file changed

+17
-10
lines changed

fs/btrfs/file.c

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1596,6 +1596,13 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
15961596

15971597
copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
15981598

1599+
num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
1600+
reserve_bytes);
1601+
dirty_sectors = round_up(copied + sector_offset,
1602+
root->sectorsize);
1603+
dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
1604+
dirty_sectors);
1605+
15991606
/*
16001607
* if we have trouble faulting in the pages, fall
16011608
* back to one page at a time
@@ -1605,6 +1612,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
16051612

16061613
if (copied == 0) {
16071614
force_page_uptodate = true;
1615+
dirty_sectors = 0;
16081616
dirty_pages = 0;
16091617
} else {
16101618
force_page_uptodate = false;
@@ -1615,20 +1623,19 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
16151623
/*
16161624
* If we had a short copy we need to release the excess delaloc
16171625
* bytes we reserved. We need to increment outstanding_extents
1618-
* because btrfs_delalloc_release_space will decrement it, but
1626+
* because btrfs_delalloc_release_space and
1627+
* btrfs_delalloc_release_metadata will decrement it, but
16191628
* we still have an outstanding extent for the chunk we actually
16201629
* managed to copy.
16211630
*/
1622-
num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
1623-
reserve_bytes);
1624-
dirty_sectors = round_up(copied + sector_offset,
1625-
root->sectorsize);
1626-
dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
1627-
dirty_sectors);
1628-
16291631
if (num_sectors > dirty_sectors) {
1630-
release_bytes = (write_bytes - copied)
1631-
& ~((u64)root->sectorsize - 1);
1632+
/*
1633+
* we round down because we don't want to count
1634+
* any partial blocks actually sent through the
1635+
* IO machines
1636+
*/
1637+
release_bytes = round_down(release_bytes - copied,
1638+
root->sectorsize);
16321639
if (copied > 0) {
16331640
spin_lock(&BTRFS_I(inode)->lock);
16341641
BTRFS_I(inode)->outstanding_extents++;

0 commit comments

Comments
 (0)