Skip to content

Commit f441460

Browse files
Mark Fashehmasoncl
authored andcommitted
btrfs: fix deadlock with extent-same and readpage
->readpage() does page_lock() before extent_lock(), we do the opposite in extent-same. We want to reverse the order in btrfs_extent_same() but it's not quite straightforward since the page locks are taken inside btrfs_cmp_data(). So I split btrfs_cmp_data() into 3 parts with a small context structure that is passed between them. The first, btrfs_cmp_data_prepare() gathers up the pages needed (taking page lock as required) and puts them on our context structure. At this point, we are safe to lock the extent range. Afterwards, we use btrfs_cmp_data() to do the data compare as usual and btrfs_cmp_data_free() to clean up our context. Signed-off-by: Mark Fasheh <[email protected]> Reviewed-by: David Sterba <[email protected]> Signed-off-by: Chris Mason <[email protected]>
1 parent 207910d commit f441460

File tree

1 file changed

+117
-31
lines changed

1 file changed

+117
-31
lines changed

fs/btrfs/ioctl.c

Lines changed: 117 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2765,14 +2765,11 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
27652765
return ret;
27662766
}
27672767

2768-
static struct page *extent_same_get_page(struct inode *inode, u64 off)
2768+
static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
27692769
{
27702770
struct page *page;
2771-
pgoff_t index;
27722771
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
27732772

2774-
index = off >> PAGE_CACHE_SHIFT;
2775-
27762773
page = grab_cache_page(inode->i_mapping, index);
27772774
if (!page)
27782775
return NULL;
@@ -2793,6 +2790,20 @@ static struct page *extent_same_get_page(struct inode *inode, u64 off)
27932790
return page;
27942791
}
27952792

2793+
static int gather_extent_pages(struct inode *inode, struct page **pages,
2794+
int num_pages, u64 off)
2795+
{
2796+
int i;
2797+
pgoff_t index = off >> PAGE_CACHE_SHIFT;
2798+
2799+
for (i = 0; i < num_pages; i++) {
2800+
pages[i] = extent_same_get_page(inode, index + i);
2801+
if (!pages[i])
2802+
return -ENOMEM;
2803+
}
2804+
return 0;
2805+
}
2806+
27962807
static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
27972808
{
27982809
/* do any pending delalloc/csum calc on src, one way or
@@ -2818,52 +2829,120 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
28182829
}
28192830
}
28202831

2821-
static void btrfs_double_unlock(struct inode *inode1, u64 loff1,
2822-
struct inode *inode2, u64 loff2, u64 len)
2832+
static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
28232833
{
2824-
unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
2825-
unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
2826-
28272834
mutex_unlock(&inode1->i_mutex);
28282835
mutex_unlock(&inode2->i_mutex);
28292836
}
28302837

2831-
static void btrfs_double_lock(struct inode *inode1, u64 loff1,
2832-
struct inode *inode2, u64 loff2, u64 len)
2838+
static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
2839+
{
2840+
if (inode1 < inode2)
2841+
swap(inode1, inode2);
2842+
2843+
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
2844+
if (inode1 != inode2)
2845+
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
2846+
}
2847+
2848+
static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
2849+
struct inode *inode2, u64 loff2, u64 len)
2850+
{
2851+
unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
2852+
unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
2853+
}
2854+
2855+
static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
2856+
struct inode *inode2, u64 loff2, u64 len)
28332857
{
28342858
if (inode1 < inode2) {
28352859
swap(inode1, inode2);
28362860
swap(loff1, loff2);
28372861
}
2838-
2839-
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
28402862
lock_extent_range(inode1, loff1, len);
2841-
if (inode1 != inode2) {
2842-
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
2863+
if (inode1 != inode2)
28432864
lock_extent_range(inode2, loff2, len);
2865+
}
2866+
2867+
struct cmp_pages {
2868+
int num_pages;
2869+
struct page **src_pages;
2870+
struct page **dst_pages;
2871+
};
2872+
2873+
static void btrfs_cmp_data_free(struct cmp_pages *cmp)
2874+
{
2875+
int i;
2876+
struct page *pg;
2877+
2878+
for (i = 0; i < cmp->num_pages; i++) {
2879+
pg = cmp->src_pages[i];
2880+
if (pg)
2881+
page_cache_release(pg);
2882+
pg = cmp->dst_pages[i];
2883+
if (pg)
2884+
page_cache_release(pg);
2885+
}
2886+
kfree(cmp->src_pages);
2887+
kfree(cmp->dst_pages);
2888+
}
2889+
2890+
static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
2891+
struct inode *dst, u64 dst_loff,
2892+
u64 len, struct cmp_pages *cmp)
2893+
{
2894+
int ret;
2895+
int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
2896+
struct page **src_pgarr, **dst_pgarr;
2897+
2898+
/*
2899+
* We must gather up all the pages before we initiate our
2900+
* extent locking. We use an array for the page pointers. Size
2901+
* of the array is bounded by len, which is in turn bounded by
2902+
* BTRFS_MAX_DEDUPE_LEN.
2903+
*/
2904+
src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
2905+
dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
2906+
if (!src_pgarr || !dst_pgarr) {
2907+
kfree(src_pgarr);
2908+
kfree(dst_pgarr);
2909+
return -ENOMEM;
28442910
}
2911+
cmp->num_pages = num_pages;
2912+
cmp->src_pages = src_pgarr;
2913+
cmp->dst_pages = dst_pgarr;
2914+
2915+
ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff);
2916+
if (ret)
2917+
goto out;
2918+
2919+
ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff);
2920+
2921+
out:
2922+
if (ret)
2923+
btrfs_cmp_data_free(cmp);
2924+
return 0;
28452925
}
28462926

28472927
static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
2848-
u64 dst_loff, u64 len)
2928+
u64 dst_loff, u64 len, struct cmp_pages *cmp)
28492929
{
28502930
int ret = 0;
2931+
int i;
28512932
struct page *src_page, *dst_page;
28522933
unsigned int cmp_len = PAGE_CACHE_SIZE;
28532934
void *addr, *dst_addr;
28542935

2936+
i = 0;
28552937
while (len) {
28562938
if (len < PAGE_CACHE_SIZE)
28572939
cmp_len = len;
28582940

2859-
src_page = extent_same_get_page(src, loff);
2860-
if (!src_page)
2861-
return -EINVAL;
2862-
dst_page = extent_same_get_page(dst, dst_loff);
2863-
if (!dst_page) {
2864-
page_cache_release(src_page);
2865-
return -EINVAL;
2866-
}
2941+
BUG_ON(i >= cmp->num_pages);
2942+
2943+
src_page = cmp->src_pages[i];
2944+
dst_page = cmp->dst_pages[i];
2945+
28672946
addr = kmap_atomic(src_page);
28682947
dst_addr = kmap_atomic(dst_page);
28692948

@@ -2875,15 +2954,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
28752954

28762955
kunmap_atomic(addr);
28772956
kunmap_atomic(dst_addr);
2878-
page_cache_release(src_page);
2879-
page_cache_release(dst_page);
28802957

28812958
if (ret)
28822959
break;
28832960

2884-
loff += cmp_len;
2885-
dst_loff += cmp_len;
28862961
len -= cmp_len;
2962+
i++;
28872963
}
28882964

28892965
return ret;
@@ -2914,6 +2990,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
29142990
{
29152991
int ret;
29162992
u64 len = olen;
2993+
struct cmp_pages cmp;
29172994

29182995
/*
29192996
* btrfs_clone() can't handle extents in the same file
@@ -2926,7 +3003,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
29263003
if (len == 0)
29273004
return 0;
29283005

2929-
btrfs_double_lock(src, loff, dst, dst_loff, len);
3006+
btrfs_double_inode_lock(src, dst);
29303007

29313008
ret = extent_same_check_offsets(src, loff, &len, olen);
29323009
if (ret)
@@ -2943,13 +3020,22 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
29433020
goto out_unlock;
29443021
}
29453022

3023+
ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
3024+
if (ret)
3025+
goto out_unlock;
3026+
3027+
btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
3028+
29463029
/* pass original length for comparison so we stay within i_size */
2947-
ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen);
3030+
ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
29483031
if (ret == 0)
29493032
ret = btrfs_clone(src, dst, loff, olen, len, dst_loff);
29503033

3034+
btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
3035+
3036+
btrfs_cmp_data_free(&cmp);
29513037
out_unlock:
2952-
btrfs_double_unlock(src, loff, dst, dst_loff, len);
3038+
btrfs_double_inode_unlock(src, dst);
29533039

29543040
return ret;
29553041
}

0 commit comments

Comments
 (0)