Skip to content

Commit 4c27fe4

Browse files
rppttorvalds
authored andcommitted
userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support
shmem_mcopy_atomic_pte is the low level routine that implements the userfaultfd UFFDIO_COPY command. It is based on the existing mcopy_atomic_pte routine with modifications for shared memory pages. Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Mike Rapoport <[email protected]> Signed-off-by: Andrea Arcangeli <[email protected]> Cc: "Dr. David Alan Gilbert" <[email protected]> Cc: Hillf Danton <[email protected]> Cc: Michael Rapoport <[email protected]> Cc: Mike Kravetz <[email protected]> Cc: Pavel Emelyanov <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent ba6907d commit 4c27fe4

File tree

2 files changed

+121
-0
lines changed

2 files changed

+121
-0
lines changed

include/linux/shmem_fs.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,4 +124,15 @@ static inline bool shmem_huge_enabled(struct vm_area_struct *vma)
124124
}
125125
#endif
126126

127+
#ifdef CONFIG_SHMEM
128+
extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
129+
struct vm_area_struct *dst_vma,
130+
unsigned long dst_addr,
131+
unsigned long src_addr,
132+
struct page **pagep);
133+
#else
134+
#define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
135+
src_addr, pagep) ({ BUG(); 0; })
136+
#endif
137+
127138
#endif

mm/shmem.c

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ static struct vfsmount *shm_mnt;
7070
#include <linux/syscalls.h>
7171
#include <linux/fcntl.h>
7272
#include <uapi/linux/memfd.h>
73+
#include <linux/rmap.h>
7374

7475
#include <linux/uaccess.h>
7576
#include <asm/pgtable.h>
@@ -2178,6 +2179,115 @@ bool shmem_mapping(struct address_space *mapping)
21782179
return mapping->a_ops == &shmem_aops;
21792180
}
21802181

2182+
int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
2183+
pmd_t *dst_pmd,
2184+
struct vm_area_struct *dst_vma,
2185+
unsigned long dst_addr,
2186+
unsigned long src_addr,
2187+
struct page **pagep)
2188+
{
2189+
struct inode *inode = file_inode(dst_vma->vm_file);
2190+
struct shmem_inode_info *info = SHMEM_I(inode);
2191+
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
2192+
struct address_space *mapping = inode->i_mapping;
2193+
gfp_t gfp = mapping_gfp_mask(mapping);
2194+
pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
2195+
struct mem_cgroup *memcg;
2196+
spinlock_t *ptl;
2197+
void *page_kaddr;
2198+
struct page *page;
2199+
pte_t _dst_pte, *dst_pte;
2200+
int ret;
2201+
2202+
if (!*pagep) {
2203+
ret = -ENOMEM;
2204+
if (shmem_acct_block(info->flags, 1))
2205+
goto out;
2206+
if (sbinfo->max_blocks) {
2207+
if (percpu_counter_compare(&sbinfo->used_blocks,
2208+
sbinfo->max_blocks) >= 0)
2209+
goto out_unacct_blocks;
2210+
percpu_counter_inc(&sbinfo->used_blocks);
2211+
}
2212+
2213+
page = shmem_alloc_page(gfp, info, pgoff);
2214+
if (!page)
2215+
goto out_dec_used_blocks;
2216+
2217+
page_kaddr = kmap_atomic(page);
2218+
ret = copy_from_user(page_kaddr, (const void __user *)src_addr,
2219+
PAGE_SIZE);
2220+
kunmap_atomic(page_kaddr);
2221+
2222+
/* fallback to copy_from_user outside mmap_sem */
2223+
if (unlikely(ret)) {
2224+
*pagep = page;
2225+
/* don't free the page */
2226+
return -EFAULT;
2227+
}
2228+
} else {
2229+
page = *pagep;
2230+
*pagep = NULL;
2231+
}
2232+
2233+
ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false);
2234+
if (ret)
2235+
goto out_release;
2236+
2237+
ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
2238+
if (!ret) {
2239+
ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL);
2240+
radix_tree_preload_end();
2241+
}
2242+
if (ret)
2243+
goto out_release_uncharge;
2244+
2245+
mem_cgroup_commit_charge(page, memcg, false, false);
2246+
2247+
_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
2248+
if (dst_vma->vm_flags & VM_WRITE)
2249+
_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
2250+
2251+
ret = -EEXIST;
2252+
dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
2253+
if (!pte_none(*dst_pte))
2254+
goto out_release_uncharge_unlock;
2255+
2256+
__SetPageUptodate(page);
2257+
2258+
lru_cache_add_anon(page);
2259+
2260+
spin_lock(&info->lock);
2261+
info->alloced++;
2262+
inode->i_blocks += BLOCKS_PER_PAGE;
2263+
shmem_recalc_inode(inode);
2264+
spin_unlock(&info->lock);
2265+
2266+
inc_mm_counter(dst_mm, mm_counter_file(page));
2267+
page_add_file_rmap(page, false);
2268+
set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
2269+
2270+
/* No need to invalidate - it was non-present before */
2271+
update_mmu_cache(dst_vma, dst_addr, dst_pte);
2272+
unlock_page(page);
2273+
pte_unmap_unlock(dst_pte, ptl);
2274+
ret = 0;
2275+
out:
2276+
return ret;
2277+
out_release_uncharge_unlock:
2278+
pte_unmap_unlock(dst_pte, ptl);
2279+
out_release_uncharge:
2280+
mem_cgroup_cancel_charge(page, memcg, false);
2281+
out_release:
2282+
put_page(page);
2283+
out_dec_used_blocks:
2284+
if (sbinfo->max_blocks)
2285+
percpu_counter_add(&sbinfo->used_blocks, -1);
2286+
out_unacct_blocks:
2287+
shmem_unacct_blocks(info->flags, 1);
2288+
goto out;
2289+
}
2290+
21812291
#ifdef CONFIG_TMPFS
21822292
static const struct inode_operations shmem_symlink_inode_operations;
21832293
static const struct inode_operations shmem_short_symlink_operations;

0 commit comments

Comments
 (0)