Skip to content

Commit c01d5b3

Browse files
Hugh Dickinstorvalds
authored andcommitted
shmem: get_unmapped_area align huge page
Provide a shmem_get_unmapped_area method in file_operations, called at mmap time to decide the mapping address. It could be conditional on CONFIG_TRANSPARENT_HUGEPAGE, but save #ifdefs in other places by making it unconditional. shmem_get_unmapped_area() first calls the usual mm->get_unmapped_area (which we treat as a black box, highly dependent on architecture and config and executable layout). Lots of conditions, and in most cases it just goes with the address that chose; but when our huge stars are rightly aligned, yet that did not provide a suitable address, go back to ask for a larger arena, within which to align the mapping suitably. There have to be some direct calls to shmem_get_unmapped_area(), not via the file_operations: because of the way shmem_zero_setup() is called to create a shmem object late in the mmap sequence, when MAP_SHARED is requested with MAP_ANONYMOUS or /dev/zero. Though this only matters when /proc/sys/vm/shmem_huge has been set. Link: http://lkml.kernel.org/r/1466021202-61880-29-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Hugh Dickins <[email protected]> Signed-off-by: Kirill A. Shutemov <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 5a6e75f commit c01d5b3

File tree

5 files changed

+142
-4
lines changed

5 files changed

+142
-4
lines changed

drivers/char/mem.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <linux/device.h>
2323
#include <linux/highmem.h>
2424
#include <linux/backing-dev.h>
25+
#include <linux/shmem_fs.h>
2526
#include <linux/splice.h>
2627
#include <linux/pfn.h>
2728
#include <linux/export.h>
@@ -657,6 +658,28 @@ static int mmap_zero(struct file *file, struct vm_area_struct *vma)
657658
return 0;
658659
}
659660

661+
static unsigned long get_unmapped_area_zero(struct file *file,
662+
unsigned long addr, unsigned long len,
663+
unsigned long pgoff, unsigned long flags)
664+
{
665+
#ifdef CONFIG_MMU
666+
if (flags & MAP_SHARED) {
667+
/*
668+
* mmap_zero() will call shmem_zero_setup() to create a file,
669+
* so use shmem's get_unmapped_area in case it can be huge;
670+
* and pass NULL for file as in mmap.c's get_unmapped_area(),
671+
* so as not to confuse shmem with our handle on "/dev/zero".
672+
*/
673+
return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags);
674+
}
675+
676+
/* Otherwise flags & MAP_PRIVATE: with no shmem object beneath it */
677+
return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
678+
#else
679+
return -ENOSYS;
680+
#endif
681+
}
682+
660683
static ssize_t write_full(struct file *file, const char __user *buf,
661684
size_t count, loff_t *ppos)
662685
{
@@ -764,6 +787,7 @@ static const struct file_operations zero_fops = {
764787
.read_iter = read_iter_zero,
765788
.write_iter = write_iter_zero,
766789
.mmap = mmap_zero,
790+
.get_unmapped_area = get_unmapped_area_zero,
767791
#ifndef CONFIG_MMU
768792
.mmap_capabilities = zero_mmap_capabilities,
769793
#endif

include/linux/shmem_fs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ extern struct file *shmem_file_setup(const char *name,
5050
extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
5151
unsigned long flags);
5252
extern int shmem_zero_setup(struct vm_area_struct *);
53+
extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
54+
unsigned long len, unsigned long pgoff, unsigned long flags);
5355
extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
5456
extern bool shmem_mapping(struct address_space *mapping);
5557
extern void shmem_unlock_mapping(struct address_space *mapping);

ipc/shm.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -476,13 +476,15 @@ static const struct file_operations shm_file_operations = {
476476
.mmap = shm_mmap,
477477
.fsync = shm_fsync,
478478
.release = shm_release,
479-
#ifndef CONFIG_MMU
480479
.get_unmapped_area = shm_get_unmapped_area,
481-
#endif
482480
.llseek = noop_llseek,
483481
.fallocate = shm_fallocate,
484482
};
485483

484+
/*
485+
* shm_file_operations_huge is now identical to shm_file_operations,
486+
* but we keep it distinct for the sake of is_file_shm_hugepages().
487+
*/
486488
static const struct file_operations shm_file_operations_huge = {
487489
.mmap = shm_mmap,
488490
.fsync = shm_fsync,

mm/mmap.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <linux/personality.h>
2626
#include <linux/security.h>
2727
#include <linux/hugetlb.h>
28+
#include <linux/shmem_fs.h>
2829
#include <linux/profile.h>
2930
#include <linux/export.h>
3031
#include <linux/mount.h>
@@ -1897,8 +1898,19 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
18971898
return -ENOMEM;
18981899

18991900
get_area = current->mm->get_unmapped_area;
1900-
if (file && file->f_op->get_unmapped_area)
1901-
get_area = file->f_op->get_unmapped_area;
1901+
if (file) {
1902+
if (file->f_op->get_unmapped_area)
1903+
get_area = file->f_op->get_unmapped_area;
1904+
} else if (flags & MAP_SHARED) {
1905+
/*
1906+
* mmap_region() will call shmem_zero_setup() to create a file,
1907+
* so use shmem's get_unmapped_area in case it can be huge.
1908+
* do_mmap_pgoff() will clear pgoff, so match alignment.
1909+
*/
1910+
pgoff = 0;
1911+
get_area = shmem_get_unmapped_area;
1912+
}
1913+
19021914
addr = get_area(file, addr, len, pgoff, flags);
19031915
if (IS_ERR_VALUE(addr))
19041916
return addr;

mm/shmem.c

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1513,6 +1513,94 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
15131513
return ret;
15141514
}
15151515

1516+
unsigned long shmem_get_unmapped_area(struct file *file,
1517+
unsigned long uaddr, unsigned long len,
1518+
unsigned long pgoff, unsigned long flags)
1519+
{
1520+
unsigned long (*get_area)(struct file *,
1521+
unsigned long, unsigned long, unsigned long, unsigned long);
1522+
unsigned long addr;
1523+
unsigned long offset;
1524+
unsigned long inflated_len;
1525+
unsigned long inflated_addr;
1526+
unsigned long inflated_offset;
1527+
1528+
if (len > TASK_SIZE)
1529+
return -ENOMEM;
1530+
1531+
get_area = current->mm->get_unmapped_area;
1532+
addr = get_area(file, uaddr, len, pgoff, flags);
1533+
1534+
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
1535+
return addr;
1536+
if (IS_ERR_VALUE(addr))
1537+
return addr;
1538+
if (addr & ~PAGE_MASK)
1539+
return addr;
1540+
if (addr > TASK_SIZE - len)
1541+
return addr;
1542+
1543+
if (shmem_huge == SHMEM_HUGE_DENY)
1544+
return addr;
1545+
if (len < HPAGE_PMD_SIZE)
1546+
return addr;
1547+
if (flags & MAP_FIXED)
1548+
return addr;
1549+
/*
1550+
* Our priority is to support MAP_SHARED mapped hugely;
1551+
* and support MAP_PRIVATE mapped hugely too, until it is COWed.
1552+
* But if caller specified an address hint, respect that as before.
1553+
*/
1554+
if (uaddr)
1555+
return addr;
1556+
1557+
if (shmem_huge != SHMEM_HUGE_FORCE) {
1558+
struct super_block *sb;
1559+
1560+
if (file) {
1561+
VM_BUG_ON(file->f_op != &shmem_file_operations);
1562+
sb = file_inode(file)->i_sb;
1563+
} else {
1564+
/*
1565+
* Called directly from mm/mmap.c, or drivers/char/mem.c
1566+
* for "/dev/zero", to create a shared anonymous object.
1567+
*/
1568+
if (IS_ERR(shm_mnt))
1569+
return addr;
1570+
sb = shm_mnt->mnt_sb;
1571+
}
1572+
if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
1573+
return addr;
1574+
}
1575+
1576+
offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
1577+
if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
1578+
return addr;
1579+
if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
1580+
return addr;
1581+
1582+
inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
1583+
if (inflated_len > TASK_SIZE)
1584+
return addr;
1585+
if (inflated_len < len)
1586+
return addr;
1587+
1588+
inflated_addr = get_area(NULL, 0, inflated_len, 0, flags);
1589+
if (IS_ERR_VALUE(inflated_addr))
1590+
return addr;
1591+
if (inflated_addr & ~PAGE_MASK)
1592+
return addr;
1593+
1594+
inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
1595+
inflated_addr += offset - inflated_offset;
1596+
if (inflated_offset > offset)
1597+
inflated_addr += HPAGE_PMD_SIZE;
1598+
1599+
if (inflated_addr > TASK_SIZE - len)
1600+
return addr;
1601+
return inflated_addr;
1602+
}
1603+
15161604
#ifdef CONFIG_NUMA
15171605
static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
15181606
{
@@ -3261,6 +3349,7 @@ static const struct address_space_operations shmem_aops = {
32613349

32623350
static const struct file_operations shmem_file_operations = {
32633351
.mmap = shmem_mmap,
3352+
.get_unmapped_area = shmem_get_unmapped_area,
32643353
#ifdef CONFIG_TMPFS
32653354
.llseek = shmem_file_llseek,
32663355
.read_iter = shmem_file_read_iter,
@@ -3496,6 +3585,15 @@ void shmem_unlock_mapping(struct address_space *mapping)
34963585
{
34973586
}
34983587

3588+
#ifdef CONFIG_MMU
3589+
unsigned long shmem_get_unmapped_area(struct file *file,
3590+
unsigned long addr, unsigned long len,
3591+
unsigned long pgoff, unsigned long flags)
3592+
{
3593+
return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
3594+
}
3595+
#endif
3596+
34993597
void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
35003598
{
35013599
truncate_inode_pages_range(inode->i_mapping, lstart, lend);

0 commit comments

Comments
 (0)