Skip to content

Commit 5a6e75f

Browse files
kiryltorvalds
authored andcommitted
shmem: prepare huge= mount option and sysfs knob
This patch adds new mount option "huge=". It can have following values: - "always": Attempt to allocate huge pages every time we need a new page; - "never": Do not allocate huge pages; - "within_size": Only allocate huge page if it will be fully within i_size. Also respect fadvise()/madvise() hints; - "advise: Only allocate huge pages if requested with fadvise()/madvise(); Default is "never" for now. "mount -o remount,huge= /mountpoint" works fine after mount: remounting huge=never will not attempt to break up huge pages at all, just stop more from being allocated. No new config option: put this under CONFIG_TRANSPARENT_HUGEPAGE, which is the appropriate option to protect those who don't want the new bloat, and with which we shall share some pmd code. Prohibit the option when !CONFIG_TRANSPARENT_HUGEPAGE, just as mpol is invalid without CONFIG_NUMA (was hidden in mpol_parse_str(): make it explicit). Allow enabling THP only if the machine has_transparent_hugepage(). But what about Shmem with no user-visible mount? SysV SHM, memfds, shared anonymous mmaps (of /dev/zero or MAP_ANONYMOUS), GPU drivers' DRM objects, Ashmem. Though unlikely to suit all usages, provide sysfs knob /sys/kernel/mm/transparent_hugepage/shmem_enabled to experiment with huge on those. And allow shmem_enabled two further values: - "deny": For use in emergencies, to force the huge option off from all mounts; - "force": Force the huge option on for all - very useful for testing; Based on patch by Hugh Dickins. Link: http://lkml.kernel.org/r/1466021202-61880-28-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 65c4537 commit 5a6e75f

File tree

4 files changed

+168
-1
lines changed

4 files changed

+168
-1
lines changed

include/linux/huge_mm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ enum transparent_hugepage_flag {
4141
#endif
4242
};
4343

44+
extern struct kobj_attribute shmem_enabled_attr;
45+
4446
#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
4547
#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
4648

include/linux/shmem_fs.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ struct shmem_sb_info {
2828
unsigned long max_inodes; /* How many inodes are allowed */
2929
unsigned long free_inodes; /* How many are left for allocation */
3030
spinlock_t stat_lock; /* Serialize shmem_sb_info changes */
31+
umode_t mode; /* Mount mode for root directory */
32+
unsigned char huge; /* Whether to try for hugepages */
3133
kuid_t uid; /* Mount uid for root directory */
3234
kgid_t gid; /* Mount gid for root directory */
33-
umode_t mode; /* Mount mode for root directory */
3435
struct mempolicy *mpol; /* default memory policy for mappings */
3536
};
3637

mm/huge_memory.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,9 @@ static struct attribute *hugepage_attr[] = {
442442
&enabled_attr.attr,
443443
&defrag_attr.attr,
444444
&use_zero_page_attr.attr,
445+
#ifdef CONFIG_SHMEM
446+
&shmem_enabled_attr.attr,
447+
#endif
445448
#ifdef CONFIG_DEBUG_VM
446449
&debug_cow_attr.attr,
447450
#endif

mm/shmem.c

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,87 @@ static bool shmem_confirm_swap(struct address_space *mapping,
288288
return item == swp_to_radix_entry(swap);
289289
}
290290

291+
/*
292+
* Definitions for "huge tmpfs": tmpfs mounted with the huge= option
293+
*
294+
* SHMEM_HUGE_NEVER:
295+
* disables huge pages for the mount;
296+
* SHMEM_HUGE_ALWAYS:
297+
* enables huge pages for the mount;
298+
* SHMEM_HUGE_WITHIN_SIZE:
299+
* only allocate huge pages if the page will be fully within i_size,
300+
* also respect fadvise()/madvise() hints;
301+
* SHMEM_HUGE_ADVISE:
302+
* only allocate huge pages if requested with fadvise()/madvise();
303+
*/
304+
305+
#define SHMEM_HUGE_NEVER 0
306+
#define SHMEM_HUGE_ALWAYS 1
307+
#define SHMEM_HUGE_WITHIN_SIZE 2
308+
#define SHMEM_HUGE_ADVISE 3
309+
310+
/*
311+
* Special values.
312+
* Only can be set via /sys/kernel/mm/transparent_hugepage/shmem_enabled:
313+
*
314+
* SHMEM_HUGE_DENY:
315+
* disables huge on shm_mnt and all mounts, for emergency use;
316+
* SHMEM_HUGE_FORCE:
317+
* enables huge on shm_mnt and all mounts, w/o needing option, for testing;
318+
*
319+
*/
320+
#define SHMEM_HUGE_DENY (-1)
321+
#define SHMEM_HUGE_FORCE (-2)
322+
323+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
324+
/* ifdef here to avoid bloating shmem.o when not necessary */
325+
326+
int shmem_huge __read_mostly;
327+
328+
static int shmem_parse_huge(const char *str)
329+
{
330+
if (!strcmp(str, "never"))
331+
return SHMEM_HUGE_NEVER;
332+
if (!strcmp(str, "always"))
333+
return SHMEM_HUGE_ALWAYS;
334+
if (!strcmp(str, "within_size"))
335+
return SHMEM_HUGE_WITHIN_SIZE;
336+
if (!strcmp(str, "advise"))
337+
return SHMEM_HUGE_ADVISE;
338+
if (!strcmp(str, "deny"))
339+
return SHMEM_HUGE_DENY;
340+
if (!strcmp(str, "force"))
341+
return SHMEM_HUGE_FORCE;
342+
return -EINVAL;
343+
}
344+
345+
static const char *shmem_format_huge(int huge)
346+
{
347+
switch (huge) {
348+
case SHMEM_HUGE_NEVER:
349+
return "never";
350+
case SHMEM_HUGE_ALWAYS:
351+
return "always";
352+
case SHMEM_HUGE_WITHIN_SIZE:
353+
return "within_size";
354+
case SHMEM_HUGE_ADVISE:
355+
return "advise";
356+
case SHMEM_HUGE_DENY:
357+
return "deny";
358+
case SHMEM_HUGE_FORCE:
359+
return "force";
360+
default:
361+
VM_BUG_ON(1);
362+
return "bad_val";
363+
}
364+
}
365+
366+
#else /* !CONFIG_TRANSPARENT_HUGEPAGE */
367+
368+
#define shmem_huge SHMEM_HUGE_DENY
369+
370+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
371+
291372
/*
292373
* Like add_to_page_cache_locked, but error if expected item has gone.
293374
*/
@@ -2860,11 +2941,24 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
28602941
sbinfo->gid = make_kgid(current_user_ns(), gid);
28612942
if (!gid_valid(sbinfo->gid))
28622943
goto bad_val;
2944+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
2945+
} else if (!strcmp(this_char, "huge")) {
2946+
int huge;
2947+
huge = shmem_parse_huge(value);
2948+
if (huge < 0)
2949+
goto bad_val;
2950+
if (!has_transparent_hugepage() &&
2951+
huge != SHMEM_HUGE_NEVER)
2952+
goto bad_val;
2953+
sbinfo->huge = huge;
2954+
#endif
2955+
#ifdef CONFIG_NUMA
28632956
} else if (!strcmp(this_char,"mpol")) {
28642957
mpol_put(mpol);
28652958
mpol = NULL;
28662959
if (mpol_parse_str(value, &mpol))
28672960
goto bad_val;
2961+
#endif
28682962
} else {
28692963
pr_err("tmpfs: Bad mount option %s\n", this_char);
28702964
goto error;
@@ -2910,6 +3004,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
29103004
goto out;
29113005

29123006
error = 0;
3007+
sbinfo->huge = config.huge;
29133008
sbinfo->max_blocks = config.max_blocks;
29143009
sbinfo->max_inodes = config.max_inodes;
29153010
sbinfo->free_inodes = config.max_inodes - inodes;
@@ -2943,6 +3038,11 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
29433038
if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
29443039
seq_printf(seq, ",gid=%u",
29453040
from_kgid_munged(&init_user_ns, sbinfo->gid));
3041+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
3042+
/* Rightly or wrongly, show huge mount option unmasked by shmem_huge */
3043+
if (sbinfo->huge)
3044+
seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge));
3045+
#endif
29463046
shmem_show_mpol(seq, sbinfo->mpol);
29473047
return 0;
29483048
}
@@ -3282,6 +3382,13 @@ int __init shmem_init(void)
32823382
pr_err("Could not kern_mount tmpfs\n");
32833383
goto out1;
32843384
}
3385+
3386+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
3387+
if (has_transparent_hugepage() && shmem_huge < SHMEM_HUGE_DENY)
3388+
SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
3389+
else
3390+
shmem_huge = 0; /* just in case it was patched */
3391+
#endif
32853392
return 0;
32863393

32873394
out1:
@@ -3293,6 +3400,60 @@ int __init shmem_init(void)
32933400
return error;
32943401
}
32953402

3403+
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS)
3404+
static ssize_t shmem_enabled_show(struct kobject *kobj,
3405+
struct kobj_attribute *attr, char *buf)
3406+
{
3407+
int values[] = {
3408+
SHMEM_HUGE_ALWAYS,
3409+
SHMEM_HUGE_WITHIN_SIZE,
3410+
SHMEM_HUGE_ADVISE,
3411+
SHMEM_HUGE_NEVER,
3412+
SHMEM_HUGE_DENY,
3413+
SHMEM_HUGE_FORCE,
3414+
};
3415+
int i, count;
3416+
3417+
for (i = 0, count = 0; i < ARRAY_SIZE(values); i++) {
3418+
const char *fmt = shmem_huge == values[i] ? "[%s] " : "%s ";
3419+
3420+
count += sprintf(buf + count, fmt,
3421+
shmem_format_huge(values[i]));
3422+
}
3423+
buf[count - 1] = '\n';
3424+
return count;
3425+
}
3426+
3427+
static ssize_t shmem_enabled_store(struct kobject *kobj,
3428+
struct kobj_attribute *attr, const char *buf, size_t count)
3429+
{
3430+
char tmp[16];
3431+
int huge;
3432+
3433+
if (count + 1 > sizeof(tmp))
3434+
return -EINVAL;
3435+
memcpy(tmp, buf, count);
3436+
tmp[count] = '\0';
3437+
if (count && tmp[count - 1] == '\n')
3438+
tmp[count - 1] = '\0';
3439+
3440+
huge = shmem_parse_huge(tmp);
3441+
if (huge == -EINVAL)
3442+
return -EINVAL;
3443+
if (!has_transparent_hugepage() &&
3444+
huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY)
3445+
return -EINVAL;
3446+
3447+
shmem_huge = huge;
3448+
if (shmem_huge < SHMEM_HUGE_DENY)
3449+
SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
3450+
return count;
3451+
}
3452+
3453+
struct kobj_attribute shmem_enabled_attr =
3454+
__ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
3455+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */
3456+
32963457
#else /* !CONFIG_SHMEM */
32973458

32983459
/*

0 commit comments

Comments
 (0)