Skip to content

Commit f4ea6dc

Browse files
kvaneeshmpe
authored andcommitted
powerpc/mm: Enable mappings above 128TB
Not all user space application is ready to handle wide addresses. It's known that at least some JIT compilers use higher bits in pointers to encode their information. It collides with valid pointers with 512TB addresses and leads to crashes. To mitigate this, we are not going to allocate virtual address space above 128TB by default. But userspace can ask for allocation from full address space by specifying hint address (with or without MAP_FIXED) above 128TB. If hint address set above 128TB, but MAP_FIXED is not specified, we try to look for unmapped area by specified address. If it's already occupied, we look for unmapped area in *full* address space, rather than from 128TB window. This approach helps to easily make application's memory allocator aware about large address space without manually tracking allocated virtual address space. This is going to be a per mmap decision. ie, we can have some mmaps with larger addresses and other that do not. A sample memory layout looks like: 10000000-10010000 r-xp 00000000 fc:00 9057045 /home/max_addr_512TB 10010000-10020000 r--p 00000000 fc:00 9057045 /home/max_addr_512TB 10020000-10030000 rw-p 00010000 fc:00 9057045 /home/max_addr_512TB 10029630000-10029660000 rw-p 00000000 00:00 0 [heap] 7fff834a0000-7fff834b0000 rw-p 00000000 00:00 0 7fff834b0000-7fff83670000 r-xp 00000000 fc:00 9177190 /lib/powerpc64le-linux-gnu/libc-2.23.so 7fff83670000-7fff83680000 r--p 001b0000 fc:00 9177190 /lib/powerpc64le-linux-gnu/libc-2.23.so 7fff83680000-7fff83690000 rw-p 001c0000 fc:00 9177190 /lib/powerpc64le-linux-gnu/libc-2.23.so 7fff83690000-7fff836a0000 rw-p 00000000 00:00 0 7fff836a0000-7fff836c0000 r-xp 00000000 00:00 0 [vdso] 7fff836c0000-7fff83700000 r-xp 00000000 fc:00 9177193 /lib/powerpc64le-linux-gnu/ld-2.23.so 7fff83700000-7fff83710000 r--p 00030000 fc:00 9177193 /lib/powerpc64le-linux-gnu/ld-2.23.so 7fff83710000-7fff83720000 rw-p 00040000 fc:00 9177193 /lib/powerpc64le-linux-gnu/ld-2.23.so 7fffdccf0000-7fffdcd20000 rw-p 00000000 00:00 0 [stack] 1000000000000-1000000010000 rw-p 00000000 00:00 0 1ffff83710000-1ffff83720000 rw-p 00000000 00:00 0 Signed-off-by: Aneesh Kumar K.V <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
1 parent fbfef90 commit f4ea6dc

File tree

6 files changed

+89
-31
lines changed

6 files changed

+89
-31
lines changed

arch/powerpc/include/asm/processor.h

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,9 @@ void release_thread(struct task_struct *);
114114
/*
115115
* Max value currently used:
116116
*/
117-
#define TASK_SIZE_USER64 TASK_SIZE_128TB
117+
#define TASK_SIZE_USER64 TASK_SIZE_512TB
118118
#else
119-
#define TASK_SIZE_USER64 TASK_SIZE_64TB
119+
#define TASK_SIZE_USER64 TASK_SIZE_64TB
120120
#endif
121121

122122
/*
@@ -128,26 +128,37 @@ void release_thread(struct task_struct *);
128128
#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
129129
TASK_SIZE_USER32 : TASK_SIZE_USER64)
130130
#define TASK_SIZE TASK_SIZE_OF(current)
131-
132131
/* This decides where the kernel will search for a free chunk of vm
133132
* space during mmap's.
134133
*/
135134
#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
136-
#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_USER64 / 4))
135+
#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_128TB / 4))
137136

138137
#define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \
139138
TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
140139
#endif
141140

141+
/*
142+
* Initial task size value for user applications. For book3s 64 we start
143+
* with 128TB and conditionally enable upto 512TB
144+
*/
145+
#ifdef CONFIG_PPC_BOOK3S_64
146+
#define DEFAULT_MAP_WINDOW ((is_32bit_task()) ? \
147+
TASK_SIZE_USER32 : TASK_SIZE_128TB)
148+
#else
149+
#define DEFAULT_MAP_WINDOW TASK_SIZE
150+
#endif
151+
142152
#ifdef __powerpc64__
143153

144-
#define STACK_TOP_USER64 TASK_SIZE_USER64
154+
/* Limit stack to 128TB */
155+
#define STACK_TOP_USER64 TASK_SIZE_128TB
145156
#define STACK_TOP_USER32 TASK_SIZE_USER32
146157

147158
#define STACK_TOP (is_32bit_task() ? \
148159
STACK_TOP_USER32 : STACK_TOP_USER64)
149160

150-
#define STACK_TOP_MAX STACK_TOP_USER64
161+
#define STACK_TOP_MAX TASK_SIZE_USER64
151162

152163
#else /* __powerpc64__ */
153164

arch/powerpc/kernel/setup-common.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -923,7 +923,7 @@ void __init setup_arch(char **cmdline_p)
923923

924924
#ifdef CONFIG_PPC_MM_SLICES
925925
#ifdef CONFIG_PPC64
926-
init_mm.context.addr_limit = TASK_SIZE_USER64;
926+
init_mm.context.addr_limit = TASK_SIZE_128TB;
927927
#else
928928
#error "context.addr_limit not initialized."
929929
#endif

arch/powerpc/mm/hugetlbpage-radix.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
5050
struct hstate *h = hstate_file(file);
5151
struct vm_unmapped_area_info info;
5252

53+
if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE))
54+
mm->context.addr_limit = TASK_SIZE;
55+
5356
if (len & ~huge_page_mask(h))
5457
return -EINVAL;
5558
if (len > mm->context.addr_limit)
@@ -78,5 +81,9 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
7881
info.high_limit = current->mm->mmap_base;
7982
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
8083
info.align_offset = 0;
84+
85+
if (addr > DEFAULT_MAP_WINDOW)
86+
info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
87+
8188
return vm_unmapped_area(&info);
8289
}

arch/powerpc/mm/mmap.c

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ static inline unsigned long mmap_base(unsigned long rnd)
7979
else if (gap > MAX_GAP)
8080
gap = MAX_GAP;
8181

82-
return PAGE_ALIGN(TASK_SIZE - gap - rnd);
82+
return PAGE_ALIGN(DEFAULT_MAP_WINDOW - gap - rnd);
8383
}
8484

8585
#ifdef CONFIG_PPC_RADIX_MMU
@@ -97,6 +97,9 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
9797
struct vm_area_struct *vma;
9898
struct vm_unmapped_area_info info;
9999

100+
if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE))
101+
mm->context.addr_limit = TASK_SIZE;
102+
100103
if (len > mm->context.addr_limit - mmap_min_addr)
101104
return -ENOMEM;
102105

@@ -114,8 +117,13 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
114117
info.flags = 0;
115118
info.length = len;
116119
info.low_limit = mm->mmap_base;
117-
info.high_limit = mm->context.addr_limit;
118120
info.align_mask = 0;
121+
122+
if (unlikely(addr > DEFAULT_MAP_WINDOW))
123+
info.high_limit = mm->context.addr_limit;
124+
else
125+
info.high_limit = DEFAULT_MAP_WINDOW;
126+
119127
return vm_unmapped_area(&info);
120128
}
121129

@@ -131,6 +139,9 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
131139
unsigned long addr = addr0;
132140
struct vm_unmapped_area_info info;
133141

142+
if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE))
143+
mm->context.addr_limit = TASK_SIZE;
144+
134145
/* requested length too big for entire address space */
135146
if (len > mm->context.addr_limit - mmap_min_addr)
136147
return -ENOMEM;
@@ -152,23 +163,22 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
152163
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
153164
info.high_limit = mm->mmap_base;
154165
info.align_mask = 0;
166+
167+
if (addr > DEFAULT_MAP_WINDOW)
168+
info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
169+
155170
addr = vm_unmapped_area(&info);
171+
if (!(addr & ~PAGE_MASK))
172+
return addr;
173+
VM_BUG_ON(addr != -ENOMEM);
156174

157175
/*
158176
* A failed mmap() very likely causes application failure,
159177
* so fall back to the bottom-up function here. This scenario
160178
* can happen with large stack limits and large mmap()
161179
* allocations.
162180
*/
163-
if (addr & ~PAGE_MASK) {
164-
VM_BUG_ON(addr != -ENOMEM);
165-
info.flags = 0;
166-
info.low_limit = TASK_UNMAPPED_BASE;
167-
info.high_limit = mm->context.addr_limit;
168-
addr = vm_unmapped_area(&info);
169-
}
170-
171-
return addr;
181+
return radix__arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
172182
}
173183

174184
static void radix__arch_pick_mmap_layout(struct mm_struct *mm,

arch/powerpc/mm/mmu_context_book3s64.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ static int hash__init_new_context(struct mm_struct *mm)
9999
* mm->context.addr_limit. Default to max task size so that we copy the
100100
* default values to paca which will help us to handle slb miss early.
101101
*/
102-
mm->context.addr_limit = TASK_SIZE_USER64;
102+
mm->context.addr_limit = TASK_SIZE_128TB;
103103

104104
/*
105105
* The old code would re-promote on fork, we don't do that when using

arch/powerpc/mm/slice.c

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ static bool slice_scan_available(unsigned long addr,
265265
static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
266266
unsigned long len,
267267
struct slice_mask available,
268-
int psize)
268+
int psize, unsigned long high_limit)
269269
{
270270
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
271271
unsigned long addr, found, next_end;
@@ -277,7 +277,10 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
277277
info.align_offset = 0;
278278

279279
addr = TASK_UNMAPPED_BASE;
280-
while (addr < mm->context.addr_limit) {
280+
/*
281+
* Check till the allow max value for this mmap request
282+
*/
283+
while (addr < high_limit) {
281284
info.low_limit = addr;
282285
if (!slice_scan_available(addr, available, 1, &addr))
283286
continue;
@@ -308,7 +311,7 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
308311
static unsigned long slice_find_area_topdown(struct mm_struct *mm,
309312
unsigned long len,
310313
struct slice_mask available,
311-
int psize)
314+
int psize, unsigned long high_limit)
312315
{
313316
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
314317
unsigned long addr, found, prev;
@@ -320,6 +323,15 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
320323
info.align_offset = 0;
321324

322325
addr = mm->mmap_base;
326+
/*
327+
* If we are trying to allocate above DEFAULT_MAP_WINDOW
328+
* Add the different to the mmap_base.
329+
* Only for that request for which high_limit is above
330+
* DEFAULT_MAP_WINDOW we should apply this.
331+
*/
332+
if (high_limit > DEFAULT_MAP_WINDOW)
333+
addr += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
334+
323335
while (addr > PAGE_SIZE) {
324336
info.high_limit = addr;
325337
if (!slice_scan_available(addr - 1, available, 0, &addr))
@@ -351,18 +363,18 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
351363
* can happen with large stack limits and large mmap()
352364
* allocations.
353365
*/
354-
return slice_find_area_bottomup(mm, len, available, psize);
366+
return slice_find_area_bottomup(mm, len, available, psize, high_limit);
355367
}
356368

357369

358370
static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
359371
struct slice_mask mask, int psize,
360-
int topdown)
372+
int topdown, unsigned long high_limit)
361373
{
362374
if (topdown)
363-
return slice_find_area_topdown(mm, len, mask, psize);
375+
return slice_find_area_topdown(mm, len, mask, psize, high_limit);
364376
else
365-
return slice_find_area_bottomup(mm, len, mask, psize);
377+
return slice_find_area_bottomup(mm, len, mask, psize, high_limit);
366378
}
367379

368380
static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
@@ -402,7 +414,22 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
402414
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
403415
struct mm_struct *mm = current->mm;
404416
unsigned long newaddr;
417+
unsigned long high_limit;
405418

419+
/*
420+
* Check if we need to expland slice area.
421+
*/
422+
if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE)) {
423+
mm->context.addr_limit = TASK_SIZE;
424+
on_each_cpu(slice_flush_segments, mm, 1);
425+
}
426+
/*
427+
* This mmap request can allocate upt to 512TB
428+
*/
429+
if (addr > DEFAULT_MAP_WINDOW)
430+
high_limit = mm->context.addr_limit;
431+
else
432+
high_limit = DEFAULT_MAP_WINDOW;
406433
/*
407434
* init different masks
408435
*/
@@ -494,7 +521,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
494521
/* Now let's see if we can find something in the existing
495522
* slices for that size
496523
*/
497-
newaddr = slice_find_area(mm, len, good_mask, psize, topdown);
524+
newaddr = slice_find_area(mm, len, good_mask,
525+
psize, topdown, high_limit);
498526
if (newaddr != -ENOMEM) {
499527
/* Found within the good mask, we don't have to setup,
500528
* we thus return directly
@@ -526,7 +554,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
526554
* anywhere in the good area.
527555
*/
528556
if (addr) {
529-
addr = slice_find_area(mm, len, good_mask, psize, topdown);
557+
addr = slice_find_area(mm, len, good_mask,
558+
psize, topdown, high_limit);
530559
if (addr != -ENOMEM) {
531560
slice_dbg(" found area at 0x%lx\n", addr);
532561
return addr;
@@ -536,14 +565,15 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
536565
/* Now let's see if we can find something in the existing slices
537566
* for that size plus free slices
538567
*/
539-
addr = slice_find_area(mm, len, potential_mask, psize, topdown);
568+
addr = slice_find_area(mm, len, potential_mask,
569+
psize, topdown, high_limit);
540570

541571
#ifdef CONFIG_PPC_64K_PAGES
542572
if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
543573
/* retry the search with 4k-page slices included */
544574
slice_or_mask(&potential_mask, &compat_mask);
545-
addr = slice_find_area(mm, len, potential_mask, psize,
546-
topdown);
575+
addr = slice_find_area(mm, len, potential_mask,
576+
psize, topdown, high_limit);
547577
}
548578
#endif
549579

0 commit comments

Comments
 (0)