@@ -147,17 +147,40 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
147
147
return next ;
148
148
}
149
149
150
- static int do_brk_flags (unsigned long addr , unsigned long request , unsigned long flags ,
151
- struct list_head * uf );
150
+ /*
151
+ * check_brk_limits() - Use platform specific check of range & verify mlock
152
+ * limits.
153
+ * @addr: The address to check
154
+ * @len: The size of increase.
155
+ *
156
+ * Return: 0 on success.
157
+ */
158
+ static int check_brk_limits (unsigned long addr , unsigned long len )
159
+ {
160
+ unsigned long mapped_addr ;
161
+
162
+ mapped_addr = get_unmapped_area (NULL , addr , len , 0 , MAP_FIXED );
163
+ if (IS_ERR_VALUE (mapped_addr ))
164
+ return mapped_addr ;
165
+
166
+ return mlock_future_check (current -> mm , current -> mm -> def_flags , len );
167
+ }
168
+ static int do_brk_munmap (struct ma_state * mas , struct vm_area_struct * vma ,
169
+ unsigned long newbrk , unsigned long oldbrk ,
170
+ struct list_head * uf );
171
+ static int do_brk_flags (struct ma_state * mas , struct vm_area_struct * brkvma ,
172
+ unsigned long addr , unsigned long request ,
173
+ unsigned long flags );
152
174
SYSCALL_DEFINE1 (brk , unsigned long , brk )
153
175
{
154
176
unsigned long newbrk , oldbrk , origbrk ;
155
177
struct mm_struct * mm = current -> mm ;
156
- struct vm_area_struct * next ;
178
+ struct vm_area_struct * brkvma , * next = NULL ;
157
179
unsigned long min_brk ;
158
180
bool populate ;
159
181
bool downgraded = false;
160
182
LIST_HEAD (uf );
183
+ MA_STATE (mas , & mm -> mm_mt , 0 , 0 );
161
184
162
185
if (mmap_write_lock_killable (mm ))
163
186
return - EINTR ;
@@ -199,35 +222,52 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
199
222
200
223
/*
201
224
* Always allow shrinking brk.
202
- * __do_munmap () may downgrade mmap_lock to read.
225
+ * do_brk_munmap () may downgrade mmap_lock to read.
203
226
*/
204
227
if (brk <= mm -> brk ) {
205
228
int ret ;
206
229
230
+ /* Search one past newbrk */
231
+ mas_set (& mas , newbrk );
232
+ brkvma = mas_find (& mas , oldbrk );
233
+ BUG_ON (brkvma == NULL );
234
+ if (brkvma -> vm_start >= oldbrk )
235
+ goto out ; /* mapping intersects with an existing non-brk vma. */
207
236
/*
208
- * mm->brk must to be protected by write mmap_lock so update it
209
- * before downgrading mmap_lock. When __do_munmap() fails,
210
- * mm->brk will be restored from origbrk .
237
+ * mm->brk must be protected by write mmap_lock.
238
+ * do_brk_munmap() may downgrade the lock, so update it
239
+ * before calling do_brk_munmap() .
211
240
*/
212
241
mm -> brk = brk ;
213
- ret = __do_munmap (mm , newbrk , oldbrk - newbrk , & uf , true);
214
- if (ret < 0 ) {
215
- mm -> brk = origbrk ;
216
- goto out ;
217
- } else if (ret == 1 ) {
242
+ mas .last = oldbrk - 1 ;
243
+ ret = do_brk_munmap (& mas , brkvma , newbrk , oldbrk , & uf );
244
+ if (ret == 1 ) {
218
245
downgraded = true;
219
- }
220
- goto success ;
246
+ goto success ;
247
+ } else if (!ret )
248
+ goto success ;
249
+
250
+ mm -> brk = origbrk ;
251
+ goto out ;
221
252
}
222
253
223
- /* Check against existing mmap mappings. */
224
- next = find_vma (mm , oldbrk );
254
+ if (check_brk_limits (oldbrk , newbrk - oldbrk ))
255
+ goto out ;
256
+
257
+ /*
258
+ * Only check if the next VMA is within the stack_guard_gap of the
259
+ * expansion area
260
+ */
261
+ mas_set (& mas , oldbrk );
262
+ next = mas_find (& mas , newbrk - 1 + PAGE_SIZE + stack_guard_gap );
225
263
if (next && newbrk + PAGE_SIZE > vm_start_gap (next ))
226
264
goto out ;
227
265
266
+ brkvma = mas_prev (& mas , mm -> start_brk );
228
267
/* Ok, looks good - let it rip. */
229
- if (do_brk_flags (oldbrk , newbrk - oldbrk , 0 , & uf ) < 0 )
268
+ if (do_brk_flags (& mas , brkvma , oldbrk , newbrk - oldbrk , 0 ) < 0 )
230
269
goto out ;
270
+
231
271
mm -> brk = brk ;
232
272
233
273
success :
@@ -2762,38 +2802,55 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
2762
2802
}
2763
2803
2764
2804
/*
2765
- * this is really a simplified "do_mmap". it only handles
2766
- * anonymous maps. eventually we may be able to do some
2767
- * brk-specific accounting here.
2805
+ * brk_munmap() - Unmap a parital vma.
2806
+ * @mas: The maple tree state.
2807
+ * @vma: The vma to be modified
2808
+ * @newbrk: the start of the address to unmap
2809
+ * @oldbrk: The end of the address to unmap
2810
+ * @uf: The userfaultfd list_head
2811
+ *
2812
+ * Returns: 1 on success.
2813
+ * unmaps a partial VMA mapping. Does not handle alignment, downgrades lock if
2814
+ * possible.
2768
2815
*/
2769
- static int do_brk_flags (unsigned long addr , unsigned long len ,
2770
- unsigned long flags , struct list_head * uf )
2816
+ static int do_brk_munmap (struct ma_state * mas , struct vm_area_struct * vma ,
2817
+ unsigned long newbrk , unsigned long oldbrk ,
2818
+ struct list_head * uf )
2771
2819
{
2772
- struct mm_struct * mm = current -> mm ;
2773
- struct vm_area_struct * vma , * prev ;
2774
- pgoff_t pgoff = addr >> PAGE_SHIFT ;
2775
- int error ;
2776
- unsigned long mapped_addr ;
2777
- validate_mm_mt (mm );
2778
-
2779
- /* Until we need other flags, refuse anything except VM_EXEC. */
2780
- if ((flags & (~VM_EXEC )) != 0 )
2781
- return - EINVAL ;
2782
- flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm -> def_flags ;
2783
-
2784
- mapped_addr = get_unmapped_area (NULL , addr , len , 0 , MAP_FIXED );
2785
- if (IS_ERR_VALUE (mapped_addr ))
2786
- return mapped_addr ;
2820
+ struct mm_struct * mm = vma -> vm_mm ;
2821
+ int ret ;
2787
2822
2788
- error = mlock_future_check (mm , mm -> def_flags , len );
2789
- if (error )
2790
- return error ;
2823
+ arch_unmap (mm , newbrk , oldbrk );
2824
+ ret = __do_munmap (mm , newbrk , oldbrk - newbrk , uf , true);
2825
+ validate_mm_mt (mm );
2826
+ return ret ;
2827
+ }
2791
2828
2792
- /* Clear old maps, set up prev and uf */
2793
- if (munmap_vma_range (mm , addr , len , & prev , uf ))
2794
- return - ENOMEM ;
2829
+ /*
2830
+ * do_brk_flags() - Increase the brk vma if the flags match.
2831
+ * @mas: The maple tree state.
2832
+ * @addr: The start address
2833
+ * @len: The length of the increase
2834
+ * @vma: The vma,
2835
+ * @flags: The VMA Flags
2836
+ *
2837
+ * Extend the brk VMA from addr to addr + len. If the VMA is NULL or the flags
2838
+ * do not match then create a new anonymous VMA. Eventually we may be able to
2839
+ * do some brk-specific accounting here.
2840
+ */
2841
+ static int do_brk_flags (struct ma_state * mas , struct vm_area_struct * vma ,
2842
+ unsigned long addr , unsigned long len ,
2843
+ unsigned long flags )
2844
+ {
2845
+ struct mm_struct * mm = current -> mm ;
2846
+ struct vm_area_struct * prev = NULL ;
2795
2847
2796
- /* Check against address space limits *after* clearing old maps... */
2848
+ validate_mm_mt (mm );
2849
+ /*
2850
+ * Check against address space limits by the changed size
2851
+ * Note: This happens *after* clearing old mappings in some code paths.
2852
+ */
2853
+ flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm -> def_flags ;
2797
2854
if (!may_expand_vm (mm , flags , len >> PAGE_SHIFT ))
2798
2855
return - ENOMEM ;
2799
2856
@@ -2803,30 +2860,54 @@ static int do_brk_flags(unsigned long addr, unsigned long len,
2803
2860
if (security_vm_enough_memory_mm (mm , len >> PAGE_SHIFT ))
2804
2861
return - ENOMEM ;
2805
2862
2806
- /* Can we just expand an old private anonymous mapping? */
2807
- vma = vma_merge (mm , prev , addr , addr + len , flags ,
2808
- NULL , NULL , pgoff , NULL , NULL_VM_UFFD_CTX , NULL );
2809
- if (vma )
2810
- goto out ;
2811
-
2812
2863
/*
2813
- * create a vma struct for an anonymous mapping
2864
+ * Expand the existing vma if possible; Note that singular lists do not
2865
+ * occur after forking, so the expand will only happen on new VMAs.
2814
2866
*/
2815
- vma = vm_area_alloc (mm );
2816
- if (!vma ) {
2817
- vm_unacct_memory (len >> PAGE_SHIFT );
2818
- return - ENOMEM ;
2867
+ if (vma &&
2868
+ (!vma -> anon_vma || list_is_singular (& vma -> anon_vma_chain )) &&
2869
+ ((vma -> vm_flags & ~VM_SOFTDIRTY ) == flags )) {
2870
+ mas -> index = vma -> vm_start ;
2871
+ mas -> last = addr + len - 1 ;
2872
+ vma_adjust_trans_huge (vma , addr , addr + len , 0 );
2873
+ if (vma -> anon_vma ) {
2874
+ anon_vma_lock_write (vma -> anon_vma );
2875
+ anon_vma_interval_tree_pre_update_vma (vma );
2876
+ }
2877
+ vma -> vm_end = addr + len ;
2878
+ vma -> vm_flags |= VM_SOFTDIRTY ;
2879
+ if (mas_store_gfp (mas , vma , GFP_KERNEL ))
2880
+ goto mas_expand_failed ;
2881
+
2882
+ if (vma -> anon_vma ) {
2883
+ anon_vma_interval_tree_post_update_vma (vma );
2884
+ anon_vma_unlock_write (vma -> anon_vma );
2885
+ }
2886
+ khugepaged_enter_vma (vma , flags );
2887
+ goto out ;
2819
2888
}
2889
+ prev = vma ;
2890
+
2891
+ /* create a vma struct for an anonymous mapping */
2892
+ vma = vm_area_alloc (mm );
2893
+ if (!vma )
2894
+ goto vma_alloc_fail ;
2820
2895
2821
2896
vma_set_anonymous (vma );
2822
2897
vma -> vm_start = addr ;
2823
2898
vma -> vm_end = addr + len ;
2824
- vma -> vm_pgoff = pgoff ;
2899
+ vma -> vm_pgoff = addr >> PAGE_SHIFT ;
2825
2900
vma -> vm_flags = flags ;
2826
2901
vma -> vm_page_prot = vm_get_page_prot (flags );
2827
- if (vma_link (mm , vma , prev ))
2828
- goto no_vma_link ;
2902
+ mas_set_range (mas , vma -> vm_start , addr + len - 1 );
2903
+ if (mas_store_gfp (mas , vma , GFP_KERNEL ))
2904
+ goto mas_store_fail ;
2905
+
2906
+ if (!prev )
2907
+ prev = mas_prev (mas , 0 );
2829
2908
2909
+ __vma_link_list (mm , vma , prev );
2910
+ mm -> map_count ++ ;
2830
2911
out :
2831
2912
perf_event_mmap (vma );
2832
2913
mm -> total_vm += len >> PAGE_SHIFT ;
@@ -2837,18 +2918,29 @@ static int do_brk_flags(unsigned long addr, unsigned long len,
2837
2918
validate_mm_mt (mm );
2838
2919
return 0 ;
2839
2920
2840
- no_vma_link :
2921
+ mas_store_fail :
2841
2922
vm_area_free (vma );
2923
+ vma_alloc_fail :
2924
+ vm_unacct_memory (len >> PAGE_SHIFT );
2925
+ return - ENOMEM ;
2926
+
2927
+ mas_expand_failed :
2928
+ if (vma -> anon_vma ) {
2929
+ anon_vma_interval_tree_post_update_vma (vma );
2930
+ anon_vma_unlock_write (vma -> anon_vma );
2931
+ }
2842
2932
return - ENOMEM ;
2843
2933
}
2844
2934
2845
2935
int vm_brk_flags (unsigned long addr , unsigned long request , unsigned long flags )
2846
2936
{
2847
2937
struct mm_struct * mm = current -> mm ;
2938
+ struct vm_area_struct * vma = NULL ;
2848
2939
unsigned long len ;
2849
2940
int ret ;
2850
2941
bool populate ;
2851
2942
LIST_HEAD (uf );
2943
+ MA_STATE (mas , & mm -> mm_mt , addr , addr );
2852
2944
2853
2945
len = PAGE_ALIGN (request );
2854
2946
if (len < request )
@@ -2859,13 +2951,38 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
2859
2951
if (mmap_write_lock_killable (mm ))
2860
2952
return - EINTR ;
2861
2953
2862
- ret = do_brk_flags (addr , len , flags , & uf );
2954
+ /* Until we need other flags, refuse anything except VM_EXEC. */
2955
+ if ((flags & (~VM_EXEC )) != 0 )
2956
+ return - EINVAL ;
2957
+
2958
+ ret = check_brk_limits (addr , len );
2959
+ if (ret )
2960
+ goto limits_failed ;
2961
+
2962
+ if (find_vma_intersection (mm , addr , addr + len ))
2963
+ ret = do_munmap (mm , addr , len , & uf );
2964
+
2965
+ if (ret )
2966
+ goto munmap_failed ;
2967
+
2968
+ vma = mas_prev (& mas , 0 );
2969
+ if (!vma || vma -> vm_end != addr || vma_policy (vma ) ||
2970
+ !can_vma_merge_after (vma , flags , NULL , NULL ,
2971
+ addr >> PAGE_SHIFT , NULL_VM_UFFD_CTX , NULL ))
2972
+ vma = NULL ;
2973
+
2974
+ ret = do_brk_flags (& mas , vma , addr , len , flags );
2863
2975
populate = ((mm -> def_flags & VM_LOCKED ) != 0 );
2864
2976
mmap_write_unlock (mm );
2865
2977
userfaultfd_unmap_complete (mm , & uf );
2866
2978
if (populate && !ret )
2867
2979
mm_populate (addr , len );
2868
2980
return ret ;
2981
+
2982
+ munmap_failed :
2983
+ limits_failed :
2984
+ mmap_write_unlock (mm );
2985
+ return ret ;
2869
2986
}
2870
2987
EXPORT_SYMBOL (vm_brk_flags );
2871
2988
0 commit comments