@@ -470,28 +470,6 @@ static inline struct vm_area_struct *__vma_next(struct mm_struct *mm,
470
470
return vma -> vm_next ;
471
471
}
472
472
473
- /*
474
- * munmap_vma_range() - munmap VMAs that overlap a range.
475
- * @mm: The mm struct
476
- * @start: The start of the range.
477
- * @len: The length of the range.
478
- * @pprev: pointer to the pointer that will be set to previous vm_area_struct
479
- *
480
- * Find all the vm_area_struct that overlap from @start to
481
- * @end and munmap them. Set @pprev to the previous vm_area_struct.
482
- *
483
- * Returns: -ENOMEM on munmap failure or 0 on success.
484
- */
485
- static inline int
486
- munmap_vma_range (struct mm_struct * mm , unsigned long start , unsigned long len ,
487
- struct vm_area_struct * * pprev , struct list_head * uf )
488
- {
489
- while (range_has_overlap (mm , start , start + len , pprev ))
490
- if (do_munmap (mm , start , len , uf ))
491
- return - ENOMEM ;
492
- return 0 ;
493
- }
494
-
495
473
static unsigned long count_vma_pages_range (struct mm_struct * mm ,
496
474
unsigned long addr , unsigned long end )
497
475
{
@@ -618,6 +596,129 @@ static void __insert_vm_struct(struct mm_struct *mm, struct ma_state *mas,
618
596
mm -> map_count ++ ;
619
597
}
620
598
599
+ /*
600
+ * vma_expand - Expand an existing VMA
601
+ *
602
+ * @mas: The maple state
603
+ * @vma: The vma to expand
604
+ * @start: The start of the vma
605
+ * @end: The exclusive end of the vma
606
+ * @pgoff: The page offset of vma
607
+ * @next: The current of next vma.
608
+ *
609
+ * Expand @vma to @start and @end. Can expand off the start and end. Will
610
+ * expand over @next if it's different from @vma and @end == @next->vm_end.
611
+ * Checking if the @vma can expand and merge with @next needs to be handled by
612
+ * the caller.
613
+ *
614
+ * Returns: 0 on success
615
+ */
616
+ inline int vma_expand (struct ma_state * mas , struct vm_area_struct * vma ,
617
+ unsigned long start , unsigned long end , pgoff_t pgoff ,
618
+ struct vm_area_struct * next )
619
+ {
620
+ struct mm_struct * mm = vma -> vm_mm ;
621
+ struct address_space * mapping = NULL ;
622
+ struct rb_root_cached * root = NULL ;
623
+ struct anon_vma * anon_vma = vma -> anon_vma ;
624
+ struct file * file = vma -> vm_file ;
625
+ bool remove_next = false;
626
+
627
+ if (next && (vma != next ) && (end == next -> vm_end )) {
628
+ remove_next = true;
629
+ if (next -> anon_vma && !vma -> anon_vma ) {
630
+ int error ;
631
+
632
+ anon_vma = next -> anon_vma ;
633
+ vma -> anon_vma = anon_vma ;
634
+ error = anon_vma_clone (vma , next );
635
+ if (error )
636
+ return error ;
637
+ }
638
+ }
639
+
640
+ /* Not merging but overwriting any part of next is not handled. */
641
+ VM_BUG_ON (next && !remove_next && next != vma && end > next -> vm_start );
642
+ /* Only handles expanding */
643
+ VM_BUG_ON (vma -> vm_start < start || vma -> vm_end > end );
644
+
645
+ if (mas_preallocate (mas , vma , GFP_KERNEL ))
646
+ goto nomem ;
647
+
648
+ vma_adjust_trans_huge (vma , start , end , 0 );
649
+
650
+ if (file ) {
651
+ mapping = file -> f_mapping ;
652
+ root = & mapping -> i_mmap ;
653
+ uprobe_munmap (vma , vma -> vm_start , vma -> vm_end );
654
+ i_mmap_lock_write (mapping );
655
+ }
656
+
657
+ if (anon_vma ) {
658
+ anon_vma_lock_write (anon_vma );
659
+ anon_vma_interval_tree_pre_update_vma (vma );
660
+ }
661
+
662
+ if (file ) {
663
+ flush_dcache_mmap_lock (mapping );
664
+ vma_interval_tree_remove (vma , root );
665
+ }
666
+
667
+ vma -> vm_start = start ;
668
+ vma -> vm_end = end ;
669
+ vma -> vm_pgoff = pgoff ;
670
+ /* Note: mas must be pointing to the expanding VMA */
671
+ vma_mas_store (vma , mas );
672
+
673
+ if (file ) {
674
+ vma_interval_tree_insert (vma , root );
675
+ flush_dcache_mmap_unlock (mapping );
676
+ }
677
+
678
+ /* Expanding over the next vma */
679
+ if (remove_next ) {
680
+ /* Remove from mm linked list - also updates highest_vm_end */
681
+ __vma_unlink_list (mm , next );
682
+
683
+ /* Kill the cache */
684
+ vmacache_invalidate (mm );
685
+
686
+ if (file )
687
+ __remove_shared_vm_struct (next , file , mapping );
688
+
689
+ } else if (!next ) {
690
+ mm -> highest_vm_end = vm_end_gap (vma );
691
+ }
692
+
693
+ if (anon_vma ) {
694
+ anon_vma_interval_tree_post_update_vma (vma );
695
+ anon_vma_unlock_write (anon_vma );
696
+ }
697
+
698
+ if (file ) {
699
+ i_mmap_unlock_write (mapping );
700
+ uprobe_mmap (vma );
701
+ }
702
+
703
+ if (remove_next ) {
704
+ if (file ) {
705
+ uprobe_munmap (next , next -> vm_start , next -> vm_end );
706
+ fput (file );
707
+ }
708
+ if (next -> anon_vma )
709
+ anon_vma_merge (vma , next );
710
+ mm -> map_count -- ;
711
+ mpol_put (vma_policy (next ));
712
+ vm_area_free (next );
713
+ }
714
+
715
+ validate_mm (mm );
716
+ return 0 ;
717
+
718
+ nomem :
719
+ return - ENOMEM ;
720
+ }
721
+
621
722
/*
622
723
* We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
623
724
* is already present in an i_mmap tree without adjusting the tree.
@@ -1630,9 +1731,15 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
1630
1731
struct list_head * uf )
1631
1732
{
1632
1733
struct mm_struct * mm = current -> mm ;
1633
- struct vm_area_struct * vma , * prev , * merge ;
1634
- int error ;
1734
+ struct vm_area_struct * vma = NULL ;
1735
+ struct vm_area_struct * next , * prev , * merge ;
1736
+ pgoff_t pglen = len >> PAGE_SHIFT ;
1635
1737
unsigned long charged = 0 ;
1738
+ unsigned long end = addr + len ;
1739
+ unsigned long merge_start = addr , merge_end = end ;
1740
+ pgoff_t vm_pgoff ;
1741
+ int error ;
1742
+ MA_STATE (mas , & mm -> mm_mt , addr , end - 1 );
1636
1743
1637
1744
/* Check against address space limit. */
1638
1745
if (!may_expand_vm (mm , vm_flags , len >> PAGE_SHIFT )) {
@@ -1642,16 +1749,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
1642
1749
* MAP_FIXED may remove pages of mappings that intersects with
1643
1750
* requested mapping. Account for the pages it would unmap.
1644
1751
*/
1645
- nr_pages = count_vma_pages_range (mm , addr , addr + len );
1752
+ nr_pages = count_vma_pages_range (mm , addr , end );
1646
1753
1647
1754
if (!may_expand_vm (mm , vm_flags ,
1648
1755
(len >> PAGE_SHIFT ) - nr_pages ))
1649
1756
return - ENOMEM ;
1650
1757
}
1651
1758
1652
- /* Clear old maps, set up prev and uf */
1653
- if (munmap_vma_range (mm , addr , len , & prev , uf ))
1759
+ /* Unmap any existing mapping in the area */
1760
+ if (do_munmap (mm , addr , len , uf ))
1654
1761
return - ENOMEM ;
1762
+
1655
1763
/*
1656
1764
* Private writable mapping: check memory availability
1657
1765
*/
@@ -1662,14 +1770,43 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
1662
1770
vm_flags |= VM_ACCOUNT ;
1663
1771
}
1664
1772
1665
- /*
1666
- * Can we just expand an old mapping?
1667
- */
1668
- vma = vma_merge (mm , prev , addr , addr + len , vm_flags ,
1669
- NULL , file , pgoff , NULL , NULL_VM_UFFD_CTX , NULL );
1670
- if (vma )
1671
- goto out ;
1773
+ next = mas_next (& mas , ULONG_MAX );
1774
+ prev = mas_prev (& mas , 0 );
1775
+ if (vm_flags & VM_SPECIAL )
1776
+ goto cannot_expand ;
1777
+
1778
+ /* Attempt to expand an old mapping */
1779
+ /* Check next */
1780
+ if (next && next -> vm_start == end && !vma_policy (next ) &&
1781
+ can_vma_merge_before (next , vm_flags , NULL , file , pgoff + pglen ,
1782
+ NULL_VM_UFFD_CTX , NULL )) {
1783
+ merge_end = next -> vm_end ;
1784
+ vma = next ;
1785
+ vm_pgoff = next -> vm_pgoff - pglen ;
1786
+ }
1787
+
1788
+ /* Check prev */
1789
+ if (prev && prev -> vm_end == addr && !vma_policy (prev ) &&
1790
+ (vma ? can_vma_merge_after (prev , vm_flags , vma -> anon_vma , file ,
1791
+ pgoff , vma -> vm_userfaultfd_ctx , NULL ) :
1792
+ can_vma_merge_after (prev , vm_flags , NULL , file , pgoff ,
1793
+ NULL_VM_UFFD_CTX , NULL ))) {
1794
+ merge_start = prev -> vm_start ;
1795
+ vma = prev ;
1796
+ vm_pgoff = prev -> vm_pgoff ;
1797
+ }
1798
+
1799
+
1800
+ /* Actually expand, if possible */
1801
+ if (vma &&
1802
+ !vma_expand (& mas , vma , merge_start , merge_end , vm_pgoff , next )) {
1803
+ khugepaged_enter_vma (vma , vm_flags );
1804
+ goto expanded ;
1805
+ }
1672
1806
1807
+ mas .index = addr ;
1808
+ mas .last = end - 1 ;
1809
+ cannot_expand :
1673
1810
/*
1674
1811
* Determine the object being mapped and call the appropriate
1675
1812
* specific mapper. the address has already been validated, but
@@ -1682,7 +1819,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
1682
1819
}
1683
1820
1684
1821
vma -> vm_start = addr ;
1685
- vma -> vm_end = addr + len ;
1822
+ vma -> vm_end = end ;
1686
1823
vma -> vm_flags = vm_flags ;
1687
1824
vma -> vm_page_prot = vm_get_page_prot (vm_flags );
1688
1825
vma -> vm_pgoff = pgoff ;
@@ -1703,28 +1840,32 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
1703
1840
*
1704
1841
* Answer: Yes, several device drivers can do it in their
1705
1842
* f_op->mmap method. -DaveM
1706
- * Bug: If addr is changed, prev, rb_link, rb_parent should
1707
- * be updated for vma_link()
1708
1843
*/
1709
1844
WARN_ON_ONCE (addr != vma -> vm_start );
1710
1845
1711
1846
addr = vma -> vm_start ;
1847
+ mas_reset (& mas );
1712
1848
1713
- /* If vm_flags changed after call_mmap(), we should try merge vma again
1714
- * as we may succeed this time.
1849
+ /*
1850
+ * If vm_flags changed after call_mmap(), we should try merge
1851
+ * vma again as we may succeed this time.
1715
1852
*/
1716
1853
if (unlikely (vm_flags != vma -> vm_flags && prev )) {
1717
1854
merge = vma_merge (mm , prev , vma -> vm_start , vma -> vm_end , vma -> vm_flags ,
1718
1855
NULL , vma -> vm_file , vma -> vm_pgoff , NULL , NULL_VM_UFFD_CTX , NULL );
1719
1856
if (merge ) {
1720
- /* ->mmap() can change vma->vm_file and fput the original file. So
1721
- * fput the vma->vm_file here or we would add an extra fput for file
1722
- * and cause general protection fault ultimately.
1857
+ /*
1858
+ * ->mmap() can change vma->vm_file and fput
1859
+ * the original file. So fput the vma->vm_file
1860
+ * here or we would add an extra fput for file
1861
+ * and cause general protection fault
1862
+ * ultimately.
1723
1863
*/
1724
1864
fput (vma -> vm_file );
1725
1865
vm_area_free (vma );
1726
1866
vma = merge ;
1727
1867
/* Update vm_flags to pick up the change. */
1868
+ addr = vma -> vm_start ;
1728
1869
vm_flags = vma -> vm_flags ;
1729
1870
goto unmap_writable ;
1730
1871
}
@@ -1748,14 +1889,30 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
1748
1889
goto free_vma ;
1749
1890
}
1750
1891
1751
- if (vma_link ( mm , vma , prev )) {
1892
+ if (mas_preallocate ( & mas , vma , GFP_KERNEL )) {
1752
1893
error = - ENOMEM ;
1753
1894
if (file )
1754
1895
goto unmap_and_free_vma ;
1755
1896
else
1756
1897
goto free_vma ;
1757
1898
}
1758
1899
1900
+ if (vma -> vm_file )
1901
+ i_mmap_lock_write (vma -> vm_file -> f_mapping );
1902
+
1903
+ vma_mas_store (vma , & mas );
1904
+ __vma_link_list (mm , vma , prev );
1905
+ mm -> map_count ++ ;
1906
+ if (vma -> vm_file ) {
1907
+ if (vma -> vm_flags & VM_SHARED )
1908
+ mapping_allow_writable (vma -> vm_file -> f_mapping );
1909
+
1910
+ flush_dcache_mmap_lock (vma -> vm_file -> f_mapping );
1911
+ vma_interval_tree_insert (vma , & vma -> vm_file -> f_mapping -> i_mmap );
1912
+ flush_dcache_mmap_unlock (vma -> vm_file -> f_mapping );
1913
+ i_mmap_unlock_write (vma -> vm_file -> f_mapping );
1914
+ }
1915
+
1759
1916
/*
1760
1917
* vma_merge() calls khugepaged_enter_vma() either, the below
1761
1918
* call covers the non-merge case.
@@ -1767,7 +1924,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
1767
1924
if (file && vm_flags & VM_SHARED )
1768
1925
mapping_unmap_writable (file -> f_mapping );
1769
1926
file = vma -> vm_file ;
1770
- out :
1927
+ expanded :
1771
1928
perf_event_mmap (vma );
1772
1929
1773
1930
vm_stat_account (mm , vm_flags , len >> PAGE_SHIFT );
@@ -1794,6 +1951,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
1794
1951
1795
1952
vma_set_page_prot (vma );
1796
1953
1954
+ validate_mm (mm );
1797
1955
return addr ;
1798
1956
1799
1957
unmap_and_free_vma :
@@ -1809,6 +1967,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
1809
1967
unacct_error :
1810
1968
if (charged )
1811
1969
vm_unacct_memory (charged );
1970
+ validate_mm (mm );
1812
1971
return error ;
1813
1972
}
1814
1973
@@ -2632,10 +2791,6 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2632
2791
prev = vma -> vm_prev ;
2633
2792
/* we have start < vma->vm_end */
2634
2793
2635
- /* if it doesn't overlap, we have nothing.. */
2636
- if (vma -> vm_start >= end )
2637
- return 0 ;
2638
-
2639
2794
/*
2640
2795
* If we need to split any vma, do it now to save pain later.
2641
2796
*
0 commit comments