@@ -59,7 +59,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
59
59
continue ;
60
60
61
61
split_huge_page_pmd_mm (walk -> mm , addr , pmd );
62
- if (pmd_none_or_trans_huge_or_clear_bad (pmd ))
62
+ if (pmd_trans_unstable (pmd ))
63
63
goto again ;
64
64
err = walk_pte_range (pmd , addr , next , walk );
65
65
if (err )
@@ -95,6 +95,32 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
95
95
return err ;
96
96
}
97
97
98
+ static int walk_pgd_range (unsigned long addr , unsigned long end ,
99
+ struct mm_walk * walk )
100
+ {
101
+ pgd_t * pgd ;
102
+ unsigned long next ;
103
+ int err = 0 ;
104
+
105
+ pgd = pgd_offset (walk -> mm , addr );
106
+ do {
107
+ next = pgd_addr_end (addr , end );
108
+ if (pgd_none_or_clear_bad (pgd )) {
109
+ if (walk -> pte_hole )
110
+ err = walk -> pte_hole (addr , next , walk );
111
+ if (err )
112
+ break ;
113
+ continue ;
114
+ }
115
+ if (walk -> pmd_entry || walk -> pte_entry )
116
+ err = walk_pud_range (pgd , addr , next , walk );
117
+ if (err )
118
+ break ;
119
+ } while (pgd ++ , addr = next , addr != end );
120
+
121
+ return err ;
122
+ }
123
+
98
124
#ifdef CONFIG_HUGETLB_PAGE
99
125
static unsigned long hugetlb_entry_end (struct hstate * h , unsigned long addr ,
100
126
unsigned long end )
@@ -103,10 +129,10 @@ static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
103
129
return boundary < end ? boundary : end ;
104
130
}
105
131
106
- static int walk_hugetlb_range (struct vm_area_struct * vma ,
107
- unsigned long addr , unsigned long end ,
132
+ static int walk_hugetlb_range (unsigned long addr , unsigned long end ,
108
133
struct mm_walk * walk )
109
134
{
135
+ struct vm_area_struct * vma = walk -> vma ;
110
136
struct hstate * h = hstate_vma (vma );
111
137
unsigned long next ;
112
138
unsigned long hmask = huge_page_mask (h );
@@ -119,128 +145,130 @@ static int walk_hugetlb_range(struct vm_area_struct *vma,
119
145
if (pte && walk -> hugetlb_entry )
120
146
err = walk -> hugetlb_entry (pte , hmask , addr , next , walk );
121
147
if (err )
122
- return err ;
148
+ break ;
123
149
} while (addr = next , addr != end );
124
150
125
- return 0 ;
151
+ return err ;
126
152
}
127
153
128
154
#else /* CONFIG_HUGETLB_PAGE */
129
- static int walk_hugetlb_range (struct vm_area_struct * vma ,
130
- unsigned long addr , unsigned long end ,
155
+ static int walk_hugetlb_range (unsigned long addr , unsigned long end ,
131
156
struct mm_walk * walk )
132
157
{
133
158
return 0 ;
134
159
}
135
160
136
161
#endif /* CONFIG_HUGETLB_PAGE */
137
162
163
+ /*
164
+ * Decide whether we really walk over the current vma on [@start, @end)
165
+ * or skip it via the returned value. Return 0 if we do walk over the
166
+ * current vma, and return 1 if we skip the vma. Negative values means
167
+ * error, where we abort the current walk.
168
+ *
169
+ * Default check (only VM_PFNMAP check for now) is used when the caller
170
+ * doesn't define test_walk() callback.
171
+ */
172
+ static int walk_page_test (unsigned long start , unsigned long end ,
173
+ struct mm_walk * walk )
174
+ {
175
+ struct vm_area_struct * vma = walk -> vma ;
138
176
177
+ if (walk -> test_walk )
178
+ return walk -> test_walk (start , end , walk );
179
+
180
+ /*
181
+ * Do not walk over vma(VM_PFNMAP), because we have no valid struct
182
+ * page backing a VM_PFNMAP range. See also commit a9ff785e4437.
183
+ */
184
+ if (vma -> vm_flags & VM_PFNMAP )
185
+ return 1 ;
186
+ return 0 ;
187
+ }
188
+
189
+ static int __walk_page_range (unsigned long start , unsigned long end ,
190
+ struct mm_walk * walk )
191
+ {
192
+ int err = 0 ;
193
+ struct vm_area_struct * vma = walk -> vma ;
194
+
195
+ if (vma && is_vm_hugetlb_page (vma )) {
196
+ if (walk -> hugetlb_entry )
197
+ err = walk_hugetlb_range (start , end , walk );
198
+ } else
199
+ err = walk_pgd_range (start , end , walk );
200
+
201
+ return err ;
202
+ }
139
203
140
204
/**
141
- * walk_page_range - walk a memory map's page tables with a callback
142
- * @addr: starting address
143
- * @end: ending address
144
- * @walk: set of callbacks to invoke for each level of the tree
145
- *
146
- * Recursively walk the page table for the memory area in a VMA,
147
- * calling supplied callbacks. Callbacks are called in-order (first
148
- * PGD, first PUD, first PMD, first PTE, second PTE... second PMD,
149
- * etc.). If lower-level callbacks are omitted, walking depth is reduced.
205
+ * walk_page_range - walk page table with caller specific callbacks
150
206
*
151
- * Each callback receives an entry pointer and the start and end of the
152
- * associated range, and a copy of the original mm_walk for access to
153
- * the ->private or ->mm fields.
207
+ * Recursively walk the page table tree of the process represented by @walk->mm
208
+ * within the virtual address range [@start, @end). During walking, we can do
209
+ * some caller-specific works for each entry, by setting up pmd_entry(),
210
+ * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these
211
+ * callbacks, the associated entries/pages are just ignored.
212
+ * The return values of these callbacks are commonly defined like below:
213
+ * - 0 : succeeded to handle the current entry, and if you don't reach the
214
+ * end address yet, continue to walk.
215
+ * - >0 : succeeded to handle the current entry, and return to the caller
216
+ * with caller specific value.
217
+ * - <0 : failed to handle the current entry, and return to the caller
218
+ * with error code.
154
219
*
155
- * Usually no locks are taken, but splitting transparent huge page may
156
- * take page table lock. And the bottom level iterator will map PTE
157
- * directories from highmem if necessary.
220
+ * Before starting to walk page table, some callers want to check whether
221
+ * they really want to walk over the current vma, typically by checking
222
+ * its vm_flags. walk_page_test() and @walk->test_walk() are used for this
223
+ * purpose.
158
224
*
159
- * If any callback returns a non-zero value, the walk is aborted and
160
- * the return value is propagated back to the caller. Otherwise 0 is returned.
225
+ * struct mm_walk keeps current values of some common data like vma and pmd,
226
+ * which are useful for the access from callbacks. If you want to pass some
227
+ * caller-specific data to callbacks, @walk->private should be helpful.
161
228
*
162
- * walk->mm->mmap_sem must be held for at least read if walk->hugetlb_entry
163
- * is !NULL.
229
+ * Locking:
230
+ * Callers of walk_page_range() and walk_page_vma() should hold
231
+ * @walk->mm->mmap_sem, because these function traverse vma list and/or
232
+ * access to vma's data.
164
233
*/
165
- int walk_page_range (unsigned long addr , unsigned long end ,
234
+ int walk_page_range (unsigned long start , unsigned long end ,
166
235
struct mm_walk * walk )
167
236
{
168
- pgd_t * pgd ;
169
- unsigned long next ;
170
237
int err = 0 ;
238
+ unsigned long next ;
239
+ struct vm_area_struct * vma ;
171
240
172
- if (addr >= end )
173
- return err ;
241
+ if (start >= end )
242
+ return - EINVAL ;
174
243
175
244
if (!walk -> mm )
176
245
return - EINVAL ;
177
246
178
247
VM_BUG_ON_MM (!rwsem_is_locked (& walk -> mm -> mmap_sem ), walk -> mm );
179
248
180
- pgd = pgd_offset (walk -> mm , addr );
249
+ vma = find_vma (walk -> mm , start );
181
250
do {
182
- struct vm_area_struct * vma = NULL ;
251
+ if (!vma ) { /* after the last vma */
252
+ walk -> vma = NULL ;
253
+ next = end ;
254
+ } else if (start < vma -> vm_start ) { /* outside vma */
255
+ walk -> vma = NULL ;
256
+ next = min (end , vma -> vm_start );
257
+ } else { /* inside vma */
258
+ walk -> vma = vma ;
259
+ next = min (end , vma -> vm_end );
260
+ vma = vma -> vm_next ;
183
261
184
- next = pgd_addr_end (addr , end );
185
-
186
- /*
187
- * This function was not intended to be vma based.
188
- * But there are vma special cases to be handled:
189
- * - hugetlb vma's
190
- * - VM_PFNMAP vma's
191
- */
192
- vma = find_vma (walk -> mm , addr );
193
- if (vma ) {
194
- /*
195
- * There are no page structures backing a VM_PFNMAP
196
- * range, so do not allow split_huge_page_pmd().
197
- */
198
- if ((vma -> vm_start <= addr ) &&
199
- (vma -> vm_flags & VM_PFNMAP )) {
200
- if (walk -> pte_hole )
201
- err = walk -> pte_hole (addr , next , walk );
202
- if (err )
203
- break ;
204
- pgd = pgd_offset (walk -> mm , next );
262
+ err = walk_page_test (start , next , walk );
263
+ if (err > 0 )
205
264
continue ;
206
- }
207
- /*
208
- * Handle hugetlb vma individually because pagetable
209
- * walk for the hugetlb page is dependent on the
210
- * architecture and we can't handled it in the same
211
- * manner as non-huge pages.
212
- */
213
- if (walk -> hugetlb_entry && (vma -> vm_start <= addr ) &&
214
- is_vm_hugetlb_page (vma )) {
215
- if (vma -> vm_end < next )
216
- next = vma -> vm_end ;
217
- /*
218
- * Hugepage is very tightly coupled with vma,
219
- * so walk through hugetlb entries within a
220
- * given vma.
221
- */
222
- err = walk_hugetlb_range (vma , addr , next , walk );
223
- if (err )
224
- break ;
225
- pgd = pgd_offset (walk -> mm , next );
226
- continue ;
227
- }
228
- }
229
-
230
- if (pgd_none_or_clear_bad (pgd )) {
231
- if (walk -> pte_hole )
232
- err = walk -> pte_hole (addr , next , walk );
233
- if (err )
265
+ if (err < 0 )
234
266
break ;
235
- pgd ++ ;
236
- continue ;
237
267
}
238
- if (walk -> pmd_entry || walk -> pte_entry )
239
- err = walk_pud_range ( pgd , addr , next , walk );
268
+ if (walk -> vma || walk -> pte_hole )
269
+ err = __walk_page_range ( start , next , walk );
240
270
if (err )
241
271
break ;
242
- pgd ++ ;
243
- } while (addr = next , addr < end );
244
-
272
+ } while (start = next , start < end );
245
273
return err ;
246
274
}
0 commit comments