.. | .. |
---|
1 | 1 | // SPDX-License-Identifier: GPL-2.0 |
---|
2 | | -#include <linux/mm.h> |
---|
| 2 | +#include <linux/pagewalk.h> |
---|
3 | 3 | #include <linux/highmem.h> |
---|
4 | 4 | #include <linux/sched.h> |
---|
5 | 5 | #include <linux/hugetlb.h> |
---|
6 | 6 | |
---|
7 | | -static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
---|
8 | | - struct mm_walk *walk) |
---|
| 7 | +/* |
---|
| 8 | + * We want to know the real level where a entry is located ignoring any |
---|
| 9 | + * folding of levels which may be happening. For example if p4d is folded then |
---|
| 10 | + * a missing entry found at level 1 (p4d) is actually at level 0 (pgd). |
---|
| 11 | + */ |
---|
| 12 | +static int real_depth(int depth) |
---|
9 | 13 | { |
---|
10 | | - pte_t *pte; |
---|
| 14 | + if (depth == 3 && PTRS_PER_PMD == 1) |
---|
| 15 | + depth = 2; |
---|
| 16 | + if (depth == 2 && PTRS_PER_PUD == 1) |
---|
| 17 | + depth = 1; |
---|
| 18 | + if (depth == 1 && PTRS_PER_P4D == 1) |
---|
| 19 | + depth = 0; |
---|
| 20 | + return depth; |
---|
| 21 | +} |
---|
| 22 | + |
---|
| 23 | +static int walk_pte_range_inner(pte_t *pte, unsigned long addr, |
---|
| 24 | + unsigned long end, struct mm_walk *walk) |
---|
| 25 | +{ |
---|
| 26 | + const struct mm_walk_ops *ops = walk->ops; |
---|
11 | 27 | int err = 0; |
---|
12 | 28 | |
---|
13 | | - pte = pte_offset_map(pmd, addr); |
---|
14 | 29 | for (;;) { |
---|
15 | | - err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk); |
---|
| 30 | + err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk); |
---|
16 | 31 | if (err) |
---|
17 | 32 | break; |
---|
18 | 33 | if (addr >= end - PAGE_SIZE) |
---|
.. | .. |
---|
20 | 35 | addr += PAGE_SIZE; |
---|
21 | 36 | pte++; |
---|
22 | 37 | } |
---|
| 38 | + return err; |
---|
| 39 | +} |
---|
23 | 40 | |
---|
24 | | - pte_unmap(pte); |
---|
| 41 | +static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
---|
| 42 | + struct mm_walk *walk) |
---|
| 43 | +{ |
---|
| 44 | + pte_t *pte; |
---|
| 45 | + int err = 0; |
---|
| 46 | + spinlock_t *ptl; |
---|
| 47 | + |
---|
| 48 | + if (walk->no_vma) { |
---|
| 49 | + pte = pte_offset_map(pmd, addr); |
---|
| 50 | + err = walk_pte_range_inner(pte, addr, end, walk); |
---|
| 51 | + pte_unmap(pte); |
---|
| 52 | + } else { |
---|
| 53 | + pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); |
---|
| 54 | + err = walk_pte_range_inner(pte, addr, end, walk); |
---|
| 55 | + pte_unmap_unlock(pte, ptl); |
---|
| 56 | + } |
---|
| 57 | + |
---|
25 | 58 | return err; |
---|
26 | 59 | } |
---|
27 | 60 | |
---|
.. | .. |
---|
30 | 63 | { |
---|
31 | 64 | pmd_t *pmd; |
---|
32 | 65 | unsigned long next; |
---|
| 66 | + const struct mm_walk_ops *ops = walk->ops; |
---|
33 | 67 | int err = 0; |
---|
| 68 | + int depth = real_depth(3); |
---|
34 | 69 | |
---|
35 | 70 | pmd = pmd_offset(pud, addr); |
---|
36 | 71 | do { |
---|
37 | 72 | again: |
---|
38 | 73 | next = pmd_addr_end(addr, end); |
---|
39 | | - if (pmd_none(*pmd) || !walk->vma) { |
---|
40 | | - if (walk->pte_hole) |
---|
41 | | - err = walk->pte_hole(addr, next, walk); |
---|
| 74 | + if (pmd_none(*pmd)) { |
---|
| 75 | + if (ops->pte_hole) |
---|
| 76 | + err = ops->pte_hole(addr, next, depth, walk); |
---|
42 | 77 | if (err) |
---|
43 | 78 | break; |
---|
44 | 79 | continue; |
---|
45 | 80 | } |
---|
| 81 | + |
---|
| 82 | + walk->action = ACTION_SUBTREE; |
---|
| 83 | + |
---|
46 | 84 | /* |
---|
47 | 85 | * This implies that each ->pmd_entry() handler |
---|
48 | 86 | * needs to know about pmd_trans_huge() pmds |
---|
49 | 87 | */ |
---|
50 | | - if (walk->pmd_entry) |
---|
51 | | - err = walk->pmd_entry(pmd, addr, next, walk); |
---|
| 88 | + if (ops->pmd_entry) |
---|
| 89 | + err = ops->pmd_entry(pmd, addr, next, walk); |
---|
52 | 90 | if (err) |
---|
53 | 91 | break; |
---|
| 92 | + |
---|
| 93 | + if (walk->action == ACTION_AGAIN) |
---|
| 94 | + goto again; |
---|
54 | 95 | |
---|
55 | 96 | /* |
---|
56 | 97 | * Check this here so we only break down trans_huge |
---|
57 | 98 | * pages when we _need_ to |
---|
58 | 99 | */ |
---|
59 | | - if (!walk->pte_entry) |
---|
| 100 | + if ((!walk->vma && (pmd_leaf(*pmd) || !pmd_present(*pmd))) || |
---|
| 101 | + walk->action == ACTION_CONTINUE || |
---|
| 102 | + !(ops->pte_entry)) |
---|
60 | 103 | continue; |
---|
61 | 104 | |
---|
62 | | - split_huge_pmd(walk->vma, pmd, addr); |
---|
63 | | - if (pmd_trans_unstable(pmd)) |
---|
64 | | - goto again; |
---|
| 105 | + if (walk->vma) { |
---|
| 106 | + split_huge_pmd(walk->vma, pmd, addr); |
---|
| 107 | + if (pmd_trans_unstable(pmd)) |
---|
| 108 | + goto again; |
---|
| 109 | + } |
---|
| 110 | + |
---|
65 | 111 | err = walk_pte_range(pmd, addr, next, walk); |
---|
66 | 112 | if (err) |
---|
67 | 113 | break; |
---|
.. | .. |
---|
75 | 121 | { |
---|
76 | 122 | pud_t *pud; |
---|
77 | 123 | unsigned long next; |
---|
| 124 | + const struct mm_walk_ops *ops = walk->ops; |
---|
78 | 125 | int err = 0; |
---|
| 126 | + int depth = real_depth(2); |
---|
79 | 127 | |
---|
80 | 128 | pud = pud_offset(p4d, addr); |
---|
81 | 129 | do { |
---|
82 | 130 | again: |
---|
83 | 131 | next = pud_addr_end(addr, end); |
---|
84 | | - if (pud_none(*pud) || !walk->vma) { |
---|
85 | | - if (walk->pte_hole) |
---|
86 | | - err = walk->pte_hole(addr, next, walk); |
---|
| 132 | + if (pud_none(*pud)) { |
---|
| 133 | + if (ops->pte_hole) |
---|
| 134 | + err = ops->pte_hole(addr, next, depth, walk); |
---|
87 | 135 | if (err) |
---|
88 | 136 | break; |
---|
89 | 137 | continue; |
---|
90 | 138 | } |
---|
91 | 139 | |
---|
92 | | - if (walk->pud_entry) { |
---|
93 | | - spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma); |
---|
| 140 | + walk->action = ACTION_SUBTREE; |
---|
94 | 141 | |
---|
95 | | - if (ptl) { |
---|
96 | | - err = walk->pud_entry(pud, addr, next, walk); |
---|
97 | | - spin_unlock(ptl); |
---|
98 | | - if (err) |
---|
99 | | - break; |
---|
100 | | - continue; |
---|
101 | | - } |
---|
102 | | - } |
---|
| 142 | + if (ops->pud_entry) |
---|
| 143 | + err = ops->pud_entry(pud, addr, next, walk); |
---|
| 144 | + if (err) |
---|
| 145 | + break; |
---|
103 | 146 | |
---|
104 | | - split_huge_pud(walk->vma, pud, addr); |
---|
| 147 | + if (walk->action == ACTION_AGAIN) |
---|
| 148 | + goto again; |
---|
| 149 | + |
---|
| 150 | + if ((!walk->vma && (pud_leaf(*pud) || !pud_present(*pud))) || |
---|
| 151 | + walk->action == ACTION_CONTINUE || |
---|
| 152 | + !(ops->pmd_entry || ops->pte_entry)) |
---|
| 153 | + continue; |
---|
| 154 | + |
---|
| 155 | + if (walk->vma) |
---|
| 156 | + split_huge_pud(walk->vma, pud, addr); |
---|
105 | 157 | if (pud_none(*pud)) |
---|
106 | 158 | goto again; |
---|
107 | 159 | |
---|
108 | | - if (walk->pmd_entry || walk->pte_entry) |
---|
109 | | - err = walk_pmd_range(pud, addr, next, walk); |
---|
| 160 | + err = walk_pmd_range(pud, addr, next, walk); |
---|
110 | 161 | if (err) |
---|
111 | 162 | break; |
---|
112 | 163 | } while (pud++, addr = next, addr != end); |
---|
.. | .. |
---|
119 | 170 | { |
---|
120 | 171 | p4d_t *p4d; |
---|
121 | 172 | unsigned long next; |
---|
| 173 | + const struct mm_walk_ops *ops = walk->ops; |
---|
122 | 174 | int err = 0; |
---|
| 175 | + int depth = real_depth(1); |
---|
123 | 176 | |
---|
124 | 177 | p4d = p4d_offset(pgd, addr); |
---|
125 | 178 | do { |
---|
126 | 179 | next = p4d_addr_end(addr, end); |
---|
127 | 180 | if (p4d_none_or_clear_bad(p4d)) { |
---|
128 | | - if (walk->pte_hole) |
---|
129 | | - err = walk->pte_hole(addr, next, walk); |
---|
| 181 | + if (ops->pte_hole) |
---|
| 182 | + err = ops->pte_hole(addr, next, depth, walk); |
---|
130 | 183 | if (err) |
---|
131 | 184 | break; |
---|
132 | 185 | continue; |
---|
133 | 186 | } |
---|
134 | | - if (walk->pmd_entry || walk->pte_entry) |
---|
| 187 | + if (ops->p4d_entry) { |
---|
| 188 | + err = ops->p4d_entry(p4d, addr, next, walk); |
---|
| 189 | + if (err) |
---|
| 190 | + break; |
---|
| 191 | + } |
---|
| 192 | + if (ops->pud_entry || ops->pmd_entry || ops->pte_entry) |
---|
135 | 193 | err = walk_pud_range(p4d, addr, next, walk); |
---|
136 | 194 | if (err) |
---|
137 | 195 | break; |
---|
.. | .. |
---|
145 | 203 | { |
---|
146 | 204 | pgd_t *pgd; |
---|
147 | 205 | unsigned long next; |
---|
| 206 | + const struct mm_walk_ops *ops = walk->ops; |
---|
148 | 207 | int err = 0; |
---|
149 | 208 | |
---|
150 | | - pgd = pgd_offset(walk->mm, addr); |
---|
| 209 | + if (walk->pgd) |
---|
| 210 | + pgd = walk->pgd + pgd_index(addr); |
---|
| 211 | + else |
---|
| 212 | + pgd = pgd_offset(walk->mm, addr); |
---|
151 | 213 | do { |
---|
152 | 214 | next = pgd_addr_end(addr, end); |
---|
153 | 215 | if (pgd_none_or_clear_bad(pgd)) { |
---|
154 | | - if (walk->pte_hole) |
---|
155 | | - err = walk->pte_hole(addr, next, walk); |
---|
| 216 | + if (ops->pte_hole) |
---|
| 217 | + err = ops->pte_hole(addr, next, 0, walk); |
---|
156 | 218 | if (err) |
---|
157 | 219 | break; |
---|
158 | 220 | continue; |
---|
159 | 221 | } |
---|
160 | | - if (walk->pmd_entry || walk->pte_entry) |
---|
| 222 | + if (ops->pgd_entry) { |
---|
| 223 | + err = ops->pgd_entry(pgd, addr, next, walk); |
---|
| 224 | + if (err) |
---|
| 225 | + break; |
---|
| 226 | + } |
---|
| 227 | + if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry || |
---|
| 228 | + ops->pte_entry) |
---|
161 | 229 | err = walk_p4d_range(pgd, addr, next, walk); |
---|
162 | 230 | if (err) |
---|
163 | 231 | break; |
---|
.. | .. |
---|
183 | 251 | unsigned long hmask = huge_page_mask(h); |
---|
184 | 252 | unsigned long sz = huge_page_size(h); |
---|
185 | 253 | pte_t *pte; |
---|
| 254 | + const struct mm_walk_ops *ops = walk->ops; |
---|
186 | 255 | int err = 0; |
---|
187 | 256 | |
---|
188 | 257 | do { |
---|
.. | .. |
---|
190 | 259 | pte = huge_pte_offset(walk->mm, addr & hmask, sz); |
---|
191 | 260 | |
---|
192 | 261 | if (pte) |
---|
193 | | - err = walk->hugetlb_entry(pte, hmask, addr, next, walk); |
---|
194 | | - else if (walk->pte_hole) |
---|
195 | | - err = walk->pte_hole(addr, next, walk); |
---|
| 262 | + err = ops->hugetlb_entry(pte, hmask, addr, next, walk); |
---|
| 263 | + else if (ops->pte_hole) |
---|
| 264 | + err = ops->pte_hole(addr, next, -1, walk); |
---|
196 | 265 | |
---|
197 | 266 | if (err) |
---|
198 | 267 | break; |
---|
.. | .. |
---|
220 | 289 | struct mm_walk *walk) |
---|
221 | 290 | { |
---|
222 | 291 | struct vm_area_struct *vma = walk->vma; |
---|
| 292 | + const struct mm_walk_ops *ops = walk->ops; |
---|
223 | 293 | |
---|
224 | | - if (walk->test_walk) |
---|
225 | | - return walk->test_walk(start, end, walk); |
---|
| 294 | + if (ops->test_walk) |
---|
| 295 | + return ops->test_walk(start, end, walk); |
---|
226 | 296 | |
---|
227 | 297 | /* |
---|
228 | 298 | * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP |
---|
.. | .. |
---|
234 | 304 | */ |
---|
235 | 305 | if (vma->vm_flags & VM_PFNMAP) { |
---|
236 | 306 | int err = 1; |
---|
237 | | - if (walk->pte_hole) |
---|
238 | | - err = walk->pte_hole(start, end, walk); |
---|
| 307 | + if (ops->pte_hole) |
---|
| 308 | + err = ops->pte_hole(start, end, -1, walk); |
---|
239 | 309 | return err ? err : 1; |
---|
240 | 310 | } |
---|
241 | 311 | return 0; |
---|
.. | .. |
---|
246 | 316 | { |
---|
247 | 317 | int err = 0; |
---|
248 | 318 | struct vm_area_struct *vma = walk->vma; |
---|
| 319 | + const struct mm_walk_ops *ops = walk->ops; |
---|
249 | 320 | |
---|
250 | | - if (vma && is_vm_hugetlb_page(vma)) { |
---|
251 | | - if (walk->hugetlb_entry) |
---|
| 321 | + if (ops->pre_vma) { |
---|
| 322 | + err = ops->pre_vma(start, end, walk); |
---|
| 323 | + if (err) |
---|
| 324 | + return err; |
---|
| 325 | + } |
---|
| 326 | + |
---|
| 327 | + if (is_vm_hugetlb_page(vma)) { |
---|
| 328 | + if (ops->hugetlb_entry) |
---|
252 | 329 | err = walk_hugetlb_range(start, end, walk); |
---|
253 | 330 | } else |
---|
254 | 331 | err = walk_pgd_range(start, end, walk); |
---|
| 332 | + |
---|
| 333 | + if (ops->post_vma) |
---|
| 334 | + ops->post_vma(walk); |
---|
255 | 335 | |
---|
256 | 336 | return err; |
---|
257 | 337 | } |
---|
258 | 338 | |
---|
259 | 339 | /** |
---|
260 | 340 | * walk_page_range - walk page table with caller specific callbacks |
---|
261 | | - * @start: start address of the virtual address range |
---|
262 | | - * @end: end address of the virtual address range |
---|
263 | | - * @walk: mm_walk structure defining the callbacks and the target address space |
---|
| 341 | + * @mm: mm_struct representing the target process of page table walk |
---|
| 342 | + * @start: start address of the virtual address range |
---|
| 343 | + * @end: end address of the virtual address range |
---|
| 344 | + * @ops: operation to call during the walk |
---|
| 345 | + * @private: private data for callbacks' usage |
---|
264 | 346 | * |
---|
265 | | - * Recursively walk the page table tree of the process represented by @walk->mm |
---|
| 347 | + * Recursively walk the page table tree of the process represented by @mm |
---|
266 | 348 | * within the virtual address range [@start, @end). During walking, we can do |
---|
267 | 349 | * some caller-specific works for each entry, by setting up pmd_entry(), |
---|
268 | 350 | * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these |
---|
.. | .. |
---|
278 | 360 | * |
---|
279 | 361 | * Before starting to walk page table, some callers want to check whether |
---|
280 | 362 | * they really want to walk over the current vma, typically by checking |
---|
281 | | - * its vm_flags. walk_page_test() and @walk->test_walk() are used for this |
---|
| 363 | + * its vm_flags. walk_page_test() and @ops->test_walk() are used for this |
---|
282 | 364 | * purpose. |
---|
| 365 | + * |
---|
| 366 | + * If operations need to be staged before and committed after a vma is walked, |
---|
| 367 | + * there are two callbacks, pre_vma() and post_vma(). Note that post_vma(), |
---|
| 368 | + * since it is intended to handle commit-type operations, can't return any |
---|
| 369 | + * errors. |
---|
283 | 370 | * |
---|
284 | 371 | * struct mm_walk keeps current values of some common data like vma and pmd, |
---|
285 | 372 | * which are useful for the access from callbacks. If you want to pass some |
---|
286 | | - * caller-specific data to callbacks, @walk->private should be helpful. |
---|
| 373 | + * caller-specific data to callbacks, @private should be helpful. |
---|
287 | 374 | * |
---|
288 | 375 | * Locking: |
---|
289 | | - * Callers of walk_page_range() and walk_page_vma() should hold |
---|
290 | | - * @walk->mm->mmap_sem, because these function traverse vma list and/or |
---|
291 | | - * access to vma's data. |
---|
| 376 | + * Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_lock, |
---|
| 377 | + * because these function traverse vma list and/or access to vma's data. |
---|
292 | 378 | */ |
---|
293 | | -int walk_page_range(unsigned long start, unsigned long end, |
---|
294 | | - struct mm_walk *walk) |
---|
| 379 | +int walk_page_range(struct mm_struct *mm, unsigned long start, |
---|
| 380 | + unsigned long end, const struct mm_walk_ops *ops, |
---|
| 381 | + void *private) |
---|
295 | 382 | { |
---|
296 | 383 | int err = 0; |
---|
297 | 384 | unsigned long next; |
---|
298 | 385 | struct vm_area_struct *vma; |
---|
| 386 | + struct mm_walk walk = { |
---|
| 387 | + .ops = ops, |
---|
| 388 | + .mm = mm, |
---|
| 389 | + .private = private, |
---|
| 390 | + }; |
---|
299 | 391 | |
---|
300 | 392 | if (start >= end) |
---|
301 | 393 | return -EINVAL; |
---|
302 | 394 | |
---|
303 | | - if (!walk->mm) |
---|
| 395 | + if (!walk.mm) |
---|
304 | 396 | return -EINVAL; |
---|
305 | 397 | |
---|
306 | | - VM_BUG_ON_MM(!rwsem_is_locked(&walk->mm->mmap_sem), walk->mm); |
---|
| 398 | + mmap_assert_locked(walk.mm); |
---|
307 | 399 | |
---|
308 | | - vma = find_vma(walk->mm, start); |
---|
| 400 | + vma = find_vma(walk.mm, start); |
---|
309 | 401 | do { |
---|
310 | 402 | if (!vma) { /* after the last vma */ |
---|
311 | | - walk->vma = NULL; |
---|
| 403 | + walk.vma = NULL; |
---|
312 | 404 | next = end; |
---|
| 405 | + if (ops->pte_hole) |
---|
| 406 | + err = ops->pte_hole(start, next, -1, &walk); |
---|
313 | 407 | } else if (start < vma->vm_start) { /* outside vma */ |
---|
314 | | - walk->vma = NULL; |
---|
| 408 | + walk.vma = NULL; |
---|
315 | 409 | next = min(end, vma->vm_start); |
---|
| 410 | + if (ops->pte_hole) |
---|
| 411 | + err = ops->pte_hole(start, next, -1, &walk); |
---|
316 | 412 | } else { /* inside vma */ |
---|
317 | | - walk->vma = vma; |
---|
| 413 | + walk.vma = vma; |
---|
318 | 414 | next = min(end, vma->vm_end); |
---|
319 | 415 | vma = vma->vm_next; |
---|
320 | 416 | |
---|
321 | | - err = walk_page_test(start, next, walk); |
---|
| 417 | + err = walk_page_test(start, next, &walk); |
---|
322 | 418 | if (err > 0) { |
---|
323 | 419 | /* |
---|
324 | 420 | * positive return values are purely for |
---|
.. | .. |
---|
330 | 426 | } |
---|
331 | 427 | if (err < 0) |
---|
332 | 428 | break; |
---|
| 429 | + err = __walk_page_range(start, next, &walk); |
---|
333 | 430 | } |
---|
334 | | - if (walk->vma || walk->pte_hole) |
---|
335 | | - err = __walk_page_range(start, next, walk); |
---|
336 | 431 | if (err) |
---|
337 | 432 | break; |
---|
338 | 433 | } while (start = next, start < end); |
---|
339 | 434 | return err; |
---|
340 | 435 | } |
---|
| 436 | +EXPORT_SYMBOL_GPL(walk_page_range); |
---|
341 | 437 | |
---|
342 | | -int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk) |
---|
| 438 | +/* |
---|
| 439 | + * Similar to walk_page_range() but can walk any page tables even if they are |
---|
| 440 | + * not backed by VMAs. Because 'unusual' entries may be walked this function |
---|
| 441 | + * will also not lock the PTEs for the pte_entry() callback. This is useful for |
---|
| 442 | + * walking the kernel pages tables or page tables for firmware. |
---|
| 443 | + */ |
---|
| 444 | +int walk_page_range_novma(struct mm_struct *mm, unsigned long start, |
---|
| 445 | + unsigned long end, const struct mm_walk_ops *ops, |
---|
| 446 | + pgd_t *pgd, |
---|
| 447 | + void *private) |
---|
343 | 448 | { |
---|
344 | | - int err; |
---|
| 449 | + struct mm_walk walk = { |
---|
| 450 | + .ops = ops, |
---|
| 451 | + .mm = mm, |
---|
| 452 | + .pgd = pgd, |
---|
| 453 | + .private = private, |
---|
| 454 | + .no_vma = true |
---|
| 455 | + }; |
---|
345 | 456 | |
---|
346 | | - if (!walk->mm) |
---|
| 457 | + if (start >= end || !walk.mm) |
---|
347 | 458 | return -EINVAL; |
---|
348 | 459 | |
---|
349 | | - VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); |
---|
350 | | - VM_BUG_ON(!vma); |
---|
351 | | - walk->vma = vma; |
---|
352 | | - err = walk_page_test(vma->vm_start, vma->vm_end, walk); |
---|
| 460 | + mmap_assert_write_locked(walk.mm); |
---|
| 461 | + |
---|
| 462 | + return walk_pgd_range(start, end, &walk); |
---|
| 463 | +} |
---|
| 464 | + |
---|
| 465 | +int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, |
---|
| 466 | + void *private) |
---|
| 467 | +{ |
---|
| 468 | + struct mm_walk walk = { |
---|
| 469 | + .ops = ops, |
---|
| 470 | + .mm = vma->vm_mm, |
---|
| 471 | + .vma = vma, |
---|
| 472 | + .private = private, |
---|
| 473 | + }; |
---|
| 474 | + int err; |
---|
| 475 | + |
---|
| 476 | + if (!walk.mm) |
---|
| 477 | + return -EINVAL; |
---|
| 478 | + |
---|
| 479 | + mmap_assert_locked(walk.mm); |
---|
| 480 | + |
---|
| 481 | + err = walk_page_test(vma->vm_start, vma->vm_end, &walk); |
---|
353 | 482 | if (err > 0) |
---|
354 | 483 | return 0; |
---|
355 | 484 | if (err < 0) |
---|
356 | 485 | return err; |
---|
357 | | - return __walk_page_range(vma->vm_start, vma->vm_end, walk); |
---|
| 486 | + return __walk_page_range(vma->vm_start, vma->vm_end, &walk); |
---|
| 487 | +} |
---|
| 488 | + |
---|
| 489 | +/** |
---|
| 490 | + * walk_page_mapping - walk all memory areas mapped into a struct address_space. |
---|
| 491 | + * @mapping: Pointer to the struct address_space |
---|
| 492 | + * @first_index: First page offset in the address_space |
---|
| 493 | + * @nr: Number of incremental page offsets to cover |
---|
| 494 | + * @ops: operation to call during the walk |
---|
| 495 | + * @private: private data for callbacks' usage |
---|
| 496 | + * |
---|
| 497 | + * This function walks all memory areas mapped into a struct address_space. |
---|
| 498 | + * The walk is limited to only the given page-size index range, but if |
---|
| 499 | + * the index boundaries cross a huge page-table entry, that entry will be |
---|
| 500 | + * included. |
---|
| 501 | + * |
---|
| 502 | + * Also see walk_page_range() for additional information. |
---|
| 503 | + * |
---|
| 504 | + * Locking: |
---|
| 505 | + * This function can't require that the struct mm_struct::mmap_lock is held, |
---|
| 506 | + * since @mapping may be mapped by multiple processes. Instead |
---|
| 507 | + * @mapping->i_mmap_rwsem must be held. This might have implications in the |
---|
| 508 | + * callbacks, and it's up tho the caller to ensure that the |
---|
| 509 | + * struct mm_struct::mmap_lock is not needed. |
---|
| 510 | + * |
---|
| 511 | + * Also this means that a caller can't rely on the struct |
---|
| 512 | + * vm_area_struct::vm_flags to be constant across a call, |
---|
| 513 | + * except for immutable flags. Callers requiring this shouldn't use |
---|
| 514 | + * this function. |
---|
| 515 | + * |
---|
| 516 | + * Return: 0 on success, negative error code on failure, positive number on |
---|
| 517 | + * caller defined premature termination. |
---|
| 518 | + */ |
---|
| 519 | +int walk_page_mapping(struct address_space *mapping, pgoff_t first_index, |
---|
| 520 | + pgoff_t nr, const struct mm_walk_ops *ops, |
---|
| 521 | + void *private) |
---|
| 522 | +{ |
---|
| 523 | + struct mm_walk walk = { |
---|
| 524 | + .ops = ops, |
---|
| 525 | + .private = private, |
---|
| 526 | + }; |
---|
| 527 | + struct vm_area_struct *vma; |
---|
| 528 | + pgoff_t vba, vea, cba, cea; |
---|
| 529 | + unsigned long start_addr, end_addr; |
---|
| 530 | + int err = 0; |
---|
| 531 | + |
---|
| 532 | + lockdep_assert_held(&mapping->i_mmap_rwsem); |
---|
| 533 | + vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index, |
---|
| 534 | + first_index + nr - 1) { |
---|
| 535 | + /* Clip to the vma */ |
---|
| 536 | + vba = vma->vm_pgoff; |
---|
| 537 | + vea = vba + vma_pages(vma); |
---|
| 538 | + cba = first_index; |
---|
| 539 | + cba = max(cba, vba); |
---|
| 540 | + cea = first_index + nr; |
---|
| 541 | + cea = min(cea, vea); |
---|
| 542 | + |
---|
| 543 | + start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start; |
---|
| 544 | + end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start; |
---|
| 545 | + if (start_addr >= end_addr) |
---|
| 546 | + continue; |
---|
| 547 | + |
---|
| 548 | + walk.vma = vma; |
---|
| 549 | + walk.mm = vma->vm_mm; |
---|
| 550 | + |
---|
| 551 | + err = walk_page_test(vma->vm_start, vma->vm_end, &walk); |
---|
| 552 | + if (err > 0) { |
---|
| 553 | + err = 0; |
---|
| 554 | + break; |
---|
| 555 | + } else if (err < 0) |
---|
| 556 | + break; |
---|
| 557 | + |
---|
| 558 | + err = __walk_page_range(start_addr, end_addr, &walk); |
---|
| 559 | + if (err) |
---|
| 560 | + break; |
---|
| 561 | + } |
---|
| 562 | + |
---|
| 563 | + return err; |
---|
358 | 564 | } |
---|