.. | .. |
---|
17 | 17 | |
---|
18 | 18 | #ifdef CONFIG_PGSTE |
---|
19 | 19 | |
---|
20 | | -static int page_table_allocate_pgste_min = 0; |
---|
21 | | -static int page_table_allocate_pgste_max = 1; |
---|
22 | 20 | int page_table_allocate_pgste = 0; |
---|
23 | 21 | EXPORT_SYMBOL(page_table_allocate_pgste); |
---|
24 | 22 | |
---|
.. | .. |
---|
29 | 27 | .maxlen = sizeof(int), |
---|
30 | 28 | .mode = S_IRUGO | S_IWUSR, |
---|
31 | 29 | .proc_handler = proc_dointvec_minmax, |
---|
32 | | - .extra1 = &page_table_allocate_pgste_min, |
---|
33 | | - .extra2 = &page_table_allocate_pgste_max, |
---|
| 30 | + .extra1 = SYSCTL_ZERO, |
---|
| 31 | + .extra2 = SYSCTL_ONE, |
---|
34 | 32 | }, |
---|
35 | 33 | { } |
---|
36 | 34 | }; |
---|
.. | .. |
---|
91 | 89 | |
---|
92 | 90 | int crst_table_upgrade(struct mm_struct *mm, unsigned long end) |
---|
93 | 91 | { |
---|
94 | | - unsigned long *table, *pgd; |
---|
95 | | - int rc, notify; |
---|
| 92 | + unsigned long *pgd = NULL, *p4d = NULL, *__pgd; |
---|
| 93 | + unsigned long asce_limit = mm->context.asce_limit; |
---|
96 | 94 | |
---|
97 | 95 | /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ |
---|
98 | | - VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE); |
---|
99 | | - rc = 0; |
---|
100 | | - notify = 0; |
---|
101 | | - while (mm->context.asce_limit < end) { |
---|
102 | | - table = crst_table_alloc(mm); |
---|
103 | | - if (!table) { |
---|
104 | | - rc = -ENOMEM; |
---|
105 | | - break; |
---|
106 | | - } |
---|
107 | | - spin_lock_bh(&mm->page_table_lock); |
---|
108 | | - pgd = (unsigned long *) mm->pgd; |
---|
109 | | - if (mm->context.asce_limit == _REGION2_SIZE) { |
---|
110 | | - crst_table_init(table, _REGION2_ENTRY_EMPTY); |
---|
111 | | - p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd); |
---|
112 | | - mm->pgd = (pgd_t *) table; |
---|
113 | | - mm->context.asce_limit = _REGION1_SIZE; |
---|
114 | | - mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | |
---|
115 | | - _ASCE_USER_BITS | _ASCE_TYPE_REGION2; |
---|
116 | | - mm_inc_nr_puds(mm); |
---|
117 | | - } else { |
---|
118 | | - crst_table_init(table, _REGION1_ENTRY_EMPTY); |
---|
119 | | - pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd); |
---|
120 | | - mm->pgd = (pgd_t *) table; |
---|
121 | | - mm->context.asce_limit = -PAGE_SIZE; |
---|
122 | | - mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | |
---|
123 | | - _ASCE_USER_BITS | _ASCE_TYPE_REGION1; |
---|
124 | | - } |
---|
125 | | - notify = 1; |
---|
126 | | - spin_unlock_bh(&mm->page_table_lock); |
---|
| 96 | + VM_BUG_ON(asce_limit < _REGION2_SIZE); |
---|
| 97 | + |
---|
| 98 | + if (end <= asce_limit) |
---|
| 99 | + return 0; |
---|
| 100 | + |
---|
| 101 | + if (asce_limit == _REGION2_SIZE) { |
---|
| 102 | + p4d = crst_table_alloc(mm); |
---|
| 103 | + if (unlikely(!p4d)) |
---|
| 104 | + goto err_p4d; |
---|
| 105 | + crst_table_init(p4d, _REGION2_ENTRY_EMPTY); |
---|
127 | 106 | } |
---|
128 | | - if (notify) |
---|
129 | | - on_each_cpu(__crst_table_upgrade, mm, 0); |
---|
130 | | - return rc; |
---|
131 | | -} |
---|
132 | | - |
---|
133 | | -void crst_table_downgrade(struct mm_struct *mm) |
---|
134 | | -{ |
---|
135 | | - pgd_t *pgd; |
---|
136 | | - |
---|
137 | | - /* downgrade should only happen from 3 to 2 levels (compat only) */ |
---|
138 | | - VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE); |
---|
139 | | - |
---|
140 | | - if (current->active_mm == mm) { |
---|
141 | | - clear_user_asce(); |
---|
142 | | - __tlb_flush_mm(mm); |
---|
| 107 | + if (end > _REGION1_SIZE) { |
---|
| 108 | + pgd = crst_table_alloc(mm); |
---|
| 109 | + if (unlikely(!pgd)) |
---|
| 110 | + goto err_pgd; |
---|
| 111 | + crst_table_init(pgd, _REGION1_ENTRY_EMPTY); |
---|
143 | 112 | } |
---|
144 | 113 | |
---|
145 | | - pgd = mm->pgd; |
---|
146 | | - mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); |
---|
147 | | - mm->context.asce_limit = _REGION3_SIZE; |
---|
148 | | - mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | |
---|
149 | | - _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; |
---|
150 | | - crst_table_free(mm, (unsigned long *) pgd); |
---|
| 114 | + spin_lock_bh(&mm->page_table_lock); |
---|
151 | 115 | |
---|
152 | | - if (current->active_mm == mm) |
---|
153 | | - set_user_asce(mm); |
---|
| 116 | + /* |
---|
| 117 | + * This routine gets called with mmap_lock lock held and there is |
---|
| 118 | + * no reason to optimize for the case of otherwise. However, if |
---|
| 119 | + * that would ever change, the below check will let us know. |
---|
| 120 | + */ |
---|
| 121 | + VM_BUG_ON(asce_limit != mm->context.asce_limit); |
---|
| 122 | + |
---|
| 123 | + if (p4d) { |
---|
| 124 | + __pgd = (unsigned long *) mm->pgd; |
---|
| 125 | + p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd); |
---|
| 126 | + mm->pgd = (pgd_t *) p4d; |
---|
| 127 | + mm->context.asce_limit = _REGION1_SIZE; |
---|
| 128 | + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | |
---|
| 129 | + _ASCE_USER_BITS | _ASCE_TYPE_REGION2; |
---|
| 130 | + mm_inc_nr_puds(mm); |
---|
| 131 | + } |
---|
| 132 | + if (pgd) { |
---|
| 133 | + __pgd = (unsigned long *) mm->pgd; |
---|
| 134 | + pgd_populate(mm, (pgd_t *) pgd, (p4d_t *) __pgd); |
---|
| 135 | + mm->pgd = (pgd_t *) pgd; |
---|
| 136 | + mm->context.asce_limit = TASK_SIZE_MAX; |
---|
| 137 | + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | |
---|
| 138 | + _ASCE_USER_BITS | _ASCE_TYPE_REGION1; |
---|
| 139 | + } |
---|
| 140 | + |
---|
| 141 | + spin_unlock_bh(&mm->page_table_lock); |
---|
| 142 | + |
---|
| 143 | + on_each_cpu(__crst_table_upgrade, mm, 0); |
---|
| 144 | + |
---|
| 145 | + return 0; |
---|
| 146 | + |
---|
| 147 | +err_pgd: |
---|
| 148 | + crst_table_free(mm, p4d); |
---|
| 149 | +err_p4d: |
---|
| 150 | + return -ENOMEM; |
---|
154 | 151 | } |
---|
155 | 152 | |
---|
156 | 153 | static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) |
---|
.. | .. |
---|
223 | 220 | page = alloc_page(GFP_KERNEL); |
---|
224 | 221 | if (!page) |
---|
225 | 222 | return NULL; |
---|
226 | | - if (!pgtable_page_ctor(page)) { |
---|
| 223 | + if (!pgtable_pte_page_ctor(page)) { |
---|
227 | 224 | __free_page(page); |
---|
228 | 225 | return NULL; |
---|
229 | 226 | } |
---|
.. | .. |
---|
271 | 268 | atomic_xor_bits(&page->_refcount, 3U << 24); |
---|
272 | 269 | } |
---|
273 | 270 | |
---|
274 | | - pgtable_page_dtor(page); |
---|
| 271 | + pgtable_pte_page_dtor(page); |
---|
275 | 272 | __free_page(page); |
---|
276 | 273 | } |
---|
277 | 274 | |
---|
.. | .. |
---|
303 | 300 | tlb_remove_table(tlb, table); |
---|
304 | 301 | } |
---|
305 | 302 | |
---|
306 | | -static void __tlb_remove_table(void *_table) |
---|
| 303 | +void __tlb_remove_table(void *_table) |
---|
307 | 304 | { |
---|
308 | 305 | unsigned int mask = (unsigned long) _table & 3; |
---|
309 | 306 | void *table = (void *)((unsigned long) _table ^ mask); |
---|
.. | .. |
---|
319 | 316 | mask >>= 24; |
---|
320 | 317 | if (mask != 0) |
---|
321 | 318 | break; |
---|
322 | | - /* fallthrough */ |
---|
| 319 | + fallthrough; |
---|
323 | 320 | case 3: /* 4K page table with pgstes */ |
---|
324 | 321 | if (mask & 3) |
---|
325 | 322 | atomic_xor_bits(&page->_refcount, 3 << 24); |
---|
326 | | - pgtable_page_dtor(page); |
---|
| 323 | + pgtable_pte_page_dtor(page); |
---|
327 | 324 | __free_page(page); |
---|
328 | 325 | break; |
---|
329 | 326 | } |
---|
330 | | -} |
---|
331 | | - |
---|
332 | | -static void tlb_remove_table_smp_sync(void *arg) |
---|
333 | | -{ |
---|
334 | | - /* Simply deliver the interrupt */ |
---|
335 | | -} |
---|
336 | | - |
---|
337 | | -static void tlb_remove_table_one(void *table) |
---|
338 | | -{ |
---|
339 | | - /* |
---|
340 | | - * This isn't an RCU grace period and hence the page-tables cannot be |
---|
341 | | - * assumed to be actually RCU-freed. |
---|
342 | | - * |
---|
343 | | - * It is however sufficient for software page-table walkers that rely |
---|
344 | | - * on IRQ disabling. See the comment near struct mmu_table_batch. |
---|
345 | | - */ |
---|
346 | | - smp_call_function(tlb_remove_table_smp_sync, NULL, 1); |
---|
347 | | - __tlb_remove_table(table); |
---|
348 | | -} |
---|
349 | | - |
---|
350 | | -static void tlb_remove_table_rcu(struct rcu_head *head) |
---|
351 | | -{ |
---|
352 | | - struct mmu_table_batch *batch; |
---|
353 | | - int i; |
---|
354 | | - |
---|
355 | | - batch = container_of(head, struct mmu_table_batch, rcu); |
---|
356 | | - |
---|
357 | | - for (i = 0; i < batch->nr; i++) |
---|
358 | | - __tlb_remove_table(batch->tables[i]); |
---|
359 | | - |
---|
360 | | - free_page((unsigned long)batch); |
---|
361 | | -} |
---|
362 | | - |
---|
363 | | -void tlb_table_flush(struct mmu_gather *tlb) |
---|
364 | | -{ |
---|
365 | | - struct mmu_table_batch **batch = &tlb->batch; |
---|
366 | | - |
---|
367 | | - if (*batch) { |
---|
368 | | - call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); |
---|
369 | | - *batch = NULL; |
---|
370 | | - } |
---|
371 | | -} |
---|
372 | | - |
---|
373 | | -void tlb_remove_table(struct mmu_gather *tlb, void *table) |
---|
374 | | -{ |
---|
375 | | - struct mmu_table_batch **batch = &tlb->batch; |
---|
376 | | - |
---|
377 | | - tlb->mm->context.flush_mm = 1; |
---|
378 | | - if (*batch == NULL) { |
---|
379 | | - *batch = (struct mmu_table_batch *) |
---|
380 | | - __get_free_page(GFP_NOWAIT | __GFP_NOWARN); |
---|
381 | | - if (*batch == NULL) { |
---|
382 | | - __tlb_flush_mm_lazy(tlb->mm); |
---|
383 | | - tlb_remove_table_one(table); |
---|
384 | | - return; |
---|
385 | | - } |
---|
386 | | - (*batch)->nr = 0; |
---|
387 | | - } |
---|
388 | | - (*batch)->tables[(*batch)->nr++] = table; |
---|
389 | | - if ((*batch)->nr == MAX_TABLE_BATCH) |
---|
390 | | - tlb_flush_mmu(tlb); |
---|
391 | 327 | } |
---|
392 | 328 | |
---|
393 | 329 | /* |
---|
.. | .. |
---|
605 | 541 | base_region2_walk(table, 0, _REGION1_SIZE, 0); |
---|
606 | 542 | break; |
---|
607 | 543 | case _ASCE_TYPE_REGION1: |
---|
608 | | - base_region1_walk(table, 0, -_PAGE_SIZE, 0); |
---|
| 544 | + base_region1_walk(table, 0, TASK_SIZE_MAX, 0); |
---|
609 | 545 | break; |
---|
610 | 546 | } |
---|
611 | 547 | base_crst_free(table); |
---|