| .. | .. |
|---|
| 17 | 17 | |
|---|
| 18 | 18 | #ifdef CONFIG_PGSTE |
|---|
| 19 | 19 | |
|---|
| 20 | | -static int page_table_allocate_pgste_min = 0; |
|---|
| 21 | | -static int page_table_allocate_pgste_max = 1; |
|---|
| 22 | 20 | int page_table_allocate_pgste = 0; |
|---|
| 23 | 21 | EXPORT_SYMBOL(page_table_allocate_pgste); |
|---|
| 24 | 22 | |
|---|
| .. | .. |
|---|
| 29 | 27 | .maxlen = sizeof(int), |
|---|
| 30 | 28 | .mode = S_IRUGO | S_IWUSR, |
|---|
| 31 | 29 | .proc_handler = proc_dointvec_minmax, |
|---|
| 32 | | - .extra1 = &page_table_allocate_pgste_min, |
|---|
| 33 | | - .extra2 = &page_table_allocate_pgste_max, |
|---|
| 30 | + .extra1 = SYSCTL_ZERO, |
|---|
| 31 | + .extra2 = SYSCTL_ONE, |
|---|
| 34 | 32 | }, |
|---|
| 35 | 33 | { } |
|---|
| 36 | 34 | }; |
|---|
| .. | .. |
|---|
| 91 | 89 | |
|---|
| 92 | 90 | int crst_table_upgrade(struct mm_struct *mm, unsigned long end) |
|---|
| 93 | 91 | { |
|---|
| 94 | | - unsigned long *table, *pgd; |
|---|
| 95 | | - int rc, notify; |
|---|
| 92 | + unsigned long *pgd = NULL, *p4d = NULL, *__pgd; |
|---|
| 93 | + unsigned long asce_limit = mm->context.asce_limit; |
|---|
| 96 | 94 | |
|---|
| 97 | 95 | /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ |
|---|
| 98 | | - VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE); |
|---|
| 99 | | - rc = 0; |
|---|
| 100 | | - notify = 0; |
|---|
| 101 | | - while (mm->context.asce_limit < end) { |
|---|
| 102 | | - table = crst_table_alloc(mm); |
|---|
| 103 | | - if (!table) { |
|---|
| 104 | | - rc = -ENOMEM; |
|---|
| 105 | | - break; |
|---|
| 106 | | - } |
|---|
| 107 | | - spin_lock_bh(&mm->page_table_lock); |
|---|
| 108 | | - pgd = (unsigned long *) mm->pgd; |
|---|
| 109 | | - if (mm->context.asce_limit == _REGION2_SIZE) { |
|---|
| 110 | | - crst_table_init(table, _REGION2_ENTRY_EMPTY); |
|---|
| 111 | | - p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd); |
|---|
| 112 | | - mm->pgd = (pgd_t *) table; |
|---|
| 113 | | - mm->context.asce_limit = _REGION1_SIZE; |
|---|
| 114 | | - mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | |
|---|
| 115 | | - _ASCE_USER_BITS | _ASCE_TYPE_REGION2; |
|---|
| 116 | | - mm_inc_nr_puds(mm); |
|---|
| 117 | | - } else { |
|---|
| 118 | | - crst_table_init(table, _REGION1_ENTRY_EMPTY); |
|---|
| 119 | | - pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd); |
|---|
| 120 | | - mm->pgd = (pgd_t *) table; |
|---|
| 121 | | - mm->context.asce_limit = -PAGE_SIZE; |
|---|
| 122 | | - mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | |
|---|
| 123 | | - _ASCE_USER_BITS | _ASCE_TYPE_REGION1; |
|---|
| 124 | | - } |
|---|
| 125 | | - notify = 1; |
|---|
| 126 | | - spin_unlock_bh(&mm->page_table_lock); |
|---|
| 96 | + VM_BUG_ON(asce_limit < _REGION2_SIZE); |
|---|
| 97 | + |
|---|
| 98 | + if (end <= asce_limit) |
|---|
| 99 | + return 0; |
|---|
| 100 | + |
|---|
| 101 | + if (asce_limit == _REGION2_SIZE) { |
|---|
| 102 | + p4d = crst_table_alloc(mm); |
|---|
| 103 | + if (unlikely(!p4d)) |
|---|
| 104 | + goto err_p4d; |
|---|
| 105 | + crst_table_init(p4d, _REGION2_ENTRY_EMPTY); |
|---|
| 127 | 106 | } |
|---|
| 128 | | - if (notify) |
|---|
| 129 | | - on_each_cpu(__crst_table_upgrade, mm, 0); |
|---|
| 130 | | - return rc; |
|---|
| 131 | | -} |
|---|
| 132 | | - |
|---|
| 133 | | -void crst_table_downgrade(struct mm_struct *mm) |
|---|
| 134 | | -{ |
|---|
| 135 | | - pgd_t *pgd; |
|---|
| 136 | | - |
|---|
| 137 | | - /* downgrade should only happen from 3 to 2 levels (compat only) */ |
|---|
| 138 | | - VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE); |
|---|
| 139 | | - |
|---|
| 140 | | - if (current->active_mm == mm) { |
|---|
| 141 | | - clear_user_asce(); |
|---|
| 142 | | - __tlb_flush_mm(mm); |
|---|
| 107 | + if (end > _REGION1_SIZE) { |
|---|
| 108 | + pgd = crst_table_alloc(mm); |
|---|
| 109 | + if (unlikely(!pgd)) |
|---|
| 110 | + goto err_pgd; |
|---|
| 111 | + crst_table_init(pgd, _REGION1_ENTRY_EMPTY); |
|---|
| 143 | 112 | } |
|---|
| 144 | 113 | |
|---|
| 145 | | - pgd = mm->pgd; |
|---|
| 146 | | - mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); |
|---|
| 147 | | - mm->context.asce_limit = _REGION3_SIZE; |
|---|
| 148 | | - mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | |
|---|
| 149 | | - _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; |
|---|
| 150 | | - crst_table_free(mm, (unsigned long *) pgd); |
|---|
| 114 | + spin_lock_bh(&mm->page_table_lock); |
|---|
| 151 | 115 | |
|---|
| 152 | | - if (current->active_mm == mm) |
|---|
| 153 | | - set_user_asce(mm); |
|---|
| 116 | + /* |
|---|
| 117 | + * This routine gets called with mmap_lock lock held and there is |
|---|
| 118 | + * no reason to optimize for the case of otherwise. However, if |
|---|
| 119 | + * that would ever change, the below check will let us know. |
|---|
| 120 | + */ |
|---|
| 121 | + VM_BUG_ON(asce_limit != mm->context.asce_limit); |
|---|
| 122 | + |
|---|
| 123 | + if (p4d) { |
|---|
| 124 | + __pgd = (unsigned long *) mm->pgd; |
|---|
| 125 | + p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd); |
|---|
| 126 | + mm->pgd = (pgd_t *) p4d; |
|---|
| 127 | + mm->context.asce_limit = _REGION1_SIZE; |
|---|
| 128 | + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | |
|---|
| 129 | + _ASCE_USER_BITS | _ASCE_TYPE_REGION2; |
|---|
| 130 | + mm_inc_nr_puds(mm); |
|---|
| 131 | + } |
|---|
| 132 | + if (pgd) { |
|---|
| 133 | + __pgd = (unsigned long *) mm->pgd; |
|---|
| 134 | + pgd_populate(mm, (pgd_t *) pgd, (p4d_t *) __pgd); |
|---|
| 135 | + mm->pgd = (pgd_t *) pgd; |
|---|
| 136 | + mm->context.asce_limit = TASK_SIZE_MAX; |
|---|
| 137 | + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | |
|---|
| 138 | + _ASCE_USER_BITS | _ASCE_TYPE_REGION1; |
|---|
| 139 | + } |
|---|
| 140 | + |
|---|
| 141 | + spin_unlock_bh(&mm->page_table_lock); |
|---|
| 142 | + |
|---|
| 143 | + on_each_cpu(__crst_table_upgrade, mm, 0); |
|---|
| 144 | + |
|---|
| 145 | + return 0; |
|---|
| 146 | + |
|---|
| 147 | +err_pgd: |
|---|
| 148 | + crst_table_free(mm, p4d); |
|---|
| 149 | +err_p4d: |
|---|
| 150 | + return -ENOMEM; |
|---|
| 154 | 151 | } |
|---|
| 155 | 152 | |
|---|
| 156 | 153 | static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) |
|---|
| .. | .. |
|---|
| 223 | 220 | page = alloc_page(GFP_KERNEL); |
|---|
| 224 | 221 | if (!page) |
|---|
| 225 | 222 | return NULL; |
|---|
| 226 | | - if (!pgtable_page_ctor(page)) { |
|---|
| 223 | + if (!pgtable_pte_page_ctor(page)) { |
|---|
| 227 | 224 | __free_page(page); |
|---|
| 228 | 225 | return NULL; |
|---|
| 229 | 226 | } |
|---|
| .. | .. |
|---|
| 271 | 268 | atomic_xor_bits(&page->_refcount, 3U << 24); |
|---|
| 272 | 269 | } |
|---|
| 273 | 270 | |
|---|
| 274 | | - pgtable_page_dtor(page); |
|---|
| 271 | + pgtable_pte_page_dtor(page); |
|---|
| 275 | 272 | __free_page(page); |
|---|
| 276 | 273 | } |
|---|
| 277 | 274 | |
|---|
| .. | .. |
|---|
| 303 | 300 | tlb_remove_table(tlb, table); |
|---|
| 304 | 301 | } |
|---|
| 305 | 302 | |
|---|
| 306 | | -static void __tlb_remove_table(void *_table) |
|---|
| 303 | +void __tlb_remove_table(void *_table) |
|---|
| 307 | 304 | { |
|---|
| 308 | 305 | unsigned int mask = (unsigned long) _table & 3; |
|---|
| 309 | 306 | void *table = (void *)((unsigned long) _table ^ mask); |
|---|
| .. | .. |
|---|
| 319 | 316 | mask >>= 24; |
|---|
| 320 | 317 | if (mask != 0) |
|---|
| 321 | 318 | break; |
|---|
| 322 | | - /* fallthrough */ |
|---|
| 319 | + fallthrough; |
|---|
| 323 | 320 | case 3: /* 4K page table with pgstes */ |
|---|
| 324 | 321 | if (mask & 3) |
|---|
| 325 | 322 | atomic_xor_bits(&page->_refcount, 3 << 24); |
|---|
| 326 | | - pgtable_page_dtor(page); |
|---|
| 323 | + pgtable_pte_page_dtor(page); |
|---|
| 327 | 324 | __free_page(page); |
|---|
| 328 | 325 | break; |
|---|
| 329 | 326 | } |
|---|
| 330 | | -} |
|---|
| 331 | | - |
|---|
| 332 | | -static void tlb_remove_table_smp_sync(void *arg) |
|---|
| 333 | | -{ |
|---|
| 334 | | - /* Simply deliver the interrupt */ |
|---|
| 335 | | -} |
|---|
| 336 | | - |
|---|
| 337 | | -static void tlb_remove_table_one(void *table) |
|---|
| 338 | | -{ |
|---|
| 339 | | - /* |
|---|
| 340 | | - * This isn't an RCU grace period and hence the page-tables cannot be |
|---|
| 341 | | - * assumed to be actually RCU-freed. |
|---|
| 342 | | - * |
|---|
| 343 | | - * It is however sufficient for software page-table walkers that rely |
|---|
| 344 | | - * on IRQ disabling. See the comment near struct mmu_table_batch. |
|---|
| 345 | | - */ |
|---|
| 346 | | - smp_call_function(tlb_remove_table_smp_sync, NULL, 1); |
|---|
| 347 | | - __tlb_remove_table(table); |
|---|
| 348 | | -} |
|---|
| 349 | | - |
|---|
| 350 | | -static void tlb_remove_table_rcu(struct rcu_head *head) |
|---|
| 351 | | -{ |
|---|
| 352 | | - struct mmu_table_batch *batch; |
|---|
| 353 | | - int i; |
|---|
| 354 | | - |
|---|
| 355 | | - batch = container_of(head, struct mmu_table_batch, rcu); |
|---|
| 356 | | - |
|---|
| 357 | | - for (i = 0; i < batch->nr; i++) |
|---|
| 358 | | - __tlb_remove_table(batch->tables[i]); |
|---|
| 359 | | - |
|---|
| 360 | | - free_page((unsigned long)batch); |
|---|
| 361 | | -} |
|---|
| 362 | | - |
|---|
| 363 | | -void tlb_table_flush(struct mmu_gather *tlb) |
|---|
| 364 | | -{ |
|---|
| 365 | | - struct mmu_table_batch **batch = &tlb->batch; |
|---|
| 366 | | - |
|---|
| 367 | | - if (*batch) { |
|---|
| 368 | | - call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); |
|---|
| 369 | | - *batch = NULL; |
|---|
| 370 | | - } |
|---|
| 371 | | -} |
|---|
| 372 | | - |
|---|
| 373 | | -void tlb_remove_table(struct mmu_gather *tlb, void *table) |
|---|
| 374 | | -{ |
|---|
| 375 | | - struct mmu_table_batch **batch = &tlb->batch; |
|---|
| 376 | | - |
|---|
| 377 | | - tlb->mm->context.flush_mm = 1; |
|---|
| 378 | | - if (*batch == NULL) { |
|---|
| 379 | | - *batch = (struct mmu_table_batch *) |
|---|
| 380 | | - __get_free_page(GFP_NOWAIT | __GFP_NOWARN); |
|---|
| 381 | | - if (*batch == NULL) { |
|---|
| 382 | | - __tlb_flush_mm_lazy(tlb->mm); |
|---|
| 383 | | - tlb_remove_table_one(table); |
|---|
| 384 | | - return; |
|---|
| 385 | | - } |
|---|
| 386 | | - (*batch)->nr = 0; |
|---|
| 387 | | - } |
|---|
| 388 | | - (*batch)->tables[(*batch)->nr++] = table; |
|---|
| 389 | | - if ((*batch)->nr == MAX_TABLE_BATCH) |
|---|
| 390 | | - tlb_flush_mmu(tlb); |
|---|
| 391 | 327 | } |
|---|
| 392 | 328 | |
|---|
| 393 | 329 | /* |
|---|
| .. | .. |
|---|
| 605 | 541 | base_region2_walk(table, 0, _REGION1_SIZE, 0); |
|---|
| 606 | 542 | break; |
|---|
| 607 | 543 | case _ASCE_TYPE_REGION1: |
|---|
| 608 | | - base_region1_walk(table, 0, -_PAGE_SIZE, 0); |
|---|
| 544 | + base_region1_walk(table, 0, TASK_SIZE_MAX, 0); |
|---|
| 609 | 545 | break; |
|---|
| 610 | 546 | } |
|---|
| 611 | 547 | base_crst_free(table); |
|---|