hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/arch/s390/mm/pgalloc.c
....@@ -17,8 +17,6 @@
1717
1818 #ifdef CONFIG_PGSTE
1919
20
-static int page_table_allocate_pgste_min = 0;
21
-static int page_table_allocate_pgste_max = 1;
2220 int page_table_allocate_pgste = 0;
2321 EXPORT_SYMBOL(page_table_allocate_pgste);
2422
....@@ -29,8 +27,8 @@
2927 .maxlen = sizeof(int),
3028 .mode = S_IRUGO | S_IWUSR,
3129 .proc_handler = proc_dointvec_minmax,
32
- .extra1 = &page_table_allocate_pgste_min,
33
- .extra2 = &page_table_allocate_pgste_max,
30
+ .extra1 = SYSCTL_ZERO,
31
+ .extra2 = SYSCTL_ONE,
3432 },
3533 { }
3634 };
....@@ -91,66 +89,65 @@
9189
9290 int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
9391 {
94
- unsigned long *table, *pgd;
95
- int rc, notify;
92
+ unsigned long *pgd = NULL, *p4d = NULL, *__pgd;
93
+ unsigned long asce_limit = mm->context.asce_limit;
9694
9795 /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
98
- VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
99
- rc = 0;
100
- notify = 0;
101
- while (mm->context.asce_limit < end) {
102
- table = crst_table_alloc(mm);
103
- if (!table) {
104
- rc = -ENOMEM;
105
- break;
106
- }
107
- spin_lock_bh(&mm->page_table_lock);
108
- pgd = (unsigned long *) mm->pgd;
109
- if (mm->context.asce_limit == _REGION2_SIZE) {
110
- crst_table_init(table, _REGION2_ENTRY_EMPTY);
111
- p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
112
- mm->pgd = (pgd_t *) table;
113
- mm->context.asce_limit = _REGION1_SIZE;
114
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
115
- _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
116
- mm_inc_nr_puds(mm);
117
- } else {
118
- crst_table_init(table, _REGION1_ENTRY_EMPTY);
119
- pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
120
- mm->pgd = (pgd_t *) table;
121
- mm->context.asce_limit = -PAGE_SIZE;
122
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
123
- _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
124
- }
125
- notify = 1;
126
- spin_unlock_bh(&mm->page_table_lock);
96
+ VM_BUG_ON(asce_limit < _REGION2_SIZE);
97
+
98
+ if (end <= asce_limit)
99
+ return 0;
100
+
101
+ if (asce_limit == _REGION2_SIZE) {
102
+ p4d = crst_table_alloc(mm);
103
+ if (unlikely(!p4d))
104
+ goto err_p4d;
105
+ crst_table_init(p4d, _REGION2_ENTRY_EMPTY);
127106 }
128
- if (notify)
129
- on_each_cpu(__crst_table_upgrade, mm, 0);
130
- return rc;
131
-}
132
-
133
-void crst_table_downgrade(struct mm_struct *mm)
134
-{
135
- pgd_t *pgd;
136
-
137
- /* downgrade should only happen from 3 to 2 levels (compat only) */
138
- VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE);
139
-
140
- if (current->active_mm == mm) {
141
- clear_user_asce();
142
- __tlb_flush_mm(mm);
107
+ if (end > _REGION1_SIZE) {
108
+ pgd = crst_table_alloc(mm);
109
+ if (unlikely(!pgd))
110
+ goto err_pgd;
111
+ crst_table_init(pgd, _REGION1_ENTRY_EMPTY);
143112 }
144113
145
- pgd = mm->pgd;
146
- mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
147
- mm->context.asce_limit = _REGION3_SIZE;
148
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
149
- _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
150
- crst_table_free(mm, (unsigned long *) pgd);
114
+ spin_lock_bh(&mm->page_table_lock);
151115
152
- if (current->active_mm == mm)
153
- set_user_asce(mm);
116
+ /*
117
+ * This routine gets called with mmap_lock lock held and there is
118
+ * no reason to optimize for the case of otherwise. However, if
119
+ * that would ever change, the below check will let us know.
120
+ */
121
+ VM_BUG_ON(asce_limit != mm->context.asce_limit);
122
+
123
+ if (p4d) {
124
+ __pgd = (unsigned long *) mm->pgd;
125
+ p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd);
126
+ mm->pgd = (pgd_t *) p4d;
127
+ mm->context.asce_limit = _REGION1_SIZE;
128
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
129
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
130
+ mm_inc_nr_puds(mm);
131
+ }
132
+ if (pgd) {
133
+ __pgd = (unsigned long *) mm->pgd;
134
+ pgd_populate(mm, (pgd_t *) pgd, (p4d_t *) __pgd);
135
+ mm->pgd = (pgd_t *) pgd;
136
+ mm->context.asce_limit = TASK_SIZE_MAX;
137
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
138
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
139
+ }
140
+
141
+ spin_unlock_bh(&mm->page_table_lock);
142
+
143
+ on_each_cpu(__crst_table_upgrade, mm, 0);
144
+
145
+ return 0;
146
+
147
+err_pgd:
148
+ crst_table_free(mm, p4d);
149
+err_p4d:
150
+ return -ENOMEM;
154151 }
155152
156153 static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
....@@ -223,7 +220,7 @@
223220 page = alloc_page(GFP_KERNEL);
224221 if (!page)
225222 return NULL;
226
- if (!pgtable_page_ctor(page)) {
223
+ if (!pgtable_pte_page_ctor(page)) {
227224 __free_page(page);
228225 return NULL;
229226 }
....@@ -271,7 +268,7 @@
271268 atomic_xor_bits(&page->_refcount, 3U << 24);
272269 }
273270
274
- pgtable_page_dtor(page);
271
+ pgtable_pte_page_dtor(page);
275272 __free_page(page);
276273 }
277274
....@@ -303,7 +300,7 @@
303300 tlb_remove_table(tlb, table);
304301 }
305302
306
-static void __tlb_remove_table(void *_table)
303
+void __tlb_remove_table(void *_table)
307304 {
308305 unsigned int mask = (unsigned long) _table & 3;
309306 void *table = (void *)((unsigned long) _table ^ mask);
....@@ -319,75 +316,14 @@
319316 mask >>= 24;
320317 if (mask != 0)
321318 break;
322
- /* fallthrough */
319
+ fallthrough;
323320 case 3: /* 4K page table with pgstes */
324321 if (mask & 3)
325322 atomic_xor_bits(&page->_refcount, 3 << 24);
326
- pgtable_page_dtor(page);
323
+ pgtable_pte_page_dtor(page);
327324 __free_page(page);
328325 break;
329326 }
330
-}
331
-
332
-static void tlb_remove_table_smp_sync(void *arg)
333
-{
334
- /* Simply deliver the interrupt */
335
-}
336
-
337
-static void tlb_remove_table_one(void *table)
338
-{
339
- /*
340
- * This isn't an RCU grace period and hence the page-tables cannot be
341
- * assumed to be actually RCU-freed.
342
- *
343
- * It is however sufficient for software page-table walkers that rely
344
- * on IRQ disabling. See the comment near struct mmu_table_batch.
345
- */
346
- smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
347
- __tlb_remove_table(table);
348
-}
349
-
350
-static void tlb_remove_table_rcu(struct rcu_head *head)
351
-{
352
- struct mmu_table_batch *batch;
353
- int i;
354
-
355
- batch = container_of(head, struct mmu_table_batch, rcu);
356
-
357
- for (i = 0; i < batch->nr; i++)
358
- __tlb_remove_table(batch->tables[i]);
359
-
360
- free_page((unsigned long)batch);
361
-}
362
-
363
-void tlb_table_flush(struct mmu_gather *tlb)
364
-{
365
- struct mmu_table_batch **batch = &tlb->batch;
366
-
367
- if (*batch) {
368
- call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
369
- *batch = NULL;
370
- }
371
-}
372
-
373
-void tlb_remove_table(struct mmu_gather *tlb, void *table)
374
-{
375
- struct mmu_table_batch **batch = &tlb->batch;
376
-
377
- tlb->mm->context.flush_mm = 1;
378
- if (*batch == NULL) {
379
- *batch = (struct mmu_table_batch *)
380
- __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
381
- if (*batch == NULL) {
382
- __tlb_flush_mm_lazy(tlb->mm);
383
- tlb_remove_table_one(table);
384
- return;
385
- }
386
- (*batch)->nr = 0;
387
- }
388
- (*batch)->tables[(*batch)->nr++] = table;
389
- if ((*batch)->nr == MAX_TABLE_BATCH)
390
- tlb_flush_mmu(tlb);
391327 }
392328
393329 /*
....@@ -605,7 +541,7 @@
605541 base_region2_walk(table, 0, _REGION1_SIZE, 0);
606542 break;
607543 case _ASCE_TYPE_REGION1:
608
- base_region1_walk(table, 0, -_PAGE_SIZE, 0);
544
+ base_region1_walk(table, 0, TASK_SIZE_MAX, 0);
609545 break;
610546 }
611547 base_crst_free(table);