hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/arch/s390/mm/vmem.c
....@@ -4,17 +4,15 @@
44 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
55 */
66
7
-#include <linux/bootmem.h>
7
+#include <linux/memblock.h>
88 #include <linux/pfn.h>
99 #include <linux/mm.h>
1010 #include <linux/init.h>
1111 #include <linux/list.h>
1212 #include <linux/hugetlb.h>
1313 #include <linux/slab.h>
14
-#include <linux/memblock.h>
1514 #include <asm/cacheflush.h>
1615 #include <asm/pgalloc.h>
17
-#include <asm/pgtable.h>
1816 #include <asm/setup.h>
1917 #include <asm/tlbflush.h>
2018 #include <asm/sections.h>
....@@ -22,21 +20,22 @@
2220
2321 static DEFINE_MUTEX(vmem_mutex);
2422
25
-struct memory_segment {
26
- struct list_head list;
27
- unsigned long start;
28
- unsigned long size;
29
-};
30
-
31
-static LIST_HEAD(mem_segs);
32
-
3323 static void __ref *vmem_alloc_pages(unsigned int order)
3424 {
3525 unsigned long size = PAGE_SIZE << order;
3626
3727 if (slab_is_available())
3828 return (void *)__get_free_pages(GFP_KERNEL, order);
39
- return (void *) memblock_alloc(size, size);
29
+ return (void *) memblock_phys_alloc(size, size);
30
+}
31
+
32
+static void vmem_free_pages(unsigned long addr, int order)
33
+{
34
+ /* We don't expect boot memory to be removed ever. */
35
+ if (!slab_is_available() ||
36
+ WARN_ON_ONCE(PageReserved(phys_to_page(addr))))
37
+ return;
38
+ free_pages(addr, order);
4039 }
4140
4241 void *vmem_crst_alloc(unsigned long val)
....@@ -57,339 +56,494 @@
5756 if (slab_is_available())
5857 pte = (pte_t *) page_table_alloc(&init_mm);
5958 else
60
- pte = (pte_t *) memblock_alloc(size, size);
59
+ pte = (pte_t *) memblock_phys_alloc(size, size);
6160 if (!pte)
6261 return NULL;
6362 memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
6463 return pte;
6564 }
6665
67
-/*
68
- * Add a physical memory range to the 1:1 mapping.
69
- */
70
-static int vmem_add_mem(unsigned long start, unsigned long size)
66
+static void vmem_pte_free(unsigned long *table)
7167 {
72
- unsigned long pgt_prot, sgt_prot, r3_prot;
73
- unsigned long pages4k, pages1m, pages2g;
74
- unsigned long end = start + size;
75
- unsigned long address = start;
76
- pgd_t *pg_dir;
77
- p4d_t *p4_dir;
78
- pud_t *pu_dir;
79
- pmd_t *pm_dir;
80
- pte_t *pt_dir;
81
- int ret = -ENOMEM;
68
+ /* We don't expect boot memory to be removed ever. */
69
+ if (!slab_is_available() ||
70
+ WARN_ON_ONCE(PageReserved(virt_to_page(table))))
71
+ return;
72
+ page_table_free(&init_mm, table);
73
+}
8274
83
- pgt_prot = pgprot_val(PAGE_KERNEL);
84
- sgt_prot = pgprot_val(SEGMENT_KERNEL);
85
- r3_prot = pgprot_val(REGION3_KERNEL);
86
- if (!MACHINE_HAS_NX) {
87
- pgt_prot &= ~_PAGE_NOEXEC;
88
- sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
89
- r3_prot &= ~_REGION_ENTRY_NOEXEC;
75
+#define PAGE_UNUSED 0xFD
76
+
77
+/*
78
+ * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges
79
+ * from unused_pmd_start to next PMD_SIZE boundary.
80
+ */
81
+static unsigned long unused_pmd_start;
82
+
83
+static void vmemmap_flush_unused_pmd(void)
84
+{
85
+ if (!unused_pmd_start)
86
+ return;
87
+ memset(__va(unused_pmd_start), PAGE_UNUSED,
88
+ ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start);
89
+ unused_pmd_start = 0;
90
+}
91
+
92
+static void __vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
93
+{
94
+ /*
95
+ * As we expect to add in the same granularity as we remove, it's
96
+ * sufficient to mark only some piece used to block the memmap page from
97
+ * getting removed (just in case the memmap never gets initialized,
98
+ * e.g., because the memory block never gets onlined).
99
+ */
100
+ memset(__va(start), 0, sizeof(struct page));
101
+}
102
+
103
+static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
104
+{
105
+ /*
106
+ * We only optimize if the new used range directly follows the
107
+ * previously unused range (esp., when populating consecutive sections).
108
+ */
109
+ if (unused_pmd_start == start) {
110
+ unused_pmd_start = end;
111
+ if (likely(IS_ALIGNED(unused_pmd_start, PMD_SIZE)))
112
+ unused_pmd_start = 0;
113
+ return;
90114 }
91
- pages4k = pages1m = pages2g = 0;
92
- while (address < end) {
93
- pg_dir = pgd_offset_k(address);
94
- if (pgd_none(*pg_dir)) {
95
- p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
96
- if (!p4_dir)
97
- goto out;
98
- pgd_populate(&init_mm, pg_dir, p4_dir);
99
- }
100
- p4_dir = p4d_offset(pg_dir, address);
101
- if (p4d_none(*p4_dir)) {
102
- pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
103
- if (!pu_dir)
104
- goto out;
105
- p4d_populate(&init_mm, p4_dir, pu_dir);
106
- }
107
- pu_dir = pud_offset(p4_dir, address);
108
- if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
109
- !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
110
- !debug_pagealloc_enabled()) {
111
- pud_val(*pu_dir) = address | r3_prot;
112
- address += PUD_SIZE;
113
- pages2g++;
114
- continue;
115
- }
116
- if (pud_none(*pu_dir)) {
117
- pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
118
- if (!pm_dir)
119
- goto out;
120
- pud_populate(&init_mm, pu_dir, pm_dir);
121
- }
122
- pm_dir = pmd_offset(pu_dir, address);
123
- if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
124
- !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) &&
125
- !debug_pagealloc_enabled()) {
126
- pmd_val(*pm_dir) = address | sgt_prot;
127
- address += PMD_SIZE;
128
- pages1m++;
129
- continue;
130
- }
131
- if (pmd_none(*pm_dir)) {
132
- pt_dir = vmem_pte_alloc();
133
- if (!pt_dir)
134
- goto out;
135
- pmd_populate(&init_mm, pm_dir, pt_dir);
136
- }
115
+ vmemmap_flush_unused_pmd();
116
+ __vmemmap_use_sub_pmd(start, end);
117
+}
137118
138
- pt_dir = pte_offset_kernel(pm_dir, address);
139
- pte_val(*pt_dir) = address | pgt_prot;
140
- address += PAGE_SIZE;
141
- pages4k++;
119
+static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
120
+{
121
+ void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
122
+
123
+ vmemmap_flush_unused_pmd();
124
+
125
+ /* Could be our memmap page is filled with PAGE_UNUSED already ... */
126
+ __vmemmap_use_sub_pmd(start, end);
127
+
128
+ /* Mark the unused parts of the new memmap page PAGE_UNUSED. */
129
+ if (!IS_ALIGNED(start, PMD_SIZE))
130
+ memset(page, PAGE_UNUSED, start - __pa(page));
131
+ /*
132
+ * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
133
+ * consecutive sections. Remember for the last added PMD the last
134
+ * unused range in the populated PMD.
135
+ */
136
+ if (!IS_ALIGNED(end, PMD_SIZE))
137
+ unused_pmd_start = end;
138
+}
139
+
140
+/* Returns true if the PMD is completely unused and can be freed. */
141
+static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
142
+{
143
+ void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
144
+
145
+ vmemmap_flush_unused_pmd();
146
+ memset(__va(start), PAGE_UNUSED, end - start);
147
+ return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE);
148
+}
149
+
150
+/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
151
+static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
152
+ unsigned long end, bool add, bool direct)
153
+{
154
+ unsigned long prot, pages = 0;
155
+ int ret = -ENOMEM;
156
+ pte_t *pte;
157
+
158
+ prot = pgprot_val(PAGE_KERNEL);
159
+ if (!MACHINE_HAS_NX)
160
+ prot &= ~_PAGE_NOEXEC;
161
+
162
+ pte = pte_offset_kernel(pmd, addr);
163
+ for (; addr < end; addr += PAGE_SIZE, pte++) {
164
+ if (!add) {
165
+ if (pte_none(*pte))
166
+ continue;
167
+ if (!direct)
168
+ vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0);
169
+ pte_clear(&init_mm, addr, pte);
170
+ } else if (pte_none(*pte)) {
171
+ if (!direct) {
172
+ void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
173
+
174
+ if (!new_page)
175
+ goto out;
176
+ pte_val(*pte) = __pa(new_page) | prot;
177
+ } else {
178
+ pte_val(*pte) = addr | prot;
179
+ }
180
+ } else {
181
+ continue;
182
+ }
183
+ pages++;
142184 }
143185 ret = 0;
144186 out:
145
- update_page_count(PG_DIRECT_MAP_4K, pages4k);
146
- update_page_count(PG_DIRECT_MAP_1M, pages1m);
147
- update_page_count(PG_DIRECT_MAP_2G, pages2g);
187
+ if (direct)
188
+ update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
148189 return ret;
190
+}
191
+
192
+static void try_free_pte_table(pmd_t *pmd, unsigned long start)
193
+{
194
+ pte_t *pte;
195
+ int i;
196
+
197
+ /* We can safely assume this is fully in 1:1 mapping & vmemmap area */
198
+ pte = pte_offset_kernel(pmd, start);
199
+ for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
200
+ if (!pte_none(*pte))
201
+ return;
202
+ }
203
+ vmem_pte_free(__va(pmd_deref(*pmd)));
204
+ pmd_clear(pmd);
205
+}
206
+
207
+/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
208
+static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
209
+ unsigned long end, bool add, bool direct)
210
+{
211
+ unsigned long next, prot, pages = 0;
212
+ int ret = -ENOMEM;
213
+ pmd_t *pmd;
214
+ pte_t *pte;
215
+
216
+ prot = pgprot_val(SEGMENT_KERNEL);
217
+ if (!MACHINE_HAS_NX)
218
+ prot &= ~_SEGMENT_ENTRY_NOEXEC;
219
+
220
+ pmd = pmd_offset(pud, addr);
221
+ for (; addr < end; addr = next, pmd++) {
222
+ next = pmd_addr_end(addr, end);
223
+ if (!add) {
224
+ if (pmd_none(*pmd))
225
+ continue;
226
+ if (pmd_large(*pmd) && !add) {
227
+ if (IS_ALIGNED(addr, PMD_SIZE) &&
228
+ IS_ALIGNED(next, PMD_SIZE)) {
229
+ if (!direct)
230
+ vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
231
+ pmd_clear(pmd);
232
+ pages++;
233
+ } else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
234
+ vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
235
+ pmd_clear(pmd);
236
+ }
237
+ continue;
238
+ }
239
+ } else if (pmd_none(*pmd)) {
240
+ if (IS_ALIGNED(addr, PMD_SIZE) &&
241
+ IS_ALIGNED(next, PMD_SIZE) &&
242
+ MACHINE_HAS_EDAT1 && addr && direct &&
243
+ !debug_pagealloc_enabled()) {
244
+ pmd_val(*pmd) = addr | prot;
245
+ pages++;
246
+ continue;
247
+ } else if (!direct && MACHINE_HAS_EDAT1) {
248
+ void *new_page;
249
+
250
+ /*
251
+ * Use 1MB frames for vmemmap if available. We
252
+ * always use large frames even if they are only
253
+ * partially used. Otherwise we would have also
254
+ * page tables since vmemmap_populate gets
255
+ * called for each section separately.
256
+ */
257
+ new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
258
+ if (new_page) {
259
+ pmd_val(*pmd) = __pa(new_page) | prot;
260
+ if (!IS_ALIGNED(addr, PMD_SIZE) ||
261
+ !IS_ALIGNED(next, PMD_SIZE)) {
262
+ vmemmap_use_new_sub_pmd(addr, next);
263
+ }
264
+ continue;
265
+ }
266
+ }
267
+ pte = vmem_pte_alloc();
268
+ if (!pte)
269
+ goto out;
270
+ pmd_populate(&init_mm, pmd, pte);
271
+ } else if (pmd_large(*pmd)) {
272
+ if (!direct)
273
+ vmemmap_use_sub_pmd(addr, next);
274
+ continue;
275
+ }
276
+ ret = modify_pte_table(pmd, addr, next, add, direct);
277
+ if (ret)
278
+ goto out;
279
+ if (!add)
280
+ try_free_pte_table(pmd, addr & PMD_MASK);
281
+ }
282
+ ret = 0;
283
+out:
284
+ if (direct)
285
+ update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
286
+ return ret;
287
+}
288
+
289
+static void try_free_pmd_table(pud_t *pud, unsigned long start)
290
+{
291
+ const unsigned long end = start + PUD_SIZE;
292
+ pmd_t *pmd;
293
+ int i;
294
+
295
+ /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
296
+ if (end > VMALLOC_START)
297
+ return;
298
+#ifdef CONFIG_KASAN
299
+ if (start < KASAN_SHADOW_END && end > KASAN_SHADOW_START)
300
+ return;
301
+#endif
302
+ pmd = pmd_offset(pud, start);
303
+ for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
304
+ if (!pmd_none(*pmd))
305
+ return;
306
+ vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
307
+ pud_clear(pud);
308
+}
309
+
310
+static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
311
+ bool add, bool direct)
312
+{
313
+ unsigned long next, prot, pages = 0;
314
+ int ret = -ENOMEM;
315
+ pud_t *pud;
316
+ pmd_t *pmd;
317
+
318
+ prot = pgprot_val(REGION3_KERNEL);
319
+ if (!MACHINE_HAS_NX)
320
+ prot &= ~_REGION_ENTRY_NOEXEC;
321
+ pud = pud_offset(p4d, addr);
322
+ for (; addr < end; addr = next, pud++) {
323
+ next = pud_addr_end(addr, end);
324
+ if (!add) {
325
+ if (pud_none(*pud))
326
+ continue;
327
+ if (pud_large(*pud)) {
328
+ if (IS_ALIGNED(addr, PUD_SIZE) &&
329
+ IS_ALIGNED(next, PUD_SIZE)) {
330
+ pud_clear(pud);
331
+ pages++;
332
+ }
333
+ continue;
334
+ }
335
+ } else if (pud_none(*pud)) {
336
+ if (IS_ALIGNED(addr, PUD_SIZE) &&
337
+ IS_ALIGNED(next, PUD_SIZE) &&
338
+ MACHINE_HAS_EDAT2 && addr && direct &&
339
+ !debug_pagealloc_enabled()) {
340
+ pud_val(*pud) = addr | prot;
341
+ pages++;
342
+ continue;
343
+ }
344
+ pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
345
+ if (!pmd)
346
+ goto out;
347
+ pud_populate(&init_mm, pud, pmd);
348
+ } else if (pud_large(*pud)) {
349
+ continue;
350
+ }
351
+ ret = modify_pmd_table(pud, addr, next, add, direct);
352
+ if (ret)
353
+ goto out;
354
+ if (!add)
355
+ try_free_pmd_table(pud, addr & PUD_MASK);
356
+ }
357
+ ret = 0;
358
+out:
359
+ if (direct)
360
+ update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
361
+ return ret;
362
+}
363
+
364
+static void try_free_pud_table(p4d_t *p4d, unsigned long start)
365
+{
366
+ const unsigned long end = start + P4D_SIZE;
367
+ pud_t *pud;
368
+ int i;
369
+
370
+ /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
371
+ if (end > VMALLOC_START)
372
+ return;
373
+#ifdef CONFIG_KASAN
374
+ if (start < KASAN_SHADOW_END && end > KASAN_SHADOW_START)
375
+ return;
376
+#endif
377
+
378
+ pud = pud_offset(p4d, start);
379
+ for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
380
+ if (!pud_none(*pud))
381
+ return;
382
+ }
383
+ vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
384
+ p4d_clear(p4d);
385
+}
386
+
387
+static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
388
+ bool add, bool direct)
389
+{
390
+ unsigned long next;
391
+ int ret = -ENOMEM;
392
+ p4d_t *p4d;
393
+ pud_t *pud;
394
+
395
+ p4d = p4d_offset(pgd, addr);
396
+ for (; addr < end; addr = next, p4d++) {
397
+ next = p4d_addr_end(addr, end);
398
+ if (!add) {
399
+ if (p4d_none(*p4d))
400
+ continue;
401
+ } else if (p4d_none(*p4d)) {
402
+ pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
403
+ if (!pud)
404
+ goto out;
405
+ p4d_populate(&init_mm, p4d, pud);
406
+ }
407
+ ret = modify_pud_table(p4d, addr, next, add, direct);
408
+ if (ret)
409
+ goto out;
410
+ if (!add)
411
+ try_free_pud_table(p4d, addr & P4D_MASK);
412
+ }
413
+ ret = 0;
414
+out:
415
+ return ret;
416
+}
417
+
418
+static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
419
+{
420
+ const unsigned long end = start + PGDIR_SIZE;
421
+ p4d_t *p4d;
422
+ int i;
423
+
424
+ /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
425
+ if (end > VMALLOC_START)
426
+ return;
427
+#ifdef CONFIG_KASAN
428
+ if (start < KASAN_SHADOW_END && end > KASAN_SHADOW_START)
429
+ return;
430
+#endif
431
+
432
+ p4d = p4d_offset(pgd, start);
433
+ for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
434
+ if (!p4d_none(*p4d))
435
+ return;
436
+ }
437
+ vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
438
+ pgd_clear(pgd);
439
+}
440
+
441
+static int modify_pagetable(unsigned long start, unsigned long end, bool add,
442
+ bool direct)
443
+{
444
+ unsigned long addr, next;
445
+ int ret = -ENOMEM;
446
+ pgd_t *pgd;
447
+ p4d_t *p4d;
448
+
449
+ if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
450
+ return -EINVAL;
451
+ for (addr = start; addr < end; addr = next) {
452
+ next = pgd_addr_end(addr, end);
453
+ pgd = pgd_offset_k(addr);
454
+
455
+ if (!add) {
456
+ if (pgd_none(*pgd))
457
+ continue;
458
+ } else if (pgd_none(*pgd)) {
459
+ p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
460
+ if (!p4d)
461
+ goto out;
462
+ pgd_populate(&init_mm, pgd, p4d);
463
+ }
464
+ ret = modify_p4d_table(pgd, addr, next, add, direct);
465
+ if (ret)
466
+ goto out;
467
+ if (!add)
468
+ try_free_p4d_table(pgd, addr & PGDIR_MASK);
469
+ }
470
+ ret = 0;
471
+out:
472
+ if (!add)
473
+ flush_tlb_kernel_range(start, end);
474
+ return ret;
475
+}
476
+
477
+static int add_pagetable(unsigned long start, unsigned long end, bool direct)
478
+{
479
+ return modify_pagetable(start, end, true, direct);
480
+}
481
+
482
+static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
483
+{
484
+ return modify_pagetable(start, end, false, direct);
485
+}
486
+
487
+/*
488
+ * Add a physical memory range to the 1:1 mapping.
489
+ */
490
+static int vmem_add_range(unsigned long start, unsigned long size)
491
+{
492
+ return add_pagetable(start, start + size, true);
149493 }
150494
151495 /*
152496 * Remove a physical memory range from the 1:1 mapping.
153
- * Currently only invalidates page table entries.
154497 */
155498 static void vmem_remove_range(unsigned long start, unsigned long size)
156499 {
157
- unsigned long pages4k, pages1m, pages2g;
158
- unsigned long end = start + size;
159
- unsigned long address = start;
160
- pgd_t *pg_dir;
161
- p4d_t *p4_dir;
162
- pud_t *pu_dir;
163
- pmd_t *pm_dir;
164
- pte_t *pt_dir;
165
-
166
- pages4k = pages1m = pages2g = 0;
167
- while (address < end) {
168
- pg_dir = pgd_offset_k(address);
169
- if (pgd_none(*pg_dir)) {
170
- address += PGDIR_SIZE;
171
- continue;
172
- }
173
- p4_dir = p4d_offset(pg_dir, address);
174
- if (p4d_none(*p4_dir)) {
175
- address += P4D_SIZE;
176
- continue;
177
- }
178
- pu_dir = pud_offset(p4_dir, address);
179
- if (pud_none(*pu_dir)) {
180
- address += PUD_SIZE;
181
- continue;
182
- }
183
- if (pud_large(*pu_dir)) {
184
- pud_clear(pu_dir);
185
- address += PUD_SIZE;
186
- pages2g++;
187
- continue;
188
- }
189
- pm_dir = pmd_offset(pu_dir, address);
190
- if (pmd_none(*pm_dir)) {
191
- address += PMD_SIZE;
192
- continue;
193
- }
194
- if (pmd_large(*pm_dir)) {
195
- pmd_clear(pm_dir);
196
- address += PMD_SIZE;
197
- pages1m++;
198
- continue;
199
- }
200
- pt_dir = pte_offset_kernel(pm_dir, address);
201
- pte_clear(&init_mm, address, pt_dir);
202
- address += PAGE_SIZE;
203
- pages4k++;
204
- }
205
- flush_tlb_kernel_range(start, end);
206
- update_page_count(PG_DIRECT_MAP_4K, -pages4k);
207
- update_page_count(PG_DIRECT_MAP_1M, -pages1m);
208
- update_page_count(PG_DIRECT_MAP_2G, -pages2g);
500
+ remove_pagetable(start, start + size, true);
209501 }
210502
211503 /*
212504 * Add a backed mem_map array to the virtual mem_map array.
213505 */
214506 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
215
- struct vmem_altmap *altmap)
507
+ struct vmem_altmap *altmap)
216508 {
217
- unsigned long pgt_prot, sgt_prot;
218
- unsigned long address = start;
219
- pgd_t *pg_dir;
220
- p4d_t *p4_dir;
221
- pud_t *pu_dir;
222
- pmd_t *pm_dir;
223
- pte_t *pt_dir;
224
- int ret = -ENOMEM;
225
-
226
- pgt_prot = pgprot_val(PAGE_KERNEL);
227
- sgt_prot = pgprot_val(SEGMENT_KERNEL);
228
- if (!MACHINE_HAS_NX) {
229
- pgt_prot &= ~_PAGE_NOEXEC;
230
- sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
231
- }
232
- for (address = start; address < end;) {
233
- pg_dir = pgd_offset_k(address);
234
- if (pgd_none(*pg_dir)) {
235
- p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
236
- if (!p4_dir)
237
- goto out;
238
- pgd_populate(&init_mm, pg_dir, p4_dir);
239
- }
240
-
241
- p4_dir = p4d_offset(pg_dir, address);
242
- if (p4d_none(*p4_dir)) {
243
- pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
244
- if (!pu_dir)
245
- goto out;
246
- p4d_populate(&init_mm, p4_dir, pu_dir);
247
- }
248
-
249
- pu_dir = pud_offset(p4_dir, address);
250
- if (pud_none(*pu_dir)) {
251
- pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
252
- if (!pm_dir)
253
- goto out;
254
- pud_populate(&init_mm, pu_dir, pm_dir);
255
- }
256
-
257
- pm_dir = pmd_offset(pu_dir, address);
258
- if (pmd_none(*pm_dir)) {
259
- /* Use 1MB frames for vmemmap if available. We always
260
- * use large frames even if they are only partially
261
- * used.
262
- * Otherwise we would have also page tables since
263
- * vmemmap_populate gets called for each section
264
- * separately. */
265
- if (MACHINE_HAS_EDAT1) {
266
- void *new_page;
267
-
268
- new_page = vmemmap_alloc_block(PMD_SIZE, node);
269
- if (!new_page)
270
- goto out;
271
- pmd_val(*pm_dir) = __pa(new_page) | sgt_prot;
272
- address = (address + PMD_SIZE) & PMD_MASK;
273
- continue;
274
- }
275
- pt_dir = vmem_pte_alloc();
276
- if (!pt_dir)
277
- goto out;
278
- pmd_populate(&init_mm, pm_dir, pt_dir);
279
- } else if (pmd_large(*pm_dir)) {
280
- address = (address + PMD_SIZE) & PMD_MASK;
281
- continue;
282
- }
283
-
284
- pt_dir = pte_offset_kernel(pm_dir, address);
285
- if (pte_none(*pt_dir)) {
286
- void *new_page;
287
-
288
- new_page = vmemmap_alloc_block(PAGE_SIZE, node);
289
- if (!new_page)
290
- goto out;
291
- pte_val(*pt_dir) = __pa(new_page) | pgt_prot;
292
- }
293
- address += PAGE_SIZE;
294
- }
295
- ret = 0;
296
-out:
297
- return ret;
298
-}
299
-
300
-void vmemmap_free(unsigned long start, unsigned long end,
301
- struct vmem_altmap *altmap)
302
-{
303
-}
304
-
305
-/*
306
- * Add memory segment to the segment list if it doesn't overlap with
307
- * an already present segment.
308
- */
309
-static int insert_memory_segment(struct memory_segment *seg)
310
-{
311
- struct memory_segment *tmp;
312
-
313
- if (seg->start + seg->size > VMEM_MAX_PHYS ||
314
- seg->start + seg->size < seg->start)
315
- return -ERANGE;
316
-
317
- list_for_each_entry(tmp, &mem_segs, list) {
318
- if (seg->start >= tmp->start + tmp->size)
319
- continue;
320
- if (seg->start + seg->size <= tmp->start)
321
- continue;
322
- return -ENOSPC;
323
- }
324
- list_add(&seg->list, &mem_segs);
325
- return 0;
326
-}
327
-
328
-/*
329
- * Remove memory segment from the segment list.
330
- */
331
-static void remove_memory_segment(struct memory_segment *seg)
332
-{
333
- list_del(&seg->list);
334
-}
335
-
336
-static void __remove_shared_memory(struct memory_segment *seg)
337
-{
338
- remove_memory_segment(seg);
339
- vmem_remove_range(seg->start, seg->size);
340
-}
341
-
342
-int vmem_remove_mapping(unsigned long start, unsigned long size)
343
-{
344
- struct memory_segment *seg;
345509 int ret;
346510
347511 mutex_lock(&vmem_mutex);
348
-
349
- ret = -ENOENT;
350
- list_for_each_entry(seg, &mem_segs, list) {
351
- if (seg->start == start && seg->size == size)
352
- break;
353
- }
354
-
355
- if (seg->start != start || seg->size != size)
356
- goto out;
357
-
358
- ret = 0;
359
- __remove_shared_memory(seg);
360
- kfree(seg);
361
-out:
512
+ /* We don't care about the node, just use NUMA_NO_NODE on allocations */
513
+ ret = add_pagetable(start, end, false);
514
+ if (ret)
515
+ remove_pagetable(start, end, false);
362516 mutex_unlock(&vmem_mutex);
363517 return ret;
364518 }
365519
520
+void vmemmap_free(unsigned long start, unsigned long end,
521
+ struct vmem_altmap *altmap)
522
+{
523
+ mutex_lock(&vmem_mutex);
524
+ remove_pagetable(start, end, false);
525
+ mutex_unlock(&vmem_mutex);
526
+}
527
+
528
+void vmem_remove_mapping(unsigned long start, unsigned long size)
529
+{
530
+ mutex_lock(&vmem_mutex);
531
+ vmem_remove_range(start, size);
532
+ mutex_unlock(&vmem_mutex);
533
+}
534
+
366535 int vmem_add_mapping(unsigned long start, unsigned long size)
367536 {
368
- struct memory_segment *seg;
369537 int ret;
370538
539
+ if (start + size > VMEM_MAX_PHYS ||
540
+ start + size < start)
541
+ return -ERANGE;
542
+
371543 mutex_lock(&vmem_mutex);
372
- ret = -ENOMEM;
373
- seg = kzalloc(sizeof(*seg), GFP_KERNEL);
374
- if (!seg)
375
- goto out;
376
- seg->start = start;
377
- seg->size = size;
378
-
379
- ret = insert_memory_segment(seg);
544
+ ret = vmem_add_range(start, size);
380545 if (ret)
381
- goto out_free;
382
-
383
- ret = vmem_add_mem(start, size);
384
- if (ret)
385
- goto out_remove;
386
- goto out;
387
-
388
-out_remove:
389
- __remove_shared_memory(seg);
390
-out_free:
391
- kfree(seg);
392
-out:
546
+ vmem_remove_range(start, size);
393547 mutex_unlock(&vmem_mutex);
394548 return ret;
395549 }
....@@ -401,10 +555,11 @@
401555 */
402556 void __init vmem_map_init(void)
403557 {
404
- struct memblock_region *reg;
558
+ phys_addr_t base, end;
559
+ u64 i;
405560
406
- for_each_memblock(memory, reg)
407
- vmem_add_mem(reg->base, reg->size);
561
+ for_each_mem_range(i, &base, &end)
562
+ vmem_add_range(base, end - base);
408563 __set_memory((unsigned long)_stext,
409564 (unsigned long)(_etext - _stext) >> PAGE_SHIFT,
410565 SET_MEMORY_RO | SET_MEMORY_X);
....@@ -414,30 +569,12 @@
414569 __set_memory((unsigned long)_sinittext,
415570 (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
416571 SET_MEMORY_RO | SET_MEMORY_X);
572
+ __set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT,
573
+ SET_MEMORY_RO | SET_MEMORY_X);
574
+
575
+ /* we need lowcore executable for our LPSWE instructions */
576
+ set_memory_x(0, 1);
577
+
417578 pr_info("Write protected kernel read-only data: %luk\n",
418579 (unsigned long)(__end_rodata - _stext) >> 10);
419580 }
420
-
421
-/*
422
- * Convert memblock.memory to a memory segment list so there is a single
423
- * list that contains all memory segments.
424
- */
425
-static int __init vmem_convert_memory_chunk(void)
426
-{
427
- struct memblock_region *reg;
428
- struct memory_segment *seg;
429
-
430
- mutex_lock(&vmem_mutex);
431
- for_each_memblock(memory, reg) {
432
- seg = kzalloc(sizeof(*seg), GFP_KERNEL);
433
- if (!seg)
434
- panic("Out of memory...\n");
435
- seg->start = reg->base;
436
- seg->size = reg->size;
437
- insert_memory_segment(seg);
438
- }
439
- mutex_unlock(&vmem_mutex);
440
- return 0;
441
-}
442
-
443
-core_initcall(vmem_convert_memory_chunk);