.. | .. |
---|
4 | 4 | * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> |
---|
5 | 5 | */ |
---|
6 | 6 | |
---|
7 | | -#include <linux/bootmem.h> |
---|
| 7 | +#include <linux/memblock.h> |
---|
8 | 8 | #include <linux/pfn.h> |
---|
9 | 9 | #include <linux/mm.h> |
---|
10 | 10 | #include <linux/init.h> |
---|
11 | 11 | #include <linux/list.h> |
---|
12 | 12 | #include <linux/hugetlb.h> |
---|
13 | 13 | #include <linux/slab.h> |
---|
14 | | -#include <linux/memblock.h> |
---|
15 | 14 | #include <asm/cacheflush.h> |
---|
16 | 15 | #include <asm/pgalloc.h> |
---|
17 | | -#include <asm/pgtable.h> |
---|
18 | 16 | #include <asm/setup.h> |
---|
19 | 17 | #include <asm/tlbflush.h> |
---|
20 | 18 | #include <asm/sections.h> |
---|
.. | .. |
---|
22 | 20 | |
---|
23 | 21 | static DEFINE_MUTEX(vmem_mutex); |
---|
24 | 22 | |
---|
25 | | -struct memory_segment { |
---|
26 | | - struct list_head list; |
---|
27 | | - unsigned long start; |
---|
28 | | - unsigned long size; |
---|
29 | | -}; |
---|
30 | | - |
---|
31 | | -static LIST_HEAD(mem_segs); |
---|
32 | | - |
---|
33 | 23 | static void __ref *vmem_alloc_pages(unsigned int order) |
---|
34 | 24 | { |
---|
35 | 25 | unsigned long size = PAGE_SIZE << order; |
---|
36 | 26 | |
---|
37 | 27 | if (slab_is_available()) |
---|
38 | 28 | return (void *)__get_free_pages(GFP_KERNEL, order); |
---|
39 | | - return (void *) memblock_alloc(size, size); |
---|
| 29 | + return (void *) memblock_phys_alloc(size, size); |
---|
| 30 | +} |
---|
| 31 | + |
---|
| 32 | +static void vmem_free_pages(unsigned long addr, int order) |
---|
| 33 | +{ |
---|
| 34 | + /* We don't expect boot memory to be removed ever. */ |
---|
| 35 | + if (!slab_is_available() || |
---|
| 36 | + WARN_ON_ONCE(PageReserved(phys_to_page(addr)))) |
---|
| 37 | + return; |
---|
| 38 | + free_pages(addr, order); |
---|
40 | 39 | } |
---|
41 | 40 | |
---|
42 | 41 | void *vmem_crst_alloc(unsigned long val) |
---|
.. | .. |
---|
57 | 56 | if (slab_is_available()) |
---|
58 | 57 | pte = (pte_t *) page_table_alloc(&init_mm); |
---|
59 | 58 | else |
---|
60 | | - pte = (pte_t *) memblock_alloc(size, size); |
---|
| 59 | + pte = (pte_t *) memblock_phys_alloc(size, size); |
---|
61 | 60 | if (!pte) |
---|
62 | 61 | return NULL; |
---|
63 | 62 | memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); |
---|
64 | 63 | return pte; |
---|
65 | 64 | } |
---|
66 | 65 | |
---|
67 | | -/* |
---|
68 | | - * Add a physical memory range to the 1:1 mapping. |
---|
69 | | - */ |
---|
70 | | -static int vmem_add_mem(unsigned long start, unsigned long size) |
---|
| 66 | +static void vmem_pte_free(unsigned long *table) |
---|
71 | 67 | { |
---|
72 | | - unsigned long pgt_prot, sgt_prot, r3_prot; |
---|
73 | | - unsigned long pages4k, pages1m, pages2g; |
---|
74 | | - unsigned long end = start + size; |
---|
75 | | - unsigned long address = start; |
---|
76 | | - pgd_t *pg_dir; |
---|
77 | | - p4d_t *p4_dir; |
---|
78 | | - pud_t *pu_dir; |
---|
79 | | - pmd_t *pm_dir; |
---|
80 | | - pte_t *pt_dir; |
---|
81 | | - int ret = -ENOMEM; |
---|
| 68 | + /* We don't expect boot memory to be removed ever. */ |
---|
| 69 | + if (!slab_is_available() || |
---|
| 70 | + WARN_ON_ONCE(PageReserved(virt_to_page(table)))) |
---|
| 71 | + return; |
---|
| 72 | + page_table_free(&init_mm, table); |
---|
| 73 | +} |
---|
82 | 74 | |
---|
83 | | - pgt_prot = pgprot_val(PAGE_KERNEL); |
---|
84 | | - sgt_prot = pgprot_val(SEGMENT_KERNEL); |
---|
85 | | - r3_prot = pgprot_val(REGION3_KERNEL); |
---|
86 | | - if (!MACHINE_HAS_NX) { |
---|
87 | | - pgt_prot &= ~_PAGE_NOEXEC; |
---|
88 | | - sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; |
---|
89 | | - r3_prot &= ~_REGION_ENTRY_NOEXEC; |
---|
| 75 | +#define PAGE_UNUSED 0xFD |
---|
| 76 | + |
---|
| 77 | +/* |
---|
| 78 | + * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges |
---|
| 79 | + * from unused_pmd_start to next PMD_SIZE boundary. |
---|
| 80 | + */ |
---|
| 81 | +static unsigned long unused_pmd_start; |
---|
| 82 | + |
---|
| 83 | +static void vmemmap_flush_unused_pmd(void) |
---|
| 84 | +{ |
---|
| 85 | + if (!unused_pmd_start) |
---|
| 86 | + return; |
---|
| 87 | + memset(__va(unused_pmd_start), PAGE_UNUSED, |
---|
| 88 | + ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start); |
---|
| 89 | + unused_pmd_start = 0; |
---|
| 90 | +} |
---|
| 91 | + |
---|
| 92 | +static void __vmemmap_use_sub_pmd(unsigned long start, unsigned long end) |
---|
| 93 | +{ |
---|
| 94 | + /* |
---|
| 95 | + * As we expect to add in the same granularity as we remove, it's |
---|
| 96 | + * sufficient to mark only some piece used to block the memmap page from |
---|
| 97 | + * getting removed (just in case the memmap never gets initialized, |
---|
| 98 | + * e.g., because the memory block never gets onlined). |
---|
| 99 | + */ |
---|
| 100 | + memset(__va(start), 0, sizeof(struct page)); |
---|
| 101 | +} |
---|
| 102 | + |
---|
| 103 | +static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) |
---|
| 104 | +{ |
---|
| 105 | + /* |
---|
| 106 | + * We only optimize if the new used range directly follows the |
---|
| 107 | + * previously unused range (esp., when populating consecutive sections). |
---|
| 108 | + */ |
---|
| 109 | + if (unused_pmd_start == start) { |
---|
| 110 | + unused_pmd_start = end; |
---|
| 111 | + if (likely(IS_ALIGNED(unused_pmd_start, PMD_SIZE))) |
---|
| 112 | + unused_pmd_start = 0; |
---|
| 113 | + return; |
---|
90 | 114 | } |
---|
91 | | - pages4k = pages1m = pages2g = 0; |
---|
92 | | - while (address < end) { |
---|
93 | | - pg_dir = pgd_offset_k(address); |
---|
94 | | - if (pgd_none(*pg_dir)) { |
---|
95 | | - p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); |
---|
96 | | - if (!p4_dir) |
---|
97 | | - goto out; |
---|
98 | | - pgd_populate(&init_mm, pg_dir, p4_dir); |
---|
99 | | - } |
---|
100 | | - p4_dir = p4d_offset(pg_dir, address); |
---|
101 | | - if (p4d_none(*p4_dir)) { |
---|
102 | | - pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); |
---|
103 | | - if (!pu_dir) |
---|
104 | | - goto out; |
---|
105 | | - p4d_populate(&init_mm, p4_dir, pu_dir); |
---|
106 | | - } |
---|
107 | | - pu_dir = pud_offset(p4_dir, address); |
---|
108 | | - if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && |
---|
109 | | - !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && |
---|
110 | | - !debug_pagealloc_enabled()) { |
---|
111 | | - pud_val(*pu_dir) = address | r3_prot; |
---|
112 | | - address += PUD_SIZE; |
---|
113 | | - pages2g++; |
---|
114 | | - continue; |
---|
115 | | - } |
---|
116 | | - if (pud_none(*pu_dir)) { |
---|
117 | | - pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); |
---|
118 | | - if (!pm_dir) |
---|
119 | | - goto out; |
---|
120 | | - pud_populate(&init_mm, pu_dir, pm_dir); |
---|
121 | | - } |
---|
122 | | - pm_dir = pmd_offset(pu_dir, address); |
---|
123 | | - if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && |
---|
124 | | - !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) && |
---|
125 | | - !debug_pagealloc_enabled()) { |
---|
126 | | - pmd_val(*pm_dir) = address | sgt_prot; |
---|
127 | | - address += PMD_SIZE; |
---|
128 | | - pages1m++; |
---|
129 | | - continue; |
---|
130 | | - } |
---|
131 | | - if (pmd_none(*pm_dir)) { |
---|
132 | | - pt_dir = vmem_pte_alloc(); |
---|
133 | | - if (!pt_dir) |
---|
134 | | - goto out; |
---|
135 | | - pmd_populate(&init_mm, pm_dir, pt_dir); |
---|
136 | | - } |
---|
| 115 | + vmemmap_flush_unused_pmd(); |
---|
| 116 | + __vmemmap_use_sub_pmd(start, end); |
---|
| 117 | +} |
---|
137 | 118 | |
---|
138 | | - pt_dir = pte_offset_kernel(pm_dir, address); |
---|
139 | | - pte_val(*pt_dir) = address | pgt_prot; |
---|
140 | | - address += PAGE_SIZE; |
---|
141 | | - pages4k++; |
---|
| 119 | +static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) |
---|
| 120 | +{ |
---|
| 121 | + void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); |
---|
| 122 | + |
---|
| 123 | + vmemmap_flush_unused_pmd(); |
---|
| 124 | + |
---|
| 125 | + /* Could be our memmap page is filled with PAGE_UNUSED already ... */ |
---|
| 126 | + __vmemmap_use_sub_pmd(start, end); |
---|
| 127 | + |
---|
| 128 | + /* Mark the unused parts of the new memmap page PAGE_UNUSED. */ |
---|
| 129 | + if (!IS_ALIGNED(start, PMD_SIZE)) |
---|
| 130 | + memset(page, PAGE_UNUSED, start - __pa(page)); |
---|
| 131 | + /* |
---|
| 132 | + * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of |
---|
| 133 | + * consecutive sections. Remember for the last added PMD the last |
---|
| 134 | + * unused range in the populated PMD. |
---|
| 135 | + */ |
---|
| 136 | + if (!IS_ALIGNED(end, PMD_SIZE)) |
---|
| 137 | + unused_pmd_start = end; |
---|
| 138 | +} |
---|
| 139 | + |
---|
| 140 | +/* Returns true if the PMD is completely unused and can be freed. */ |
---|
| 141 | +static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end) |
---|
| 142 | +{ |
---|
| 143 | + void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); |
---|
| 144 | + |
---|
| 145 | + vmemmap_flush_unused_pmd(); |
---|
| 146 | + memset(__va(start), PAGE_UNUSED, end - start); |
---|
| 147 | + return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE); |
---|
| 148 | +} |
---|
| 149 | + |
---|
| 150 | +/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ |
---|
| 151 | +static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, |
---|
| 152 | + unsigned long end, bool add, bool direct) |
---|
| 153 | +{ |
---|
| 154 | + unsigned long prot, pages = 0; |
---|
| 155 | + int ret = -ENOMEM; |
---|
| 156 | + pte_t *pte; |
---|
| 157 | + |
---|
| 158 | + prot = pgprot_val(PAGE_KERNEL); |
---|
| 159 | + if (!MACHINE_HAS_NX) |
---|
| 160 | + prot &= ~_PAGE_NOEXEC; |
---|
| 161 | + |
---|
| 162 | + pte = pte_offset_kernel(pmd, addr); |
---|
| 163 | + for (; addr < end; addr += PAGE_SIZE, pte++) { |
---|
| 164 | + if (!add) { |
---|
| 165 | + if (pte_none(*pte)) |
---|
| 166 | + continue; |
---|
| 167 | + if (!direct) |
---|
| 168 | + vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0); |
---|
| 169 | + pte_clear(&init_mm, addr, pte); |
---|
| 170 | + } else if (pte_none(*pte)) { |
---|
| 171 | + if (!direct) { |
---|
| 172 | + void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); |
---|
| 173 | + |
---|
| 174 | + if (!new_page) |
---|
| 175 | + goto out; |
---|
| 176 | + pte_val(*pte) = __pa(new_page) | prot; |
---|
| 177 | + } else { |
---|
| 178 | + pte_val(*pte) = addr | prot; |
---|
| 179 | + } |
---|
| 180 | + } else { |
---|
| 181 | + continue; |
---|
| 182 | + } |
---|
| 183 | + pages++; |
---|
142 | 184 | } |
---|
143 | 185 | ret = 0; |
---|
144 | 186 | out: |
---|
145 | | - update_page_count(PG_DIRECT_MAP_4K, pages4k); |
---|
146 | | - update_page_count(PG_DIRECT_MAP_1M, pages1m); |
---|
147 | | - update_page_count(PG_DIRECT_MAP_2G, pages2g); |
---|
| 187 | + if (direct) |
---|
| 188 | + update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages); |
---|
148 | 189 | return ret; |
---|
| 190 | +} |
---|
| 191 | + |
---|
| 192 | +static void try_free_pte_table(pmd_t *pmd, unsigned long start) |
---|
| 193 | +{ |
---|
| 194 | + pte_t *pte; |
---|
| 195 | + int i; |
---|
| 196 | + |
---|
| 197 | + /* We can safely assume this is fully in 1:1 mapping & vmemmap area */ |
---|
| 198 | + pte = pte_offset_kernel(pmd, start); |
---|
| 199 | + for (i = 0; i < PTRS_PER_PTE; i++, pte++) { |
---|
| 200 | + if (!pte_none(*pte)) |
---|
| 201 | + return; |
---|
| 202 | + } |
---|
| 203 | + vmem_pte_free(__va(pmd_deref(*pmd))); |
---|
| 204 | + pmd_clear(pmd); |
---|
| 205 | +} |
---|
| 206 | + |
---|
| 207 | +/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ |
---|
| 208 | +static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, |
---|
| 209 | + unsigned long end, bool add, bool direct) |
---|
| 210 | +{ |
---|
| 211 | + unsigned long next, prot, pages = 0; |
---|
| 212 | + int ret = -ENOMEM; |
---|
| 213 | + pmd_t *pmd; |
---|
| 214 | + pte_t *pte; |
---|
| 215 | + |
---|
| 216 | + prot = pgprot_val(SEGMENT_KERNEL); |
---|
| 217 | + if (!MACHINE_HAS_NX) |
---|
| 218 | + prot &= ~_SEGMENT_ENTRY_NOEXEC; |
---|
| 219 | + |
---|
| 220 | + pmd = pmd_offset(pud, addr); |
---|
| 221 | + for (; addr < end; addr = next, pmd++) { |
---|
| 222 | + next = pmd_addr_end(addr, end); |
---|
| 223 | + if (!add) { |
---|
| 224 | + if (pmd_none(*pmd)) |
---|
| 225 | + continue; |
---|
| 226 | + if (pmd_large(*pmd) && !add) { |
---|
| 227 | + if (IS_ALIGNED(addr, PMD_SIZE) && |
---|
| 228 | + IS_ALIGNED(next, PMD_SIZE)) { |
---|
| 229 | + if (!direct) |
---|
| 230 | + vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE)); |
---|
| 231 | + pmd_clear(pmd); |
---|
| 232 | + pages++; |
---|
| 233 | + } else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) { |
---|
| 234 | + vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE)); |
---|
| 235 | + pmd_clear(pmd); |
---|
| 236 | + } |
---|
| 237 | + continue; |
---|
| 238 | + } |
---|
| 239 | + } else if (pmd_none(*pmd)) { |
---|
| 240 | + if (IS_ALIGNED(addr, PMD_SIZE) && |
---|
| 241 | + IS_ALIGNED(next, PMD_SIZE) && |
---|
| 242 | + MACHINE_HAS_EDAT1 && addr && direct && |
---|
| 243 | + !debug_pagealloc_enabled()) { |
---|
| 244 | + pmd_val(*pmd) = addr | prot; |
---|
| 245 | + pages++; |
---|
| 246 | + continue; |
---|
| 247 | + } else if (!direct && MACHINE_HAS_EDAT1) { |
---|
| 248 | + void *new_page; |
---|
| 249 | + |
---|
| 250 | + /* |
---|
| 251 | + * Use 1MB frames for vmemmap if available. We |
---|
| 252 | + * always use large frames even if they are only |
---|
| 253 | + * partially used. Otherwise we would have also |
---|
| 254 | + * page tables since vmemmap_populate gets |
---|
| 255 | + * called for each section separately. |
---|
| 256 | + */ |
---|
| 257 | + new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE); |
---|
| 258 | + if (new_page) { |
---|
| 259 | + pmd_val(*pmd) = __pa(new_page) | prot; |
---|
| 260 | + if (!IS_ALIGNED(addr, PMD_SIZE) || |
---|
| 261 | + !IS_ALIGNED(next, PMD_SIZE)) { |
---|
| 262 | + vmemmap_use_new_sub_pmd(addr, next); |
---|
| 263 | + } |
---|
| 264 | + continue; |
---|
| 265 | + } |
---|
| 266 | + } |
---|
| 267 | + pte = vmem_pte_alloc(); |
---|
| 268 | + if (!pte) |
---|
| 269 | + goto out; |
---|
| 270 | + pmd_populate(&init_mm, pmd, pte); |
---|
| 271 | + } else if (pmd_large(*pmd)) { |
---|
| 272 | + if (!direct) |
---|
| 273 | + vmemmap_use_sub_pmd(addr, next); |
---|
| 274 | + continue; |
---|
| 275 | + } |
---|
| 276 | + ret = modify_pte_table(pmd, addr, next, add, direct); |
---|
| 277 | + if (ret) |
---|
| 278 | + goto out; |
---|
| 279 | + if (!add) |
---|
| 280 | + try_free_pte_table(pmd, addr & PMD_MASK); |
---|
| 281 | + } |
---|
| 282 | + ret = 0; |
---|
| 283 | +out: |
---|
| 284 | + if (direct) |
---|
| 285 | + update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages); |
---|
| 286 | + return ret; |
---|
| 287 | +} |
---|
| 288 | + |
---|
| 289 | +static void try_free_pmd_table(pud_t *pud, unsigned long start) |
---|
| 290 | +{ |
---|
| 291 | + const unsigned long end = start + PUD_SIZE; |
---|
| 292 | + pmd_t *pmd; |
---|
| 293 | + int i; |
---|
| 294 | + |
---|
| 295 | + /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ |
---|
| 296 | + if (end > VMALLOC_START) |
---|
| 297 | + return; |
---|
| 298 | +#ifdef CONFIG_KASAN |
---|
| 299 | + if (start < KASAN_SHADOW_END && end > KASAN_SHADOW_START) |
---|
| 300 | + return; |
---|
| 301 | +#endif |
---|
| 302 | + pmd = pmd_offset(pud, start); |
---|
| 303 | + for (i = 0; i < PTRS_PER_PMD; i++, pmd++) |
---|
| 304 | + if (!pmd_none(*pmd)) |
---|
| 305 | + return; |
---|
| 306 | + vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER); |
---|
| 307 | + pud_clear(pud); |
---|
| 308 | +} |
---|
| 309 | + |
---|
| 310 | +static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, |
---|
| 311 | + bool add, bool direct) |
---|
| 312 | +{ |
---|
| 313 | + unsigned long next, prot, pages = 0; |
---|
| 314 | + int ret = -ENOMEM; |
---|
| 315 | + pud_t *pud; |
---|
| 316 | + pmd_t *pmd; |
---|
| 317 | + |
---|
| 318 | + prot = pgprot_val(REGION3_KERNEL); |
---|
| 319 | + if (!MACHINE_HAS_NX) |
---|
| 320 | + prot &= ~_REGION_ENTRY_NOEXEC; |
---|
| 321 | + pud = pud_offset(p4d, addr); |
---|
| 322 | + for (; addr < end; addr = next, pud++) { |
---|
| 323 | + next = pud_addr_end(addr, end); |
---|
| 324 | + if (!add) { |
---|
| 325 | + if (pud_none(*pud)) |
---|
| 326 | + continue; |
---|
| 327 | + if (pud_large(*pud)) { |
---|
| 328 | + if (IS_ALIGNED(addr, PUD_SIZE) && |
---|
| 329 | + IS_ALIGNED(next, PUD_SIZE)) { |
---|
| 330 | + pud_clear(pud); |
---|
| 331 | + pages++; |
---|
| 332 | + } |
---|
| 333 | + continue; |
---|
| 334 | + } |
---|
| 335 | + } else if (pud_none(*pud)) { |
---|
| 336 | + if (IS_ALIGNED(addr, PUD_SIZE) && |
---|
| 337 | + IS_ALIGNED(next, PUD_SIZE) && |
---|
| 338 | + MACHINE_HAS_EDAT2 && addr && direct && |
---|
| 339 | + !debug_pagealloc_enabled()) { |
---|
| 340 | + pud_val(*pud) = addr | prot; |
---|
| 341 | + pages++; |
---|
| 342 | + continue; |
---|
| 343 | + } |
---|
| 344 | + pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); |
---|
| 345 | + if (!pmd) |
---|
| 346 | + goto out; |
---|
| 347 | + pud_populate(&init_mm, pud, pmd); |
---|
| 348 | + } else if (pud_large(*pud)) { |
---|
| 349 | + continue; |
---|
| 350 | + } |
---|
| 351 | + ret = modify_pmd_table(pud, addr, next, add, direct); |
---|
| 352 | + if (ret) |
---|
| 353 | + goto out; |
---|
| 354 | + if (!add) |
---|
| 355 | + try_free_pmd_table(pud, addr & PUD_MASK); |
---|
| 356 | + } |
---|
| 357 | + ret = 0; |
---|
| 358 | +out: |
---|
| 359 | + if (direct) |
---|
| 360 | + update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages); |
---|
| 361 | + return ret; |
---|
| 362 | +} |
---|
| 363 | + |
---|
| 364 | +static void try_free_pud_table(p4d_t *p4d, unsigned long start) |
---|
| 365 | +{ |
---|
| 366 | + const unsigned long end = start + P4D_SIZE; |
---|
| 367 | + pud_t *pud; |
---|
| 368 | + int i; |
---|
| 369 | + |
---|
| 370 | + /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ |
---|
| 371 | + if (end > VMALLOC_START) |
---|
| 372 | + return; |
---|
| 373 | +#ifdef CONFIG_KASAN |
---|
| 374 | + if (start < KASAN_SHADOW_END && end > KASAN_SHADOW_START) |
---|
| 375 | + return; |
---|
| 376 | +#endif |
---|
| 377 | + |
---|
| 378 | + pud = pud_offset(p4d, start); |
---|
| 379 | + for (i = 0; i < PTRS_PER_PUD; i++, pud++) { |
---|
| 380 | + if (!pud_none(*pud)) |
---|
| 381 | + return; |
---|
| 382 | + } |
---|
| 383 | + vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER); |
---|
| 384 | + p4d_clear(p4d); |
---|
| 385 | +} |
---|
| 386 | + |
---|
| 387 | +static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, |
---|
| 388 | + bool add, bool direct) |
---|
| 389 | +{ |
---|
| 390 | + unsigned long next; |
---|
| 391 | + int ret = -ENOMEM; |
---|
| 392 | + p4d_t *p4d; |
---|
| 393 | + pud_t *pud; |
---|
| 394 | + |
---|
| 395 | + p4d = p4d_offset(pgd, addr); |
---|
| 396 | + for (; addr < end; addr = next, p4d++) { |
---|
| 397 | + next = p4d_addr_end(addr, end); |
---|
| 398 | + if (!add) { |
---|
| 399 | + if (p4d_none(*p4d)) |
---|
| 400 | + continue; |
---|
| 401 | + } else if (p4d_none(*p4d)) { |
---|
| 402 | + pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); |
---|
| 403 | + if (!pud) |
---|
| 404 | + goto out; |
---|
| 405 | + p4d_populate(&init_mm, p4d, pud); |
---|
| 406 | + } |
---|
| 407 | + ret = modify_pud_table(p4d, addr, next, add, direct); |
---|
| 408 | + if (ret) |
---|
| 409 | + goto out; |
---|
| 410 | + if (!add) |
---|
| 411 | + try_free_pud_table(p4d, addr & P4D_MASK); |
---|
| 412 | + } |
---|
| 413 | + ret = 0; |
---|
| 414 | +out: |
---|
| 415 | + return ret; |
---|
| 416 | +} |
---|
| 417 | + |
---|
| 418 | +static void try_free_p4d_table(pgd_t *pgd, unsigned long start) |
---|
| 419 | +{ |
---|
| 420 | + const unsigned long end = start + PGDIR_SIZE; |
---|
| 421 | + p4d_t *p4d; |
---|
| 422 | + int i; |
---|
| 423 | + |
---|
| 424 | + /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ |
---|
| 425 | + if (end > VMALLOC_START) |
---|
| 426 | + return; |
---|
| 427 | +#ifdef CONFIG_KASAN |
---|
| 428 | + if (start < KASAN_SHADOW_END && end > KASAN_SHADOW_START) |
---|
| 429 | + return; |
---|
| 430 | +#endif |
---|
| 431 | + |
---|
| 432 | + p4d = p4d_offset(pgd, start); |
---|
| 433 | + for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { |
---|
| 434 | + if (!p4d_none(*p4d)) |
---|
| 435 | + return; |
---|
| 436 | + } |
---|
| 437 | + vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER); |
---|
| 438 | + pgd_clear(pgd); |
---|
| 439 | +} |
---|
| 440 | + |
---|
| 441 | +static int modify_pagetable(unsigned long start, unsigned long end, bool add, |
---|
| 442 | + bool direct) |
---|
| 443 | +{ |
---|
| 444 | + unsigned long addr, next; |
---|
| 445 | + int ret = -ENOMEM; |
---|
| 446 | + pgd_t *pgd; |
---|
| 447 | + p4d_t *p4d; |
---|
| 448 | + |
---|
| 449 | + if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end))) |
---|
| 450 | + return -EINVAL; |
---|
| 451 | + for (addr = start; addr < end; addr = next) { |
---|
| 452 | + next = pgd_addr_end(addr, end); |
---|
| 453 | + pgd = pgd_offset_k(addr); |
---|
| 454 | + |
---|
| 455 | + if (!add) { |
---|
| 456 | + if (pgd_none(*pgd)) |
---|
| 457 | + continue; |
---|
| 458 | + } else if (pgd_none(*pgd)) { |
---|
| 459 | + p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); |
---|
| 460 | + if (!p4d) |
---|
| 461 | + goto out; |
---|
| 462 | + pgd_populate(&init_mm, pgd, p4d); |
---|
| 463 | + } |
---|
| 464 | + ret = modify_p4d_table(pgd, addr, next, add, direct); |
---|
| 465 | + if (ret) |
---|
| 466 | + goto out; |
---|
| 467 | + if (!add) |
---|
| 468 | + try_free_p4d_table(pgd, addr & PGDIR_MASK); |
---|
| 469 | + } |
---|
| 470 | + ret = 0; |
---|
| 471 | +out: |
---|
| 472 | + if (!add) |
---|
| 473 | + flush_tlb_kernel_range(start, end); |
---|
| 474 | + return ret; |
---|
| 475 | +} |
---|
| 476 | + |
---|
| 477 | +static int add_pagetable(unsigned long start, unsigned long end, bool direct) |
---|
| 478 | +{ |
---|
| 479 | + return modify_pagetable(start, end, true, direct); |
---|
| 480 | +} |
---|
| 481 | + |
---|
| 482 | +static int remove_pagetable(unsigned long start, unsigned long end, bool direct) |
---|
| 483 | +{ |
---|
| 484 | + return modify_pagetable(start, end, false, direct); |
---|
| 485 | +} |
---|
| 486 | + |
---|
| 487 | +/* |
---|
| 488 | + * Add a physical memory range to the 1:1 mapping. |
---|
| 489 | + */ |
---|
| 490 | +static int vmem_add_range(unsigned long start, unsigned long size) |
---|
| 491 | +{ |
---|
| 492 | + return add_pagetable(start, start + size, true); |
---|
149 | 493 | } |
---|
150 | 494 | |
---|
151 | 495 | /* |
---|
152 | 496 | * Remove a physical memory range from the 1:1 mapping. |
---|
153 | | - * Currently only invalidates page table entries. |
---|
154 | 497 | */ |
---|
155 | 498 | static void vmem_remove_range(unsigned long start, unsigned long size) |
---|
156 | 499 | { |
---|
157 | | - unsigned long pages4k, pages1m, pages2g; |
---|
158 | | - unsigned long end = start + size; |
---|
159 | | - unsigned long address = start; |
---|
160 | | - pgd_t *pg_dir; |
---|
161 | | - p4d_t *p4_dir; |
---|
162 | | - pud_t *pu_dir; |
---|
163 | | - pmd_t *pm_dir; |
---|
164 | | - pte_t *pt_dir; |
---|
165 | | - |
---|
166 | | - pages4k = pages1m = pages2g = 0; |
---|
167 | | - while (address < end) { |
---|
168 | | - pg_dir = pgd_offset_k(address); |
---|
169 | | - if (pgd_none(*pg_dir)) { |
---|
170 | | - address += PGDIR_SIZE; |
---|
171 | | - continue; |
---|
172 | | - } |
---|
173 | | - p4_dir = p4d_offset(pg_dir, address); |
---|
174 | | - if (p4d_none(*p4_dir)) { |
---|
175 | | - address += P4D_SIZE; |
---|
176 | | - continue; |
---|
177 | | - } |
---|
178 | | - pu_dir = pud_offset(p4_dir, address); |
---|
179 | | - if (pud_none(*pu_dir)) { |
---|
180 | | - address += PUD_SIZE; |
---|
181 | | - continue; |
---|
182 | | - } |
---|
183 | | - if (pud_large(*pu_dir)) { |
---|
184 | | - pud_clear(pu_dir); |
---|
185 | | - address += PUD_SIZE; |
---|
186 | | - pages2g++; |
---|
187 | | - continue; |
---|
188 | | - } |
---|
189 | | - pm_dir = pmd_offset(pu_dir, address); |
---|
190 | | - if (pmd_none(*pm_dir)) { |
---|
191 | | - address += PMD_SIZE; |
---|
192 | | - continue; |
---|
193 | | - } |
---|
194 | | - if (pmd_large(*pm_dir)) { |
---|
195 | | - pmd_clear(pm_dir); |
---|
196 | | - address += PMD_SIZE; |
---|
197 | | - pages1m++; |
---|
198 | | - continue; |
---|
199 | | - } |
---|
200 | | - pt_dir = pte_offset_kernel(pm_dir, address); |
---|
201 | | - pte_clear(&init_mm, address, pt_dir); |
---|
202 | | - address += PAGE_SIZE; |
---|
203 | | - pages4k++; |
---|
204 | | - } |
---|
205 | | - flush_tlb_kernel_range(start, end); |
---|
206 | | - update_page_count(PG_DIRECT_MAP_4K, -pages4k); |
---|
207 | | - update_page_count(PG_DIRECT_MAP_1M, -pages1m); |
---|
208 | | - update_page_count(PG_DIRECT_MAP_2G, -pages2g); |
---|
| 500 | + remove_pagetable(start, start + size, true); |
---|
209 | 501 | } |
---|
210 | 502 | |
---|
211 | 503 | /* |
---|
212 | 504 | * Add a backed mem_map array to the virtual mem_map array. |
---|
213 | 505 | */ |
---|
214 | 506 | int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, |
---|
215 | | - struct vmem_altmap *altmap) |
---|
| 507 | + struct vmem_altmap *altmap) |
---|
216 | 508 | { |
---|
217 | | - unsigned long pgt_prot, sgt_prot; |
---|
218 | | - unsigned long address = start; |
---|
219 | | - pgd_t *pg_dir; |
---|
220 | | - p4d_t *p4_dir; |
---|
221 | | - pud_t *pu_dir; |
---|
222 | | - pmd_t *pm_dir; |
---|
223 | | - pte_t *pt_dir; |
---|
224 | | - int ret = -ENOMEM; |
---|
225 | | - |
---|
226 | | - pgt_prot = pgprot_val(PAGE_KERNEL); |
---|
227 | | - sgt_prot = pgprot_val(SEGMENT_KERNEL); |
---|
228 | | - if (!MACHINE_HAS_NX) { |
---|
229 | | - pgt_prot &= ~_PAGE_NOEXEC; |
---|
230 | | - sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; |
---|
231 | | - } |
---|
232 | | - for (address = start; address < end;) { |
---|
233 | | - pg_dir = pgd_offset_k(address); |
---|
234 | | - if (pgd_none(*pg_dir)) { |
---|
235 | | - p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); |
---|
236 | | - if (!p4_dir) |
---|
237 | | - goto out; |
---|
238 | | - pgd_populate(&init_mm, pg_dir, p4_dir); |
---|
239 | | - } |
---|
240 | | - |
---|
241 | | - p4_dir = p4d_offset(pg_dir, address); |
---|
242 | | - if (p4d_none(*p4_dir)) { |
---|
243 | | - pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); |
---|
244 | | - if (!pu_dir) |
---|
245 | | - goto out; |
---|
246 | | - p4d_populate(&init_mm, p4_dir, pu_dir); |
---|
247 | | - } |
---|
248 | | - |
---|
249 | | - pu_dir = pud_offset(p4_dir, address); |
---|
250 | | - if (pud_none(*pu_dir)) { |
---|
251 | | - pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); |
---|
252 | | - if (!pm_dir) |
---|
253 | | - goto out; |
---|
254 | | - pud_populate(&init_mm, pu_dir, pm_dir); |
---|
255 | | - } |
---|
256 | | - |
---|
257 | | - pm_dir = pmd_offset(pu_dir, address); |
---|
258 | | - if (pmd_none(*pm_dir)) { |
---|
259 | | - /* Use 1MB frames for vmemmap if available. We always |
---|
260 | | - * use large frames even if they are only partially |
---|
261 | | - * used. |
---|
262 | | - * Otherwise we would have also page tables since |
---|
263 | | - * vmemmap_populate gets called for each section |
---|
264 | | - * separately. */ |
---|
265 | | - if (MACHINE_HAS_EDAT1) { |
---|
266 | | - void *new_page; |
---|
267 | | - |
---|
268 | | - new_page = vmemmap_alloc_block(PMD_SIZE, node); |
---|
269 | | - if (!new_page) |
---|
270 | | - goto out; |
---|
271 | | - pmd_val(*pm_dir) = __pa(new_page) | sgt_prot; |
---|
272 | | - address = (address + PMD_SIZE) & PMD_MASK; |
---|
273 | | - continue; |
---|
274 | | - } |
---|
275 | | - pt_dir = vmem_pte_alloc(); |
---|
276 | | - if (!pt_dir) |
---|
277 | | - goto out; |
---|
278 | | - pmd_populate(&init_mm, pm_dir, pt_dir); |
---|
279 | | - } else if (pmd_large(*pm_dir)) { |
---|
280 | | - address = (address + PMD_SIZE) & PMD_MASK; |
---|
281 | | - continue; |
---|
282 | | - } |
---|
283 | | - |
---|
284 | | - pt_dir = pte_offset_kernel(pm_dir, address); |
---|
285 | | - if (pte_none(*pt_dir)) { |
---|
286 | | - void *new_page; |
---|
287 | | - |
---|
288 | | - new_page = vmemmap_alloc_block(PAGE_SIZE, node); |
---|
289 | | - if (!new_page) |
---|
290 | | - goto out; |
---|
291 | | - pte_val(*pt_dir) = __pa(new_page) | pgt_prot; |
---|
292 | | - } |
---|
293 | | - address += PAGE_SIZE; |
---|
294 | | - } |
---|
295 | | - ret = 0; |
---|
296 | | -out: |
---|
297 | | - return ret; |
---|
298 | | -} |
---|
299 | | - |
---|
300 | | -void vmemmap_free(unsigned long start, unsigned long end, |
---|
301 | | - struct vmem_altmap *altmap) |
---|
302 | | -{ |
---|
303 | | -} |
---|
304 | | - |
---|
305 | | -/* |
---|
306 | | - * Add memory segment to the segment list if it doesn't overlap with |
---|
307 | | - * an already present segment. |
---|
308 | | - */ |
---|
309 | | -static int insert_memory_segment(struct memory_segment *seg) |
---|
310 | | -{ |
---|
311 | | - struct memory_segment *tmp; |
---|
312 | | - |
---|
313 | | - if (seg->start + seg->size > VMEM_MAX_PHYS || |
---|
314 | | - seg->start + seg->size < seg->start) |
---|
315 | | - return -ERANGE; |
---|
316 | | - |
---|
317 | | - list_for_each_entry(tmp, &mem_segs, list) { |
---|
318 | | - if (seg->start >= tmp->start + tmp->size) |
---|
319 | | - continue; |
---|
320 | | - if (seg->start + seg->size <= tmp->start) |
---|
321 | | - continue; |
---|
322 | | - return -ENOSPC; |
---|
323 | | - } |
---|
324 | | - list_add(&seg->list, &mem_segs); |
---|
325 | | - return 0; |
---|
326 | | -} |
---|
327 | | - |
---|
328 | | -/* |
---|
329 | | - * Remove memory segment from the segment list. |
---|
330 | | - */ |
---|
331 | | -static void remove_memory_segment(struct memory_segment *seg) |
---|
332 | | -{ |
---|
333 | | - list_del(&seg->list); |
---|
334 | | -} |
---|
335 | | - |
---|
336 | | -static void __remove_shared_memory(struct memory_segment *seg) |
---|
337 | | -{ |
---|
338 | | - remove_memory_segment(seg); |
---|
339 | | - vmem_remove_range(seg->start, seg->size); |
---|
340 | | -} |
---|
341 | | - |
---|
342 | | -int vmem_remove_mapping(unsigned long start, unsigned long size) |
---|
343 | | -{ |
---|
344 | | - struct memory_segment *seg; |
---|
345 | 509 | int ret; |
---|
346 | 510 | |
---|
347 | 511 | mutex_lock(&vmem_mutex); |
---|
348 | | - |
---|
349 | | - ret = -ENOENT; |
---|
350 | | - list_for_each_entry(seg, &mem_segs, list) { |
---|
351 | | - if (seg->start == start && seg->size == size) |
---|
352 | | - break; |
---|
353 | | - } |
---|
354 | | - |
---|
355 | | - if (seg->start != start || seg->size != size) |
---|
356 | | - goto out; |
---|
357 | | - |
---|
358 | | - ret = 0; |
---|
359 | | - __remove_shared_memory(seg); |
---|
360 | | - kfree(seg); |
---|
361 | | -out: |
---|
| 512 | + /* We don't care about the node, just use NUMA_NO_NODE on allocations */ |
---|
| 513 | + ret = add_pagetable(start, end, false); |
---|
| 514 | + if (ret) |
---|
| 515 | + remove_pagetable(start, end, false); |
---|
362 | 516 | mutex_unlock(&vmem_mutex); |
---|
363 | 517 | return ret; |
---|
364 | 518 | } |
---|
365 | 519 | |
---|
| 520 | +void vmemmap_free(unsigned long start, unsigned long end, |
---|
| 521 | + struct vmem_altmap *altmap) |
---|
| 522 | +{ |
---|
| 523 | + mutex_lock(&vmem_mutex); |
---|
| 524 | + remove_pagetable(start, end, false); |
---|
| 525 | + mutex_unlock(&vmem_mutex); |
---|
| 526 | +} |
---|
| 527 | + |
---|
| 528 | +void vmem_remove_mapping(unsigned long start, unsigned long size) |
---|
| 529 | +{ |
---|
| 530 | + mutex_lock(&vmem_mutex); |
---|
| 531 | + vmem_remove_range(start, size); |
---|
| 532 | + mutex_unlock(&vmem_mutex); |
---|
| 533 | +} |
---|
| 534 | + |
---|
366 | 535 | int vmem_add_mapping(unsigned long start, unsigned long size) |
---|
367 | 536 | { |
---|
368 | | - struct memory_segment *seg; |
---|
369 | 537 | int ret; |
---|
370 | 538 | |
---|
| 539 | + if (start + size > VMEM_MAX_PHYS || |
---|
| 540 | + start + size < start) |
---|
| 541 | + return -ERANGE; |
---|
| 542 | + |
---|
371 | 543 | mutex_lock(&vmem_mutex); |
---|
372 | | - ret = -ENOMEM; |
---|
373 | | - seg = kzalloc(sizeof(*seg), GFP_KERNEL); |
---|
374 | | - if (!seg) |
---|
375 | | - goto out; |
---|
376 | | - seg->start = start; |
---|
377 | | - seg->size = size; |
---|
378 | | - |
---|
379 | | - ret = insert_memory_segment(seg); |
---|
| 544 | + ret = vmem_add_range(start, size); |
---|
380 | 545 | if (ret) |
---|
381 | | - goto out_free; |
---|
382 | | - |
---|
383 | | - ret = vmem_add_mem(start, size); |
---|
384 | | - if (ret) |
---|
385 | | - goto out_remove; |
---|
386 | | - goto out; |
---|
387 | | - |
---|
388 | | -out_remove: |
---|
389 | | - __remove_shared_memory(seg); |
---|
390 | | -out_free: |
---|
391 | | - kfree(seg); |
---|
392 | | -out: |
---|
| 546 | + vmem_remove_range(start, size); |
---|
393 | 547 | mutex_unlock(&vmem_mutex); |
---|
394 | 548 | return ret; |
---|
395 | 549 | } |
---|
.. | .. |
---|
401 | 555 | */ |
---|
402 | 556 | void __init vmem_map_init(void) |
---|
403 | 557 | { |
---|
404 | | - struct memblock_region *reg; |
---|
| 558 | + phys_addr_t base, end; |
---|
| 559 | + u64 i; |
---|
405 | 560 | |
---|
406 | | - for_each_memblock(memory, reg) |
---|
407 | | - vmem_add_mem(reg->base, reg->size); |
---|
| 561 | + for_each_mem_range(i, &base, &end) |
---|
| 562 | + vmem_add_range(base, end - base); |
---|
408 | 563 | __set_memory((unsigned long)_stext, |
---|
409 | 564 | (unsigned long)(_etext - _stext) >> PAGE_SHIFT, |
---|
410 | 565 | SET_MEMORY_RO | SET_MEMORY_X); |
---|
.. | .. |
---|
414 | 569 | __set_memory((unsigned long)_sinittext, |
---|
415 | 570 | (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT, |
---|
416 | 571 | SET_MEMORY_RO | SET_MEMORY_X); |
---|
| 572 | + __set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT, |
---|
| 573 | + SET_MEMORY_RO | SET_MEMORY_X); |
---|
| 574 | + |
---|
| 575 | + /* we need lowcore executable for our LPSWE instructions */ |
---|
| 576 | + set_memory_x(0, 1); |
---|
| 577 | + |
---|
417 | 578 | pr_info("Write protected kernel read-only data: %luk\n", |
---|
418 | 579 | (unsigned long)(__end_rodata - _stext) >> 10); |
---|
419 | 580 | } |
---|
420 | | - |
---|
421 | | -/* |
---|
422 | | - * Convert memblock.memory to a memory segment list so there is a single |
---|
423 | | - * list that contains all memory segments. |
---|
424 | | - */ |
---|
425 | | -static int __init vmem_convert_memory_chunk(void) |
---|
426 | | -{ |
---|
427 | | - struct memblock_region *reg; |
---|
428 | | - struct memory_segment *seg; |
---|
429 | | - |
---|
430 | | - mutex_lock(&vmem_mutex); |
---|
431 | | - for_each_memblock(memory, reg) { |
---|
432 | | - seg = kzalloc(sizeof(*seg), GFP_KERNEL); |
---|
433 | | - if (!seg) |
---|
434 | | - panic("Out of memory...\n"); |
---|
435 | | - seg->start = reg->base; |
---|
436 | | - seg->size = reg->size; |
---|
437 | | - insert_memory_segment(seg); |
---|
438 | | - } |
---|
439 | | - mutex_unlock(&vmem_mutex); |
---|
440 | | - return 0; |
---|
441 | | -} |
---|
442 | | - |
---|
443 | | -core_initcall(vmem_convert_memory_chunk); |
---|