.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * PowerPC version |
---|
3 | 4 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) |
---|
.. | .. |
---|
11 | 12 | * |
---|
12 | 13 | * Dave Engebretsen <engebret@us.ibm.com> |
---|
13 | 14 | * Rework for PPC64 port. |
---|
14 | | - * |
---|
15 | | - * This program is free software; you can redistribute it and/or |
---|
16 | | - * modify it under the terms of the GNU General Public License |
---|
17 | | - * as published by the Free Software Foundation; either version |
---|
18 | | - * 2 of the License, or (at your option) any later version. |
---|
19 | | - * |
---|
20 | 15 | */ |
---|
21 | 16 | |
---|
22 | 17 | #undef DEBUG |
---|
.. | .. |
---|
52 | 47 | #include <asm/rtas.h> |
---|
53 | 48 | #include <asm/io.h> |
---|
54 | 49 | #include <asm/mmu_context.h> |
---|
55 | | -#include <asm/pgtable.h> |
---|
56 | 50 | #include <asm/mmu.h> |
---|
57 | 51 | #include <linux/uaccess.h> |
---|
58 | 52 | #include <asm/smp.h> |
---|
.. | .. |
---|
66 | 60 | #include <asm/iommu.h> |
---|
67 | 61 | #include <asm/vdso.h> |
---|
68 | 62 | |
---|
69 | | -#include "mmu_decl.h" |
---|
70 | | - |
---|
71 | | -phys_addr_t memstart_addr = ~0; |
---|
72 | | -EXPORT_SYMBOL_GPL(memstart_addr); |
---|
73 | | -phys_addr_t kernstart_addr; |
---|
74 | | -EXPORT_SYMBOL_GPL(kernstart_addr); |
---|
| 63 | +#include <mm/mmu_decl.h> |
---|
75 | 64 | |
---|
76 | 65 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
---|
77 | 66 | /* |
---|
78 | | - * Given an address within the vmemmap, determine the pfn of the page that |
---|
79 | | - * represents the start of the section it is within. Note that we have to |
---|
| 67 | + * Given an address within the vmemmap, determine the page that |
---|
| 68 | + * represents the start of the subsection it is within. Note that we have to |
---|
80 | 69 | * do this by hand as the proffered address may not be correctly aligned. |
---|
81 | 70 | * Subtraction of non-aligned pointers produces undefined results. |
---|
82 | 71 | */ |
---|
83 | | -static unsigned long __meminit vmemmap_section_start(unsigned long page) |
---|
| 72 | +static struct page * __meminit vmemmap_subsection_start(unsigned long vmemmap_addr) |
---|
84 | 73 | { |
---|
85 | | - unsigned long offset = page - ((unsigned long)(vmemmap)); |
---|
| 74 | + unsigned long start_pfn; |
---|
| 75 | + unsigned long offset = vmemmap_addr - ((unsigned long)(vmemmap)); |
---|
86 | 76 | |
---|
87 | 77 | /* Return the pfn of the start of the section. */ |
---|
88 | | - return (offset / sizeof(struct page)) & PAGE_SECTION_MASK; |
---|
| 78 | + start_pfn = (offset / sizeof(struct page)) & PAGE_SUBSECTION_MASK; |
---|
| 79 | + return pfn_to_page(start_pfn); |
---|
89 | 80 | } |
---|
90 | 81 | |
---|
91 | 82 | /* |
---|
92 | | - * Check if this vmemmap page is already initialised. If any section |
---|
93 | | - * which overlaps this vmemmap page is initialised then this page is |
---|
94 | | - * initialised already. |
---|
| 83 | + * Since memory is added in sub-section chunks, before creating a new vmemmap |
---|
| 84 | + * mapping, the kernel should check whether there is an existing memmap mapping |
---|
| 85 | + * covering the new subsection added. This is needed because kernel can map |
---|
| 86 | + * vmemmap area using 16MB pages which will cover a memory range of 16G. Such |
---|
| 87 | + * a range covers multiple subsections (2M) |
---|
| 88 | + * |
---|
| 89 | + * If any subsection in the 16G range mapped by vmemmap is valid we consider the |
---|
| 90 | + * vmemmap populated (There is a page table entry already present). We can't do |
---|
| 91 | + * a page table lookup here because with the hash translation we don't keep |
---|
| 92 | + * vmemmap details in linux page table. |
---|
95 | 93 | */ |
---|
96 | | -static int __meminit vmemmap_populated(unsigned long start, int page_size) |
---|
| 94 | +static int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size) |
---|
97 | 95 | { |
---|
98 | | - unsigned long end = start + page_size; |
---|
99 | | - start = (unsigned long)(pfn_to_page(vmemmap_section_start(start))); |
---|
| 96 | + struct page *start; |
---|
| 97 | + unsigned long vmemmap_end = vmemmap_addr + vmemmap_map_size; |
---|
| 98 | + start = vmemmap_subsection_start(vmemmap_addr); |
---|
100 | 99 | |
---|
101 | | - for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page))) |
---|
102 | | - if (pfn_valid(page_to_pfn((struct page *)start))) |
---|
| 100 | + for (; (unsigned long)start < vmemmap_end; start += PAGES_PER_SUBSECTION) |
---|
| 101 | + /* |
---|
| 102 | + * pfn valid check here is intended to really check |
---|
| 103 | + * whether we have any subsection already initialized |
---|
| 104 | + * in this range. |
---|
| 105 | + */ |
---|
| 106 | + if (pfn_valid(page_to_pfn(start))) |
---|
103 | 107 | return 1; |
---|
104 | 108 | |
---|
105 | 109 | return 0; |
---|
.. | .. |
---|
158 | 162 | return next++; |
---|
159 | 163 | } |
---|
160 | 164 | |
---|
161 | | -static __meminit void vmemmap_list_populate(unsigned long phys, |
---|
162 | | - unsigned long start, |
---|
163 | | - int node) |
---|
| 165 | +static __meminit int vmemmap_list_populate(unsigned long phys, |
---|
| 166 | + unsigned long start, |
---|
| 167 | + int node) |
---|
164 | 168 | { |
---|
165 | 169 | struct vmemmap_backing *vmem_back; |
---|
166 | 170 | |
---|
167 | 171 | vmem_back = vmemmap_list_alloc(node); |
---|
168 | 172 | if (unlikely(!vmem_back)) { |
---|
169 | | - WARN_ON(1); |
---|
170 | | - return; |
---|
| 173 | + pr_debug("vmemap list allocation failed\n"); |
---|
| 174 | + return -ENOMEM; |
---|
171 | 175 | } |
---|
172 | 176 | |
---|
173 | 177 | vmem_back->phys = phys; |
---|
.. | .. |
---|
175 | 179 | vmem_back->list = vmemmap_list; |
---|
176 | 180 | |
---|
177 | 181 | vmemmap_list = vmem_back; |
---|
| 182 | + return 0; |
---|
| 183 | +} |
---|
| 184 | + |
---|
| 185 | +static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start, |
---|
| 186 | + unsigned long page_size) |
---|
| 187 | +{ |
---|
| 188 | + unsigned long nr_pfn = page_size / sizeof(struct page); |
---|
| 189 | + unsigned long start_pfn = page_to_pfn((struct page *)start); |
---|
| 190 | + |
---|
| 191 | + if ((start_pfn + nr_pfn - 1) > altmap->end_pfn) |
---|
| 192 | + return true; |
---|
| 193 | + |
---|
| 194 | + if (start_pfn < altmap->base_pfn) |
---|
| 195 | + return true; |
---|
| 196 | + |
---|
| 197 | + return false; |
---|
178 | 198 | } |
---|
179 | 199 | |
---|
180 | 200 | int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, |
---|
181 | 201 | struct vmem_altmap *altmap) |
---|
182 | 202 | { |
---|
| 203 | + bool altmap_alloc; |
---|
183 | 204 | unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; |
---|
184 | 205 | |
---|
185 | 206 | /* Align to the page size of the linear mapping. */ |
---|
186 | | - start = _ALIGN_DOWN(start, page_size); |
---|
| 207 | + start = ALIGN_DOWN(start, page_size); |
---|
187 | 208 | |
---|
188 | 209 | pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); |
---|
189 | 210 | |
---|
.. | .. |
---|
191 | 212 | void *p = NULL; |
---|
192 | 213 | int rc; |
---|
193 | 214 | |
---|
| 215 | + /* |
---|
| 216 | + * This vmemmap range is backing different subsections. If any |
---|
| 217 | + * of that subsection is marked valid, that means we already |
---|
| 218 | + * have initialized a page table covering this range and hence |
---|
| 219 | + * the vmemmap range is populated. |
---|
| 220 | + */ |
---|
194 | 221 | if (vmemmap_populated(start, page_size)) |
---|
195 | 222 | continue; |
---|
196 | 223 | |
---|
.. | .. |
---|
199 | 226 | * fail due to alignment issues when using 16MB hugepages, so |
---|
200 | 227 | * fall back to system memory if the altmap allocation fail. |
---|
201 | 228 | */ |
---|
202 | | - if (altmap) |
---|
203 | | - p = altmap_alloc_block_buf(page_size, altmap); |
---|
204 | | - if (!p) |
---|
205 | | - p = vmemmap_alloc_block_buf(page_size, node); |
---|
| 229 | + if (altmap && !altmap_cross_boundary(altmap, start, page_size)) { |
---|
| 230 | + p = vmemmap_alloc_block_buf(page_size, node, altmap); |
---|
| 231 | + if (!p) |
---|
| 232 | + pr_debug("altmap block allocation failed, falling back to system memory"); |
---|
| 233 | + else |
---|
| 234 | + altmap_alloc = true; |
---|
| 235 | + } |
---|
| 236 | + if (!p) { |
---|
| 237 | + p = vmemmap_alloc_block_buf(page_size, node, NULL); |
---|
| 238 | + altmap_alloc = false; |
---|
| 239 | + } |
---|
206 | 240 | if (!p) |
---|
207 | 241 | return -ENOMEM; |
---|
208 | 242 | |
---|
209 | | - vmemmap_list_populate(__pa(p), start, node); |
---|
| 243 | + if (vmemmap_list_populate(__pa(p), start, node)) { |
---|
| 244 | + /* |
---|
| 245 | + * If we don't populate vmemap list, we don't have |
---|
| 246 | + * the ability to free the allocated vmemmap |
---|
| 247 | + * pages in section_deactivate. Hence free them |
---|
| 248 | + * here. |
---|
| 249 | + */ |
---|
| 250 | + int nr_pfns = page_size >> PAGE_SHIFT; |
---|
| 251 | + unsigned long page_order = get_order(page_size); |
---|
| 252 | + |
---|
| 253 | + if (altmap_alloc) |
---|
| 254 | + vmem_altmap_free(altmap, nr_pfns); |
---|
| 255 | + else |
---|
| 256 | + free_pages((unsigned long)p, page_order); |
---|
| 257 | + return -ENOMEM; |
---|
| 258 | + } |
---|
210 | 259 | |
---|
211 | 260 | pr_debug(" * %016lx..%016lx allocated at %p\n", |
---|
212 | 261 | start, start + page_size, p); |
---|
.. | .. |
---|
236 | 285 | vmem_back_prev = vmem_back; |
---|
237 | 286 | } |
---|
238 | 287 | |
---|
239 | | - if (unlikely(!vmem_back)) { |
---|
240 | | - WARN_ON(1); |
---|
| 288 | + if (unlikely(!vmem_back)) |
---|
241 | 289 | return 0; |
---|
242 | | - } |
---|
243 | 290 | |
---|
244 | 291 | /* remove it from vmemmap_list */ |
---|
245 | 292 | if (vmem_back == vmemmap_list) /* remove head */ |
---|
.. | .. |
---|
263 | 310 | unsigned long alt_start = ~0, alt_end = ~0; |
---|
264 | 311 | unsigned long base_pfn; |
---|
265 | 312 | |
---|
266 | | - start = _ALIGN_DOWN(start, page_size); |
---|
| 313 | + start = ALIGN_DOWN(start, page_size); |
---|
267 | 314 | if (altmap) { |
---|
268 | 315 | alt_start = altmap->base_pfn; |
---|
269 | | - alt_end = altmap->base_pfn + altmap->reserve + |
---|
270 | | - altmap->free + altmap->alloc + altmap->align; |
---|
| 316 | + alt_end = altmap->base_pfn + altmap->reserve + altmap->free; |
---|
271 | 317 | } |
---|
272 | 318 | |
---|
273 | 319 | pr_debug("vmemmap_free %lx...%lx\n", start, end); |
---|
274 | 320 | |
---|
275 | 321 | for (; start < end; start += page_size) { |
---|
276 | 322 | unsigned long nr_pages, addr; |
---|
277 | | - struct page *section_base; |
---|
278 | 323 | struct page *page; |
---|
279 | 324 | |
---|
280 | 325 | /* |
---|
281 | | - * the section has already be marked as invalid, so |
---|
282 | | - * vmemmap_populated() true means some other sections still |
---|
283 | | - * in this page, so skip it. |
---|
| 326 | + * We have already marked the subsection we are trying to remove |
---|
| 327 | + * invalid. So if we want to remove the vmemmap range, we |
---|
| 328 | + * need to make sure there is no subsection marked valid |
---|
| 329 | + * in this range. |
---|
284 | 330 | */ |
---|
285 | 331 | if (vmemmap_populated(start, page_size)) |
---|
286 | 332 | continue; |
---|
.. | .. |
---|
290 | 336 | continue; |
---|
291 | 337 | |
---|
292 | 338 | page = pfn_to_page(addr >> PAGE_SHIFT); |
---|
293 | | - section_base = pfn_to_page(vmemmap_section_start(start)); |
---|
294 | 339 | nr_pages = 1 << page_order; |
---|
295 | 340 | base_pfn = PHYS_PFN(addr); |
---|
296 | 341 | |
---|
.. | .. |
---|
379 | 424 | } |
---|
380 | 425 | if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] & |
---|
381 | 426 | OV5_FEAT(OV5_RADIX_GTSE))) { |
---|
382 | | - pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n"); |
---|
383 | | - } |
---|
| 427 | + cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE; |
---|
| 428 | + } else |
---|
| 429 | + cur_cpu_spec->mmu_features |= MMU_FTR_GTSE; |
---|
384 | 430 | /* Do radix anyway - the hypervisor said we had to */ |
---|
385 | 431 | cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; |
---|
386 | 432 | } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) { |
---|
387 | 433 | /* Hypervisor only supports hash - disable radix */ |
---|
388 | 434 | cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; |
---|
| 435 | + cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE; |
---|
389 | 436 | } |
---|
390 | 437 | } |
---|
391 | 438 | |
---|
.. | .. |
---|
404 | 451 | if (!(mfmsr() & MSR_HV)) |
---|
405 | 452 | early_check_vec5(); |
---|
406 | 453 | |
---|
407 | | - if (early_radix_enabled()) |
---|
| 454 | + if (early_radix_enabled()) { |
---|
408 | 455 | radix__early_init_devtree(); |
---|
409 | | - else |
---|
| 456 | + /* |
---|
| 457 | + * We have finalized the translation we are going to use by now. |
---|
| 458 | + * Radix mode is not limited by RMA / VRMA addressing. |
---|
| 459 | + * Hence don't limit memblock allocations. |
---|
| 460 | + */ |
---|
| 461 | + ppc64_rma_size = ULONG_MAX; |
---|
| 462 | + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); |
---|
| 463 | + } else |
---|
410 | 464 | hash__early_init_devtree(); |
---|
411 | 465 | } |
---|
412 | 466 | #endif /* CONFIG_PPC_BOOK3S_64 */ |
---|