| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * PowerPC version |
|---|
| 3 | 4 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) |
|---|
| .. | .. |
|---|
| 11 | 12 | * |
|---|
| 12 | 13 | * Dave Engebretsen <engebret@us.ibm.com> |
|---|
| 13 | 14 | * Rework for PPC64 port. |
|---|
| 14 | | - * |
|---|
| 15 | | - * This program is free software; you can redistribute it and/or |
|---|
| 16 | | - * modify it under the terms of the GNU General Public License |
|---|
| 17 | | - * as published by the Free Software Foundation; either version |
|---|
| 18 | | - * 2 of the License, or (at your option) any later version. |
|---|
| 19 | | - * |
|---|
| 20 | 15 | */ |
|---|
| 21 | 16 | |
|---|
| 22 | 17 | #undef DEBUG |
|---|
| .. | .. |
|---|
| 52 | 47 | #include <asm/rtas.h> |
|---|
| 53 | 48 | #include <asm/io.h> |
|---|
| 54 | 49 | #include <asm/mmu_context.h> |
|---|
| 55 | | -#include <asm/pgtable.h> |
|---|
| 56 | 50 | #include <asm/mmu.h> |
|---|
| 57 | 51 | #include <linux/uaccess.h> |
|---|
| 58 | 52 | #include <asm/smp.h> |
|---|
| .. | .. |
|---|
| 66 | 60 | #include <asm/iommu.h> |
|---|
| 67 | 61 | #include <asm/vdso.h> |
|---|
| 68 | 62 | |
|---|
| 69 | | -#include "mmu_decl.h" |
|---|
| 70 | | - |
|---|
| 71 | | -phys_addr_t memstart_addr = ~0; |
|---|
| 72 | | -EXPORT_SYMBOL_GPL(memstart_addr); |
|---|
| 73 | | -phys_addr_t kernstart_addr; |
|---|
| 74 | | -EXPORT_SYMBOL_GPL(kernstart_addr); |
|---|
| 63 | +#include <mm/mmu_decl.h> |
|---|
| 75 | 64 | |
|---|
| 76 | 65 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
|---|
| 77 | 66 | /* |
|---|
| 78 | | - * Given an address within the vmemmap, determine the pfn of the page that |
|---|
| 79 | | - * represents the start of the section it is within. Note that we have to |
|---|
| 67 | + * Given an address within the vmemmap, determine the page that |
|---|
| 68 | + * represents the start of the subsection it is within. Note that we have to |
|---|
| 80 | 69 | * do this by hand as the proffered address may not be correctly aligned. |
|---|
| 81 | 70 | * Subtraction of non-aligned pointers produces undefined results. |
|---|
| 82 | 71 | */ |
|---|
| 83 | | -static unsigned long __meminit vmemmap_section_start(unsigned long page) |
|---|
| 72 | +static struct page * __meminit vmemmap_subsection_start(unsigned long vmemmap_addr) |
|---|
| 84 | 73 | { |
|---|
| 85 | | - unsigned long offset = page - ((unsigned long)(vmemmap)); |
|---|
| 74 | + unsigned long start_pfn; |
|---|
| 75 | + unsigned long offset = vmemmap_addr - ((unsigned long)(vmemmap)); |
|---|
| 86 | 76 | |
|---|
| 87 | 77 | /* Return the pfn of the start of the section. */ |
|---|
| 88 | | - return (offset / sizeof(struct page)) & PAGE_SECTION_MASK; |
|---|
| 78 | + start_pfn = (offset / sizeof(struct page)) & PAGE_SUBSECTION_MASK; |
|---|
| 79 | + return pfn_to_page(start_pfn); |
|---|
| 89 | 80 | } |
|---|
| 90 | 81 | |
|---|
| 91 | 82 | /* |
|---|
| 92 | | - * Check if this vmemmap page is already initialised. If any section |
|---|
| 93 | | - * which overlaps this vmemmap page is initialised then this page is |
|---|
| 94 | | - * initialised already. |
|---|
| 83 | + * Since memory is added in sub-section chunks, before creating a new vmemmap |
|---|
| 84 | + * mapping, the kernel should check whether there is an existing memmap mapping |
|---|
| 85 | + * covering the new subsection added. This is needed because kernel can map |
|---|
| 86 | + * vmemmap area using 16MB pages which will cover a memory range of 16G. Such |
|---|
| 87 | + * a range covers multiple subsections (2M) |
|---|
| 88 | + * |
|---|
| 89 | + * If any subsection in the 16G range mapped by vmemmap is valid we consider the |
|---|
| 90 | + * vmemmap populated (There is a page table entry already present). We can't do |
|---|
| 91 | + * a page table lookup here because with the hash translation we don't keep |
|---|
| 92 | + * vmemmap details in linux page table. |
|---|
| 95 | 93 | */ |
|---|
| 96 | | -static int __meminit vmemmap_populated(unsigned long start, int page_size) |
|---|
| 94 | +static int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size) |
|---|
| 97 | 95 | { |
|---|
| 98 | | - unsigned long end = start + page_size; |
|---|
| 99 | | - start = (unsigned long)(pfn_to_page(vmemmap_section_start(start))); |
|---|
| 96 | + struct page *start; |
|---|
| 97 | + unsigned long vmemmap_end = vmemmap_addr + vmemmap_map_size; |
|---|
| 98 | + start = vmemmap_subsection_start(vmemmap_addr); |
|---|
| 100 | 99 | |
|---|
| 101 | | - for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page))) |
|---|
| 102 | | - if (pfn_valid(page_to_pfn((struct page *)start))) |
|---|
| 100 | + for (; (unsigned long)start < vmemmap_end; start += PAGES_PER_SUBSECTION) |
|---|
| 101 | + /* |
|---|
| 102 | + * pfn valid check here is intended to really check |
|---|
| 103 | + * whether we have any subsection already initialized |
|---|
| 104 | + * in this range. |
|---|
| 105 | + */ |
|---|
| 106 | + if (pfn_valid(page_to_pfn(start))) |
|---|
| 103 | 107 | return 1; |
|---|
| 104 | 108 | |
|---|
| 105 | 109 | return 0; |
|---|
| .. | .. |
|---|
| 158 | 162 | return next++; |
|---|
| 159 | 163 | } |
|---|
| 160 | 164 | |
|---|
| 161 | | -static __meminit void vmemmap_list_populate(unsigned long phys, |
|---|
| 162 | | - unsigned long start, |
|---|
| 163 | | - int node) |
|---|
| 165 | +static __meminit int vmemmap_list_populate(unsigned long phys, |
|---|
| 166 | + unsigned long start, |
|---|
| 167 | + int node) |
|---|
| 164 | 168 | { |
|---|
| 165 | 169 | struct vmemmap_backing *vmem_back; |
|---|
| 166 | 170 | |
|---|
| 167 | 171 | vmem_back = vmemmap_list_alloc(node); |
|---|
| 168 | 172 | if (unlikely(!vmem_back)) { |
|---|
| 169 | | - WARN_ON(1); |
|---|
| 170 | | - return; |
|---|
| 173 | + pr_debug("vmemap list allocation failed\n"); |
|---|
| 174 | + return -ENOMEM; |
|---|
| 171 | 175 | } |
|---|
| 172 | 176 | |
|---|
| 173 | 177 | vmem_back->phys = phys; |
|---|
| .. | .. |
|---|
| 175 | 179 | vmem_back->list = vmemmap_list; |
|---|
| 176 | 180 | |
|---|
| 177 | 181 | vmemmap_list = vmem_back; |
|---|
| 182 | + return 0; |
|---|
| 183 | +} |
|---|
| 184 | + |
|---|
| 185 | +static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start, |
|---|
| 186 | + unsigned long page_size) |
|---|
| 187 | +{ |
|---|
| 188 | + unsigned long nr_pfn = page_size / sizeof(struct page); |
|---|
| 189 | + unsigned long start_pfn = page_to_pfn((struct page *)start); |
|---|
| 190 | + |
|---|
| 191 | + if ((start_pfn + nr_pfn) > altmap->end_pfn) |
|---|
| 192 | + return true; |
|---|
| 193 | + |
|---|
| 194 | + if (start_pfn < altmap->base_pfn) |
|---|
| 195 | + return true; |
|---|
| 196 | + |
|---|
| 197 | + return false; |
|---|
| 178 | 198 | } |
|---|
| 179 | 199 | |
|---|
| 180 | 200 | int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, |
|---|
| 181 | 201 | struct vmem_altmap *altmap) |
|---|
| 182 | 202 | { |
|---|
| 203 | + bool altmap_alloc; |
|---|
| 183 | 204 | unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; |
|---|
| 184 | 205 | |
|---|
| 185 | 206 | /* Align to the page size of the linear mapping. */ |
|---|
| 186 | | - start = _ALIGN_DOWN(start, page_size); |
|---|
| 207 | + start = ALIGN_DOWN(start, page_size); |
|---|
| 187 | 208 | |
|---|
| 188 | 209 | pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); |
|---|
| 189 | 210 | |
|---|
| .. | .. |
|---|
| 191 | 212 | void *p = NULL; |
|---|
| 192 | 213 | int rc; |
|---|
| 193 | 214 | |
|---|
| 215 | + /* |
|---|
| 216 | + * This vmemmap range is backing different subsections. If any |
|---|
| 217 | + * of that subsection is marked valid, that means we already |
|---|
| 218 | + * have initialized a page table covering this range and hence |
|---|
| 219 | + * the vmemmap range is populated. |
|---|
| 220 | + */ |
|---|
| 194 | 221 | if (vmemmap_populated(start, page_size)) |
|---|
| 195 | 222 | continue; |
|---|
| 196 | 223 | |
|---|
| .. | .. |
|---|
| 199 | 226 | * fail due to alignment issues when using 16MB hugepages, so |
|---|
| 200 | 227 | * fall back to system memory if the altmap allocation fail. |
|---|
| 201 | 228 | */ |
|---|
| 202 | | - if (altmap) |
|---|
| 203 | | - p = altmap_alloc_block_buf(page_size, altmap); |
|---|
| 204 | | - if (!p) |
|---|
| 205 | | - p = vmemmap_alloc_block_buf(page_size, node); |
|---|
| 229 | + if (altmap && !altmap_cross_boundary(altmap, start, page_size)) { |
|---|
| 230 | + p = vmemmap_alloc_block_buf(page_size, node, altmap); |
|---|
| 231 | + if (!p) |
|---|
| 232 | + pr_debug("altmap block allocation failed, falling back to system memory"); |
|---|
| 233 | + else |
|---|
| 234 | + altmap_alloc = true; |
|---|
| 235 | + } |
|---|
| 236 | + if (!p) { |
|---|
| 237 | + p = vmemmap_alloc_block_buf(page_size, node, NULL); |
|---|
| 238 | + altmap_alloc = false; |
|---|
| 239 | + } |
|---|
| 206 | 240 | if (!p) |
|---|
| 207 | 241 | return -ENOMEM; |
|---|
| 208 | 242 | |
|---|
| 209 | | - vmemmap_list_populate(__pa(p), start, node); |
|---|
| 243 | + if (vmemmap_list_populate(__pa(p), start, node)) { |
|---|
| 244 | + /* |
|---|
| 245 | + * If we don't populate vmemap list, we don't have |
|---|
| 246 | + * the ability to free the allocated vmemmap |
|---|
| 247 | + * pages in section_deactivate. Hence free them |
|---|
| 248 | + * here. |
|---|
| 249 | + */ |
|---|
| 250 | + int nr_pfns = page_size >> PAGE_SHIFT; |
|---|
| 251 | + unsigned long page_order = get_order(page_size); |
|---|
| 252 | + |
|---|
| 253 | + if (altmap_alloc) |
|---|
| 254 | + vmem_altmap_free(altmap, nr_pfns); |
|---|
| 255 | + else |
|---|
| 256 | + free_pages((unsigned long)p, page_order); |
|---|
| 257 | + return -ENOMEM; |
|---|
| 258 | + } |
|---|
| 210 | 259 | |
|---|
| 211 | 260 | pr_debug(" * %016lx..%016lx allocated at %p\n", |
|---|
| 212 | 261 | start, start + page_size, p); |
|---|
| .. | .. |
|---|
| 236 | 285 | vmem_back_prev = vmem_back; |
|---|
| 237 | 286 | } |
|---|
| 238 | 287 | |
|---|
| 239 | | - if (unlikely(!vmem_back)) { |
|---|
| 240 | | - WARN_ON(1); |
|---|
| 288 | + if (unlikely(!vmem_back)) |
|---|
| 241 | 289 | return 0; |
|---|
| 242 | | - } |
|---|
| 243 | 290 | |
|---|
| 244 | 291 | /* remove it from vmemmap_list */ |
|---|
| 245 | 292 | if (vmem_back == vmemmap_list) /* remove head */ |
|---|
| .. | .. |
|---|
| 263 | 310 | unsigned long alt_start = ~0, alt_end = ~0; |
|---|
| 264 | 311 | unsigned long base_pfn; |
|---|
| 265 | 312 | |
|---|
| 266 | | - start = _ALIGN_DOWN(start, page_size); |
|---|
| 313 | + start = ALIGN_DOWN(start, page_size); |
|---|
| 267 | 314 | if (altmap) { |
|---|
| 268 | 315 | alt_start = altmap->base_pfn; |
|---|
| 269 | 316 | alt_end = altmap->base_pfn + altmap->reserve + |
|---|
| .. | .. |
|---|
| 274 | 321 | |
|---|
| 275 | 322 | for (; start < end; start += page_size) { |
|---|
| 276 | 323 | unsigned long nr_pages, addr; |
|---|
| 277 | | - struct page *section_base; |
|---|
| 278 | 324 | struct page *page; |
|---|
| 279 | 325 | |
|---|
| 280 | 326 | /* |
|---|
| 281 | | - * the section has already be marked as invalid, so |
|---|
| 282 | | - * vmemmap_populated() true means some other sections still |
|---|
| 283 | | - * in this page, so skip it. |
|---|
| 327 | + * We have already marked the subsection we are trying to remove |
|---|
| 328 | + * invalid. So if we want to remove the vmemmap range, we |
|---|
| 329 | + * need to make sure there is no subsection marked valid |
|---|
| 330 | + * in this range. |
|---|
| 284 | 331 | */ |
|---|
| 285 | 332 | if (vmemmap_populated(start, page_size)) |
|---|
| 286 | 333 | continue; |
|---|
| .. | .. |
|---|
| 290 | 337 | continue; |
|---|
| 291 | 338 | |
|---|
| 292 | 339 | page = pfn_to_page(addr >> PAGE_SHIFT); |
|---|
| 293 | | - section_base = pfn_to_page(vmemmap_section_start(start)); |
|---|
| 294 | 340 | nr_pages = 1 << page_order; |
|---|
| 295 | 341 | base_pfn = PHYS_PFN(addr); |
|---|
| 296 | 342 | |
|---|
| .. | .. |
|---|
| 379 | 425 | } |
|---|
| 380 | 426 | if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] & |
|---|
| 381 | 427 | OV5_FEAT(OV5_RADIX_GTSE))) { |
|---|
| 382 | | - pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n"); |
|---|
| 383 | | - } |
|---|
| 428 | + cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE; |
|---|
| 429 | + } else |
|---|
| 430 | + cur_cpu_spec->mmu_features |= MMU_FTR_GTSE; |
|---|
| 384 | 431 | /* Do radix anyway - the hypervisor said we had to */ |
|---|
| 385 | 432 | cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; |
|---|
| 386 | 433 | } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) { |
|---|
| 387 | 434 | /* Hypervisor only supports hash - disable radix */ |
|---|
| 388 | 435 | cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; |
|---|
| 436 | + cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE; |
|---|
| 389 | 437 | } |
|---|
| 390 | 438 | } |
|---|
| 391 | 439 | |
|---|
| .. | .. |
|---|
| 404 | 452 | if (!(mfmsr() & MSR_HV)) |
|---|
| 405 | 453 | early_check_vec5(); |
|---|
| 406 | 454 | |
|---|
| 407 | | - if (early_radix_enabled()) |
|---|
| 455 | + if (early_radix_enabled()) { |
|---|
| 408 | 456 | radix__early_init_devtree(); |
|---|
| 409 | | - else |
|---|
| 457 | + /* |
|---|
| 458 | + * We have finalized the translation we are going to use by now. |
|---|
| 459 | + * Radix mode is not limited by RMA / VRMA addressing. |
|---|
| 460 | + * Hence don't limit memblock allocations. |
|---|
| 461 | + */ |
|---|
| 462 | + ppc64_rma_size = ULONG_MAX; |
|---|
| 463 | + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); |
|---|
| 464 | + } else |
|---|
| 410 | 465 | hash__early_init_devtree(); |
|---|
| 411 | 466 | } |
|---|
| 412 | 467 | #endif /* CONFIG_PPC_BOOK3S_64 */ |
|---|