| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Copyright (C) 1995 Linus Torvalds |
|---|
| 3 | 4 | * |
|---|
| 4 | | - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 |
|---|
| 5 | | - * |
|---|
| 6 | | - * Memory region support |
|---|
| 7 | | - * David Parsons <orc@pell.chi.il.us>, July-August 1999 |
|---|
| 8 | | - * |
|---|
| 9 | | - * Added E820 sanitization routine (removes overlapping memory regions); |
|---|
| 10 | | - * Brian Moyle <bmoyle@mvista.com>, February 2001 |
|---|
| 11 | | - * |
|---|
| 12 | | - * Moved CPU detection code to cpu/${cpu}.c |
|---|
| 13 | | - * Patrick Mochel <mochel@osdl.org>, March 2002 |
|---|
| 14 | | - * |
|---|
| 15 | | - * Provisions for empty E820 memory regions (reported by certain BIOSes). |
|---|
| 16 | | - * Alex Achenbach <xela@slit.de>, December 2002. |
|---|
| 17 | | - * |
|---|
| 5 | + * This file contains the setup_arch() code, which handles the architecture-dependent |
|---|
| 6 | + * parts of early kernel initialization. |
|---|
| 18 | 7 | */ |
|---|
| 19 | | - |
|---|
| 20 | | -/* |
|---|
| 21 | | - * This file handles the architecture-dependent parts of initialization |
|---|
| 22 | | - */ |
|---|
| 23 | | - |
|---|
| 24 | | -#include <linux/sched.h> |
|---|
| 25 | | -#include <linux/mm.h> |
|---|
| 26 | | -#include <linux/mmzone.h> |
|---|
| 27 | | -#include <linux/screen_info.h> |
|---|
| 28 | | -#include <linux/ioport.h> |
|---|
| 29 | | -#include <linux/acpi.h> |
|---|
| 30 | | -#include <linux/sfi.h> |
|---|
| 31 | | -#include <linux/apm_bios.h> |
|---|
| 32 | | -#include <linux/initrd.h> |
|---|
| 33 | | -#include <linux/bootmem.h> |
|---|
| 34 | | -#include <linux/memblock.h> |
|---|
| 35 | | -#include <linux/seq_file.h> |
|---|
| 36 | 8 | #include <linux/console.h> |
|---|
| 37 | | -#include <linux/root_dev.h> |
|---|
| 38 | | -#include <linux/highmem.h> |
|---|
| 39 | | -#include <linux/export.h> |
|---|
| 40 | | -#include <linux/efi.h> |
|---|
| 41 | | -#include <linux/init.h> |
|---|
| 42 | | -#include <linux/edd.h> |
|---|
| 43 | | -#include <linux/iscsi_ibft.h> |
|---|
| 44 | | -#include <linux/nodemask.h> |
|---|
| 45 | | -#include <linux/kexec.h> |
|---|
| 9 | +#include <linux/crash_dump.h> |
|---|
| 10 | +#include <linux/dma-map-ops.h> |
|---|
| 46 | 11 | #include <linux/dmi.h> |
|---|
| 47 | | -#include <linux/pfn.h> |
|---|
| 48 | | -#include <linux/pci.h> |
|---|
| 49 | | -#include <asm/pci-direct.h> |
|---|
| 12 | +#include <linux/efi.h> |
|---|
| 50 | 13 | #include <linux/init_ohci1394_dma.h> |
|---|
| 51 | | -#include <linux/kvm_para.h> |
|---|
| 52 | | -#include <linux/dma-contiguous.h> |
|---|
| 14 | +#include <linux/initrd.h> |
|---|
| 15 | +#include <linux/iscsi_ibft.h> |
|---|
| 16 | +#include <linux/memblock.h> |
|---|
| 17 | +#include <linux/pci.h> |
|---|
| 18 | +#include <linux/root_dev.h> |
|---|
| 19 | +#include <linux/sfi.h> |
|---|
| 20 | +#include <linux/hugetlb.h> |
|---|
| 21 | +#include <linux/tboot.h> |
|---|
| 22 | +#include <linux/usb/xhci-dbgp.h> |
|---|
| 23 | +#include <linux/static_call.h> |
|---|
| 24 | +#include <linux/swiotlb.h> |
|---|
| 25 | + |
|---|
| 26 | +#include <uapi/linux/mount.h> |
|---|
| 27 | + |
|---|
| 53 | 28 | #include <xen/xen.h> |
|---|
| 54 | 29 | |
|---|
| 55 | | -#include <linux/errno.h> |
|---|
| 56 | | -#include <linux/kernel.h> |
|---|
| 57 | | -#include <linux/stddef.h> |
|---|
| 58 | | -#include <linux/unistd.h> |
|---|
| 59 | | -#include <linux/ptrace.h> |
|---|
| 60 | | -#include <linux/user.h> |
|---|
| 61 | | -#include <linux/delay.h> |
|---|
| 62 | | - |
|---|
| 63 | | -#include <linux/kallsyms.h> |
|---|
| 64 | | -#include <linux/cpufreq.h> |
|---|
| 65 | | -#include <linux/dma-mapping.h> |
|---|
| 66 | | -#include <linux/ctype.h> |
|---|
| 67 | | -#include <linux/uaccess.h> |
|---|
| 68 | | - |
|---|
| 69 | | -#include <linux/percpu.h> |
|---|
| 70 | | -#include <linux/crash_dump.h> |
|---|
| 71 | | -#include <linux/tboot.h> |
|---|
| 72 | | -#include <linux/jiffies.h> |
|---|
| 73 | | -#include <linux/mem_encrypt.h> |
|---|
| 74 | | - |
|---|
| 75 | | -#include <linux/usb/xhci-dbgp.h> |
|---|
| 76 | | -#include <video/edid.h> |
|---|
| 77 | | - |
|---|
| 78 | | -#include <asm/mtrr.h> |
|---|
| 79 | 30 | #include <asm/apic.h> |
|---|
| 80 | | -#include <asm/realmode.h> |
|---|
| 81 | | -#include <asm/e820/api.h> |
|---|
| 82 | | -#include <asm/mpspec.h> |
|---|
| 83 | | -#include <asm/setup.h> |
|---|
| 84 | | -#include <asm/efi.h> |
|---|
| 85 | | -#include <asm/timer.h> |
|---|
| 86 | | -#include <asm/i8259.h> |
|---|
| 87 | | -#include <asm/sections.h> |
|---|
| 88 | | -#include <asm/io_apic.h> |
|---|
| 89 | | -#include <asm/ist.h> |
|---|
| 90 | | -#include <asm/setup_arch.h> |
|---|
| 31 | +#include <asm/numa.h> |
|---|
| 91 | 32 | #include <asm/bios_ebda.h> |
|---|
| 92 | | -#include <asm/cacheflush.h> |
|---|
| 93 | | -#include <asm/processor.h> |
|---|
| 94 | 33 | #include <asm/bugs.h> |
|---|
| 95 | | -#include <asm/kasan.h> |
|---|
| 96 | | - |
|---|
| 97 | | -#include <asm/vsyscall.h> |
|---|
| 98 | 34 | #include <asm/cpu.h> |
|---|
| 99 | | -#include <asm/desc.h> |
|---|
| 100 | | -#include <asm/dma.h> |
|---|
| 101 | | -#include <asm/iommu.h> |
|---|
| 35 | +#include <asm/efi.h> |
|---|
| 102 | 36 | #include <asm/gart.h> |
|---|
| 103 | | -#include <asm/mmu_context.h> |
|---|
| 104 | | -#include <asm/proto.h> |
|---|
| 105 | | - |
|---|
| 106 | | -#include <asm/paravirt.h> |
|---|
| 107 | 37 | #include <asm/hypervisor.h> |
|---|
| 108 | | -#include <asm/olpc_ofw.h> |
|---|
| 109 | | - |
|---|
| 110 | | -#include <asm/percpu.h> |
|---|
| 111 | | -#include <asm/topology.h> |
|---|
| 112 | | -#include <asm/apicdef.h> |
|---|
| 113 | | -#include <asm/amd_nb.h> |
|---|
| 114 | | -#include <asm/mce.h> |
|---|
| 115 | | -#include <asm/alternative.h> |
|---|
| 116 | | -#include <asm/prom.h> |
|---|
| 117 | | -#include <asm/microcode.h> |
|---|
| 38 | +#include <asm/io_apic.h> |
|---|
| 39 | +#include <asm/kasan.h> |
|---|
| 118 | 40 | #include <asm/kaslr.h> |
|---|
| 41 | +#include <asm/mce.h> |
|---|
| 42 | +#include <asm/mtrr.h> |
|---|
| 43 | +#include <asm/realmode.h> |
|---|
| 44 | +#include <asm/olpc_ofw.h> |
|---|
| 45 | +#include <asm/pci-direct.h> |
|---|
| 46 | +#include <asm/prom.h> |
|---|
| 47 | +#include <asm/proto.h> |
|---|
| 119 | 48 | #include <asm/unwind.h> |
|---|
| 49 | +#include <asm/vsyscall.h> |
|---|
| 50 | +#include <linux/vmalloc.h> |
|---|
| 120 | 51 | |
|---|
| 121 | 52 | /* |
|---|
| 122 | | - * max_low_pfn_mapped: highest direct mapped pfn under 4GB |
|---|
| 123 | | - * max_pfn_mapped: highest direct mapped pfn over 4GB |
|---|
| 53 | + * max_low_pfn_mapped: highest directly mapped pfn < 4 GB |
|---|
| 54 | + * max_pfn_mapped: highest directly mapped pfn > 4 GB |
|---|
| 124 | 55 | * |
|---|
| 125 | 56 | * The direct mapping only covers E820_TYPE_RAM regions, so the ranges and gaps are |
|---|
| 126 | | - * represented by pfn_mapped |
|---|
| 57 | + * represented by pfn_mapped[]. |
|---|
| 127 | 58 | */ |
|---|
| 128 | 59 | unsigned long max_low_pfn_mapped; |
|---|
| 129 | 60 | unsigned long max_pfn_mapped; |
|---|
| .. | .. |
|---|
| 133 | 64 | #endif |
|---|
| 134 | 65 | |
|---|
| 135 | 66 | |
|---|
| 136 | | -static __initdata unsigned long _brk_start = (unsigned long)__brk_base; |
|---|
| 137 | | -unsigned long _brk_end = (unsigned long)__brk_base; |
|---|
| 67 | +/* |
|---|
| 68 | + * Range of the BSS area. The size of the BSS area is determined |
|---|
| 69 | + * at link time, with RESERVE_BRK*() facility reserving additional |
|---|
| 70 | + * chunks. |
|---|
| 71 | + */ |
|---|
| 72 | +unsigned long _brk_start = (unsigned long)__brk_base; |
|---|
| 73 | +unsigned long _brk_end = (unsigned long)__brk_base; |
|---|
| 138 | 74 | |
|---|
| 139 | 75 | struct boot_params boot_params; |
|---|
| 140 | 76 | |
|---|
| 141 | 77 | /* |
|---|
| 142 | | - * Machine setup.. |
|---|
| 78 | + * These are the four main kernel memory regions, we put them into |
|---|
| 79 | + * the resource tree so that kdump tools and other debugging tools |
|---|
| 80 | + * recover it: |
|---|
| 143 | 81 | */ |
|---|
| 82 | + |
|---|
| 83 | +static struct resource rodata_resource = { |
|---|
| 84 | + .name = "Kernel rodata", |
|---|
| 85 | + .start = 0, |
|---|
| 86 | + .end = 0, |
|---|
| 87 | + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM |
|---|
| 88 | +}; |
|---|
| 89 | + |
|---|
| 144 | 90 | static struct resource data_resource = { |
|---|
| 145 | 91 | .name = "Kernel data", |
|---|
| 146 | 92 | .start = 0, |
|---|
| .. | .. |
|---|
| 164 | 110 | |
|---|
| 165 | 111 | |
|---|
| 166 | 112 | #ifdef CONFIG_X86_32 |
|---|
| 167 | | -/* cpu data as detected by the assembly code in head_32.S */ |
|---|
| 113 | +/* CPU data as detected by the assembly code in head_32.S */ |
|---|
| 168 | 114 | struct cpuinfo_x86 new_cpu_data; |
|---|
| 169 | 115 | |
|---|
| 170 | | -/* common cpu data for all cpus */ |
|---|
| 116 | +/* Common CPU data for all CPUs */ |
|---|
| 171 | 117 | struct cpuinfo_x86 boot_cpu_data __read_mostly; |
|---|
| 172 | 118 | EXPORT_SYMBOL(boot_cpu_data); |
|---|
| 173 | 119 | |
|---|
| 174 | 120 | unsigned int def_to_bigsmp; |
|---|
| 175 | 121 | |
|---|
| 176 | | -/* for MCA, but anyone else can use it if they want */ |
|---|
| 122 | +/* For MCA, but anyone else can use it if they want */ |
|---|
| 177 | 123 | unsigned int machine_id; |
|---|
| 178 | 124 | unsigned int machine_submodel_id; |
|---|
| 179 | 125 | unsigned int BIOS_revision; |
|---|
| .. | .. |
|---|
| 295 | 241 | |
|---|
| 296 | 242 | ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32; |
|---|
| 297 | 243 | |
|---|
| 244 | + if (ramdisk_image == 0) |
|---|
| 245 | + ramdisk_image = phys_initrd_start; |
|---|
| 246 | + |
|---|
| 298 | 247 | return ramdisk_image; |
|---|
| 299 | 248 | } |
|---|
| 300 | 249 | static u64 __init get_ramdisk_size(void) |
|---|
| .. | .. |
|---|
| 302 | 251 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
|---|
| 303 | 252 | |
|---|
| 304 | 253 | ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32; |
|---|
| 254 | + |
|---|
| 255 | + if (ramdisk_size == 0) |
|---|
| 256 | + ramdisk_size = phys_initrd_size; |
|---|
| 305 | 257 | |
|---|
| 306 | 258 | return ramdisk_size; |
|---|
| 307 | 259 | } |
|---|
| .. | .. |
|---|
| 314 | 266 | u64 area_size = PAGE_ALIGN(ramdisk_size); |
|---|
| 315 | 267 | |
|---|
| 316 | 268 | /* We need to move the initrd down into directly mapped mem */ |
|---|
| 317 | | - relocated_ramdisk = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), |
|---|
| 318 | | - area_size, PAGE_SIZE); |
|---|
| 319 | | - |
|---|
| 269 | + relocated_ramdisk = memblock_phys_alloc_range(area_size, PAGE_SIZE, 0, |
|---|
| 270 | + PFN_PHYS(max_pfn_mapped)); |
|---|
| 320 | 271 | if (!relocated_ramdisk) |
|---|
| 321 | 272 | panic("Cannot find place for new RAMDISK of size %lld\n", |
|---|
| 322 | 273 | ramdisk_size); |
|---|
| 323 | 274 | |
|---|
| 324 | | - /* Note: this includes all the mem currently occupied by |
|---|
| 325 | | - the initrd, we rely on that fact to keep the data intact. */ |
|---|
| 326 | | - memblock_reserve(relocated_ramdisk, area_size); |
|---|
| 327 | 275 | initrd_start = relocated_ramdisk + PAGE_OFFSET; |
|---|
| 328 | 276 | initrd_end = initrd_start + ramdisk_size; |
|---|
| 329 | 277 | printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n", |
|---|
| .. | .. |
|---|
| 350 | 298 | |
|---|
| 351 | 299 | memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); |
|---|
| 352 | 300 | } |
|---|
| 301 | + |
|---|
| 353 | 302 | static void __init reserve_initrd(void) |
|---|
| 354 | 303 | { |
|---|
| 355 | 304 | /* Assume only end is not page aligned */ |
|---|
| 356 | 305 | u64 ramdisk_image = get_ramdisk_image(); |
|---|
| 357 | 306 | u64 ramdisk_size = get_ramdisk_size(); |
|---|
| 358 | 307 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
|---|
| 359 | | - u64 mapped_size; |
|---|
| 360 | 308 | |
|---|
| 361 | 309 | if (!boot_params.hdr.type_of_loader || |
|---|
| 362 | 310 | !ramdisk_image || !ramdisk_size) |
|---|
| 363 | 311 | return; /* No initrd provided by bootloader */ |
|---|
| 364 | 312 | |
|---|
| 365 | 313 | initrd_start = 0; |
|---|
| 366 | | - |
|---|
| 367 | | - mapped_size = memblock_mem_size(max_pfn_mapped); |
|---|
| 368 | | - if (ramdisk_size >= (mapped_size>>1)) |
|---|
| 369 | | - panic("initrd too large to handle, " |
|---|
| 370 | | - "disabling initrd (%lld needed, %lld available)\n", |
|---|
| 371 | | - ramdisk_size, mapped_size>>1); |
|---|
| 372 | 314 | |
|---|
| 373 | 315 | printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, |
|---|
| 374 | 316 | ramdisk_end - 1); |
|---|
| .. | .. |
|---|
| 429 | 371 | |
|---|
| 430 | 372 | static void __init memblock_x86_reserve_range_setup_data(void) |
|---|
| 431 | 373 | { |
|---|
| 374 | + struct setup_indirect *indirect; |
|---|
| 432 | 375 | struct setup_data *data; |
|---|
| 433 | | - u64 pa_data; |
|---|
| 376 | + u64 pa_data, pa_next; |
|---|
| 377 | + u32 len; |
|---|
| 434 | 378 | |
|---|
| 435 | 379 | pa_data = boot_params.hdr.setup_data; |
|---|
| 436 | 380 | while (pa_data) { |
|---|
| 437 | 381 | data = early_memremap(pa_data, sizeof(*data)); |
|---|
| 382 | + if (!data) { |
|---|
| 383 | + pr_warn("setup: failed to memremap setup_data entry\n"); |
|---|
| 384 | + return; |
|---|
| 385 | + } |
|---|
| 386 | + |
|---|
| 387 | + len = sizeof(*data); |
|---|
| 388 | + pa_next = data->next; |
|---|
| 389 | + |
|---|
| 438 | 390 | memblock_reserve(pa_data, sizeof(*data) + data->len); |
|---|
| 439 | | - pa_data = data->next; |
|---|
| 440 | | - early_memunmap(data, sizeof(*data)); |
|---|
| 391 | + |
|---|
| 392 | + if (data->type == SETUP_INDIRECT) { |
|---|
| 393 | + len += data->len; |
|---|
| 394 | + early_memunmap(data, sizeof(*data)); |
|---|
| 395 | + data = early_memremap(pa_data, len); |
|---|
| 396 | + if (!data) { |
|---|
| 397 | + pr_warn("setup: failed to memremap indirect setup_data\n"); |
|---|
| 398 | + return; |
|---|
| 399 | + } |
|---|
| 400 | + |
|---|
| 401 | + indirect = (struct setup_indirect *)data->data; |
|---|
| 402 | + |
|---|
| 403 | + if (indirect->type != SETUP_INDIRECT) |
|---|
| 404 | + memblock_reserve(indirect->addr, indirect->len); |
|---|
| 405 | + } |
|---|
| 406 | + |
|---|
| 407 | + pa_data = pa_next; |
|---|
| 408 | + early_memunmap(data, len); |
|---|
| 441 | 409 | } |
|---|
| 442 | 410 | } |
|---|
| 443 | 411 | |
|---|
| .. | .. |
|---|
| 448 | 416 | #ifdef CONFIG_KEXEC_CORE |
|---|
| 449 | 417 | |
|---|
| 450 | 418 | /* 16M alignment for crash kernel regions */ |
|---|
| 451 | | -#define CRASH_ALIGN (16 << 20) |
|---|
| 419 | +#define CRASH_ALIGN SZ_16M |
|---|
| 452 | 420 | |
|---|
| 453 | 421 | /* |
|---|
| 454 | | - * Keep the crash kernel below this limit. On 32 bits earlier kernels |
|---|
| 455 | | - * would limit the kernel to the low 512 MiB due to mapping restrictions. |
|---|
| 456 | | - * On 64bit, old kexec-tools need to under 896MiB. |
|---|
| 422 | + * Keep the crash kernel below this limit. |
|---|
| 423 | + * |
|---|
| 424 | + * Earlier 32-bits kernels would limit the kernel to the low 512 MB range |
|---|
| 425 | + * due to mapping restrictions. |
|---|
| 426 | + * |
|---|
| 427 | + * 64-bit kdump kernels need to be restricted to be under 64 TB, which is |
|---|
| 428 | + * the upper limit of system RAM in 4-level paging mode. Since the kdump |
|---|
| 429 | + * jump could be from 5-level paging to 4-level paging, the jump will fail if |
|---|
| 430 | + * the kernel is put above 64 TB, and during the 1st kernel bootup there's |
|---|
| 431 | + * no good way to detect the paging mode of the target kernel which will be |
|---|
| 432 | + * loaded for dumping. |
|---|
| 457 | 433 | */ |
|---|
| 458 | 434 | #ifdef CONFIG_X86_32 |
|---|
| 459 | | -# define CRASH_ADDR_LOW_MAX (512 << 20) |
|---|
| 460 | | -# define CRASH_ADDR_HIGH_MAX (512 << 20) |
|---|
| 435 | +# define CRASH_ADDR_LOW_MAX SZ_512M |
|---|
| 436 | +# define CRASH_ADDR_HIGH_MAX SZ_512M |
|---|
| 461 | 437 | #else |
|---|
| 462 | | -# define CRASH_ADDR_LOW_MAX (896UL << 20) |
|---|
| 463 | | -# define CRASH_ADDR_HIGH_MAX MAXMEM |
|---|
| 438 | +# define CRASH_ADDR_LOW_MAX SZ_4G |
|---|
| 439 | +# define CRASH_ADDR_HIGH_MAX SZ_64T |
|---|
| 464 | 440 | #endif |
|---|
| 465 | 441 | |
|---|
| 466 | 442 | static int __init reserve_crashkernel_low(void) |
|---|
| 467 | 443 | { |
|---|
| 468 | 444 | #ifdef CONFIG_X86_64 |
|---|
| 469 | 445 | unsigned long long base, low_base = 0, low_size = 0; |
|---|
| 470 | | - unsigned long total_low_mem; |
|---|
| 446 | + unsigned long low_mem_limit; |
|---|
| 471 | 447 | int ret; |
|---|
| 472 | 448 | |
|---|
| 473 | | - total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT)); |
|---|
| 449 | + low_mem_limit = min(memblock_phys_mem_size(), CRASH_ADDR_LOW_MAX); |
|---|
| 474 | 450 | |
|---|
| 475 | 451 | /* crashkernel=Y,low */ |
|---|
| 476 | | - ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); |
|---|
| 452 | + ret = parse_crashkernel_low(boot_command_line, low_mem_limit, &low_size, &base); |
|---|
| 477 | 453 | if (ret) { |
|---|
| 478 | 454 | /* |
|---|
| 479 | | - * two parts from lib/swiotlb.c: |
|---|
| 455 | + * two parts from kernel/dma/swiotlb.c: |
|---|
| 480 | 456 | * -swiotlb size: user-specified with swiotlb= or default. |
|---|
| 481 | 457 | * |
|---|
| 482 | 458 | * -swiotlb overflow buffer: now hardcoded to 32k. We round it |
|---|
| .. | .. |
|---|
| 491 | 467 | return 0; |
|---|
| 492 | 468 | } |
|---|
| 493 | 469 | |
|---|
| 494 | | - low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN); |
|---|
| 470 | + low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); |
|---|
| 495 | 471 | if (!low_base) { |
|---|
| 496 | 472 | pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", |
|---|
| 497 | 473 | (unsigned long)(low_size >> 20)); |
|---|
| 498 | 474 | return -ENOMEM; |
|---|
| 499 | 475 | } |
|---|
| 500 | 476 | |
|---|
| 501 | | - ret = memblock_reserve(low_base, low_size); |
|---|
| 502 | | - if (ret) { |
|---|
| 503 | | - pr_err("%s: Error reserving crashkernel low memblock.\n", __func__); |
|---|
| 504 | | - return ret; |
|---|
| 505 | | - } |
|---|
| 506 | | - |
|---|
| 507 | | - pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n", |
|---|
| 477 | + pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (low RAM limit: %ldMB)\n", |
|---|
| 508 | 478 | (unsigned long)(low_size >> 20), |
|---|
| 509 | 479 | (unsigned long)(low_base >> 20), |
|---|
| 510 | | - (unsigned long)(total_low_mem >> 20)); |
|---|
| 480 | + (unsigned long)(low_mem_limit >> 20)); |
|---|
| 511 | 481 | |
|---|
| 512 | 482 | crashk_low_res.start = low_base; |
|---|
| 513 | 483 | crashk_low_res.end = low_base + low_size - 1; |
|---|
| .. | .. |
|---|
| 541 | 511 | } |
|---|
| 542 | 512 | |
|---|
| 543 | 513 | /* 0 means: find the address automatically */ |
|---|
| 544 | | - if (crash_base <= 0) { |
|---|
| 514 | + if (!crash_base) { |
|---|
| 545 | 515 | /* |
|---|
| 546 | 516 | * Set CRASH_ADDR_LOW_MAX upper bound for crash memory, |
|---|
| 547 | | - * as old kexec-tools loads bzImage below that, unless |
|---|
| 548 | | - * "crashkernel=size[KMG],high" is specified. |
|---|
| 517 | + * crashkernel=x,high reserves memory over 4G, also allocates |
|---|
| 518 | + * 256M extra low memory for DMA buffers and swiotlb. |
|---|
| 519 | + * But the extra memory is not required for all machines. |
|---|
| 520 | + * So try low memory first and fall back to high memory |
|---|
| 521 | + * unless "crashkernel=size[KMG],high" is specified. |
|---|
| 549 | 522 | */ |
|---|
| 550 | | - crash_base = memblock_find_in_range(CRASH_ALIGN, |
|---|
| 551 | | - high ? CRASH_ADDR_HIGH_MAX |
|---|
| 552 | | - : CRASH_ADDR_LOW_MAX, |
|---|
| 553 | | - crash_size, CRASH_ALIGN); |
|---|
| 523 | + if (!high) |
|---|
| 524 | + crash_base = memblock_phys_alloc_range(crash_size, |
|---|
| 525 | + CRASH_ALIGN, CRASH_ALIGN, |
|---|
| 526 | + CRASH_ADDR_LOW_MAX); |
|---|
| 527 | + if (!crash_base) |
|---|
| 528 | + crash_base = memblock_phys_alloc_range(crash_size, |
|---|
| 529 | + CRASH_ALIGN, CRASH_ALIGN, |
|---|
| 530 | + CRASH_ADDR_HIGH_MAX); |
|---|
| 554 | 531 | if (!crash_base) { |
|---|
| 555 | 532 | pr_info("crashkernel reservation failed - No suitable area found.\n"); |
|---|
| 556 | 533 | return; |
|---|
| 557 | 534 | } |
|---|
| 558 | | - |
|---|
| 559 | 535 | } else { |
|---|
| 560 | 536 | unsigned long long start; |
|---|
| 561 | 537 | |
|---|
| 562 | | - start = memblock_find_in_range(crash_base, |
|---|
| 563 | | - crash_base + crash_size, |
|---|
| 564 | | - crash_size, 1 << 20); |
|---|
| 538 | + start = memblock_phys_alloc_range(crash_size, SZ_1M, crash_base, |
|---|
| 539 | + crash_base + crash_size); |
|---|
| 565 | 540 | if (start != crash_base) { |
|---|
| 566 | 541 | pr_info("crashkernel reservation failed - memory is in use.\n"); |
|---|
| 567 | 542 | return; |
|---|
| 568 | 543 | } |
|---|
| 569 | | - } |
|---|
| 570 | | - ret = memblock_reserve(crash_base, crash_size); |
|---|
| 571 | | - if (ret) { |
|---|
| 572 | | - pr_err("%s: Error reserving crashkernel memblock.\n", __func__); |
|---|
| 573 | | - return; |
|---|
| 574 | 544 | } |
|---|
| 575 | 545 | |
|---|
| 576 | 546 | if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { |
|---|
| .. | .. |
|---|
| 727 | 697 | e820__range_update(0, PAGE_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); |
|---|
| 728 | 698 | |
|---|
| 729 | 699 | /* |
|---|
| 730 | | - * special case: Some BIOSen report the PC BIOS |
|---|
| 731 | | - * area (640->1Mb) as ram even though it is not. |
|---|
| 700 | + * special case: Some BIOSes report the PC BIOS |
|---|
| 701 | + * area (640Kb -> 1Mb) as RAM even though it is not. |
|---|
| 732 | 702 | * take them out. |
|---|
| 733 | 703 | */ |
|---|
| 734 | 704 | e820__range_remove(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_TYPE_RAM, 1); |
|---|
| .. | .. |
|---|
| 820 | 790 | |
|---|
| 821 | 791 | void __init setup_arch(char **cmdline_p) |
|---|
| 822 | 792 | { |
|---|
| 793 | + /* |
|---|
| 794 | + * Reserve the memory occupied by the kernel between _text and |
|---|
| 795 | + * __end_of_kernel_reserve symbols. Any kernel sections after the |
|---|
| 796 | + * __end_of_kernel_reserve symbol must be explicitly reserved with a |
|---|
| 797 | + * separate memblock_reserve() or they will be discarded. |
|---|
| 798 | + */ |
|---|
| 823 | 799 | memblock_reserve(__pa_symbol(_text), |
|---|
| 824 | | - (unsigned long)__bss_stop - (unsigned long)_text); |
|---|
| 800 | + (unsigned long)__end_of_kernel_reserve - (unsigned long)_text); |
|---|
| 825 | 801 | |
|---|
| 826 | 802 | /* |
|---|
| 827 | 803 | * Make sure page 0 is always reserved because on systems with |
|---|
| .. | .. |
|---|
| 852 | 828 | /* |
|---|
| 853 | 829 | * Note: Quark X1000 CPUs advertise PGE incorrectly and require |
|---|
| 854 | 830 | * a cr3 based tlb flush, so the following __flush_tlb_all() |
|---|
| 855 | | - * will not flush anything because the cpu quirk which clears |
|---|
| 831 | + * will not flush anything because the CPU quirk which clears |
|---|
| 856 | 832 | * X86_FEATURE_PGE has not been invoked yet. Though due to the |
|---|
| 857 | 833 | * load_cr3() above the TLB has been flushed already. The |
|---|
| 858 | 834 | * quirk is invoked before subsequent calls to __flush_tlb_all() |
|---|
| .. | .. |
|---|
| 874 | 850 | early_cpu_init(); |
|---|
| 875 | 851 | arch_init_ideal_nops(); |
|---|
| 876 | 852 | jump_label_init(); |
|---|
| 853 | + static_call_init(); |
|---|
| 877 | 854 | early_ioremap_init(); |
|---|
| 878 | 855 | |
|---|
| 879 | 856 | setup_olpc_ofw_pgd(); |
|---|
| .. | .. |
|---|
| 896 | 873 | |
|---|
| 897 | 874 | #ifdef CONFIG_BLK_DEV_RAM |
|---|
| 898 | 875 | rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; |
|---|
| 899 | | - rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); |
|---|
| 900 | | - rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); |
|---|
| 901 | 876 | #endif |
|---|
| 902 | 877 | #ifdef CONFIG_EFI |
|---|
| 903 | 878 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, |
|---|
| .. | .. |
|---|
| 925 | 900 | init_mm.end_data = (unsigned long) _edata; |
|---|
| 926 | 901 | init_mm.brk = _brk_end; |
|---|
| 927 | 902 | |
|---|
| 928 | | - mpx_mm_init(&init_mm); |
|---|
| 929 | | - |
|---|
| 930 | 903 | code_resource.start = __pa_symbol(_text); |
|---|
| 931 | 904 | code_resource.end = __pa_symbol(_etext)-1; |
|---|
| 932 | | - data_resource.start = __pa_symbol(_etext); |
|---|
| 905 | + rodata_resource.start = __pa_symbol(__start_rodata); |
|---|
| 906 | + rodata_resource.end = __pa_symbol(__end_rodata)-1; |
|---|
| 907 | + data_resource.start = __pa_symbol(_sdata); |
|---|
| 933 | 908 | data_resource.end = __pa_symbol(_edata)-1; |
|---|
| 934 | 909 | bss_resource.start = __pa_symbol(__bss_start); |
|---|
| 935 | 910 | bss_resource.end = __pa_symbol(__bss_stop)-1; |
|---|
| .. | .. |
|---|
| 1005 | 980 | if (efi_enabled(EFI_BOOT)) |
|---|
| 1006 | 981 | efi_init(); |
|---|
| 1007 | 982 | |
|---|
| 1008 | | - dmi_scan_machine(); |
|---|
| 1009 | | - dmi_memdev_walk(); |
|---|
| 1010 | | - dmi_set_dump_stack_arch_desc(); |
|---|
| 983 | + dmi_setup(); |
|---|
| 1011 | 984 | |
|---|
| 1012 | 985 | /* |
|---|
| 1013 | 986 | * VMware detection requires dmi to be available, so this |
|---|
| 1014 | | - * needs to be done after dmi_scan_machine(), for the boot CPU. |
|---|
| 987 | + * needs to be done after dmi_setup(), for the boot CPU. |
|---|
| 1015 | 988 | */ |
|---|
| 1016 | 989 | init_hypervisor_platform(); |
|---|
| 1017 | 990 | |
|---|
| .. | .. |
|---|
| 1020 | 993 | |
|---|
| 1021 | 994 | /* after parse_early_param, so could debug it */ |
|---|
| 1022 | 995 | insert_resource(&iomem_resource, &code_resource); |
|---|
| 996 | + insert_resource(&iomem_resource, &rodata_resource); |
|---|
| 1023 | 997 | insert_resource(&iomem_resource, &data_resource); |
|---|
| 1024 | 998 | insert_resource(&iomem_resource, &bss_resource); |
|---|
| 1025 | 999 | |
|---|
| .. | .. |
|---|
| 1102 | 1076 | |
|---|
| 1103 | 1077 | reserve_bios_regions(); |
|---|
| 1104 | 1078 | |
|---|
| 1105 | | - if (efi_enabled(EFI_MEMMAP)) { |
|---|
| 1106 | | - efi_fake_memmap(); |
|---|
| 1107 | | - efi_find_mirror(); |
|---|
| 1108 | | - efi_esrt_init(); |
|---|
| 1079 | + efi_fake_memmap(); |
|---|
| 1080 | + efi_find_mirror(); |
|---|
| 1081 | + efi_esrt_init(); |
|---|
| 1082 | + efi_mokvar_table_init(); |
|---|
| 1109 | 1083 | |
|---|
| 1110 | | - /* |
|---|
| 1111 | | - * The EFI specification says that boot service code won't be |
|---|
| 1112 | | - * called after ExitBootServices(). This is, in fact, a lie. |
|---|
| 1113 | | - */ |
|---|
| 1114 | | - efi_reserve_boot_services(); |
|---|
| 1115 | | - } |
|---|
| 1084 | + /* |
|---|
| 1085 | + * The EFI specification says that boot service code won't be |
|---|
| 1086 | + * called after ExitBootServices(). This is, in fact, a lie. |
|---|
| 1087 | + */ |
|---|
| 1088 | + efi_reserve_boot_services(); |
|---|
| 1116 | 1089 | |
|---|
| 1117 | 1090 | /* preallocate 4k for mptable mpc */ |
|---|
| 1118 | 1091 | e820__memblock_alloc_reserved_mpc_new(); |
|---|
| .. | .. |
|---|
| 1190 | 1163 | initmem_init(); |
|---|
| 1191 | 1164 | dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); |
|---|
| 1192 | 1165 | |
|---|
| 1166 | + if (boot_cpu_has(X86_FEATURE_GBPAGES)) |
|---|
| 1167 | + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); |
|---|
| 1168 | + |
|---|
| 1193 | 1169 | /* |
|---|
| 1194 | 1170 | * Reserve memory for crash kernel after SRAT is parsed so that it |
|---|
| 1195 | 1171 | * won't consume hotpluggable memory. |
|---|
| .. | .. |
|---|
| 1242 | 1218 | prefill_possible_map(); |
|---|
| 1243 | 1219 | |
|---|
| 1244 | 1220 | init_cpu_to_node(); |
|---|
| 1221 | + init_gi_nodes(); |
|---|
| 1245 | 1222 | |
|---|
| 1246 | 1223 | io_apic_init_mappings(); |
|---|
| 1247 | 1224 | |
|---|
| .. | .. |
|---|
| 1258 | 1235 | #if defined(CONFIG_VGA_CONSOLE) |
|---|
| 1259 | 1236 | if (!efi_enabled(EFI_BOOT) || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) |
|---|
| 1260 | 1237 | conswitchp = &vga_con; |
|---|
| 1261 | | -#elif defined(CONFIG_DUMMY_CONSOLE) |
|---|
| 1262 | | - conswitchp = &dummy_con; |
|---|
| 1263 | 1238 | #endif |
|---|
| 1264 | 1239 | #endif |
|---|
| 1265 | 1240 | x86_init.oem.banner(); |
|---|