.. | .. |
---|
19 | 19 | */ |
---|
20 | 20 | #define BOOT_CTYPE_H |
---|
21 | 21 | |
---|
22 | | -/* |
---|
23 | | - * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h. |
---|
24 | | - * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL |
---|
25 | | - * which is meaningless and will cause compiling error in some cases. |
---|
26 | | - */ |
---|
27 | | -#define __DISABLE_EXPORTS |
---|
28 | | - |
---|
29 | 22 | #include "misc.h" |
---|
30 | 23 | #include "error.h" |
---|
31 | 24 | #include "../string.h" |
---|
.. | .. |
---|
43 | 36 | #define STATIC |
---|
44 | 37 | #include <linux/decompress/mm.h> |
---|
45 | 38 | |
---|
46 | | -#ifdef CONFIG_X86_5LEVEL |
---|
47 | | -unsigned int __pgtable_l5_enabled; |
---|
48 | | -unsigned int pgdir_shift __ro_after_init = 39; |
---|
49 | | -unsigned int ptrs_per_p4d __ro_after_init = 1; |
---|
50 | | -#endif |
---|
| 39 | +#define _SETUP |
---|
| 40 | +#include <asm/setup.h> /* For COMMAND_LINE_SIZE */ |
---|
| 41 | +#undef _SETUP |
---|
51 | 42 | |
---|
52 | 43 | extern unsigned long get_cmd_line_ptr(void); |
---|
53 | | - |
---|
54 | | -/* Used by PAGE_KERN* macros: */ |
---|
55 | | -pteval_t __default_kernel_pte_mask __read_mostly = ~0; |
---|
56 | 44 | |
---|
57 | 45 | /* Simplified build-specific string for starting entropy. */ |
---|
58 | 46 | static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" |
---|
.. | .. |
---|
87 | 75 | #define KASLR_COMPRESSED_BOOT |
---|
88 | 76 | #include "../../lib/kaslr.c" |
---|
89 | 77 | |
---|
90 | | -struct mem_vector { |
---|
91 | | - unsigned long long start; |
---|
92 | | - unsigned long long size; |
---|
93 | | -}; |
---|
94 | 78 | |
---|
95 | 79 | /* Only supporting at most 4 unusable memmap regions with kaslr */ |
---|
96 | 80 | #define MAX_MEMMAP_REGIONS 4 |
---|
.. | .. |
---|
98 | 82 | static bool memmap_too_large; |
---|
99 | 83 | |
---|
100 | 84 | |
---|
101 | | -/* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */ |
---|
102 | | -static unsigned long long mem_limit = ULLONG_MAX; |
---|
| 85 | +/* |
---|
| 86 | + * Store memory limit: MAXMEM on 64-bit and KERNEL_IMAGE_SIZE on 32-bit. |
---|
| 87 | + * It may be reduced by "mem=nn[KMG]" or "memmap=nn[KMG]" command line options. |
---|
| 88 | + */ |
---|
| 89 | +static u64 mem_limit; |
---|
103 | 90 | |
---|
| 91 | +/* Number of immovable memory regions */ |
---|
| 92 | +static int num_immovable_mem; |
---|
104 | 93 | |
---|
105 | 94 | enum mem_avoid_index { |
---|
106 | 95 | MEM_AVOID_ZO_RANGE = 0, |
---|
.. | .. |
---|
134 | 123 | #include "../../../../lib/ctype.c" |
---|
135 | 124 | #include "../../../../lib/cmdline.c" |
---|
136 | 125 | |
---|
| 126 | +enum parse_mode { |
---|
| 127 | + PARSE_MEMMAP, |
---|
| 128 | + PARSE_EFI, |
---|
| 129 | +}; |
---|
| 130 | + |
---|
137 | 131 | static int |
---|
138 | | -parse_memmap(char *p, unsigned long long *start, unsigned long long *size) |
---|
| 132 | +parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode) |
---|
139 | 133 | { |
---|
140 | 134 | char *oldp; |
---|
141 | 135 | |
---|
.. | .. |
---|
158 | 152 | *start = memparse(p + 1, &p); |
---|
159 | 153 | return 0; |
---|
160 | 154 | case '@': |
---|
161 | | - /* memmap=nn@ss specifies usable region, should be skipped */ |
---|
162 | | - *size = 0; |
---|
163 | | - /* Fall through */ |
---|
| 155 | + if (mode == PARSE_MEMMAP) { |
---|
| 156 | + /* |
---|
| 157 | + * memmap=nn@ss specifies usable region, should |
---|
| 158 | + * be skipped |
---|
| 159 | + */ |
---|
| 160 | + *size = 0; |
---|
| 161 | + } else { |
---|
| 162 | + u64 flags; |
---|
| 163 | + |
---|
| 164 | + /* |
---|
| 165 | + * efi_fake_mem=nn@ss:attr the attr specifies |
---|
| 166 | + * flags that might imply a soft-reservation. |
---|
| 167 | + */ |
---|
| 168 | + *start = memparse(p + 1, &p); |
---|
| 169 | + if (p && *p == ':') { |
---|
| 170 | + p++; |
---|
| 171 | + if (kstrtoull(p, 0, &flags) < 0) |
---|
| 172 | + *size = 0; |
---|
| 173 | + else if (flags & EFI_MEMORY_SP) |
---|
| 174 | + return 0; |
---|
| 175 | + } |
---|
| 176 | + *size = 0; |
---|
| 177 | + } |
---|
| 178 | + fallthrough; |
---|
164 | 179 | default: |
---|
165 | 180 | /* |
---|
166 | 181 | * If w/o offset, only size specified, memmap=nn[KMG] has the |
---|
.. | .. |
---|
174 | 189 | return -EINVAL; |
---|
175 | 190 | } |
---|
176 | 191 | |
---|
177 | | -static void mem_avoid_memmap(char *str) |
---|
| 192 | +static void mem_avoid_memmap(enum parse_mode mode, char *str) |
---|
178 | 193 | { |
---|
179 | 194 | static int i; |
---|
180 | 195 | |
---|
.. | .. |
---|
183 | 198 | |
---|
184 | 199 | while (str && (i < MAX_MEMMAP_REGIONS)) { |
---|
185 | 200 | int rc; |
---|
186 | | - unsigned long long start, size; |
---|
| 201 | + u64 start, size; |
---|
187 | 202 | char *k = strchr(str, ','); |
---|
188 | 203 | |
---|
189 | 204 | if (k) |
---|
190 | 205 | *k++ = 0; |
---|
191 | 206 | |
---|
192 | | - rc = parse_memmap(str, &start, &size); |
---|
| 207 | + rc = parse_memmap(str, &start, &size, mode); |
---|
193 | 208 | if (rc < 0) |
---|
194 | 209 | break; |
---|
195 | 210 | str = k; |
---|
196 | 211 | |
---|
197 | 212 | if (start == 0) { |
---|
198 | 213 | /* Store the specified memory limit if size > 0 */ |
---|
199 | | - if (size > 0) |
---|
| 214 | + if (size > 0 && size < mem_limit) |
---|
200 | 215 | mem_limit = size; |
---|
201 | 216 | |
---|
202 | 217 | continue; |
---|
.. | .. |
---|
240 | 255 | } |
---|
241 | 256 | } |
---|
242 | 257 | |
---|
243 | | - |
---|
244 | | -static int handle_mem_options(void) |
---|
| 258 | +static void handle_mem_options(void) |
---|
245 | 259 | { |
---|
246 | 260 | char *args = (char *)get_cmd_line_ptr(); |
---|
247 | | - size_t len = strlen((char *)args); |
---|
| 261 | + size_t len; |
---|
248 | 262 | char *tmp_cmdline; |
---|
249 | 263 | char *param, *val; |
---|
250 | 264 | u64 mem_size; |
---|
251 | 265 | |
---|
252 | | - if (!strstr(args, "memmap=") && !strstr(args, "mem=") && |
---|
253 | | - !strstr(args, "hugepages")) |
---|
254 | | - return 0; |
---|
| 266 | + if (!args) |
---|
| 267 | + return; |
---|
255 | 268 | |
---|
| 269 | + len = strnlen(args, COMMAND_LINE_SIZE-1); |
---|
256 | 270 | tmp_cmdline = malloc(len + 1); |
---|
257 | 271 | if (!tmp_cmdline) |
---|
258 | 272 | error("Failed to allocate space for tmp_cmdline"); |
---|
.. | .. |
---|
267 | 281 | while (*args) { |
---|
268 | 282 | args = next_arg(args, ¶m, &val); |
---|
269 | 283 | /* Stop at -- */ |
---|
270 | | - if (!val && strcmp(param, "--") == 0) { |
---|
271 | | - warn("Only '--' specified in cmdline"); |
---|
272 | | - free(tmp_cmdline); |
---|
273 | | - return -1; |
---|
274 | | - } |
---|
| 284 | + if (!val && strcmp(param, "--") == 0) |
---|
| 285 | + break; |
---|
275 | 286 | |
---|
276 | 287 | if (!strcmp(param, "memmap")) { |
---|
277 | | - mem_avoid_memmap(val); |
---|
278 | | - } else if (strstr(param, "hugepages")) { |
---|
| 288 | + mem_avoid_memmap(PARSE_MEMMAP, val); |
---|
| 289 | + } else if (IS_ENABLED(CONFIG_X86_64) && strstr(param, "hugepages")) { |
---|
279 | 290 | parse_gb_huge_pages(param, val); |
---|
280 | 291 | } else if (!strcmp(param, "mem")) { |
---|
281 | 292 | char *p = val; |
---|
.. | .. |
---|
283 | 294 | if (!strcmp(p, "nopentium")) |
---|
284 | 295 | continue; |
---|
285 | 296 | mem_size = memparse(p, &p); |
---|
286 | | - if (mem_size == 0) { |
---|
287 | | - free(tmp_cmdline); |
---|
288 | | - return -EINVAL; |
---|
289 | | - } |
---|
290 | | - mem_limit = mem_size; |
---|
| 297 | + if (mem_size == 0) |
---|
| 298 | + break; |
---|
| 299 | + |
---|
| 300 | + if (mem_size < mem_limit) |
---|
| 301 | + mem_limit = mem_size; |
---|
| 302 | + } else if (!strcmp(param, "efi_fake_mem")) { |
---|
| 303 | + mem_avoid_memmap(PARSE_EFI, val); |
---|
291 | 304 | } |
---|
292 | 305 | } |
---|
293 | 306 | |
---|
294 | 307 | free(tmp_cmdline); |
---|
295 | | - return 0; |
---|
| 308 | + return; |
---|
296 | 309 | } |
---|
297 | 310 | |
---|
298 | 311 | /* |
---|
299 | | - * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T). |
---|
| 312 | + * In theory, KASLR can put the kernel anywhere in the range of [16M, MAXMEM) |
---|
| 313 | + * on 64-bit, and [16M, KERNEL_IMAGE_SIZE) on 32-bit. |
---|
| 314 | + * |
---|
300 | 315 | * The mem_avoid array is used to store the ranges that need to be avoided |
---|
301 | 316 | * when KASLR searches for an appropriate random address. We must avoid any |
---|
302 | 317 | * regions that are unsafe to overlap with during decompression, and other |
---|
.. | .. |
---|
374 | 389 | { |
---|
375 | 390 | unsigned long init_size = boot_params->hdr.init_size; |
---|
376 | 391 | u64 initrd_start, initrd_size; |
---|
377 | | - u64 cmd_line, cmd_line_size; |
---|
378 | | - char *ptr; |
---|
| 392 | + unsigned long cmd_line, cmd_line_size; |
---|
379 | 393 | |
---|
380 | 394 | /* |
---|
381 | 395 | * Avoid the region that is unsafe to overlap during |
---|
.. | .. |
---|
383 | 397 | */ |
---|
384 | 398 | mem_avoid[MEM_AVOID_ZO_RANGE].start = input; |
---|
385 | 399 | mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input; |
---|
386 | | - add_identity_map(mem_avoid[MEM_AVOID_ZO_RANGE].start, |
---|
387 | | - mem_avoid[MEM_AVOID_ZO_RANGE].size); |
---|
388 | 400 | |
---|
389 | 401 | /* Avoid initrd. */ |
---|
390 | 402 | initrd_start = (u64)boot_params->ext_ramdisk_image << 32; |
---|
.. | .. |
---|
396 | 408 | /* No need to set mapping for initrd, it will be handled in VO. */ |
---|
397 | 409 | |
---|
398 | 410 | /* Avoid kernel command line. */ |
---|
399 | | - cmd_line = (u64)boot_params->ext_cmd_line_ptr << 32; |
---|
400 | | - cmd_line |= boot_params->hdr.cmd_line_ptr; |
---|
| 411 | + cmd_line = get_cmd_line_ptr(); |
---|
401 | 412 | /* Calculate size of cmd_line. */ |
---|
402 | | - ptr = (char *)(unsigned long)cmd_line; |
---|
403 | | - for (cmd_line_size = 0; ptr[cmd_line_size++];) |
---|
404 | | - ; |
---|
405 | | - mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line; |
---|
406 | | - mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size; |
---|
407 | | - add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start, |
---|
408 | | - mem_avoid[MEM_AVOID_CMDLINE].size); |
---|
| 413 | + if (cmd_line) { |
---|
| 414 | + cmd_line_size = strnlen((char *)cmd_line, COMMAND_LINE_SIZE-1) + 1; |
---|
| 415 | + mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line; |
---|
| 416 | + mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size; |
---|
| 417 | + } |
---|
409 | 418 | |
---|
410 | 419 | /* Avoid boot parameters. */ |
---|
411 | 420 | mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params; |
---|
412 | 421 | mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params); |
---|
413 | | - add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start, |
---|
414 | | - mem_avoid[MEM_AVOID_BOOTPARAMS].size); |
---|
415 | 422 | |
---|
416 | 423 | /* We don't need to set a mapping for setup_data. */ |
---|
417 | 424 | |
---|
418 | 425 | /* Mark the memmap regions we need to avoid */ |
---|
419 | 426 | handle_mem_options(); |
---|
420 | 427 | |
---|
421 | | -#ifdef CONFIG_X86_VERBOSE_BOOTUP |
---|
422 | | - /* Make sure video RAM can be used. */ |
---|
423 | | - add_identity_map(0, PMD_SIZE); |
---|
424 | | -#endif |
---|
| 428 | + /* Enumerate the immovable memory regions */ |
---|
| 429 | + num_immovable_mem = count_immovable_mem_regions(); |
---|
425 | 430 | } |
---|
426 | 431 | |
---|
427 | 432 | /* |
---|
.. | .. |
---|
433 | 438 | { |
---|
434 | 439 | int i; |
---|
435 | 440 | struct setup_data *ptr; |
---|
436 | | - unsigned long earliest = img->start + img->size; |
---|
| 441 | + u64 earliest = img->start + img->size; |
---|
437 | 442 | bool is_overlapping = false; |
---|
438 | 443 | |
---|
439 | 444 | for (i = 0; i < MEM_AVOID_MAX; i++) { |
---|
.. | .. |
---|
459 | 464 | is_overlapping = true; |
---|
460 | 465 | } |
---|
461 | 466 | |
---|
| 467 | + if (ptr->type == SETUP_INDIRECT && |
---|
| 468 | + ((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) { |
---|
| 469 | + avoid.start = ((struct setup_indirect *)ptr->data)->addr; |
---|
| 470 | + avoid.size = ((struct setup_indirect *)ptr->data)->len; |
---|
| 471 | + |
---|
| 472 | + if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) { |
---|
| 473 | + *overlap = avoid; |
---|
| 474 | + earliest = overlap->start; |
---|
| 475 | + is_overlapping = true; |
---|
| 476 | + } |
---|
| 477 | + } |
---|
| 478 | + |
---|
462 | 479 | ptr = (struct setup_data *)(unsigned long)ptr->next; |
---|
463 | 480 | } |
---|
464 | 481 | |
---|
.. | .. |
---|
466 | 483 | } |
---|
467 | 484 | |
---|
468 | 485 | struct slot_area { |
---|
469 | | - unsigned long addr; |
---|
470 | | - int num; |
---|
| 486 | + u64 addr; |
---|
| 487 | + unsigned long num; |
---|
471 | 488 | }; |
---|
472 | 489 | |
---|
473 | 490 | #define MAX_SLOT_AREA 100 |
---|
474 | 491 | |
---|
475 | 492 | static struct slot_area slot_areas[MAX_SLOT_AREA]; |
---|
476 | | - |
---|
| 493 | +static unsigned int slot_area_index; |
---|
477 | 494 | static unsigned long slot_max; |
---|
478 | | - |
---|
479 | | -static unsigned long slot_area_index; |
---|
480 | 495 | |
---|
481 | 496 | static void store_slot_info(struct mem_vector *region, unsigned long image_size) |
---|
482 | 497 | { |
---|
.. | .. |
---|
486 | 501 | return; |
---|
487 | 502 | |
---|
488 | 503 | slot_area.addr = region->start; |
---|
489 | | - slot_area.num = (region->size - image_size) / |
---|
490 | | - CONFIG_PHYSICAL_ALIGN + 1; |
---|
| 504 | + slot_area.num = 1 + (region->size - image_size) / CONFIG_PHYSICAL_ALIGN; |
---|
491 | 505 | |
---|
492 | | - if (slot_area.num > 0) { |
---|
493 | | - slot_areas[slot_area_index++] = slot_area; |
---|
494 | | - slot_max += slot_area.num; |
---|
495 | | - } |
---|
| 506 | + slot_areas[slot_area_index++] = slot_area; |
---|
| 507 | + slot_max += slot_area.num; |
---|
496 | 508 | } |
---|
497 | 509 | |
---|
498 | 510 | /* |
---|
.. | .. |
---|
502 | 514 | static void |
---|
503 | 515 | process_gb_huge_pages(struct mem_vector *region, unsigned long image_size) |
---|
504 | 516 | { |
---|
505 | | - unsigned long addr, size = 0; |
---|
| 517 | + u64 pud_start, pud_end; |
---|
| 518 | + unsigned long gb_huge_pages; |
---|
506 | 519 | struct mem_vector tmp; |
---|
507 | | - int i = 0; |
---|
508 | 520 | |
---|
509 | | - if (!max_gb_huge_pages) { |
---|
| 521 | + if (!IS_ENABLED(CONFIG_X86_64) || !max_gb_huge_pages) { |
---|
510 | 522 | store_slot_info(region, image_size); |
---|
511 | 523 | return; |
---|
512 | 524 | } |
---|
513 | 525 | |
---|
514 | | - addr = ALIGN(region->start, PUD_SIZE); |
---|
515 | | - /* Did we raise the address above the passed in memory entry? */ |
---|
516 | | - if (addr < region->start + region->size) |
---|
517 | | - size = region->size - (addr - region->start); |
---|
518 | | - |
---|
519 | | - /* Check how many 1GB huge pages can be filtered out: */ |
---|
520 | | - while (size > PUD_SIZE && max_gb_huge_pages) { |
---|
521 | | - size -= PUD_SIZE; |
---|
522 | | - max_gb_huge_pages--; |
---|
523 | | - i++; |
---|
524 | | - } |
---|
| 526 | + /* Are there any 1GB pages in the region? */ |
---|
| 527 | + pud_start = ALIGN(region->start, PUD_SIZE); |
---|
| 528 | + pud_end = ALIGN_DOWN(region->start + region->size, PUD_SIZE); |
---|
525 | 529 | |
---|
526 | 530 | /* No good 1GB huge pages found: */ |
---|
527 | | - if (!i) { |
---|
| 531 | + if (pud_start >= pud_end) { |
---|
528 | 532 | store_slot_info(region, image_size); |
---|
529 | 533 | return; |
---|
530 | 534 | } |
---|
531 | 535 | |
---|
532 | | - /* |
---|
533 | | - * Skip those 'i'*1GB good huge pages, and continue checking and |
---|
534 | | - * processing the remaining head or tail part of the passed region |
---|
535 | | - * if available. |
---|
536 | | - */ |
---|
537 | | - |
---|
538 | | - if (addr >= region->start + image_size) { |
---|
| 536 | + /* Check if the head part of the region is usable. */ |
---|
| 537 | + if (pud_start >= region->start + image_size) { |
---|
539 | 538 | tmp.start = region->start; |
---|
540 | | - tmp.size = addr - region->start; |
---|
| 539 | + tmp.size = pud_start - region->start; |
---|
541 | 540 | store_slot_info(&tmp, image_size); |
---|
542 | 541 | } |
---|
543 | 542 | |
---|
544 | | - size = region->size - (addr - region->start) - i * PUD_SIZE; |
---|
545 | | - if (size >= image_size) { |
---|
546 | | - tmp.start = addr + i * PUD_SIZE; |
---|
547 | | - tmp.size = size; |
---|
| 543 | + /* Skip the good 1GB pages. */ |
---|
| 544 | + gb_huge_pages = (pud_end - pud_start) >> PUD_SHIFT; |
---|
| 545 | + if (gb_huge_pages > max_gb_huge_pages) { |
---|
| 546 | + pud_end = pud_start + (max_gb_huge_pages << PUD_SHIFT); |
---|
| 547 | + max_gb_huge_pages = 0; |
---|
| 548 | + } else { |
---|
| 549 | + max_gb_huge_pages -= gb_huge_pages; |
---|
| 550 | + } |
---|
| 551 | + |
---|
| 552 | + /* Check if the tail part of the region is usable. */ |
---|
| 553 | + if (region->start + region->size >= pud_end + image_size) { |
---|
| 554 | + tmp.start = pud_end; |
---|
| 555 | + tmp.size = region->start + region->size - pud_end; |
---|
548 | 556 | store_slot_info(&tmp, image_size); |
---|
549 | 557 | } |
---|
550 | 558 | } |
---|
551 | 559 | |
---|
552 | | -static unsigned long slots_fetch_random(void) |
---|
| 560 | +static u64 slots_fetch_random(void) |
---|
553 | 561 | { |
---|
554 | 562 | unsigned long slot; |
---|
555 | | - int i; |
---|
| 563 | + unsigned int i; |
---|
556 | 564 | |
---|
557 | 565 | /* Handle case of no slots stored. */ |
---|
558 | 566 | if (slot_max == 0) |
---|
.. | .. |
---|
565 | 573 | slot -= slot_areas[i].num; |
---|
566 | 574 | continue; |
---|
567 | 575 | } |
---|
568 | | - return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN; |
---|
| 576 | + return slot_areas[i].addr + ((u64)slot * CONFIG_PHYSICAL_ALIGN); |
---|
569 | 577 | } |
---|
570 | 578 | |
---|
571 | 579 | if (i == slot_area_index) |
---|
.. | .. |
---|
573 | 581 | return 0; |
---|
574 | 582 | } |
---|
575 | 583 | |
---|
576 | | -static void process_mem_region(struct mem_vector *entry, |
---|
577 | | - unsigned long minimum, |
---|
578 | | - unsigned long image_size) |
---|
| 584 | +static void __process_mem_region(struct mem_vector *entry, |
---|
| 585 | + unsigned long minimum, |
---|
| 586 | + unsigned long image_size) |
---|
579 | 587 | { |
---|
580 | 588 | struct mem_vector region, overlap; |
---|
581 | | - struct slot_area slot_area; |
---|
582 | | - unsigned long start_orig, end; |
---|
583 | | - struct mem_vector cur_entry; |
---|
| 589 | + u64 region_end; |
---|
584 | 590 | |
---|
585 | | - /* On 32-bit, ignore entries entirely above our maximum. */ |
---|
586 | | - if (IS_ENABLED(CONFIG_X86_32) && entry->start >= KERNEL_IMAGE_SIZE) |
---|
587 | | - return; |
---|
588 | | - |
---|
589 | | - /* Ignore entries entirely below our minimum. */ |
---|
590 | | - if (entry->start + entry->size < minimum) |
---|
591 | | - return; |
---|
592 | | - |
---|
593 | | - /* Ignore entries above memory limit */ |
---|
594 | | - end = min(entry->size + entry->start, mem_limit); |
---|
595 | | - if (entry->start >= end) |
---|
596 | | - return; |
---|
597 | | - cur_entry.start = entry->start; |
---|
598 | | - cur_entry.size = end - entry->start; |
---|
599 | | - |
---|
600 | | - region.start = cur_entry.start; |
---|
601 | | - region.size = cur_entry.size; |
---|
| 591 | + /* Enforce minimum and memory limit. */ |
---|
| 592 | + region.start = max_t(u64, entry->start, minimum); |
---|
| 593 | + region_end = min(entry->start + entry->size, mem_limit); |
---|
602 | 594 | |
---|
603 | 595 | /* Give up if slot area array is full. */ |
---|
604 | 596 | while (slot_area_index < MAX_SLOT_AREA) { |
---|
605 | | - start_orig = region.start; |
---|
606 | | - |
---|
607 | | - /* Potentially raise address to minimum location. */ |
---|
608 | | - if (region.start < minimum) |
---|
609 | | - region.start = minimum; |
---|
610 | | - |
---|
611 | 597 | /* Potentially raise address to meet alignment needs. */ |
---|
612 | 598 | region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); |
---|
613 | 599 | |
---|
614 | 600 | /* Did we raise the address above the passed in memory entry? */ |
---|
615 | | - if (region.start > cur_entry.start + cur_entry.size) |
---|
| 601 | + if (region.start > region_end) |
---|
616 | 602 | return; |
---|
617 | 603 | |
---|
618 | 604 | /* Reduce size by any delta from the original address. */ |
---|
619 | | - region.size -= region.start - start_orig; |
---|
620 | | - |
---|
621 | | - /* On 32-bit, reduce region size to fit within max size. */ |
---|
622 | | - if (IS_ENABLED(CONFIG_X86_32) && |
---|
623 | | - region.start + region.size > KERNEL_IMAGE_SIZE) |
---|
624 | | - region.size = KERNEL_IMAGE_SIZE - region.start; |
---|
| 605 | + region.size = region_end - region.start; |
---|
625 | 606 | |
---|
626 | 607 | /* Return if region can't contain decompressed kernel */ |
---|
627 | 608 | if (region.size < image_size) |
---|
.. | .. |
---|
634 | 615 | } |
---|
635 | 616 | |
---|
636 | 617 | /* Store beginning of region if holds at least image_size. */ |
---|
637 | | - if (overlap.start > region.start + image_size) { |
---|
638 | | - struct mem_vector beginning; |
---|
639 | | - |
---|
640 | | - beginning.start = region.start; |
---|
641 | | - beginning.size = overlap.start - region.start; |
---|
642 | | - process_gb_huge_pages(&beginning, image_size); |
---|
| 618 | + if (overlap.start >= region.start + image_size) { |
---|
| 619 | + region.size = overlap.start - region.start; |
---|
| 620 | + process_gb_huge_pages(®ion, image_size); |
---|
643 | 621 | } |
---|
644 | 622 | |
---|
645 | | - /* Return if overlap extends to or past end of region. */ |
---|
646 | | - if (overlap.start + overlap.size >= region.start + region.size) |
---|
647 | | - return; |
---|
648 | | - |
---|
649 | 623 | /* Clip off the overlapping region and start over. */ |
---|
650 | | - region.size -= overlap.start - region.start + overlap.size; |
---|
651 | 624 | region.start = overlap.start + overlap.size; |
---|
652 | 625 | } |
---|
653 | 626 | } |
---|
654 | 627 | |
---|
| 628 | +static bool process_mem_region(struct mem_vector *region, |
---|
| 629 | + unsigned long minimum, |
---|
| 630 | + unsigned long image_size) |
---|
| 631 | +{ |
---|
| 632 | + int i; |
---|
| 633 | + /* |
---|
| 634 | + * If no immovable memory found, or MEMORY_HOTREMOVE disabled, |
---|
| 635 | + * use @region directly. |
---|
| 636 | + */ |
---|
| 637 | + if (!num_immovable_mem) { |
---|
| 638 | + __process_mem_region(region, minimum, image_size); |
---|
| 639 | + |
---|
| 640 | + if (slot_area_index == MAX_SLOT_AREA) { |
---|
| 641 | + debug_putstr("Aborted e820/efi memmap scan (slot_areas full)!\n"); |
---|
| 642 | + return 1; |
---|
| 643 | + } |
---|
| 644 | + return 0; |
---|
| 645 | + } |
---|
| 646 | + |
---|
| 647 | +#if defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI) |
---|
| 648 | + /* |
---|
| 649 | + * If immovable memory found, filter the intersection between |
---|
| 650 | + * immovable memory and @region. |
---|
| 651 | + */ |
---|
| 652 | + for (i = 0; i < num_immovable_mem; i++) { |
---|
| 653 | + u64 start, end, entry_end, region_end; |
---|
| 654 | + struct mem_vector entry; |
---|
| 655 | + |
---|
| 656 | + if (!mem_overlaps(region, &immovable_mem[i])) |
---|
| 657 | + continue; |
---|
| 658 | + |
---|
| 659 | + start = immovable_mem[i].start; |
---|
| 660 | + end = start + immovable_mem[i].size; |
---|
| 661 | + region_end = region->start + region->size; |
---|
| 662 | + |
---|
| 663 | + entry.start = clamp(region->start, start, end); |
---|
| 664 | + entry_end = clamp(region_end, start, end); |
---|
| 665 | + entry.size = entry_end - entry.start; |
---|
| 666 | + |
---|
| 667 | + __process_mem_region(&entry, minimum, image_size); |
---|
| 668 | + |
---|
| 669 | + if (slot_area_index == MAX_SLOT_AREA) { |
---|
| 670 | + debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n"); |
---|
| 671 | + return 1; |
---|
| 672 | + } |
---|
| 673 | + } |
---|
| 674 | +#endif |
---|
| 675 | + return 0; |
---|
| 676 | +} |
---|
| 677 | + |
---|
655 | 678 | #ifdef CONFIG_EFI |
---|
656 | 679 | /* |
---|
657 | | - * Returns true if mirror region found (and must have been processed |
---|
658 | | - * for slots adding) |
---|
| 680 | + * Returns true if we processed the EFI memmap, which we prefer over the E820 |
---|
| 681 | + * table if it is available. |
---|
659 | 682 | */ |
---|
660 | 683 | static bool |
---|
661 | 684 | process_efi_entries(unsigned long minimum, unsigned long image_size) |
---|
.. | .. |
---|
711 | 734 | if (md->type != EFI_CONVENTIONAL_MEMORY) |
---|
712 | 735 | continue; |
---|
713 | 736 | |
---|
| 737 | + if (efi_soft_reserve_enabled() && |
---|
| 738 | + (md->attribute & EFI_MEMORY_SP)) |
---|
| 739 | + continue; |
---|
| 740 | + |
---|
714 | 741 | if (efi_mirror_found && |
---|
715 | 742 | !(md->attribute & EFI_MEMORY_MORE_RELIABLE)) |
---|
716 | 743 | continue; |
---|
717 | 744 | |
---|
718 | 745 | region.start = md->phys_addr; |
---|
719 | 746 | region.size = md->num_pages << EFI_PAGE_SHIFT; |
---|
720 | | - process_mem_region(®ion, minimum, image_size); |
---|
721 | | - if (slot_area_index == MAX_SLOT_AREA) { |
---|
722 | | - debug_putstr("Aborted EFI scan (slot_areas full)!\n"); |
---|
| 747 | + if (process_mem_region(®ion, minimum, image_size)) |
---|
723 | 748 | break; |
---|
724 | | - } |
---|
725 | 749 | } |
---|
726 | 750 | return true; |
---|
727 | 751 | } |
---|
.. | .. |
---|
748 | 772 | continue; |
---|
749 | 773 | region.start = entry->addr; |
---|
750 | 774 | region.size = entry->size; |
---|
751 | | - process_mem_region(®ion, minimum, image_size); |
---|
752 | | - if (slot_area_index == MAX_SLOT_AREA) { |
---|
753 | | - debug_putstr("Aborted e820 scan (slot_areas full)!\n"); |
---|
| 775 | + if (process_mem_region(®ion, minimum, image_size)) |
---|
754 | 776 | break; |
---|
755 | | - } |
---|
756 | 777 | } |
---|
757 | 778 | } |
---|
758 | 779 | |
---|
759 | 780 | static unsigned long find_random_phys_addr(unsigned long minimum, |
---|
760 | 781 | unsigned long image_size) |
---|
761 | 782 | { |
---|
| 783 | + u64 phys_addr; |
---|
| 784 | + |
---|
| 785 | + /* Bail out early if it's impossible to succeed. */ |
---|
| 786 | + if (minimum + image_size > mem_limit) |
---|
| 787 | + return 0; |
---|
| 788 | + |
---|
762 | 789 | /* Check if we had too many memmaps. */ |
---|
763 | 790 | if (memmap_too_large) { |
---|
764 | 791 | debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n"); |
---|
765 | 792 | return 0; |
---|
766 | 793 | } |
---|
767 | 794 | |
---|
768 | | - /* Make sure minimum is aligned. */ |
---|
769 | | - minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); |
---|
| 795 | + if (!process_efi_entries(minimum, image_size)) |
---|
| 796 | + process_e820_entries(minimum, image_size); |
---|
770 | 797 | |
---|
771 | | - if (process_efi_entries(minimum, image_size)) |
---|
772 | | - return slots_fetch_random(); |
---|
| 798 | + phys_addr = slots_fetch_random(); |
---|
773 | 799 | |
---|
774 | | - process_e820_entries(minimum, image_size); |
---|
775 | | - return slots_fetch_random(); |
---|
| 800 | + /* Perform a final check to make sure the address is in range. */ |
---|
| 801 | + if (phys_addr < minimum || phys_addr + image_size > mem_limit) { |
---|
| 802 | + warn("Invalid physical address chosen!\n"); |
---|
| 803 | + return 0; |
---|
| 804 | + } |
---|
| 805 | + |
---|
| 806 | + return (unsigned long)phys_addr; |
---|
776 | 807 | } |
---|
777 | 808 | |
---|
778 | 809 | static unsigned long find_random_virt_addr(unsigned long minimum, |
---|
.. | .. |
---|
780 | 811 | { |
---|
781 | 812 | unsigned long slots, random_addr; |
---|
782 | 813 | |
---|
783 | | - /* Make sure minimum is aligned. */ |
---|
784 | | - minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); |
---|
785 | | - /* Align image_size for easy slot calculations. */ |
---|
786 | | - image_size = ALIGN(image_size, CONFIG_PHYSICAL_ALIGN); |
---|
787 | | - |
---|
788 | 814 | /* |
---|
789 | 815 | * There are how many CONFIG_PHYSICAL_ALIGN-sized slots |
---|
790 | 816 | * that can hold image_size within the range of minimum to |
---|
791 | 817 | * KERNEL_IMAGE_SIZE? |
---|
792 | 818 | */ |
---|
793 | | - slots = (KERNEL_IMAGE_SIZE - minimum - image_size) / |
---|
794 | | - CONFIG_PHYSICAL_ALIGN + 1; |
---|
| 819 | + slots = 1 + (KERNEL_IMAGE_SIZE - minimum - image_size) / CONFIG_PHYSICAL_ALIGN; |
---|
795 | 820 | |
---|
796 | 821 | random_addr = kaslr_get_random_long("Virtual") % slots; |
---|
797 | 822 | |
---|
.. | .. |
---|
815 | 840 | return; |
---|
816 | 841 | } |
---|
817 | 842 | |
---|
818 | | -#ifdef CONFIG_X86_5LEVEL |
---|
819 | | - if (__read_cr4() & X86_CR4_LA57) { |
---|
820 | | - __pgtable_l5_enabled = 1; |
---|
821 | | - pgdir_shift = 48; |
---|
822 | | - ptrs_per_p4d = 512; |
---|
823 | | - } |
---|
824 | | -#endif |
---|
825 | | - |
---|
826 | 843 | boot_params->hdr.loadflags |= KASLR_FLAG; |
---|
827 | 844 | |
---|
828 | | - /* Prepare to add new identity pagetables on demand. */ |
---|
829 | | - initialize_identity_maps(); |
---|
| 845 | + if (IS_ENABLED(CONFIG_X86_32)) |
---|
| 846 | + mem_limit = KERNEL_IMAGE_SIZE; |
---|
| 847 | + else |
---|
| 848 | + mem_limit = MAXMEM; |
---|
830 | 849 | |
---|
831 | 850 | /* Record the various known unsafe memory ranges. */ |
---|
832 | 851 | mem_avoid_init(input, input_size, *output); |
---|
.. | .. |
---|
837 | 856 | * location: |
---|
838 | 857 | */ |
---|
839 | 858 | min_addr = min(*output, 512UL << 20); |
---|
| 859 | + /* Make sure minimum is aligned. */ |
---|
| 860 | + min_addr = ALIGN(min_addr, CONFIG_PHYSICAL_ALIGN); |
---|
840 | 861 | |
---|
841 | 862 | /* Walk available memory entries to find a random address. */ |
---|
842 | 863 | random_addr = find_random_phys_addr(min_addr, output_size); |
---|
.. | .. |
---|
844 | 865 | warn("Physical KASLR disabled: no suitable memory region!"); |
---|
845 | 866 | } else { |
---|
846 | 867 | /* Update the new physical address location. */ |
---|
847 | | - if (*output != random_addr) { |
---|
848 | | - add_identity_map(random_addr, output_size); |
---|
| 868 | + if (*output != random_addr) |
---|
849 | 869 | *output = random_addr; |
---|
850 | | - } |
---|
851 | | - |
---|
852 | | - /* |
---|
853 | | - * This loads the identity mapping page table. |
---|
854 | | - * This should only be done if a new physical address |
---|
855 | | - * is found for the kernel, otherwise we should keep |
---|
856 | | - * the old page table to make it be like the "nokaslr" |
---|
857 | | - * case. |
---|
858 | | - */ |
---|
859 | | - finalize_identity_maps(); |
---|
860 | 870 | } |
---|
861 | 871 | |
---|
862 | 872 | |
---|