hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/kernel/kexec_file.c
....@@ -1,12 +1,10 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * kexec: kexec_file_load system call
34 *
45 * Copyright (C) 2014 Red Hat Inc.
56 * Authors:
67 * Vivek Goyal <vgoyal@redhat.com>
7
- *
8
- * This source code is licensed under the GNU General Public License,
9
- * Version 2. See the file COPYING for more details.
108 */
119
1210 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
....@@ -16,6 +14,7 @@
1614 #include <linux/file.h>
1715 #include <linux/slab.h>
1816 #include <linux/kexec.h>
17
+#include <linux/memblock.h>
1918 #include <linux/mutex.h>
2019 #include <linux/list.h>
2120 #include <linux/fs.h>
....@@ -25,11 +24,19 @@
2524 #include <linux/elf.h>
2625 #include <linux/elfcore.h>
2726 #include <linux/kernel.h>
28
-#include <linux/kexec.h>
29
-#include <linux/slab.h>
27
+#include <linux/kernel_read_file.h>
3028 #include <linux/syscalls.h>
3129 #include <linux/vmalloc.h>
3230 #include "kexec_internal.h"
31
+
32
+#ifdef CONFIG_KEXEC_SIG
33
+static bool sig_enforce = IS_ENABLED(CONFIG_KEXEC_SIG_FORCE);
34
+
35
+void set_kexec_sig_enforced(void)
36
+{
37
+ sig_enforce = true;
38
+}
39
+#endif
3340
3441 static int kexec_calculate_store_digests(struct kimage *image);
3542
....@@ -78,7 +85,7 @@
7885 return kexec_image_load_default(image);
7986 }
8087
81
-static int kexec_image_post_load_cleanup_default(struct kimage *image)
88
+int kexec_image_post_load_cleanup_default(struct kimage *image)
8289 {
8390 if (!image->fops || !image->fops->cleanup)
8491 return 0;
....@@ -91,7 +98,7 @@
9198 return kexec_image_post_load_cleanup_default(image);
9299 }
93100
94
-#ifdef CONFIG_KEXEC_VERIFY_SIG
101
+#ifdef CONFIG_KEXEC_SIG
95102 static int kexec_image_verify_sig_default(struct kimage *image, void *buf,
96103 unsigned long buf_len)
97104 {
....@@ -109,40 +116,6 @@
109116 return kexec_image_verify_sig_default(image, buf, buf_len);
110117 }
111118 #endif
112
-
113
-/*
114
- * arch_kexec_apply_relocations_add - apply relocations of type RELA
115
- * @pi: Purgatory to be relocated.
116
- * @section: Section relocations applying to.
117
- * @relsec: Section containing RELAs.
118
- * @symtab: Corresponding symtab.
119
- *
120
- * Return: 0 on success, negative errno on error.
121
- */
122
-int __weak
123
-arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section,
124
- const Elf_Shdr *relsec, const Elf_Shdr *symtab)
125
-{
126
- pr_err("RELA relocation unsupported.\n");
127
- return -ENOEXEC;
128
-}
129
-
130
-/*
131
- * arch_kexec_apply_relocations - apply relocations of type REL
132
- * @pi: Purgatory to be relocated.
133
- * @section: Section relocations applying to.
134
- * @relsec: Section containing RELs.
135
- * @symtab: Corresponding symtab.
136
- *
137
- * Return: 0 on success, negative errno on error.
138
- */
139
-int __weak
140
-arch_kexec_apply_relocations(struct purgatory_info *pi, Elf_Shdr *section,
141
- const Elf_Shdr *relsec, const Elf_Shdr *symtab)
142
-{
143
- pr_err("REL relocation unsupported.\n");
144
- return -ENOEXEC;
145
-}
146119
147120 /*
148121 * Free up memory used by kernel, initrd, and command line. This is temporary
....@@ -185,6 +158,37 @@
185158 image->image_loader_data = NULL;
186159 }
187160
161
+#ifdef CONFIG_KEXEC_SIG
162
+static int
163
+kimage_validate_signature(struct kimage *image)
164
+{
165
+ int ret;
166
+
167
+ ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
168
+ image->kernel_buf_len);
169
+ if (ret) {
170
+
171
+ if (sig_enforce) {
172
+ pr_notice("Enforced kernel signature verification failed (%d).\n", ret);
173
+ return ret;
174
+ }
175
+
176
+ /*
177
+ * If IMA is guaranteed to appraise a signature on the kexec
178
+ * image, permit it even if the kernel is otherwise locked
179
+ * down.
180
+ */
181
+ if (!ima_appraise_signature(READING_KEXEC_IMAGE) &&
182
+ security_locked_down(LOCKDOWN_KEXEC))
183
+ return -EPERM;
184
+
185
+ pr_debug("kernel signature verification failed (%d).\n", ret);
186
+ }
187
+
188
+ return 0;
189
+}
190
+#endif
191
+
188192 /*
189193 * In file mode list of segments is prepared by kernel. Copy relevant
190194 * data from user space, do error checking, prepare segment list
....@@ -194,18 +198,14 @@
194198 const char __user *cmdline_ptr,
195199 unsigned long cmdline_len, unsigned flags)
196200 {
197
- int ret = 0;
201
+ int ret;
198202 void *ldata;
199
- loff_t size;
200203
201
- ret = kernel_read_file_from_fd(kernel_fd, &image->kernel_buf,
202
- &size, INT_MAX, READING_KEXEC_IMAGE);
203
- if (ret)
204
+ ret = kernel_read_file_from_fd(kernel_fd, 0, &image->kernel_buf,
205
+ INT_MAX, NULL, READING_KEXEC_IMAGE);
206
+ if (ret < 0)
204207 return ret;
205
- image->kernel_buf_len = size;
206
-
207
- /* IMA needs to pass the measurement list to the next kernel. */
208
- ima_add_kexec_buffer(image);
208
+ image->kernel_buf_len = ret;
209209
210210 /* Call arch image probe handlers */
211211 ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
....@@ -213,23 +213,21 @@
213213 if (ret)
214214 goto out;
215215
216
-#ifdef CONFIG_KEXEC_VERIFY_SIG
217
- ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
218
- image->kernel_buf_len);
219
- if (ret) {
220
- pr_debug("kernel signature verification failed.\n");
216
+#ifdef CONFIG_KEXEC_SIG
217
+ ret = kimage_validate_signature(image);
218
+
219
+ if (ret)
221220 goto out;
222
- }
223
- pr_debug("kernel signature verification successful.\n");
224221 #endif
225222 /* It is possible that there no initramfs is being loaded */
226223 if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
227
- ret = kernel_read_file_from_fd(initrd_fd, &image->initrd_buf,
228
- &size, INT_MAX,
224
+ ret = kernel_read_file_from_fd(initrd_fd, 0, &image->initrd_buf,
225
+ INT_MAX, NULL,
229226 READING_KEXEC_INITRAMFS);
230
- if (ret)
227
+ if (ret < 0)
231228 goto out;
232
- image->initrd_buf_len = size;
229
+ image->initrd_buf_len = ret;
230
+ ret = 0;
233231 }
234232
235233 if (cmdline_len) {
....@@ -247,7 +245,13 @@
247245 ret = -EINVAL;
248246 goto out;
249247 }
248
+
249
+ ima_kexec_cmdline(kernel_fd, image->cmdline_buf,
250
+ image->cmdline_buf_len - 1);
250251 }
252
+
253
+ /* IMA needs to pass the measurement list to the next kernel. */
254
+ ima_add_kexec_buffer(image);
251255
252256 /* Call arch image load handlers */
253257 ldata = arch_kexec_kernel_image_load(image);
....@@ -396,6 +400,10 @@
396400
397401 kimage_terminate(image);
398402
403
+ ret = machine_kexec_post_load(image);
404
+ if (ret)
405
+ goto out;
406
+
399407 /*
400408 * Free up any temporary buffers allocated which are not needed
401409 * after image has been loaded
....@@ -491,6 +499,11 @@
491499 unsigned long sz = end - start + 1;
492500
493501 /* Returning 0 will take to next memory range */
502
+
503
+ /* Don't use memory that will be detected and handled by a driver. */
504
+ if (res->flags & IORESOURCE_SYSRAM_DRIVER_MANAGED)
505
+ return 0;
506
+
494507 if (sz < kbuf->memsz)
495508 return 0;
496509
....@@ -506,8 +519,60 @@
506519 return locate_mem_hole_bottom_up(start, end, kbuf);
507520 }
508521
522
+#ifdef CONFIG_ARCH_KEEP_MEMBLOCK
523
+static int kexec_walk_memblock(struct kexec_buf *kbuf,
524
+ int (*func)(struct resource *, void *))
525
+{
526
+ int ret = 0;
527
+ u64 i;
528
+ phys_addr_t mstart, mend;
529
+ struct resource res = { };
530
+
531
+ if (kbuf->image->type == KEXEC_TYPE_CRASH)
532
+ return func(&crashk_res, kbuf);
533
+
534
+ if (kbuf->top_down) {
535
+ for_each_free_mem_range_reverse(i, NUMA_NO_NODE, MEMBLOCK_NONE,
536
+ &mstart, &mend, NULL) {
537
+ /*
538
+ * In memblock, end points to the first byte after the
539
+ * range while in kexec, end points to the last byte
540
+ * in the range.
541
+ */
542
+ res.start = mstart;
543
+ res.end = mend - 1;
544
+ ret = func(&res, kbuf);
545
+ if (ret)
546
+ break;
547
+ }
548
+ } else {
549
+ for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
550
+ &mstart, &mend, NULL) {
551
+ /*
552
+ * In memblock, end points to the first byte after the
553
+ * range while in kexec, end points to the last byte
554
+ * in the range.
555
+ */
556
+ res.start = mstart;
557
+ res.end = mend - 1;
558
+ ret = func(&res, kbuf);
559
+ if (ret)
560
+ break;
561
+ }
562
+ }
563
+
564
+ return ret;
565
+}
566
+#else
567
+static int kexec_walk_memblock(struct kexec_buf *kbuf,
568
+ int (*func)(struct resource *, void *))
569
+{
570
+ return 0;
571
+}
572
+#endif
573
+
509574 /**
510
- * arch_kexec_walk_mem - call func(data) on free memory regions
575
+ * kexec_walk_resources - call func(data) on free memory regions
511576 * @kbuf: Context info for the search. Also passed to @func.
512577 * @func: Function to call for each memory region.
513578 *
....@@ -515,8 +580,8 @@
515580 * and that value will be returned. If all free regions are visited without
516581 * func returning non-zero, then zero will be returned.
517582 */
518
-int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
519
- int (*func)(struct resource *, void *))
583
+static int kexec_walk_resources(struct kexec_buf *kbuf,
584
+ int (*func)(struct resource *, void *))
520585 {
521586 if (kbuf->image->type == KEXEC_TYPE_CRASH)
522587 return walk_iomem_res_desc(crashk_res.desc,
....@@ -539,9 +604,29 @@
539604 {
540605 int ret;
541606
542
- ret = arch_kexec_walk_mem(kbuf, locate_mem_hole_callback);
607
+ /* Arch knows where to place */
608
+ if (kbuf->mem != KEXEC_BUF_MEM_UNKNOWN)
609
+ return 0;
610
+
611
+ if (!IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
612
+ ret = kexec_walk_resources(kbuf, locate_mem_hole_callback);
613
+ else
614
+ ret = kexec_walk_memblock(kbuf, locate_mem_hole_callback);
543615
544616 return ret == 1 ? 0 : -EADDRNOTAVAIL;
617
+}
618
+
619
+/**
620
+ * arch_kexec_locate_mem_hole - Find free memory to place the segments.
621
+ * @kbuf: Parameters for the memory search.
622
+ *
623
+ * On success, kbuf->mem will have the start address of the memory region found.
624
+ *
625
+ * Return: 0 on success, negative errno on error.
626
+ */
627
+int __weak arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
628
+{
629
+ return kexec_locate_mem_hole(kbuf);
545630 }
546631
547632 /**
....@@ -556,7 +641,6 @@
556641 */
557642 int kexec_add_buffer(struct kexec_buf *kbuf)
558643 {
559
-
560644 struct kexec_segment *ksegment;
561645 int ret;
562646
....@@ -584,7 +668,7 @@
584668 kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
585669
586670 /* Walk the RAM ranges and allocate a suitable range for the buffer */
587
- ret = kexec_locate_mem_hole(kbuf);
671
+ ret = arch_kexec_locate_mem_hole(kbuf);
588672 if (ret)
589673 return ret;
590674
....@@ -637,7 +721,6 @@
637721 }
638722
639723 desc->tfm = tfm;
640
- desc->flags = 0;
641724
642725 ret = crypto_shash_init(desc);
643726 if (ret < 0)
....@@ -1069,24 +1152,26 @@
10691152 unsigned long long mstart, unsigned long long mend)
10701153 {
10711154 int i, j;
1072
- unsigned long long start, end;
1155
+ unsigned long long start, end, p_start, p_end;
10731156 struct crash_mem_range temp_range = {0, 0};
10741157
10751158 for (i = 0; i < mem->nr_ranges; i++) {
10761159 start = mem->ranges[i].start;
10771160 end = mem->ranges[i].end;
1161
+ p_start = mstart;
1162
+ p_end = mend;
10781163
10791164 if (mstart > end || mend < start)
10801165 continue;
10811166
10821167 /* Truncate any area outside of range */
10831168 if (mstart < start)
1084
- mstart = start;
1169
+ p_start = start;
10851170 if (mend > end)
1086
- mend = end;
1171
+ p_end = end;
10871172
10881173 /* Found completely overlapping range */
1089
- if (mstart == start && mend == end) {
1174
+ if (p_start == start && p_end == end) {
10901175 mem->ranges[i].start = 0;
10911176 mem->ranges[i].end = 0;
10921177 if (i < mem->nr_ranges - 1) {
....@@ -1097,20 +1182,29 @@
10971182 mem->ranges[j].end =
10981183 mem->ranges[j+1].end;
10991184 }
1185
+
1186
+ /*
1187
+ * Continue to check if there are another overlapping ranges
1188
+ * from the current position because of shifting the above
1189
+ * mem ranges.
1190
+ */
1191
+ i--;
1192
+ mem->nr_ranges--;
1193
+ continue;
11001194 }
11011195 mem->nr_ranges--;
11021196 return 0;
11031197 }
11041198
1105
- if (mstart > start && mend < end) {
1199
+ if (p_start > start && p_end < end) {
11061200 /* Split original range */
1107
- mem->ranges[i].end = mstart - 1;
1108
- temp_range.start = mend + 1;
1201
+ mem->ranges[i].end = p_start - 1;
1202
+ temp_range.start = p_end + 1;
11091203 temp_range.end = end;
1110
- } else if (mstart != start)
1111
- mem->ranges[i].end = mstart - 1;
1204
+ } else if (p_start != start)
1205
+ mem->ranges[i].end = p_start - 1;
11121206 else
1113
- mem->ranges[i].start = mend + 1;
1207
+ mem->ranges[i].start = p_end + 1;
11141208 break;
11151209 }
11161210
....@@ -1147,7 +1241,7 @@
11471241 unsigned long long notes_addr;
11481242 unsigned long mstart, mend;
11491243
1150
- /* extra phdr for vmcoreinfo elf note */
1244
+ /* extra phdr for vmcoreinfo ELF note */
11511245 nr_phdr = nr_cpus + 1;
11521246 nr_phdr += mem->nr_ranges;
11531247
....@@ -1155,7 +1249,7 @@
11551249 * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
11561250 * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64).
11571251 * I think this is required by tools like gdb. So same physical
1158
- * memory will be mapped in two elf headers. One will contain kernel
1252
+ * memory will be mapped in two ELF headers. One will contain kernel
11591253 * text virtual addresses and other will have __va(physical) addresses.
11601254 */
11611255
....@@ -1182,7 +1276,7 @@
11821276 ehdr->e_ehsize = sizeof(Elf64_Ehdr);
11831277 ehdr->e_phentsize = sizeof(Elf64_Phdr);
11841278
1185
- /* Prepare one phdr of type PT_NOTE for each present cpu */
1279
+ /* Prepare one phdr of type PT_NOTE for each present CPU */
11861280 for_each_present_cpu(cpu) {
11871281 phdr->p_type = PT_NOTE;
11881282 notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
....@@ -1203,7 +1297,7 @@
12031297 if (kernel_map) {
12041298 phdr->p_type = PT_LOAD;
12051299 phdr->p_flags = PF_R|PF_W|PF_X;
1206
- phdr->p_vaddr = (Elf64_Addr)_text;
1300
+ phdr->p_vaddr = (unsigned long) _text;
12071301 phdr->p_filesz = phdr->p_memsz = _end - _text;
12081302 phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);
12091303 ehdr->e_phnum++;
....@@ -1220,14 +1314,14 @@
12201314 phdr->p_offset = mstart;
12211315
12221316 phdr->p_paddr = mstart;
1223
- phdr->p_vaddr = (unsigned long long) __va(mstart);
1317
+ phdr->p_vaddr = (unsigned long) __va(mstart);
12241318 phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
12251319 phdr->p_align = 0;
12261320 ehdr->e_phnum++;
1227
- phdr++;
1228
- pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
1321
+ pr_debug("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
12291322 phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
12301323 ehdr->e_phnum, phdr->p_offset);
1324
+ phdr++;
12311325 }
12321326
12331327 *addr = buf;