hc
2024-05-10 61598093bbdd283a7edc367d900f223070ead8d2
kernel/kernel/kexec_file.c
....@@ -1,12 +1,10 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * kexec: kexec_file_load system call
34 *
45 * Copyright (C) 2014 Red Hat Inc.
56 * Authors:
67 * Vivek Goyal <vgoyal@redhat.com>
7
- *
8
- * This source code is licensed under the GNU General Public License,
9
- * Version 2. See the file COPYING for more details.
108 */
119
1210 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
....@@ -16,6 +14,7 @@
1614 #include <linux/file.h>
1715 #include <linux/slab.h>
1816 #include <linux/kexec.h>
17
+#include <linux/memblock.h>
1918 #include <linux/mutex.h>
2019 #include <linux/list.h>
2120 #include <linux/fs.h>
....@@ -25,11 +24,19 @@
2524 #include <linux/elf.h>
2625 #include <linux/elfcore.h>
2726 #include <linux/kernel.h>
28
-#include <linux/kexec.h>
29
-#include <linux/slab.h>
27
+#include <linux/kernel_read_file.h>
3028 #include <linux/syscalls.h>
3129 #include <linux/vmalloc.h>
3230 #include "kexec_internal.h"
31
+
32
+#ifdef CONFIG_KEXEC_SIG
33
+static bool sig_enforce = IS_ENABLED(CONFIG_KEXEC_SIG_FORCE);
34
+
35
+void set_kexec_sig_enforced(void)
36
+{
37
+ sig_enforce = true;
38
+}
39
+#endif
3340
3441 static int kexec_calculate_store_digests(struct kimage *image);
3542
....@@ -78,7 +85,7 @@
7885 return kexec_image_load_default(image);
7986 }
8087
81
-static int kexec_image_post_load_cleanup_default(struct kimage *image)
88
+int kexec_image_post_load_cleanup_default(struct kimage *image)
8289 {
8390 if (!image->fops || !image->fops->cleanup)
8491 return 0;
....@@ -91,7 +98,7 @@
9198 return kexec_image_post_load_cleanup_default(image);
9299 }
93100
94
-#ifdef CONFIG_KEXEC_VERIFY_SIG
101
+#ifdef CONFIG_KEXEC_SIG
95102 static int kexec_image_verify_sig_default(struct kimage *image, void *buf,
96103 unsigned long buf_len)
97104 {
....@@ -109,40 +116,6 @@
109116 return kexec_image_verify_sig_default(image, buf, buf_len);
110117 }
111118 #endif
112
-
113
-/*
114
- * arch_kexec_apply_relocations_add - apply relocations of type RELA
115
- * @pi: Purgatory to be relocated.
116
- * @section: Section relocations applying to.
117
- * @relsec: Section containing RELAs.
118
- * @symtab: Corresponding symtab.
119
- *
120
- * Return: 0 on success, negative errno on error.
121
- */
122
-int __weak
123
-arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section,
124
- const Elf_Shdr *relsec, const Elf_Shdr *symtab)
125
-{
126
- pr_err("RELA relocation unsupported.\n");
127
- return -ENOEXEC;
128
-}
129
-
130
-/*
131
- * arch_kexec_apply_relocations - apply relocations of type REL
132
- * @pi: Purgatory to be relocated.
133
- * @section: Section relocations applying to.
134
- * @relsec: Section containing RELs.
135
- * @symtab: Corresponding symtab.
136
- *
137
- * Return: 0 on success, negative errno on error.
138
- */
139
-int __weak
140
-arch_kexec_apply_relocations(struct purgatory_info *pi, Elf_Shdr *section,
141
- const Elf_Shdr *relsec, const Elf_Shdr *symtab)
142
-{
143
- pr_err("REL relocation unsupported.\n");
144
- return -ENOEXEC;
145
-}
146119
147120 /*
148121 * Free up memory used by kernel, initrd, and command line. This is temporary
....@@ -185,6 +158,37 @@
185158 image->image_loader_data = NULL;
186159 }
187160
161
+#ifdef CONFIG_KEXEC_SIG
162
+static int
163
+kimage_validate_signature(struct kimage *image)
164
+{
165
+ int ret;
166
+
167
+ ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
168
+ image->kernel_buf_len);
169
+ if (ret) {
170
+
171
+ if (sig_enforce) {
172
+ pr_notice("Enforced kernel signature verification failed (%d).\n", ret);
173
+ return ret;
174
+ }
175
+
176
+ /*
177
+ * If IMA is guaranteed to appraise a signature on the kexec
178
+ * image, permit it even if the kernel is otherwise locked
179
+ * down.
180
+ */
181
+ if (!ima_appraise_signature(READING_KEXEC_IMAGE) &&
182
+ security_locked_down(LOCKDOWN_KEXEC))
183
+ return -EPERM;
184
+
185
+ pr_debug("kernel signature verification failed (%d).\n", ret);
186
+ }
187
+
188
+ return 0;
189
+}
190
+#endif
191
+
188192 /*
189193 * In file mode list of segments is prepared by kernel. Copy relevant
190194 * data from user space, do error checking, prepare segment list
....@@ -194,18 +198,14 @@
194198 const char __user *cmdline_ptr,
195199 unsigned long cmdline_len, unsigned flags)
196200 {
197
- int ret = 0;
201
+ int ret;
198202 void *ldata;
199
- loff_t size;
200203
201
- ret = kernel_read_file_from_fd(kernel_fd, &image->kernel_buf,
202
- &size, INT_MAX, READING_KEXEC_IMAGE);
203
- if (ret)
204
+ ret = kernel_read_file_from_fd(kernel_fd, 0, &image->kernel_buf,
205
+ INT_MAX, NULL, READING_KEXEC_IMAGE);
206
+ if (ret < 0)
204207 return ret;
205
- image->kernel_buf_len = size;
206
-
207
- /* IMA needs to pass the measurement list to the next kernel. */
208
- ima_add_kexec_buffer(image);
208
+ image->kernel_buf_len = ret;
209209
210210 /* Call arch image probe handlers */
211211 ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
....@@ -213,23 +213,21 @@
213213 if (ret)
214214 goto out;
215215
216
-#ifdef CONFIG_KEXEC_VERIFY_SIG
217
- ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
218
- image->kernel_buf_len);
219
- if (ret) {
220
- pr_debug("kernel signature verification failed.\n");
216
+#ifdef CONFIG_KEXEC_SIG
217
+ ret = kimage_validate_signature(image);
218
+
219
+ if (ret)
221220 goto out;
222
- }
223
- pr_debug("kernel signature verification successful.\n");
224221 #endif
225222 /* It is possible that there no initramfs is being loaded */
226223 if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
227
- ret = kernel_read_file_from_fd(initrd_fd, &image->initrd_buf,
228
- &size, INT_MAX,
224
+ ret = kernel_read_file_from_fd(initrd_fd, 0, &image->initrd_buf,
225
+ INT_MAX, NULL,
229226 READING_KEXEC_INITRAMFS);
230
- if (ret)
227
+ if (ret < 0)
231228 goto out;
232
- image->initrd_buf_len = size;
229
+ image->initrd_buf_len = ret;
230
+ ret = 0;
233231 }
234232
235233 if (cmdline_len) {
....@@ -247,7 +245,13 @@
247245 ret = -EINVAL;
248246 goto out;
249247 }
248
+
249
+ ima_kexec_cmdline(kernel_fd, image->cmdline_buf,
250
+ image->cmdline_buf_len - 1);
250251 }
252
+
253
+ /* IMA needs to pass the measurement list to the next kernel. */
254
+ ima_add_kexec_buffer(image);
251255
252256 /* Call arch image load handlers */
253257 ldata = arch_kexec_kernel_image_load(image);
....@@ -339,7 +343,7 @@
339343
340344 image = NULL;
341345
342
- if (!mutex_trylock(&kexec_mutex))
346
+ if (!kexec_trylock())
343347 return -EBUSY;
344348
345349 dest_image = &kexec_image;
....@@ -396,6 +400,10 @@
396400
397401 kimage_terminate(image);
398402
403
+ ret = machine_kexec_post_load(image);
404
+ if (ret)
405
+ goto out;
406
+
399407 /*
400408 * Free up any temporary buffers allocated which are not needed
401409 * after image has been loaded
....@@ -407,7 +415,7 @@
407415 if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
408416 arch_kexec_protect_crashkres();
409417
410
- mutex_unlock(&kexec_mutex);
418
+ kexec_unlock();
411419 kimage_free(image);
412420 return ret;
413421 }
....@@ -491,6 +499,11 @@
491499 unsigned long sz = end - start + 1;
492500
493501 /* Returning 0 will take to next memory range */
502
+
503
+ /* Don't use memory that will be detected and handled by a driver. */
504
+ if (res->flags & IORESOURCE_SYSRAM_DRIVER_MANAGED)
505
+ return 0;
506
+
494507 if (sz < kbuf->memsz)
495508 return 0;
496509
....@@ -506,8 +519,60 @@
506519 return locate_mem_hole_bottom_up(start, end, kbuf);
507520 }
508521
522
+#ifdef CONFIG_ARCH_KEEP_MEMBLOCK
523
+static int kexec_walk_memblock(struct kexec_buf *kbuf,
524
+ int (*func)(struct resource *, void *))
525
+{
526
+ int ret = 0;
527
+ u64 i;
528
+ phys_addr_t mstart, mend;
529
+ struct resource res = { };
530
+
531
+ if (kbuf->image->type == KEXEC_TYPE_CRASH)
532
+ return func(&crashk_res, kbuf);
533
+
534
+ if (kbuf->top_down) {
535
+ for_each_free_mem_range_reverse(i, NUMA_NO_NODE, MEMBLOCK_NONE,
536
+ &mstart, &mend, NULL) {
537
+ /*
538
+ * In memblock, end points to the first byte after the
539
+ * range while in kexec, end points to the last byte
540
+ * in the range.
541
+ */
542
+ res.start = mstart;
543
+ res.end = mend - 1;
544
+ ret = func(&res, kbuf);
545
+ if (ret)
546
+ break;
547
+ }
548
+ } else {
549
+ for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
550
+ &mstart, &mend, NULL) {
551
+ /*
552
+ * In memblock, end points to the first byte after the
553
+ * range while in kexec, end points to the last byte
554
+ * in the range.
555
+ */
556
+ res.start = mstart;
557
+ res.end = mend - 1;
558
+ ret = func(&res, kbuf);
559
+ if (ret)
560
+ break;
561
+ }
562
+ }
563
+
564
+ return ret;
565
+}
566
+#else
567
+static int kexec_walk_memblock(struct kexec_buf *kbuf,
568
+ int (*func)(struct resource *, void *))
569
+{
570
+ return 0;
571
+}
572
+#endif
573
+
509574 /**
510
- * arch_kexec_walk_mem - call func(data) on free memory regions
575
+ * kexec_walk_resources - call func(data) on free memory regions
511576 * @kbuf: Context info for the search. Also passed to @func.
512577 * @func: Function to call for each memory region.
513578 *
....@@ -515,8 +580,8 @@
515580 * and that value will be returned. If all free regions are visited without
516581 * func returning non-zero, then zero will be returned.
517582 */
518
-int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
519
- int (*func)(struct resource *, void *))
583
+static int kexec_walk_resources(struct kexec_buf *kbuf,
584
+ int (*func)(struct resource *, void *))
520585 {
521586 if (kbuf->image->type == KEXEC_TYPE_CRASH)
522587 return walk_iomem_res_desc(crashk_res.desc,
....@@ -539,9 +604,29 @@
539604 {
540605 int ret;
541606
542
- ret = arch_kexec_walk_mem(kbuf, locate_mem_hole_callback);
607
+ /* Arch knows where to place */
608
+ if (kbuf->mem != KEXEC_BUF_MEM_UNKNOWN)
609
+ return 0;
610
+
611
+ if (!IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
612
+ ret = kexec_walk_resources(kbuf, locate_mem_hole_callback);
613
+ else
614
+ ret = kexec_walk_memblock(kbuf, locate_mem_hole_callback);
543615
544616 return ret == 1 ? 0 : -EADDRNOTAVAIL;
617
+}
618
+
619
+/**
620
+ * arch_kexec_locate_mem_hole - Find free memory to place the segments.
621
+ * @kbuf: Parameters for the memory search.
622
+ *
623
+ * On success, kbuf->mem will have the start address of the memory region found.
624
+ *
625
+ * Return: 0 on success, negative errno on error.
626
+ */
627
+int __weak arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
628
+{
629
+ return kexec_locate_mem_hole(kbuf);
545630 }
546631
547632 /**
....@@ -556,7 +641,6 @@
556641 */
557642 int kexec_add_buffer(struct kexec_buf *kbuf)
558643 {
559
-
560644 struct kexec_segment *ksegment;
561645 int ret;
562646
....@@ -584,7 +668,7 @@
584668 kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
585669
586670 /* Walk the RAM ranges and allocate a suitable range for the buffer */
587
- ret = kexec_locate_mem_hole(kbuf);
671
+ ret = arch_kexec_locate_mem_hole(kbuf);
588672 if (ret)
589673 return ret;
590674
....@@ -637,7 +721,6 @@
637721 }
638722
639723 desc->tfm = tfm;
640
- desc->flags = 0;
641724
642725 ret = crypto_shash_init(desc);
643726 if (ret < 0)
....@@ -827,10 +910,22 @@
827910 }
828911
829912 offset = ALIGN(offset, align);
913
+
914
+ /*
915
+ * Check if the segment contains the entry point, if so,
916
+ * calculate the value of image->start based on it.
917
+ * If the compiler has produced more than one .text section
918
+ * (Eg: .text.hot), they are generally after the main .text
919
+ * section, and they shall not be used to calculate
920
+ * image->start. So do not re-calculate image->start if it
921
+ * is not set to the initial value, and warn the user so they
922
+ * have a chance to fix their purgatory's linker script.
923
+ */
830924 if (sechdrs[i].sh_flags & SHF_EXECINSTR &&
831925 pi->ehdr->e_entry >= sechdrs[i].sh_addr &&
832926 pi->ehdr->e_entry < (sechdrs[i].sh_addr
833
- + sechdrs[i].sh_size)) {
927
+ + sechdrs[i].sh_size) &&
928
+ !WARN_ON(kbuf->image->start != pi->ehdr->e_entry)) {
834929 kbuf->image->start -= sechdrs[i].sh_addr;
835930 kbuf->image->start += kbuf->mem + offset;
836931 }
....@@ -1069,24 +1164,26 @@
10691164 unsigned long long mstart, unsigned long long mend)
10701165 {
10711166 int i, j;
1072
- unsigned long long start, end;
1167
+ unsigned long long start, end, p_start, p_end;
10731168 struct crash_mem_range temp_range = {0, 0};
10741169
10751170 for (i = 0; i < mem->nr_ranges; i++) {
10761171 start = mem->ranges[i].start;
10771172 end = mem->ranges[i].end;
1173
+ p_start = mstart;
1174
+ p_end = mend;
10781175
10791176 if (mstart > end || mend < start)
10801177 continue;
10811178
10821179 /* Truncate any area outside of range */
10831180 if (mstart < start)
1084
- mstart = start;
1181
+ p_start = start;
10851182 if (mend > end)
1086
- mend = end;
1183
+ p_end = end;
10871184
10881185 /* Found completely overlapping range */
1089
- if (mstart == start && mend == end) {
1186
+ if (p_start == start && p_end == end) {
10901187 mem->ranges[i].start = 0;
10911188 mem->ranges[i].end = 0;
10921189 if (i < mem->nr_ranges - 1) {
....@@ -1097,20 +1194,29 @@
10971194 mem->ranges[j].end =
10981195 mem->ranges[j+1].end;
10991196 }
1197
+
1198
+ /*
1199
+ * Continue to check if there are another overlapping ranges
1200
+ * from the current position because of shifting the above
1201
+ * mem ranges.
1202
+ */
1203
+ i--;
1204
+ mem->nr_ranges--;
1205
+ continue;
11001206 }
11011207 mem->nr_ranges--;
11021208 return 0;
11031209 }
11041210
1105
- if (mstart > start && mend < end) {
1211
+ if (p_start > start && p_end < end) {
11061212 /* Split original range */
1107
- mem->ranges[i].end = mstart - 1;
1108
- temp_range.start = mend + 1;
1213
+ mem->ranges[i].end = p_start - 1;
1214
+ temp_range.start = p_end + 1;
11091215 temp_range.end = end;
1110
- } else if (mstart != start)
1111
- mem->ranges[i].end = mstart - 1;
1216
+ } else if (p_start != start)
1217
+ mem->ranges[i].end = p_start - 1;
11121218 else
1113
- mem->ranges[i].start = mend + 1;
1219
+ mem->ranges[i].start = p_end + 1;
11141220 break;
11151221 }
11161222
....@@ -1147,7 +1253,7 @@
11471253 unsigned long long notes_addr;
11481254 unsigned long mstart, mend;
11491255
1150
- /* extra phdr for vmcoreinfo elf note */
1256
+ /* extra phdr for vmcoreinfo ELF note */
11511257 nr_phdr = nr_cpus + 1;
11521258 nr_phdr += mem->nr_ranges;
11531259
....@@ -1155,7 +1261,7 @@
11551261 * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
11561262 * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64).
11571263 * I think this is required by tools like gdb. So same physical
1158
- * memory will be mapped in two elf headers. One will contain kernel
1264
+ * memory will be mapped in two ELF headers. One will contain kernel
11591265 * text virtual addresses and other will have __va(physical) addresses.
11601266 */
11611267
....@@ -1182,7 +1288,7 @@
11821288 ehdr->e_ehsize = sizeof(Elf64_Ehdr);
11831289 ehdr->e_phentsize = sizeof(Elf64_Phdr);
11841290
1185
- /* Prepare one phdr of type PT_NOTE for each present cpu */
1291
+ /* Prepare one phdr of type PT_NOTE for each present CPU */
11861292 for_each_present_cpu(cpu) {
11871293 phdr->p_type = PT_NOTE;
11881294 notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
....@@ -1203,7 +1309,7 @@
12031309 if (kernel_map) {
12041310 phdr->p_type = PT_LOAD;
12051311 phdr->p_flags = PF_R|PF_W|PF_X;
1206
- phdr->p_vaddr = (Elf64_Addr)_text;
1312
+ phdr->p_vaddr = (unsigned long) _text;
12071313 phdr->p_filesz = phdr->p_memsz = _end - _text;
12081314 phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);
12091315 ehdr->e_phnum++;
....@@ -1220,14 +1326,14 @@
12201326 phdr->p_offset = mstart;
12211327
12221328 phdr->p_paddr = mstart;
1223
- phdr->p_vaddr = (unsigned long long) __va(mstart);
1329
+ phdr->p_vaddr = (unsigned long) __va(mstart);
12241330 phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
12251331 phdr->p_align = 0;
12261332 ehdr->e_phnum++;
1227
- phdr++;
1228
- pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
1333
+ pr_debug("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
12291334 phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
12301335 ehdr->e_phnum, phdr->p_offset);
1336
+ phdr++;
12311337 }
12321338
12331339 *addr = buf;