hc
2024-05-10 23fa18eaa71266feff7ba8d83022d9e1cc83c65a
kernel/arch/powerpc/kernel/fadump.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
34 * dump with assistance from firmware. This approach does not use kexec,
....@@ -5,20 +6,6 @@
56 * memory contents. The most of the code implementation has been adapted
67 * from phyp assisted dump implementation written by Linas Vepstas and
78 * Manish Ahuja
8
- *
9
- * This program is free software; you can redistribute it and/or modify
10
- * it under the terms of the GNU General Public License as published by
11
- * the Free Software Foundation; either version 2 of the License, or
12
- * (at your option) any later version.
13
- *
14
- * This program is distributed in the hope that it will be useful,
15
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
- * GNU General Public License for more details.
18
- *
19
- * You should have received a copy of the GNU General Public License
20
- * along with this program; if not, write to the Free Software
21
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
229 *
2310 * Copyright 2011 IBM Corporation
2411 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
....@@ -35,103 +22,155 @@
3522 #include <linux/kobject.h>
3623 #include <linux/sysfs.h>
3724 #include <linux/slab.h>
25
+#include <linux/cma.h>
26
+#include <linux/hugetlb.h>
3827
3928 #include <asm/debugfs.h>
4029 #include <asm/page.h>
4130 #include <asm/prom.h>
42
-#include <asm/rtas.h>
4331 #include <asm/fadump.h>
32
+#include <asm/fadump-internal.h>
4433 #include <asm/setup.h>
4534
35
+/*
36
+ * The CPU who acquired the lock to trigger the fadump crash should
37
+ * wait for other CPUs to enter.
38
+ *
39
+ * The timeout is in milliseconds.
40
+ */
41
+#define CRASH_TIMEOUT 500
42
+
4643 static struct fw_dump fw_dump;
47
-static struct fadump_mem_struct fdm;
48
-static const struct fadump_mem_struct *fdm_active;
4944
45
+static void __init fadump_reserve_crash_area(u64 base);
46
+
47
+struct kobject *fadump_kobj;
48
+
49
+#ifndef CONFIG_PRESERVE_FA_DUMP
50
+
51
+static atomic_t cpus_in_fadump;
5052 static DEFINE_MUTEX(fadump_mutex);
51
-struct fad_crash_memory_ranges *crash_memory_ranges;
52
-int crash_memory_ranges_size;
53
-int crash_mem_ranges;
54
-int max_crash_mem_ranges;
5553
56
-/* Scan the Firmware Assisted dump configuration details. */
57
-int __init early_init_dt_scan_fw_dump(unsigned long node,
58
- const char *uname, int depth, void *data)
54
+struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false };
55
+
56
+#define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */
57
+#define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \
58
+ sizeof(struct fadump_memory_range))
59
+static struct fadump_memory_range rngs[RESERVED_RNGS_CNT];
60
+struct fadump_mrange_info reserved_mrange_info = { "reserved", rngs,
61
+ RESERVED_RNGS_SZ, 0,
62
+ RESERVED_RNGS_CNT, true };
63
+
64
+static void __init early_init_dt_scan_reserved_ranges(unsigned long node);
65
+
66
+#ifdef CONFIG_CMA
67
+static struct cma *fadump_cma;
68
+
69
+/*
70
+ * fadump_cma_init() - Initialize CMA area from a fadump reserved memory
71
+ *
72
+ * This function initializes CMA area from fadump reserved memory.
73
+ * The total size of fadump reserved memory covers for boot memory size
74
+ * + cpu data size + hpte size and metadata.
75
+ * Initialize only the area equivalent to boot memory size for CMA use.
76
+ * The reamining portion of fadump reserved memory will be not given
77
+ * to CMA and pages for thoes will stay reserved. boot memory size is
78
+ * aligned per CMA requirement to satisy cma_init_reserved_mem() call.
79
+ * But for some reason even if it fails we still have the memory reservation
80
+ * with us and we can still continue doing fadump.
81
+ */
82
+int __init fadump_cma_init(void)
5983 {
60
- const __be32 *sections;
61
- int i, num_sections;
62
- int size;
63
- const __be32 *token;
84
+ unsigned long long base, size;
85
+ int rc;
6486
65
- if (depth != 1 || strcmp(uname, "rtas") != 0)
87
+ if (!fw_dump.fadump_enabled)
6688 return 0;
6789
6890 /*
69
- * Check if Firmware Assisted dump is supported. if yes, check
70
- * if dump has been initiated on last reboot.
91
+ * Do not use CMA if user has provided fadump=nocma kernel parameter.
92
+ * Return 1 to continue with fadump old behaviour.
7193 */
72
- token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
73
- if (!token)
94
+ if (fw_dump.nocma)
7495 return 1;
7596
76
- fw_dump.fadump_supported = 1;
77
- fw_dump.ibm_configure_kernel_dump = be32_to_cpu(*token);
97
+ base = fw_dump.reserve_dump_area_start;
98
+ size = fw_dump.boot_memory_size;
7899
79
- /*
80
- * The 'ibm,kernel-dump' rtas node is present only if there is
81
- * dump data waiting for us.
82
- */
83
- fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
84
- if (fdm_active)
85
- fw_dump.dump_active = 1;
100
+ if (!size)
101
+ return 0;
86102
87
- /* Get the sizes required to store dump data for the firmware provided
88
- * dump sections.
89
- * For each dump section type supported, a 32bit cell which defines
90
- * the ID of a supported section followed by two 32 bit cells which
91
- * gives teh size of the section in bytes.
92
- */
93
- sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
94
- &size);
95
-
96
- if (!sections)
103
+ rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma);
104
+ if (rc) {
105
+ pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc);
106
+ /*
107
+ * Though the CMA init has failed we still have memory
108
+ * reservation with us. The reserved memory will be
109
+ * blocked from production system usage. Hence return 1,
110
+ * so that we can continue with fadump.
111
+ */
97112 return 1;
98
-
99
- num_sections = size / (3 * sizeof(u32));
100
-
101
- for (i = 0; i < num_sections; i++, sections += 3) {
102
- u32 type = (u32)of_read_number(sections, 1);
103
-
104
- switch (type) {
105
- case FADUMP_CPU_STATE_DATA:
106
- fw_dump.cpu_state_data_size =
107
- of_read_ulong(&sections[1], 2);
108
- break;
109
- case FADUMP_HPTE_REGION:
110
- fw_dump.hpte_region_size =
111
- of_read_ulong(&sections[1], 2);
112
- break;
113
- }
114113 }
115114
115
+ /*
116
+ * So we now have successfully initialized cma area for fadump.
117
+ */
118
+ pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx "
119
+ "bytes of memory reserved for firmware-assisted dump\n",
120
+ cma_get_size(fadump_cma),
121
+ (unsigned long)cma_get_base(fadump_cma) >> 20,
122
+ fw_dump.reserve_dump_area_size);
116123 return 1;
124
+}
125
+#else
126
+static int __init fadump_cma_init(void) { return 1; }
127
+#endif /* CONFIG_CMA */
128
+
129
+/* Scan the Firmware Assisted dump configuration details. */
130
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
131
+ int depth, void *data)
132
+{
133
+ if (depth == 0) {
134
+ early_init_dt_scan_reserved_ranges(node);
135
+ return 0;
136
+ }
137
+
138
+ if (depth != 1)
139
+ return 0;
140
+
141
+ if (strcmp(uname, "rtas") == 0) {
142
+ rtas_fadump_dt_scan(&fw_dump, node);
143
+ return 1;
144
+ }
145
+
146
+ if (strcmp(uname, "ibm,opal") == 0) {
147
+ opal_fadump_dt_scan(&fw_dump, node);
148
+ return 1;
149
+ }
150
+
151
+ return 0;
117152 }
118153
119154 /*
120155 * If fadump is registered, check if the memory provided
121156 * falls within boot memory area and reserved memory area.
122157 */
123
-int is_fadump_memory_area(u64 addr, ulong size)
158
+int is_fadump_memory_area(u64 addr, unsigned long size)
124159 {
125
- u64 d_start = fw_dump.reserve_dump_area_start;
126
- u64 d_end = d_start + fw_dump.reserve_dump_area_size;
160
+ u64 d_start, d_end;
127161
128162 if (!fw_dump.dump_registered)
129163 return 0;
130164
165
+ if (!size)
166
+ return 0;
167
+
168
+ d_start = fw_dump.reserve_dump_area_start;
169
+ d_end = d_start + fw_dump.reserve_dump_area_size;
131170 if (((addr + size) > d_start) && (addr <= d_end))
132171 return 1;
133172
134
- return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
173
+ return (addr <= fw_dump.boot_mem_top);
135174 }
136175
137176 int should_fadump_crash(void)
....@@ -147,40 +186,75 @@
147186 }
148187
149188 /*
150
- * Returns 1, if there are no holes in boot memory area,
151
- * 0 otherwise.
189
+ * Returns true, if there are no holes in memory area between d_start to d_end,
190
+ * false otherwise.
152191 */
153
-static int is_boot_memory_area_contiguous(void)
192
+static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
154193 {
155
- struct memblock_region *reg;
156
- unsigned long tstart, tend;
157
- unsigned long start_pfn = PHYS_PFN(RMA_START);
158
- unsigned long end_pfn = PHYS_PFN(RMA_START + fw_dump.boot_memory_size);
159
- unsigned int ret = 0;
194
+ phys_addr_t reg_start, reg_end;
195
+ bool ret = false;
196
+ u64 i, start, end;
160197
161
- for_each_memblock(memory, reg) {
162
- tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
163
- tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
164
- if (tstart < tend) {
165
- /* Memory hole from start_pfn to tstart */
166
- if (tstart > start_pfn)
198
+ for_each_mem_range(i, &reg_start, &reg_end) {
199
+ start = max_t(u64, d_start, reg_start);
200
+ end = min_t(u64, d_end, reg_end);
201
+ if (d_start < end) {
202
+ /* Memory hole from d_start to start */
203
+ if (start > d_start)
167204 break;
168205
169
- if (tend == end_pfn) {
170
- ret = 1;
206
+ if (end == d_end) {
207
+ ret = true;
171208 break;
172209 }
173210
174
- start_pfn = tend + 1;
211
+ d_start = end + 1;
175212 }
176213 }
177214
178215 return ret;
179216 }
180217
218
+/*
219
+ * Returns true, if there are no holes in boot memory area,
220
+ * false otherwise.
221
+ */
222
+bool is_fadump_boot_mem_contiguous(void)
223
+{
224
+ unsigned long d_start, d_end;
225
+ bool ret = false;
226
+ int i;
227
+
228
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
229
+ d_start = fw_dump.boot_mem_addr[i];
230
+ d_end = d_start + fw_dump.boot_mem_sz[i];
231
+
232
+ ret = is_fadump_mem_area_contiguous(d_start, d_end);
233
+ if (!ret)
234
+ break;
235
+ }
236
+
237
+ return ret;
238
+}
239
+
240
+/*
241
+ * Returns true, if there are no holes in reserved memory area,
242
+ * false otherwise.
243
+ */
244
+bool is_fadump_reserved_mem_contiguous(void)
245
+{
246
+ u64 d_start, d_end;
247
+
248
+ d_start = fw_dump.reserve_dump_area_start;
249
+ d_end = d_start + fw_dump.reserve_dump_area_size;
250
+ return is_fadump_mem_area_contiguous(d_start, d_end);
251
+}
252
+
181253 /* Print firmware assisted dump configurations for debugging purpose. */
182254 static void fadump_show_config(void)
183255 {
256
+ int i;
257
+
184258 pr_debug("Support for firmware-assisted dump (fadump): %s\n",
185259 (fw_dump.fadump_supported ? "present" : "no support"));
186260
....@@ -194,62 +268,13 @@
194268 pr_debug("Dump section sizes:\n");
195269 pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
196270 pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size);
197
- pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size);
198
-}
199
-
200
-static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
201
- unsigned long addr)
202
-{
203
- if (!fdm)
204
- return 0;
205
-
206
- memset(fdm, 0, sizeof(struct fadump_mem_struct));
207
- addr = addr & PAGE_MASK;
208
-
209
- fdm->header.dump_format_version = cpu_to_be32(0x00000001);
210
- fdm->header.dump_num_sections = cpu_to_be16(3);
211
- fdm->header.dump_status_flag = 0;
212
- fdm->header.offset_first_dump_section =
213
- cpu_to_be32((u32)offsetof(struct fadump_mem_struct, cpu_state_data));
214
-
215
- /*
216
- * Fields for disk dump option.
217
- * We are not using disk dump option, hence set these fields to 0.
218
- */
219
- fdm->header.dd_block_size = 0;
220
- fdm->header.dd_block_offset = 0;
221
- fdm->header.dd_num_blocks = 0;
222
- fdm->header.dd_offset_disk_path = 0;
223
-
224
- /* set 0 to disable an automatic dump-reboot. */
225
- fdm->header.max_time_auto = 0;
226
-
227
- /* Kernel dump sections */
228
- /* cpu state data section. */
229
- fdm->cpu_state_data.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
230
- fdm->cpu_state_data.source_data_type = cpu_to_be16(FADUMP_CPU_STATE_DATA);
231
- fdm->cpu_state_data.source_address = 0;
232
- fdm->cpu_state_data.source_len = cpu_to_be64(fw_dump.cpu_state_data_size);
233
- fdm->cpu_state_data.destination_address = cpu_to_be64(addr);
234
- addr += fw_dump.cpu_state_data_size;
235
-
236
- /* hpte region section */
237
- fdm->hpte_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
238
- fdm->hpte_region.source_data_type = cpu_to_be16(FADUMP_HPTE_REGION);
239
- fdm->hpte_region.source_address = 0;
240
- fdm->hpte_region.source_len = cpu_to_be64(fw_dump.hpte_region_size);
241
- fdm->hpte_region.destination_address = cpu_to_be64(addr);
242
- addr += fw_dump.hpte_region_size;
243
-
244
- /* RMA region section */
245
- fdm->rmr_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
246
- fdm->rmr_region.source_data_type = cpu_to_be16(FADUMP_REAL_MODE_REGION);
247
- fdm->rmr_region.source_address = cpu_to_be64(RMA_START);
248
- fdm->rmr_region.source_len = cpu_to_be64(fw_dump.boot_memory_size);
249
- fdm->rmr_region.destination_address = cpu_to_be64(addr);
250
- addr += fw_dump.boot_memory_size;
251
-
252
- return addr;
271
+ pr_debug(" Boot memory size : %lx\n", fw_dump.boot_memory_size);
272
+ pr_debug(" Boot memory top : %llx\n", fw_dump.boot_mem_top);
273
+ pr_debug("Boot memory regions cnt: %llx\n", fw_dump.boot_mem_regs_cnt);
274
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
275
+ pr_debug("[%03d] base = %llx, size = %llx\n", i,
276
+ fw_dump.boot_mem_addr[i], fw_dump.boot_mem_sz[i]);
277
+ }
253278 }
254279
255280 /**
....@@ -267,10 +292,10 @@
267292 * that is required for a kernel to boot successfully.
268293 *
269294 */
270
-static inline unsigned long fadump_calculate_reserve_size(void)
295
+static __init u64 fadump_calculate_reserve_size(void)
271296 {
297
+ u64 base, size, bootmem_min;
272298 int ret;
273
- unsigned long long base, size;
274299
275300 if (fw_dump.reserve_bootvar)
276301 pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
....@@ -320,7 +345,8 @@
320345 if (memory_limit && size > memory_limit)
321346 size = memory_limit;
322347
323
- return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
348
+ bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
349
+ return (size > bootmem_min ? size : bootmem_min);
324350 }
325351
326352 /*
....@@ -341,51 +367,199 @@
341367 size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
342368
343369 size = PAGE_ALIGN(size);
370
+
371
+ /* This is to hold kernel metadata on platforms that support it */
372
+ size += (fw_dump.ops->fadump_get_metadata_size ?
373
+ fw_dump.ops->fadump_get_metadata_size() : 0);
344374 return size;
345375 }
346376
347
-static void __init fadump_reserve_crash_area(unsigned long base,
348
- unsigned long size)
377
+static int __init add_boot_mem_region(unsigned long rstart,
378
+ unsigned long rsize)
349379 {
350
- struct memblock_region *reg;
351
- unsigned long mstart, mend, msize;
380
+ int i = fw_dump.boot_mem_regs_cnt++;
352381
353
- for_each_memblock(memory, reg) {
354
- mstart = max_t(unsigned long, base, reg->base);
355
- mend = reg->base + reg->size;
356
- mend = min(base + size, mend);
382
+ if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) {
383
+ fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS;
384
+ return 0;
385
+ }
357386
358
- if (mstart < mend) {
359
- msize = mend - mstart;
360
- memblock_reserve(mstart, msize);
361
- pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n",
362
- (msize >> 20), mstart);
387
+ pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n",
388
+ i, rstart, (rstart + rsize));
389
+ fw_dump.boot_mem_addr[i] = rstart;
390
+ fw_dump.boot_mem_sz[i] = rsize;
391
+ return 1;
392
+}
393
+
394
+/*
395
+ * Firmware usually has a hard limit on the data it can copy per region.
396
+ * Honour that by splitting a memory range into multiple regions.
397
+ */
398
+static int __init add_boot_mem_regions(unsigned long mstart,
399
+ unsigned long msize)
400
+{
401
+ unsigned long rstart, rsize, max_size;
402
+ int ret = 1;
403
+
404
+ rstart = mstart;
405
+ max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : msize;
406
+ while (msize) {
407
+ if (msize > max_size)
408
+ rsize = max_size;
409
+ else
410
+ rsize = msize;
411
+
412
+ ret = add_boot_mem_region(rstart, rsize);
413
+ if (!ret)
414
+ break;
415
+
416
+ msize -= rsize;
417
+ rstart += rsize;
418
+ }
419
+
420
+ return ret;
421
+}
422
+
423
+static int __init fadump_get_boot_mem_regions(void)
424
+{
425
+ unsigned long size, cur_size, hole_size, last_end;
426
+ unsigned long mem_size = fw_dump.boot_memory_size;
427
+ phys_addr_t reg_start, reg_end;
428
+ int ret = 1;
429
+ u64 i;
430
+
431
+ fw_dump.boot_mem_regs_cnt = 0;
432
+
433
+ last_end = 0;
434
+ hole_size = 0;
435
+ cur_size = 0;
436
+ for_each_mem_range(i, &reg_start, &reg_end) {
437
+ size = reg_end - reg_start;
438
+ hole_size += (reg_start - last_end);
439
+
440
+ if ((cur_size + size) >= mem_size) {
441
+ size = (mem_size - cur_size);
442
+ ret = add_boot_mem_regions(reg_start, size);
443
+ break;
444
+ }
445
+
446
+ mem_size -= size;
447
+ cur_size += size;
448
+ ret = add_boot_mem_regions(reg_start, size);
449
+ if (!ret)
450
+ break;
451
+
452
+ last_end = reg_end;
453
+ }
454
+ fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
455
+
456
+ return ret;
457
+}
458
+
459
+/*
460
+ * Returns true, if the given range overlaps with reserved memory ranges
461
+ * starting at idx. Also, updates idx to index of overlapping memory range
462
+ * with the given memory range.
463
+ * False, otherwise.
464
+ */
465
+static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx)
466
+{
467
+ bool ret = false;
468
+ int i;
469
+
470
+ for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) {
471
+ u64 rbase = reserved_mrange_info.mem_ranges[i].base;
472
+ u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size;
473
+
474
+ if (end <= rbase)
475
+ break;
476
+
477
+ if ((end > rbase) && (base < rend)) {
478
+ *idx = i;
479
+ ret = true;
480
+ break;
363481 }
364482 }
483
+
484
+ return ret;
485
+}
486
+
487
+/*
488
+ * Locate a suitable memory area to reserve memory for FADump. While at it,
489
+ * lookup reserved-ranges & avoid overlap with them, as they are used by F/W.
490
+ */
491
+static u64 __init fadump_locate_reserve_mem(u64 base, u64 size)
492
+{
493
+ struct fadump_memory_range *mrngs;
494
+ phys_addr_t mstart, mend;
495
+ int idx = 0;
496
+ u64 i, ret = 0;
497
+
498
+ mrngs = reserved_mrange_info.mem_ranges;
499
+ for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
500
+ &mstart, &mend, NULL) {
501
+ pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n",
502
+ i, mstart, mend, base);
503
+
504
+ if (mstart > base)
505
+ base = PAGE_ALIGN(mstart);
506
+
507
+ while ((mend > base) && ((mend - base) >= size)) {
508
+ if (!overlaps_reserved_ranges(base, base+size, &idx)) {
509
+ ret = base;
510
+ goto out;
511
+ }
512
+
513
+ base = mrngs[idx].base + mrngs[idx].size;
514
+ base = PAGE_ALIGN(base);
515
+ }
516
+ }
517
+
518
+out:
519
+ return ret;
365520 }
366521
367522 int __init fadump_reserve_mem(void)
368523 {
369
- unsigned long base, size, memory_boundary;
524
+ u64 base, size, mem_boundary, bootmem_min;
525
+ int ret = 1;
370526
371527 if (!fw_dump.fadump_enabled)
372528 return 0;
373529
374530 if (!fw_dump.fadump_supported) {
375
- printk(KERN_INFO "Firmware-assisted dump is not supported on"
376
- " this hardware\n");
377
- fw_dump.fadump_enabled = 0;
378
- return 0;
531
+ pr_info("Firmware-Assisted Dump is not supported on this hardware\n");
532
+ goto error_out;
379533 }
534
+
380535 /*
381536 * Initialize boot memory size
382537 * If dump is active then we have already calculated the size during
383538 * first kernel.
384539 */
385
- if (fdm_active)
386
- fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len);
387
- else
388
- fw_dump.boot_memory_size = fadump_calculate_reserve_size();
540
+ if (!fw_dump.dump_active) {
541
+ fw_dump.boot_memory_size =
542
+ PAGE_ALIGN(fadump_calculate_reserve_size());
543
+#ifdef CONFIG_CMA
544
+ if (!fw_dump.nocma) {
545
+ fw_dump.boot_memory_size =
546
+ ALIGN(fw_dump.boot_memory_size,
547
+ FADUMP_CMA_ALIGNMENT);
548
+ }
549
+#endif
550
+
551
+ bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
552
+ if (fw_dump.boot_memory_size < bootmem_min) {
553
+ pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n",
554
+ fw_dump.boot_memory_size, bootmem_min);
555
+ goto error_out;
556
+ }
557
+
558
+ if (!fadump_get_boot_mem_regions()) {
559
+ pr_err("Too many holes in boot memory area to enable fadump\n");
560
+ goto error_out;
561
+ }
562
+ }
389563
390564 /*
391565 * Calculate the memory boundary.
....@@ -404,10 +578,13 @@
404578 " dump, now %#016llx\n", memory_limit);
405579 }
406580 if (memory_limit)
407
- memory_boundary = memory_limit;
581
+ mem_boundary = memory_limit;
408582 else
409
- memory_boundary = memblock_end_of_DRAM();
583
+ mem_boundary = memblock_end_of_DRAM();
410584
585
+ base = fw_dump.boot_mem_top;
586
+ size = get_fadump_area_size();
587
+ fw_dump.reserve_dump_area_size = size;
411588 if (fw_dump.dump_active) {
412589 pr_info("Firmware-assisted dump is active.\n");
413590
....@@ -421,56 +598,52 @@
421598 #endif
422599 /*
423600 * If last boot has crashed then reserve all the memory
424
- * above boot_memory_size so that we don't touch it until
601
+ * above boot memory size so that we don't touch it until
425602 * dump is written to disk by userspace tool. This memory
426
- * will be released for general use once the dump is saved.
603
+ * can be released for general use by invalidating fadump.
427604 */
428
- base = fw_dump.boot_memory_size;
429
- size = memory_boundary - base;
430
- fadump_reserve_crash_area(base, size);
605
+ fadump_reserve_crash_area(base);
431606
432
- fw_dump.fadumphdr_addr =
433
- be64_to_cpu(fdm_active->rmr_region.destination_address) +
434
- be64_to_cpu(fdm_active->rmr_region.source_len);
435
- pr_debug("fadumphdr_addr = %p\n",
436
- (void *) fw_dump.fadumphdr_addr);
607
+ pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr);
608
+ pr_debug("Reserve dump area start address: 0x%lx\n",
609
+ fw_dump.reserve_dump_area_start);
437610 } else {
438
- size = get_fadump_area_size();
439
-
440611 /*
441612 * Reserve memory at an offset closer to bottom of the RAM to
442
- * minimize the impact of memory hot-remove operation. We can't
443
- * use memblock_find_in_range() here since it doesn't allocate
444
- * from bottom to top.
613
+ * minimize the impact of memory hot-remove operation.
445614 */
446
- for (base = fw_dump.boot_memory_size;
447
- base <= (memory_boundary - size);
448
- base += size) {
449
- if (memblock_is_region_memory(base, size) &&
450
- !memblock_is_region_reserved(base, size))
451
- break;
615
+ base = fadump_locate_reserve_mem(base, size);
616
+
617
+ if (!base || (base + size > mem_boundary)) {
618
+ pr_err("Failed to find memory chunk for reservation!\n");
619
+ goto error_out;
452620 }
453
- if ((base > (memory_boundary - size)) ||
454
- memblock_reserve(base, size)) {
455
- pr_err("Failed to reserve memory\n");
456
- return 0;
621
+ fw_dump.reserve_dump_area_start = base;
622
+
623
+ /*
624
+ * Calculate the kernel metadata address and register it with
625
+ * f/w if the platform supports.
626
+ */
627
+ if (fw_dump.ops->fadump_setup_metadata &&
628
+ (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
629
+ goto error_out;
630
+
631
+ if (memblock_reserve(base, size)) {
632
+ pr_err("Failed to reserve memory!\n");
633
+ goto error_out;
457634 }
458635
459
- pr_info("Reserved %ldMB of memory at %ldMB for firmware-"
460
- "assisted dump (System RAM: %ldMB)\n",
461
- (unsigned long)(size >> 20),
462
- (unsigned long)(base >> 20),
463
- (unsigned long)(memblock_phys_mem_size() >> 20));
636
+ pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n",
637
+ (size >> 20), base, (memblock_phys_mem_size() >> 20));
638
+
639
+ ret = fadump_cma_init();
464640 }
465641
466
- fw_dump.reserve_dump_area_start = base;
467
- fw_dump.reserve_dump_area_size = size;
468
- return 1;
469
-}
470
-
471
-unsigned long __init arch_reserved_kernel_pages(void)
472
-{
473
- return memblock_reserved_size() / PAGE_SIZE;
642
+ return ret;
643
+error_out:
644
+ fw_dump.fadump_enabled = 0;
645
+ fw_dump.reserve_dump_area_size = 0;
646
+ return 0;
474647 }
475648
476649 /* Look for fadump= cmdline option. */
....@@ -483,6 +656,10 @@
483656 fw_dump.fadump_enabled = 1;
484657 else if (strncmp(p, "off", 3) == 0)
485658 fw_dump.fadump_enabled = 0;
659
+ else if (strncmp(p, "nocma", 5) == 0) {
660
+ fw_dump.fadump_enabled = 1;
661
+ fw_dump.nocma = 1;
662
+ }
486663
487664 return 0;
488665 }
....@@ -501,63 +678,13 @@
501678 }
502679 early_param("fadump_reserve_mem", early_fadump_reserve_mem);
503680
504
-static int register_fw_dump(struct fadump_mem_struct *fdm)
505
-{
506
- int rc, err;
507
- unsigned int wait_time;
508
-
509
- pr_debug("Registering for firmware-assisted kernel dump...\n");
510
-
511
- /* TODO: Add upper time limit for the delay */
512
- do {
513
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
514
- FADUMP_REGISTER, fdm,
515
- sizeof(struct fadump_mem_struct));
516
-
517
- wait_time = rtas_busy_delay_time(rc);
518
- if (wait_time)
519
- mdelay(wait_time);
520
-
521
- } while (wait_time);
522
-
523
- err = -EIO;
524
- switch (rc) {
525
- default:
526
- pr_err("Failed to register. Unknown Error(%d).\n", rc);
527
- break;
528
- case -1:
529
- printk(KERN_ERR "Failed to register firmware-assisted kernel"
530
- " dump. Hardware Error(%d).\n", rc);
531
- break;
532
- case -3:
533
- if (!is_boot_memory_area_contiguous())
534
- pr_err("Can't have holes in boot memory area while "
535
- "registering fadump\n");
536
-
537
- printk(KERN_ERR "Failed to register firmware-assisted kernel"
538
- " dump. Parameter Error(%d).\n", rc);
539
- err = -EINVAL;
540
- break;
541
- case -9:
542
- printk(KERN_ERR "firmware-assisted kernel dump is already "
543
- " registered.");
544
- fw_dump.dump_registered = 1;
545
- err = -EEXIST;
546
- break;
547
- case 0:
548
- printk(KERN_INFO "firmware-assisted kernel dump registration"
549
- " is successful\n");
550
- fw_dump.dump_registered = 1;
551
- err = 0;
552
- break;
553
- }
554
- return err;
555
-}
556
-
557681 void crash_fadump(struct pt_regs *regs, const char *str)
558682 {
683
+ unsigned int msecs;
559684 struct fadump_crash_info_header *fdh = NULL;
560685 int old_cpu, this_cpu;
686
+ /* Do not include first CPU */
687
+ unsigned int ncpus = num_online_cpus() - 1;
561688
562689 if (!should_fadump_crash())
563690 return;
....@@ -573,6 +700,8 @@
573700 old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu);
574701
575702 if (old_cpu != -1) {
703
+ atomic_inc(&cpus_in_fadump);
704
+
576705 /*
577706 * We can't loop here indefinitely. Wait as long as fadump
578707 * is in force. If we race with fadump un-registration this
....@@ -596,71 +725,20 @@
596725
597726 fdh->online_mask = *cpu_online_mask;
598727
599
- /* Call ibm,os-term rtas call to trigger firmware assisted dump */
600
- rtas_os_term((char *)str);
601
-}
602
-
603
-#define GPR_MASK 0xffffff0000000000
604
-static inline int fadump_gpr_index(u64 id)
605
-{
606
- int i = -1;
607
- char str[3];
608
-
609
- if ((id & GPR_MASK) == REG_ID("GPR")) {
610
- /* get the digits at the end */
611
- id &= ~GPR_MASK;
612
- id >>= 24;
613
- str[2] = '\0';
614
- str[1] = id & 0xff;
615
- str[0] = (id >> 8) & 0xff;
616
- sscanf(str, "%d", &i);
617
- if (i > 31)
618
- i = -1;
728
+ /*
729
+ * If we came in via system reset, wait a while for the secondary
730
+ * CPUs to enter.
731
+ */
732
+ if (TRAP(&(fdh->regs)) == 0x100) {
733
+ msecs = CRASH_TIMEOUT;
734
+ while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0))
735
+ mdelay(1);
619736 }
620
- return i;
737
+
738
+ fw_dump.ops->fadump_trigger(fdh, str);
621739 }
622740
623
-static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
624
- u64 reg_val)
625
-{
626
- int i;
627
-
628
- i = fadump_gpr_index(reg_id);
629
- if (i >= 0)
630
- regs->gpr[i] = (unsigned long)reg_val;
631
- else if (reg_id == REG_ID("NIA"))
632
- regs->nip = (unsigned long)reg_val;
633
- else if (reg_id == REG_ID("MSR"))
634
- regs->msr = (unsigned long)reg_val;
635
- else if (reg_id == REG_ID("CTR"))
636
- regs->ctr = (unsigned long)reg_val;
637
- else if (reg_id == REG_ID("LR"))
638
- regs->link = (unsigned long)reg_val;
639
- else if (reg_id == REG_ID("XER"))
640
- regs->xer = (unsigned long)reg_val;
641
- else if (reg_id == REG_ID("CR"))
642
- regs->ccr = (unsigned long)reg_val;
643
- else if (reg_id == REG_ID("DAR"))
644
- regs->dar = (unsigned long)reg_val;
645
- else if (reg_id == REG_ID("DSISR"))
646
- regs->dsisr = (unsigned long)reg_val;
647
-}
648
-
649
-static struct fadump_reg_entry*
650
-fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
651
-{
652
- memset(regs, 0, sizeof(struct pt_regs));
653
-
654
- while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) {
655
- fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
656
- be64_to_cpu(reg_entry->reg_value));
657
- reg_entry++;
658
- }
659
- reg_entry++;
660
- return reg_entry;
661
-}
662
-
663
-static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
741
+u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
664742 {
665743 struct elf_prstatus prstatus;
666744
....@@ -675,19 +753,17 @@
675753 return buf;
676754 }
677755
678
-static void fadump_update_elfcore_header(char *bufp)
756
+void fadump_update_elfcore_header(char *bufp)
679757 {
680
- struct elfhdr *elf;
681758 struct elf_phdr *phdr;
682759
683
- elf = (struct elfhdr *)bufp;
684760 bufp += sizeof(struct elfhdr);
685761
686762 /* First note is a place holder for cpu notes info. */
687763 phdr = (struct elf_phdr *)bufp;
688764
689765 if (phdr->p_type == PT_NOTE) {
690
- phdr->p_paddr = fw_dump.cpu_notes_buf;
766
+ phdr->p_paddr = __pa(fw_dump.cpu_notes_buf_vaddr);
691767 phdr->p_offset = phdr->p_paddr;
692768 phdr->p_filesz = fw_dump.cpu_notes_buf_size;
693769 phdr->p_memsz = fw_dump.cpu_notes_buf_size;
....@@ -695,228 +771,103 @@
695771 return;
696772 }
697773
698
-static void *fadump_cpu_notes_buf_alloc(unsigned long size)
774
+static void *fadump_alloc_buffer(unsigned long size)
699775 {
700
- void *vaddr;
776
+ unsigned long count, i;
701777 struct page *page;
702
- unsigned long order, count, i;
778
+ void *vaddr;
703779
704
- order = get_order(size);
705
- vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
780
+ vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
706781 if (!vaddr)
707782 return NULL;
708783
709
- count = 1 << order;
784
+ count = PAGE_ALIGN(size) / PAGE_SIZE;
710785 page = virt_to_page(vaddr);
711786 for (i = 0; i < count; i++)
712
- SetPageReserved(page + i);
787
+ mark_page_reserved(page + i);
713788 return vaddr;
714789 }
715790
716
-static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
791
+static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
717792 {
718
- struct page *page;
719
- unsigned long order, count, i;
720
-
721
- order = get_order(size);
722
- count = 1 << order;
723
- page = virt_to_page(vaddr);
724
- for (i = 0; i < count; i++)
725
- ClearPageReserved(page + i);
726
- __free_pages(page, order);
793
+ free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
727794 }
728795
729
-/*
730
- * Read CPU state dump data and convert it into ELF notes.
731
- * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
732
- * used to access the data to allow for additional fields to be added without
733
- * affecting compatibility. Each list of registers for a CPU starts with
734
- * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
735
- * 8 Byte ASCII identifier and 8 Byte register value. The register entry
736
- * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
737
- * of register value. For more details refer to PAPR document.
738
- *
739
- * Only for the crashing cpu we ignore the CPU dump data and get exact
740
- * state from fadump crash info structure populated by first kernel at the
741
- * time of crash.
742
- */
743
-static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
796
+s32 fadump_setup_cpu_notes_buf(u32 num_cpus)
744797 {
745
- struct fadump_reg_save_area_header *reg_header;
746
- struct fadump_reg_entry *reg_entry;
747
- struct fadump_crash_info_header *fdh = NULL;
748
- void *vaddr;
749
- unsigned long addr;
750
- u32 num_cpus, *note_buf;
751
- struct pt_regs regs;
752
- int i, rc = 0, cpu = 0;
753
-
754
- if (!fdm->cpu_state_data.bytes_dumped)
755
- return -EINVAL;
756
-
757
- addr = be64_to_cpu(fdm->cpu_state_data.destination_address);
758
- vaddr = __va(addr);
759
-
760
- reg_header = vaddr;
761
- if (be64_to_cpu(reg_header->magic_number) != REGSAVE_AREA_MAGIC) {
762
- printk(KERN_ERR "Unable to read register save area.\n");
763
- return -ENOENT;
764
- }
765
- pr_debug("--------CPU State Data------------\n");
766
- pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number));
767
- pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset));
768
-
769
- vaddr += be32_to_cpu(reg_header->num_cpu_offset);
770
- num_cpus = be32_to_cpu(*((__be32 *)(vaddr)));
771
- pr_debug("NumCpus : %u\n", num_cpus);
772
- vaddr += sizeof(u32);
773
- reg_entry = (struct fadump_reg_entry *)vaddr;
774
-
775798 /* Allocate buffer to hold cpu crash notes. */
776799 fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
777800 fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
778
- note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
779
- if (!note_buf) {
780
- printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
781
- "cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
801
+ fw_dump.cpu_notes_buf_vaddr =
802
+ (unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size);
803
+ if (!fw_dump.cpu_notes_buf_vaddr) {
804
+ pr_err("Failed to allocate %ld bytes for CPU notes buffer\n",
805
+ fw_dump.cpu_notes_buf_size);
782806 return -ENOMEM;
783807 }
784
- fw_dump.cpu_notes_buf = __pa(note_buf);
785808
786
- pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
787
- (num_cpus * sizeof(note_buf_t)), note_buf);
788
-
789
- if (fw_dump.fadumphdr_addr)
790
- fdh = __va(fw_dump.fadumphdr_addr);
791
-
792
- for (i = 0; i < num_cpus; i++) {
793
- if (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUSTRT")) {
794
- printk(KERN_ERR "Unable to read CPU state data\n");
795
- rc = -ENOENT;
796
- goto error_out;
797
- }
798
- /* Lower 4 bytes of reg_value contains logical cpu id */
799
- cpu = be64_to_cpu(reg_entry->reg_value) & FADUMP_CPU_ID_MASK;
800
- if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) {
801
- SKIP_TO_NEXT_CPU(reg_entry);
802
- continue;
803
- }
804
- pr_debug("Reading register data for cpu %d...\n", cpu);
805
- if (fdh && fdh->crashing_cpu == cpu) {
806
- regs = fdh->regs;
807
- note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
808
- SKIP_TO_NEXT_CPU(reg_entry);
809
- } else {
810
- reg_entry++;
811
- reg_entry = fadump_read_registers(reg_entry, &regs);
812
- note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
813
- }
814
- }
815
- final_note(note_buf);
816
-
817
- if (fdh) {
818
- pr_debug("Updating elfcore header (%llx) with cpu notes\n",
819
- fdh->elfcorehdr_addr);
820
- fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
821
- }
809
+ pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n",
810
+ fw_dump.cpu_notes_buf_size,
811
+ fw_dump.cpu_notes_buf_vaddr);
822812 return 0;
813
+}
823814
824
-error_out:
825
- fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
826
- fw_dump.cpu_notes_buf_size);
827
- fw_dump.cpu_notes_buf = 0;
815
+void fadump_free_cpu_notes_buf(void)
816
+{
817
+ if (!fw_dump.cpu_notes_buf_vaddr)
818
+ return;
819
+
820
+ fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr,
821
+ fw_dump.cpu_notes_buf_size);
822
+ fw_dump.cpu_notes_buf_vaddr = 0;
828823 fw_dump.cpu_notes_buf_size = 0;
829
- return rc;
824
+}
830825
826
+static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info)
827
+{
828
+ if (mrange_info->is_static) {
829
+ mrange_info->mem_range_cnt = 0;
830
+ return;
831
+ }
832
+
833
+ kfree(mrange_info->mem_ranges);
834
+ memset((void *)((u64)mrange_info + RNG_NAME_SZ), 0,
835
+ (sizeof(struct fadump_mrange_info) - RNG_NAME_SZ));
831836 }
832837
833838 /*
834
- * Validate and process the dump data stored by firmware before exporting
835
- * it through '/proc/vmcore'.
836
- */
837
-static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
838
-{
839
- struct fadump_crash_info_header *fdh;
840
- int rc = 0;
841
-
842
- if (!fdm_active || !fw_dump.fadumphdr_addr)
843
- return -EINVAL;
844
-
845
- /* Check if the dump data is valid. */
846
- if ((be16_to_cpu(fdm_active->header.dump_status_flag) == FADUMP_ERROR_FLAG) ||
847
- (fdm_active->cpu_state_data.error_flags != 0) ||
848
- (fdm_active->rmr_region.error_flags != 0)) {
849
- printk(KERN_ERR "Dump taken by platform is not valid\n");
850
- return -EINVAL;
851
- }
852
- if ((fdm_active->rmr_region.bytes_dumped !=
853
- fdm_active->rmr_region.source_len) ||
854
- !fdm_active->cpu_state_data.bytes_dumped) {
855
- printk(KERN_ERR "Dump taken by platform is incomplete\n");
856
- return -EINVAL;
857
- }
858
-
859
- /* Validate the fadump crash info header */
860
- fdh = __va(fw_dump.fadumphdr_addr);
861
- if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
862
- printk(KERN_ERR "Crash info header is not valid.\n");
863
- return -EINVAL;
864
- }
865
-
866
- rc = fadump_build_cpu_notes(fdm_active);
867
- if (rc)
868
- return rc;
869
-
870
- /*
871
- * We are done validating dump info and elfcore header is now ready
872
- * to be exported. set elfcorehdr_addr so that vmcore module will
873
- * export the elfcore header through '/proc/vmcore'.
874
- */
875
- elfcorehdr_addr = fdh->elfcorehdr_addr;
876
-
877
- return 0;
878
-}
879
-
880
-static void free_crash_memory_ranges(void)
881
-{
882
- kfree(crash_memory_ranges);
883
- crash_memory_ranges = NULL;
884
- crash_memory_ranges_size = 0;
885
- max_crash_mem_ranges = 0;
886
-}
887
-
888
-/*
889
- * Allocate or reallocate crash memory ranges array in incremental units
839
+ * Allocate or reallocate mem_ranges array in incremental units
890840 * of PAGE_SIZE.
891841 */
892
-static int allocate_crash_memory_ranges(void)
842
+static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info)
893843 {
894
- struct fad_crash_memory_ranges *new_array;
844
+ struct fadump_memory_range *new_array;
895845 u64 new_size;
896846
897
- new_size = crash_memory_ranges_size + PAGE_SIZE;
898
- pr_debug("Allocating %llu bytes of memory for crash memory ranges\n",
899
- new_size);
847
+ new_size = mrange_info->mem_ranges_sz + PAGE_SIZE;
848
+ pr_debug("Allocating %llu bytes of memory for %s memory ranges\n",
849
+ new_size, mrange_info->name);
900850
901
- new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL);
851
+ new_array = krealloc(mrange_info->mem_ranges, new_size, GFP_KERNEL);
902852 if (new_array == NULL) {
903
- pr_err("Insufficient memory for setting up crash memory ranges\n");
904
- free_crash_memory_ranges();
853
+ pr_err("Insufficient memory for setting up %s memory ranges\n",
854
+ mrange_info->name);
855
+ fadump_free_mem_ranges(mrange_info);
905856 return -ENOMEM;
906857 }
907858
908
- crash_memory_ranges = new_array;
909
- crash_memory_ranges_size = new_size;
910
- max_crash_mem_ranges = (new_size /
911
- sizeof(struct fad_crash_memory_ranges));
859
+ mrange_info->mem_ranges = new_array;
860
+ mrange_info->mem_ranges_sz = new_size;
861
+ mrange_info->max_mem_ranges = (new_size /
862
+ sizeof(struct fadump_memory_range));
912863 return 0;
913864 }
914
-
915
-static inline int fadump_add_crash_memory(unsigned long long base,
916
- unsigned long long end)
865
+static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
866
+ u64 base, u64 end)
917867 {
918
- u64 start, size;
868
+ struct fadump_memory_range *mem_ranges = mrange_info->mem_ranges;
919869 bool is_adjacent = false;
870
+ u64 start, size;
920871
921872 if (base == end)
922873 return 0;
....@@ -925,38 +876,52 @@
925876 * Fold adjacent memory ranges to bring down the memory ranges/
926877 * PT_LOAD segments count.
927878 */
928
- if (crash_mem_ranges) {
929
- start = crash_memory_ranges[crash_mem_ranges - 1].base;
930
- size = crash_memory_ranges[crash_mem_ranges - 1].size;
879
+ if (mrange_info->mem_range_cnt) {
880
+ start = mem_ranges[mrange_info->mem_range_cnt - 1].base;
881
+ size = mem_ranges[mrange_info->mem_range_cnt - 1].size;
931882
932
- if ((start + size) == base)
883
+ /*
884
+ * Boot memory area needs separate PT_LOAD segment(s) as it
885
+ * is moved to a different location at the time of crash.
886
+ * So, fold only if the region is not boot memory area.
887
+ */
888
+ if ((start + size) == base && start >= fw_dump.boot_mem_top)
933889 is_adjacent = true;
934890 }
935891 if (!is_adjacent) {
936892 /* resize the array on reaching the limit */
937
- if (crash_mem_ranges == max_crash_mem_ranges) {
893
+ if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) {
938894 int ret;
939895
940
- ret = allocate_crash_memory_ranges();
896
+ if (mrange_info->is_static) {
897
+ pr_err("Reached array size limit for %s memory ranges\n",
898
+ mrange_info->name);
899
+ return -ENOSPC;
900
+ }
901
+
902
+ ret = fadump_alloc_mem_ranges(mrange_info);
941903 if (ret)
942904 return ret;
905
+
906
+ /* Update to the new resized array */
907
+ mem_ranges = mrange_info->mem_ranges;
943908 }
944909
945910 start = base;
946
- crash_memory_ranges[crash_mem_ranges].base = start;
947
- crash_mem_ranges++;
911
+ mem_ranges[mrange_info->mem_range_cnt].base = start;
912
+ mrange_info->mem_range_cnt++;
948913 }
949914
950
- crash_memory_ranges[crash_mem_ranges - 1].size = (end - start);
951
- pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
952
- (crash_mem_ranges - 1), start, end - 1, (end - start));
915
+ mem_ranges[mrange_info->mem_range_cnt - 1].size = (end - start);
916
+ pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
917
+ mrange_info->name, (mrange_info->mem_range_cnt - 1),
918
+ start, end - 1, (end - start));
953919 return 0;
954920 }
955921
956
-static int fadump_exclude_reserved_area(unsigned long long start,
957
- unsigned long long end)
922
+static int fadump_exclude_reserved_area(u64 start, u64 end)
958923 {
959
- unsigned long long ra_start, ra_end;
924
+ u64 ra_start, ra_end;
960925 int ret = 0;
961926
962927 ra_start = fw_dump.reserve_dump_area_start;
....@@ -964,18 +929,22 @@
964929
965930 if ((ra_start < end) && (ra_end > start)) {
966931 if ((start < ra_start) && (end > ra_end)) {
967
- ret = fadump_add_crash_memory(start, ra_start);
932
+ ret = fadump_add_mem_range(&crash_mrange_info,
933
+ start, ra_start);
968934 if (ret)
969935 return ret;
970936
971
- ret = fadump_add_crash_memory(ra_end, end);
937
+ ret = fadump_add_mem_range(&crash_mrange_info,
938
+ ra_end, end);
972939 } else if (start < ra_start) {
973
- ret = fadump_add_crash_memory(start, ra_start);
940
+ ret = fadump_add_mem_range(&crash_mrange_info,
941
+ start, ra_start);
974942 } else if (ra_end < end) {
975
- ret = fadump_add_crash_memory(ra_end, end);
943
+ ret = fadump_add_mem_range(&crash_mrange_info,
944
+ ra_end, end);
976945 }
977946 } else
978
- ret = fadump_add_crash_memory(start, end);
947
+ ret = fadump_add_mem_range(&crash_mrange_info, start, end);
979948
980949 return ret;
981950 }
....@@ -1019,37 +988,33 @@
1019988 */
1020989 static int fadump_setup_crash_memory_ranges(void)
1021990 {
1022
- struct memblock_region *reg;
1023
- unsigned long long start, end;
991
+ u64 i, start, end;
1024992 int ret;
1025993
1026994 pr_debug("Setup crash memory ranges.\n");
1027
- crash_mem_ranges = 0;
995
+ crash_mrange_info.mem_range_cnt = 0;
1028996
1029997 /*
1030
- * add the first memory chunk (RMA_START through boot_memory_size) as
1031
- * a separate memory chunk. The reason is, at the time crash firmware
1032
- * will move the content of this memory chunk to different location
1033
- * specified during fadump registration. We need to create a separate
1034
- * program header for this chunk with the correct offset.
998
+ * Boot memory region(s) registered with firmware are moved to
999
+ * different location at the time of crash. Create separate program
1000
+ * header(s) for this memory chunk(s) with the correct offset.
10351001 */
1036
- ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
1037
- if (ret)
1038
- return ret;
1002
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
1003
+ start = fw_dump.boot_mem_addr[i];
1004
+ end = start + fw_dump.boot_mem_sz[i];
1005
+ ret = fadump_add_mem_range(&crash_mrange_info, start, end);
1006
+ if (ret)
1007
+ return ret;
1008
+ }
10391009
1040
- for_each_memblock(memory, reg) {
1041
- start = (unsigned long long)reg->base;
1042
- end = start + (unsigned long long)reg->size;
1043
-
1010
+ for_each_mem_range(i, &start, &end) {
10441011 /*
1045
- * skip the first memory chunk that is already added (RMA_START
1046
- * through boot_memory_size). This logic needs a relook if and
1047
- * when RMA_START changes to a non-zero value.
1012
+ * skip the memory chunk that is already added
1013
+ * (0 through boot_memory_top).
10481014 */
1049
- BUILD_BUG_ON(RMA_START != 0);
1050
- if (start < fw_dump.boot_memory_size) {
1051
- if (end > fw_dump.boot_memory_size)
1052
- start = fw_dump.boot_memory_size;
1015
+ if (start < fw_dump.boot_mem_top) {
1016
+ if (end > fw_dump.boot_mem_top)
1017
+ start = fw_dump.boot_mem_top;
10531018 else
10541019 continue;
10551020 }
....@@ -1070,17 +1035,35 @@
10701035 */
10711036 static inline unsigned long fadump_relocate(unsigned long paddr)
10721037 {
1073
- if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
1074
- return be64_to_cpu(fdm.rmr_region.destination_address) + paddr;
1075
- else
1076
- return paddr;
1038
+ unsigned long raddr, rstart, rend, rlast, hole_size;
1039
+ int i;
1040
+
1041
+ hole_size = 0;
1042
+ rlast = 0;
1043
+ raddr = paddr;
1044
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
1045
+ rstart = fw_dump.boot_mem_addr[i];
1046
+ rend = rstart + fw_dump.boot_mem_sz[i];
1047
+ hole_size += (rstart - rlast);
1048
+
1049
+ if (paddr >= rstart && paddr < rend) {
1050
+ raddr += fw_dump.boot_mem_dest_addr - hole_size;
1051
+ break;
1052
+ }
1053
+
1054
+ rlast = rend;
1055
+ }
1056
+
1057
+ pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr, raddr);
1058
+ return raddr;
10771059 }
10781060
10791061 static int fadump_create_elfcore_headers(char *bufp)
10801062 {
1081
- struct elfhdr *elf;
1063
+ unsigned long long raddr, offset;
10821064 struct elf_phdr *phdr;
1083
- int i;
1065
+ struct elfhdr *elf;
1066
+ int i, j;
10841067
10851068 fadump_init_elfcore_header(bufp);
10861069 elf = (struct elfhdr *)bufp;
....@@ -1123,12 +1106,14 @@
11231106 (elf->e_phnum)++;
11241107
11251108 /* setup PT_LOAD sections. */
1109
+ j = 0;
1110
+ offset = 0;
1111
+ raddr = fw_dump.boot_mem_addr[0];
1112
+ for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) {
1113
+ u64 mbase, msize;
11261114
1127
- for (i = 0; i < crash_mem_ranges; i++) {
1128
- unsigned long long mbase, msize;
1129
- mbase = crash_memory_ranges[i].base;
1130
- msize = crash_memory_ranges[i].size;
1131
-
1115
+ mbase = crash_mrange_info.mem_ranges[i].base;
1116
+ msize = crash_mrange_info.mem_ranges[i].size;
11321117 if (!msize)
11331118 continue;
11341119
....@@ -1138,13 +1123,17 @@
11381123 phdr->p_flags = PF_R|PF_W|PF_X;
11391124 phdr->p_offset = mbase;
11401125
1141
- if (mbase == RMA_START) {
1126
+ if (mbase == raddr) {
11421127 /*
1143
- * The entire RMA region will be moved by firmware
1144
- * to the specified destination_address. Hence set
1145
- * the correct offset.
1128
+ * The entire real memory region will be moved by
1129
+ * firmware to the specified destination_address.
1130
+ * Hence set the correct offset.
11461131 */
1147
- phdr->p_offset = be64_to_cpu(fdm.rmr_region.destination_address);
1132
+ phdr->p_offset = fw_dump.boot_mem_dest_addr + offset;
1133
+ if (j < (fw_dump.boot_mem_regs_cnt - 1)) {
1134
+ offset += fw_dump.boot_mem_sz[j];
1135
+ raddr = fw_dump.boot_mem_addr[++j];
1136
+ }
11481137 }
11491138
11501139 phdr->p_paddr = mbase;
....@@ -1166,7 +1155,6 @@
11661155 if (!addr)
11671156 return 0;
11681157
1169
- fw_dump.fadumphdr_addr = addr;
11701158 fdh = __va(addr);
11711159 addr += sizeof(struct fadump_crash_info_header);
11721160
....@@ -1174,7 +1162,7 @@
11741162 fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
11751163 fdh->elfcorehdr_addr = addr;
11761164 /* We will set the crashing cpu id in crash_fadump() during crash. */
1177
- fdh->crashing_cpu = CPU_UNKNOWN;
1165
+ fdh->crashing_cpu = FADUMP_CPU_UNKNOWN;
11781166
11791167 return addr;
11801168 }
....@@ -1196,7 +1184,8 @@
11961184 if (ret)
11971185 return ret;
11981186
1199
- addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len);
1187
+ addr = fw_dump.fadumphdr_addr;
1188
+
12001189 /* Initialize fadump crash info header. */
12011190 addr = init_fadump_header(addr);
12021191 vaddr = __va(addr);
....@@ -1205,75 +1194,27 @@
12051194 fadump_create_elfcore_headers(vaddr);
12061195
12071196 /* register the future kernel dump with firmware. */
1208
- return register_fw_dump(&fdm);
1209
-}
1210
-
1211
-static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
1212
-{
1213
- int rc = 0;
1214
- unsigned int wait_time;
1215
-
1216
- pr_debug("Un-register firmware-assisted dump\n");
1217
-
1218
- /* TODO: Add upper time limit for the delay */
1219
- do {
1220
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
1221
- FADUMP_UNREGISTER, fdm,
1222
- sizeof(struct fadump_mem_struct));
1223
-
1224
- wait_time = rtas_busy_delay_time(rc);
1225
- if (wait_time)
1226
- mdelay(wait_time);
1227
- } while (wait_time);
1228
-
1229
- if (rc) {
1230
- printk(KERN_ERR "Failed to un-register firmware-assisted dump."
1231
- " unexpected error(%d).\n", rc);
1232
- return rc;
1233
- }
1234
- fw_dump.dump_registered = 0;
1235
- return 0;
1236
-}
1237
-
1238
-static int fadump_invalidate_dump(struct fadump_mem_struct *fdm)
1239
-{
1240
- int rc = 0;
1241
- unsigned int wait_time;
1242
-
1243
- pr_debug("Invalidating firmware-assisted dump registration\n");
1244
-
1245
- /* TODO: Add upper time limit for the delay */
1246
- do {
1247
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
1248
- FADUMP_INVALIDATE, fdm,
1249
- sizeof(struct fadump_mem_struct));
1250
-
1251
- wait_time = rtas_busy_delay_time(rc);
1252
- if (wait_time)
1253
- mdelay(wait_time);
1254
- } while (wait_time);
1255
-
1256
- if (rc) {
1257
- pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc);
1258
- return rc;
1259
- }
1260
- fw_dump.dump_active = 0;
1261
- fdm_active = NULL;
1262
- return 0;
1197
+ pr_debug("Registering for firmware-assisted kernel dump...\n");
1198
+ return fw_dump.ops->fadump_register(&fw_dump);
12631199 }
12641200
12651201 void fadump_cleanup(void)
12661202 {
1203
+ if (!fw_dump.fadump_supported)
1204
+ return;
1205
+
12671206 /* Invalidate the registration only if dump is active. */
12681207 if (fw_dump.dump_active) {
1269
- init_fadump_mem_struct(&fdm,
1270
- be64_to_cpu(fdm_active->cpu_state_data.destination_address));
1271
- fadump_invalidate_dump(&fdm);
1208
+ pr_debug("Invalidating firmware-assisted dump registration\n");
1209
+ fw_dump.ops->fadump_invalidate(&fw_dump);
12721210 } else if (fw_dump.dump_registered) {
12731211 /* Un-register Firmware-assisted dump if it was registered. */
1274
- fadump_unregister_dump(&fdm);
1275
- free_crash_memory_ranges();
1212
+ fw_dump.ops->fadump_unregister(&fw_dump);
1213
+ fadump_free_mem_ranges(&crash_mrange_info);
12761214 }
1215
+
1216
+ if (fw_dump.ops->fadump_cleanup)
1217
+ fw_dump.ops->fadump_cleanup(&fw_dump);
12771218 }
12781219
12791220 static void fadump_free_reserved_memory(unsigned long start_pfn,
....@@ -1298,95 +1239,194 @@
12981239 /*
12991240 * Skip memory holes and free memory that was actually reserved.
13001241 */
1301
-static void fadump_release_reserved_area(unsigned long start, unsigned long end)
1242
+static void fadump_release_reserved_area(u64 start, u64 end)
13021243 {
1303
- struct memblock_region *reg;
1304
- unsigned long tstart, tend;
1305
- unsigned long start_pfn = PHYS_PFN(start);
1306
- unsigned long end_pfn = PHYS_PFN(end);
1244
+ unsigned long reg_spfn, reg_epfn;
1245
+ u64 tstart, tend, spfn, epfn;
1246
+ int i;
13071247
1308
- for_each_memblock(memory, reg) {
1309
- tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
1310
- tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
1248
+ spfn = PHYS_PFN(start);
1249
+ epfn = PHYS_PFN(end);
1250
+
1251
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &reg_spfn, &reg_epfn, NULL) {
1252
+ tstart = max_t(u64, spfn, reg_spfn);
1253
+ tend = min_t(u64, epfn, reg_epfn);
1254
+
13111255 if (tstart < tend) {
13121256 fadump_free_reserved_memory(tstart, tend);
13131257
1314
- if (tend == end_pfn)
1258
+ if (tend == epfn)
13151259 break;
13161260
1317
- start_pfn = tend + 1;
1261
+ spfn = tend;
13181262 }
13191263 }
13201264 }
13211265
13221266 /*
1323
- * Release the memory that was reserved in early boot to preserve the memory
1324
- * contents. The released memory will be available for general use.
1267
+ * Sort the mem ranges in-place and merge adjacent ranges
1268
+ * to minimize the memory ranges count.
13251269 */
1326
-static void fadump_release_memory(unsigned long begin, unsigned long end)
1270
+static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
13271271 {
1328
- unsigned long ra_start, ra_end;
1272
+ struct fadump_memory_range *mem_ranges;
1273
+ struct fadump_memory_range tmp_range;
1274
+ u64 base, size;
1275
+ int i, j, idx;
1276
+
1277
+ if (!reserved_mrange_info.mem_range_cnt)
1278
+ return;
1279
+
1280
+ /* Sort the memory ranges */
1281
+ mem_ranges = mrange_info->mem_ranges;
1282
+ for (i = 0; i < mrange_info->mem_range_cnt; i++) {
1283
+ idx = i;
1284
+ for (j = (i + 1); j < mrange_info->mem_range_cnt; j++) {
1285
+ if (mem_ranges[idx].base > mem_ranges[j].base)
1286
+ idx = j;
1287
+ }
1288
+ if (idx != i) {
1289
+ tmp_range = mem_ranges[idx];
1290
+ mem_ranges[idx] = mem_ranges[i];
1291
+ mem_ranges[i] = tmp_range;
1292
+ }
1293
+ }
1294
+
1295
+ /* Merge adjacent reserved ranges */
1296
+ idx = 0;
1297
+ for (i = 1; i < mrange_info->mem_range_cnt; i++) {
1298
+ base = mem_ranges[i-1].base;
1299
+ size = mem_ranges[i-1].size;
1300
+ if (mem_ranges[i].base == (base + size))
1301
+ mem_ranges[idx].size += mem_ranges[i].size;
1302
+ else {
1303
+ idx++;
1304
+ if (i == idx)
1305
+ continue;
1306
+
1307
+ mem_ranges[idx] = mem_ranges[i];
1308
+ }
1309
+ }
1310
+ mrange_info->mem_range_cnt = idx + 1;
1311
+}
1312
+
1313
+/*
1314
+ * Scan reserved-ranges to consider them while reserving/releasing
1315
+ * memory for FADump.
1316
+ */
1317
+static void __init early_init_dt_scan_reserved_ranges(unsigned long node)
1318
+{
1319
+ const __be32 *prop;
1320
+ int len, ret = -1;
1321
+ unsigned long i;
1322
+
1323
+ /* reserved-ranges already scanned */
1324
+ if (reserved_mrange_info.mem_range_cnt != 0)
1325
+ return;
1326
+
1327
+ prop = of_get_flat_dt_prop(node, "reserved-ranges", &len);
1328
+ if (!prop)
1329
+ return;
1330
+
1331
+ /*
1332
+ * Each reserved range is an (address,size) pair, 2 cells each,
1333
+ * totalling 4 cells per range.
1334
+ */
1335
+ for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
1336
+ u64 base, size;
1337
+
1338
+ base = of_read_number(prop + (i * 4) + 0, 2);
1339
+ size = of_read_number(prop + (i * 4) + 2, 2);
1340
+
1341
+ if (size) {
1342
+ ret = fadump_add_mem_range(&reserved_mrange_info,
1343
+ base, base + size);
1344
+ if (ret < 0) {
1345
+ pr_warn("some reserved ranges are ignored!\n");
1346
+ break;
1347
+ }
1348
+ }
1349
+ }
1350
+
1351
+ /* Compact reserved ranges */
1352
+ sort_and_merge_mem_ranges(&reserved_mrange_info);
1353
+}
1354
+
1355
+/*
1356
+ * Release the memory that was reserved during early boot to preserve the
1357
+ * crash'ed kernel's memory contents except reserved dump area (permanent
1358
+ * reservation) and reserved ranges used by F/W. The released memory will
1359
+ * be available for general use.
1360
+ */
1361
+static void fadump_release_memory(u64 begin, u64 end)
1362
+{
1363
+ u64 ra_start, ra_end, tstart;
1364
+ int i, ret;
13291365
13301366 ra_start = fw_dump.reserve_dump_area_start;
13311367 ra_end = ra_start + fw_dump.reserve_dump_area_size;
13321368
13331369 /*
1334
- * exclude the dump reserve area. Will reuse it for next
1335
- * fadump registration.
1370
+ * If reserved ranges array limit is hit, overwrite the last reserved
1371
+ * memory range with reserved dump area to ensure it is excluded from
1372
+ * the memory being released (reused for next FADump registration).
13361373 */
1337
- if (begin < ra_end && end > ra_start) {
1338
- if (begin < ra_start)
1339
- fadump_release_reserved_area(begin, ra_start);
1340
- if (end > ra_end)
1341
- fadump_release_reserved_area(ra_end, end);
1342
- } else
1343
- fadump_release_reserved_area(begin, end);
1374
+ if (reserved_mrange_info.mem_range_cnt ==
1375
+ reserved_mrange_info.max_mem_ranges)
1376
+ reserved_mrange_info.mem_range_cnt--;
1377
+
1378
+ ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
1379
+ if (ret != 0)
1380
+ return;
1381
+
1382
+ /* Get the reserved ranges list in order first. */
1383
+ sort_and_merge_mem_ranges(&reserved_mrange_info);
1384
+
1385
+ /* Exclude reserved ranges and release remaining memory */
1386
+ tstart = begin;
1387
+ for (i = 0; i < reserved_mrange_info.mem_range_cnt; i++) {
1388
+ ra_start = reserved_mrange_info.mem_ranges[i].base;
1389
+ ra_end = ra_start + reserved_mrange_info.mem_ranges[i].size;
1390
+
1391
+ if (tstart >= ra_end)
1392
+ continue;
1393
+
1394
+ if (tstart < ra_start)
1395
+ fadump_release_reserved_area(tstart, ra_start);
1396
+ tstart = ra_end;
1397
+ }
1398
+
1399
+ if (tstart < end)
1400
+ fadump_release_reserved_area(tstart, end);
13441401 }
13451402
13461403 static void fadump_invalidate_release_mem(void)
13471404 {
1348
- unsigned long reserved_area_start, reserved_area_end;
1349
- unsigned long destination_address;
1350
-
13511405 mutex_lock(&fadump_mutex);
13521406 if (!fw_dump.dump_active) {
13531407 mutex_unlock(&fadump_mutex);
13541408 return;
13551409 }
13561410
1357
- destination_address = be64_to_cpu(fdm_active->cpu_state_data.destination_address);
13581411 fadump_cleanup();
13591412 mutex_unlock(&fadump_mutex);
13601413
1361
- /*
1362
- * Save the current reserved memory bounds we will require them
1363
- * later for releasing the memory for general use.
1364
- */
1365
- reserved_area_start = fw_dump.reserve_dump_area_start;
1366
- reserved_area_end = reserved_area_start +
1367
- fw_dump.reserve_dump_area_size;
1368
- /*
1369
- * Setup reserve_dump_area_start and its size so that we can
1370
- * reuse this reserved memory for Re-registration.
1371
- */
1372
- fw_dump.reserve_dump_area_start = destination_address;
1373
- fw_dump.reserve_dump_area_size = get_fadump_area_size();
1414
+ fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM());
1415
+ fadump_free_cpu_notes_buf();
13741416
1375
- fadump_release_memory(reserved_area_start, reserved_area_end);
1376
- if (fw_dump.cpu_notes_buf) {
1377
- fadump_cpu_notes_buf_free(
1378
- (unsigned long)__va(fw_dump.cpu_notes_buf),
1379
- fw_dump.cpu_notes_buf_size);
1380
- fw_dump.cpu_notes_buf = 0;
1381
- fw_dump.cpu_notes_buf_size = 0;
1382
- }
1383
- /* Initialize the kernel dump memory structure for FAD registration. */
1384
- init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
1417
+ /*
1418
+ * Setup kernel metadata and initialize the kernel dump
1419
+ * memory structure for FADump re-registration.
1420
+ */
1421
+ if (fw_dump.ops->fadump_setup_metadata &&
1422
+ (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
1423
+ pr_warn("Failed to setup kernel metadata!\n");
1424
+ fw_dump.ops->fadump_init_mem_struct(&fw_dump);
13851425 }
13861426
1387
-static ssize_t fadump_release_memory_store(struct kobject *kobj,
1388
- struct kobj_attribute *attr,
1389
- const char *buf, size_t count)
1427
+static ssize_t release_mem_store(struct kobject *kobj,
1428
+ struct kobj_attribute *attr,
1429
+ const char *buf, size_t count)
13901430 {
13911431 int input = -1;
13921432
....@@ -1411,28 +1451,45 @@
14111451 return count;
14121452 }
14131453
1414
-static ssize_t fadump_enabled_show(struct kobject *kobj,
1415
- struct kobj_attribute *attr,
1416
- char *buf)
1454
+/* Release the reserved memory and disable the FADump */
1455
+static void unregister_fadump(void)
1456
+{
1457
+ fadump_cleanup();
1458
+ fadump_release_memory(fw_dump.reserve_dump_area_start,
1459
+ fw_dump.reserve_dump_area_size);
1460
+ fw_dump.fadump_enabled = 0;
1461
+ kobject_put(fadump_kobj);
1462
+}
1463
+
1464
+static ssize_t enabled_show(struct kobject *kobj,
1465
+ struct kobj_attribute *attr,
1466
+ char *buf)
14171467 {
14181468 return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
14191469 }
14201470
1421
-static ssize_t fadump_register_show(struct kobject *kobj,
1422
- struct kobj_attribute *attr,
1423
- char *buf)
1471
+static ssize_t mem_reserved_show(struct kobject *kobj,
1472
+ struct kobj_attribute *attr,
1473
+ char *buf)
1474
+{
1475
+ return sprintf(buf, "%ld\n", fw_dump.reserve_dump_area_size);
1476
+}
1477
+
1478
+static ssize_t registered_show(struct kobject *kobj,
1479
+ struct kobj_attribute *attr,
1480
+ char *buf)
14241481 {
14251482 return sprintf(buf, "%d\n", fw_dump.dump_registered);
14261483 }
14271484
1428
-static ssize_t fadump_register_store(struct kobject *kobj,
1429
- struct kobj_attribute *attr,
1430
- const char *buf, size_t count)
1485
+static ssize_t registered_store(struct kobject *kobj,
1486
+ struct kobj_attribute *attr,
1487
+ const char *buf, size_t count)
14311488 {
14321489 int ret = 0;
14331490 int input = -1;
14341491
1435
- if (!fw_dump.fadump_enabled || fdm_active)
1492
+ if (!fw_dump.fadump_enabled || fw_dump.dump_active)
14361493 return -EPERM;
14371494
14381495 if (kstrtoint(buf, 0, &input))
....@@ -1445,13 +1502,15 @@
14451502 if (fw_dump.dump_registered == 0) {
14461503 goto unlock_out;
14471504 }
1505
+
14481506 /* Un-register Firmware-assisted dump */
1449
- fadump_unregister_dump(&fdm);
1507
+ pr_debug("Un-register firmware-assisted dump\n");
1508
+ fw_dump.ops->fadump_unregister(&fw_dump);
14501509 break;
14511510 case 1:
14521511 if (fw_dump.dump_registered == 1) {
1453
- ret = -EEXIST;
1454
- goto unlock_out;
1512
+ /* Un-register Firmware-assisted dump */
1513
+ fw_dump.ops->fadump_unregister(&fw_dump);
14551514 }
14561515 /* Register Firmware-assisted dump */
14571516 ret = register_fadump();
....@@ -1468,114 +1527,91 @@
14681527
14691528 static int fadump_region_show(struct seq_file *m, void *private)
14701529 {
1471
- const struct fadump_mem_struct *fdm_ptr;
1472
-
14731530 if (!fw_dump.fadump_enabled)
14741531 return 0;
14751532
14761533 mutex_lock(&fadump_mutex);
1477
- if (fdm_active)
1478
- fdm_ptr = fdm_active;
1479
- else {
1480
- mutex_unlock(&fadump_mutex);
1481
- fdm_ptr = &fdm;
1482
- }
1483
-
1484
- seq_printf(m,
1485
- "CPU : [%#016llx-%#016llx] %#llx bytes, "
1486
- "Dumped: %#llx\n",
1487
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address),
1488
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) +
1489
- be64_to_cpu(fdm_ptr->cpu_state_data.source_len) - 1,
1490
- be64_to_cpu(fdm_ptr->cpu_state_data.source_len),
1491
- be64_to_cpu(fdm_ptr->cpu_state_data.bytes_dumped));
1492
- seq_printf(m,
1493
- "HPTE: [%#016llx-%#016llx] %#llx bytes, "
1494
- "Dumped: %#llx\n",
1495
- be64_to_cpu(fdm_ptr->hpte_region.destination_address),
1496
- be64_to_cpu(fdm_ptr->hpte_region.destination_address) +
1497
- be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1,
1498
- be64_to_cpu(fdm_ptr->hpte_region.source_len),
1499
- be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped));
1500
- seq_printf(m,
1501
- "DUMP: [%#016llx-%#016llx] %#llx bytes, "
1502
- "Dumped: %#llx\n",
1503
- be64_to_cpu(fdm_ptr->rmr_region.destination_address),
1504
- be64_to_cpu(fdm_ptr->rmr_region.destination_address) +
1505
- be64_to_cpu(fdm_ptr->rmr_region.source_len) - 1,
1506
- be64_to_cpu(fdm_ptr->rmr_region.source_len),
1507
- be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
1508
-
1509
- if (!fdm_active ||
1510
- (fw_dump.reserve_dump_area_start ==
1511
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address)))
1512
- goto out;
1513
-
1514
- /* Dump is active. Show reserved memory region. */
1515
- seq_printf(m,
1516
- " : [%#016llx-%#016llx] %#llx bytes, "
1517
- "Dumped: %#llx\n",
1518
- (unsigned long long)fw_dump.reserve_dump_area_start,
1519
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1,
1520
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
1521
- fw_dump.reserve_dump_area_start,
1522
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
1523
- fw_dump.reserve_dump_area_start);
1524
-out:
1525
- if (fdm_active)
1526
- mutex_unlock(&fadump_mutex);
1534
+ fw_dump.ops->fadump_region_show(&fw_dump, m);
1535
+ mutex_unlock(&fadump_mutex);
15271536 return 0;
15281537 }
15291538
1530
-static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem,
1531
- 0200, NULL,
1532
- fadump_release_memory_store);
1533
-static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
1534
- 0444, fadump_enabled_show,
1535
- NULL);
1536
-static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
1537
- 0644, fadump_register_show,
1538
- fadump_register_store);
1539
+static struct kobj_attribute release_attr = __ATTR_WO(release_mem);
1540
+static struct kobj_attribute enable_attr = __ATTR_RO(enabled);
1541
+static struct kobj_attribute register_attr = __ATTR_RW(registered);
1542
+static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved);
15391543
1540
-static int fadump_region_open(struct inode *inode, struct file *file)
1541
-{
1542
- return single_open(file, fadump_region_show, inode->i_private);
1543
-}
1544
-
1545
-static const struct file_operations fadump_region_fops = {
1546
- .open = fadump_region_open,
1547
- .read = seq_read,
1548
- .llseek = seq_lseek,
1549
- .release = single_release,
1544
+static struct attribute *fadump_attrs[] = {
1545
+ &enable_attr.attr,
1546
+ &register_attr.attr,
1547
+ &mem_reserved_attr.attr,
1548
+ NULL,
15501549 };
1550
+
1551
+ATTRIBUTE_GROUPS(fadump);
1552
+
1553
+DEFINE_SHOW_ATTRIBUTE(fadump_region);
15511554
15521555 static void fadump_init_files(void)
15531556 {
1554
- struct dentry *debugfs_file;
15551557 int rc = 0;
15561558
1557
- rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
1558
- if (rc)
1559
- printk(KERN_ERR "fadump: unable to create sysfs file"
1560
- " fadump_enabled (%d)\n", rc);
1559
+ fadump_kobj = kobject_create_and_add("fadump", kernel_kobj);
1560
+ if (!fadump_kobj) {
1561
+ pr_err("failed to create fadump kobject\n");
1562
+ return;
1563
+ }
15611564
1562
- rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
1563
- if (rc)
1564
- printk(KERN_ERR "fadump: unable to create sysfs file"
1565
- " fadump_registered (%d)\n", rc);
1566
-
1567
- debugfs_file = debugfs_create_file("fadump_region", 0444,
1568
- powerpc_debugfs_root, NULL,
1569
- &fadump_region_fops);
1570
- if (!debugfs_file)
1571
- printk(KERN_ERR "fadump: unable to create debugfs file"
1572
- " fadump_region\n");
1565
+ debugfs_create_file("fadump_region", 0444, powerpc_debugfs_root, NULL,
1566
+ &fadump_region_fops);
15731567
15741568 if (fw_dump.dump_active) {
1575
- rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr);
1569
+ rc = sysfs_create_file(fadump_kobj, &release_attr.attr);
15761570 if (rc)
1577
- printk(KERN_ERR "fadump: unable to create sysfs file"
1578
- " fadump_release_mem (%d)\n", rc);
1571
+ pr_err("unable to create release_mem sysfs file (%d)\n",
1572
+ rc);
1573
+ }
1574
+
1575
+ rc = sysfs_create_groups(fadump_kobj, fadump_groups);
1576
+ if (rc) {
1577
+ pr_err("sysfs group creation failed (%d), unregistering FADump",
1578
+ rc);
1579
+ unregister_fadump();
1580
+ return;
1581
+ }
1582
+
1583
+ /*
1584
+ * The FADump sysfs are moved from kernel_kobj to fadump_kobj need to
1585
+ * create symlink at old location to maintain backward compatibility.
1586
+ *
1587
+ * - fadump_enabled -> fadump/enabled
1588
+ * - fadump_registered -> fadump/registered
1589
+ * - fadump_release_mem -> fadump/release_mem
1590
+ */
1591
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
1592
+ "enabled", "fadump_enabled");
1593
+ if (rc) {
1594
+ pr_err("unable to create fadump_enabled symlink (%d)", rc);
1595
+ return;
1596
+ }
1597
+
1598
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
1599
+ "registered",
1600
+ "fadump_registered");
1601
+ if (rc) {
1602
+ pr_err("unable to create fadump_registered symlink (%d)", rc);
1603
+ sysfs_remove_link(kernel_kobj, "fadump_enabled");
1604
+ return;
1605
+ }
1606
+
1607
+ if (fw_dump.dump_active) {
1608
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj,
1609
+ fadump_kobj,
1610
+ "release_mem",
1611
+ "fadump_release_mem");
1612
+ if (rc)
1613
+ pr_err("unable to create fadump_release_mem symlink (%d)",
1614
+ rc);
15791615 }
15801616 return;
15811617 }
....@@ -1585,16 +1621,15 @@
15851621 */
15861622 int __init setup_fadump(void)
15871623 {
1588
- if (!fw_dump.fadump_enabled)
1624
+ if (!fw_dump.fadump_supported)
15891625 return 0;
15901626
1591
- if (!fw_dump.fadump_supported) {
1592
- printk(KERN_ERR "Firmware-assisted dump is not supported on"
1593
- " this hardware\n");
1594
- return 0;
1595
- }
1596
-
1627
+ fadump_init_files();
15971628 fadump_show_config();
1629
+
1630
+ if (!fw_dump.fadump_enabled)
1631
+ return 1;
1632
+
15981633 /*
15991634 * If dump data is available then see if it is valid and prepare for
16001635 * saving it to the disk.
....@@ -1604,14 +1639,81 @@
16041639 * if dump process fails then invalidate the registration
16051640 * and release memory before proceeding for re-registration.
16061641 */
1607
- if (process_fadump(fdm_active) < 0)
1642
+ if (fw_dump.ops->fadump_process(&fw_dump) < 0)
16081643 fadump_invalidate_release_mem();
16091644 }
16101645 /* Initialize the kernel dump memory structure for FAD registration. */
16111646 else if (fw_dump.reserve_dump_area_size)
1612
- init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
1613
- fadump_init_files();
1647
+ fw_dump.ops->fadump_init_mem_struct(&fw_dump);
1648
+
1649
+ /*
1650
+ * In case of panic, fadump is triggered via ppc_panic_event()
1651
+ * panic notifier. Setting crash_kexec_post_notifiers to 'true'
1652
+ * lets panic() function take crash friendly path before panic
1653
+ * notifiers are invoked.
1654
+ */
1655
+ crash_kexec_post_notifiers = true;
16141656
16151657 return 1;
16161658 }
16171659 subsys_initcall(setup_fadump);
1660
+#else /* !CONFIG_PRESERVE_FA_DUMP */
1661
+
1662
+/* Scan the Firmware Assisted dump configuration details. */
1663
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
1664
+ int depth, void *data)
1665
+{
1666
+ if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0))
1667
+ return 0;
1668
+
1669
+ opal_fadump_dt_scan(&fw_dump, node);
1670
+ return 1;
1671
+}
1672
+
1673
+/*
1674
+ * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
1675
+ * preserve crash data. The subsequent memory preserving kernel boot
1676
+ * is likely to process this crash data.
1677
+ */
1678
+int __init fadump_reserve_mem(void)
1679
+{
1680
+ if (fw_dump.dump_active) {
1681
+ /*
1682
+ * If last boot has crashed then reserve all the memory
1683
+ * above boot memory to preserve crash data.
1684
+ */
1685
+ pr_info("Preserving crash data for processing in next boot.\n");
1686
+ fadump_reserve_crash_area(fw_dump.boot_mem_top);
1687
+ } else
1688
+ pr_debug("FADump-aware kernel..\n");
1689
+
1690
+ return 1;
1691
+}
1692
+#endif /* CONFIG_PRESERVE_FA_DUMP */
1693
+
1694
+/* Preserve everything above the base address */
1695
+static void __init fadump_reserve_crash_area(u64 base)
1696
+{
1697
+ u64 i, mstart, mend, msize;
1698
+
1699
+ for_each_mem_range(i, &mstart, &mend) {
1700
+ msize = mend - mstart;
1701
+
1702
+ if ((mstart + msize) < base)
1703
+ continue;
1704
+
1705
+ if (mstart < base) {
1706
+ msize -= (base - mstart);
1707
+ mstart = base;
1708
+ }
1709
+
1710
+ pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data",
1711
+ (msize >> 20), mstart);
1712
+ memblock_reserve(mstart, msize);
1713
+ }
1714
+}
1715
+
1716
+unsigned long __init arch_reserved_kernel_pages(void)
1717
+{
1718
+ return memblock_reserved_size() / PAGE_SIZE;
1719
+}