forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-01-04 1543e317f1da31b75942316931e8f491a8920811
kernel/drivers/acpi/apei/ghes.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * APEI Generic Hardware Error Source support
34 *
....@@ -14,17 +15,9 @@
1415 *
1516 * Copyright 2010,2011 Intel Corp.
1617 * Author: Huang Ying <ying.huang@intel.com>
17
- *
18
- * This program is free software; you can redistribute it and/or
19
- * modify it under the terms of the GNU General Public License version
20
- * 2 as published by the Free Software Foundation;
21
- *
22
- * This program is distributed in the hope that it will be useful,
23
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
24
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25
- * GNU General Public License for more details.
2618 */
2719
20
+#include <linux/arm_sdei.h>
2821 #include <linux/kernel.h>
2922 #include <linux/moduleparam.h>
3023 #include <linux/init.h>
....@@ -41,11 +34,13 @@
4134 #include <linux/llist.h>
4235 #include <linux/genalloc.h>
4336 #include <linux/pci.h>
37
+#include <linux/pfn.h>
4438 #include <linux/aer.h>
4539 #include <linux/nmi.h>
4640 #include <linux/sched/clock.h>
4741 #include <linux/uuid.h>
4842 #include <linux/ras.h>
43
+#include <linux/task_work.h>
4944
5045 #include <acpi/actbl1.h>
5146 #include <acpi/ghes.h>
....@@ -84,6 +79,21 @@
8479 ((struct acpi_hest_generic_status *) \
8580 ((struct ghes_estatus_node *)(estatus_node) + 1))
8681
82
+#define GHES_VENDOR_ENTRY_LEN(gdata_len) \
83
+ (sizeof(struct ghes_vendor_record_entry) + (gdata_len))
84
+#define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry) \
85
+ ((struct acpi_hest_generic_data *) \
86
+ ((struct ghes_vendor_record_entry *)(vendor_entry) + 1))
87
+
88
+/*
89
+ * NMI-like notifications vary by architecture, before the compiler can prune
90
+ * unused static functions it needs a value for these enums.
91
+ */
92
+#ifndef CONFIG_ARM_SDE_INTERFACE
93
+#define FIX_APEI_GHES_SDEI_NORMAL __end_of_fixed_addresses
94
+#define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses
95
+#endif
96
+
8797 static inline bool is_hest_type_generic_v2(struct ghes *ghes)
8898 {
8999 return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
....@@ -114,11 +124,16 @@
114124 * handler, but general ioremap can not be used in atomic context, so
115125 * the fixmap is used instead.
116126 *
117
- * These 2 spinlocks are used to prevent the fixmap entries from being used
127
+ * This spinlock is used to prevent the fixmap entry from being used
118128 * simultaneously.
119129 */
120
-static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
121
-static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
130
+static DEFINE_SPINLOCK(ghes_notify_lock_irq);
131
+
132
+struct ghes_vendor_record_entry {
133
+ struct work_struct work;
134
+ int error_severity;
135
+ char vendor_record[];
136
+};
122137
123138 static struct gen_pool *ghes_estatus_pool;
124139 static unsigned long ghes_estatus_pool_size_request;
....@@ -128,82 +143,56 @@
128143
129144 static int ghes_panic_timeout __read_mostly = 30;
130145
131
-static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
146
+static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx)
132147 {
133148 phys_addr_t paddr;
134149 pgprot_t prot;
135150
136
- paddr = pfn << PAGE_SHIFT;
151
+ paddr = PFN_PHYS(pfn);
137152 prot = arch_apei_get_mem_attribute(paddr);
138
- __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot);
153
+ __set_fixmap(fixmap_idx, paddr, prot);
139154
140
- return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI);
155
+ return (void __iomem *) __fix_to_virt(fixmap_idx);
141156 }
142157
143
-static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
158
+static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx)
144159 {
145
- phys_addr_t paddr;
146
- pgprot_t prot;
160
+ int _idx = virt_to_fix((unsigned long)vaddr);
147161
148
- paddr = pfn << PAGE_SHIFT;
149
- prot = arch_apei_get_mem_attribute(paddr);
150
- __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot);
151
-
152
- return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ);
162
+ WARN_ON_ONCE(fixmap_idx != _idx);
163
+ clear_fixmap(fixmap_idx);
153164 }
154165
155
-static void ghes_iounmap_nmi(void)
166
+int ghes_estatus_pool_init(unsigned int num_ghes)
156167 {
157
- clear_fixmap(FIX_APEI_GHES_NMI);
158
-}
168
+ unsigned long addr, len;
169
+ int rc;
159170
160
-static void ghes_iounmap_irq(void)
161
-{
162
- clear_fixmap(FIX_APEI_GHES_IRQ);
163
-}
164
-
165
-static int ghes_estatus_pool_init(void)
166
-{
167171 ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
168172 if (!ghes_estatus_pool)
169173 return -ENOMEM;
170
- return 0;
171
-}
172174
173
-static void ghes_estatus_pool_free_chunk(struct gen_pool *pool,
174
- struct gen_pool_chunk *chunk,
175
- void *data)
176
-{
177
- vfree((void *)chunk->start_addr);
178
-}
175
+ len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX;
176
+ len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE);
179177
180
-static void ghes_estatus_pool_exit(void)
181
-{
182
- gen_pool_for_each_chunk(ghes_estatus_pool,
183
- ghes_estatus_pool_free_chunk, NULL);
184
- gen_pool_destroy(ghes_estatus_pool);
185
-}
186
-
187
-static int ghes_estatus_pool_expand(unsigned long len)
188
-{
189
- unsigned long size, addr;
190
-
191
- ghes_estatus_pool_size_request += PAGE_ALIGN(len);
192
- size = gen_pool_size(ghes_estatus_pool);
193
- if (size >= ghes_estatus_pool_size_request)
194
- return 0;
195
-
178
+ ghes_estatus_pool_size_request = PAGE_ALIGN(len);
196179 addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
197180 if (!addr)
198
- return -ENOMEM;
181
+ goto err_pool_alloc;
199182
200
- /*
201
- * New allocation must be visible in all pgd before it can be found by
202
- * an NMI allocating from the pool.
203
- */
204
- vmalloc_sync_mappings();
183
+ rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
184
+ if (rc)
185
+ goto err_pool_add;
205186
206
- return gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
187
+ return 0;
188
+
189
+err_pool_add:
190
+ vfree((void *)addr);
191
+
192
+err_pool_alloc:
193
+ gen_pool_destroy(ghes_estatus_pool);
194
+
195
+ return -ENOMEM;
207196 }
208197
209198 static int map_gen_v2(struct ghes *ghes)
....@@ -214,6 +203,21 @@
214203 static void unmap_gen_v2(struct ghes *ghes)
215204 {
216205 apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
206
+}
207
+
208
+static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
209
+{
210
+ int rc;
211
+ u64 val = 0;
212
+
213
+ rc = apei_read(&val, &gv2->read_ack_register);
214
+ if (rc)
215
+ return;
216
+
217
+ val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
218
+ val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset;
219
+
220
+ apei_write(val, &gv2->read_ack_register);
217221 }
218222
219223 static struct ghes *ghes_new(struct acpi_hest_generic *generic)
....@@ -238,10 +242,10 @@
238242 goto err_unmap_read_ack_addr;
239243 error_block_length = generic->error_block_length;
240244 if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
241
- pr_warning(FW_WARN GHES_PFX
242
- "Error status block length is too long: %u for "
243
- "generic hardware error source: %d.\n",
244
- error_block_length, generic->header.source_id);
245
+ pr_warn(FW_WARN GHES_PFX
246
+ "Error status block length is too long: %u for "
247
+ "generic hardware error source: %d.\n",
248
+ error_block_length, generic->header.source_id);
245249 error_block_length = GHES_ESTATUS_MAX_SIZE;
246250 }
247251 ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
....@@ -288,23 +292,16 @@
288292 }
289293
290294 static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
291
- int from_phys)
295
+ int from_phys,
296
+ enum fixed_addresses fixmap_idx)
292297 {
293298 void __iomem *vaddr;
294
- unsigned long flags = 0;
295
- int in_nmi = in_nmi();
296299 u64 offset;
297300 u32 trunk;
298301
299302 while (len > 0) {
300303 offset = paddr - (paddr & PAGE_MASK);
301
- if (in_nmi) {
302
- raw_spin_lock(&ghes_ioremap_lock_nmi);
303
- vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT);
304
- } else {
305
- spin_lock_irqsave(&ghes_ioremap_lock_irq, flags);
306
- vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT);
307
- }
304
+ vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx);
308305 trunk = PAGE_SIZE - offset;
309306 trunk = min(trunk, len);
310307 if (from_phys)
....@@ -314,92 +311,164 @@
314311 len -= trunk;
315312 paddr += trunk;
316313 buffer += trunk;
317
- if (in_nmi) {
318
- ghes_iounmap_nmi();
319
- raw_spin_unlock(&ghes_ioremap_lock_nmi);
320
- } else {
321
- ghes_iounmap_irq();
322
- spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
323
- }
314
+ ghes_unmap(vaddr, fixmap_idx);
324315 }
325316 }
326317
327
-static int ghes_read_estatus(struct ghes *ghes, int silent)
318
+/* Check the top-level record header has an appropriate size. */
319
+static int __ghes_check_estatus(struct ghes *ghes,
320
+ struct acpi_hest_generic_status *estatus)
321
+{
322
+ u32 len = cper_estatus_len(estatus);
323
+
324
+ if (len < sizeof(*estatus)) {
325
+ pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n");
326
+ return -EIO;
327
+ }
328
+
329
+ if (len > ghes->generic->error_block_length) {
330
+ pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n");
331
+ return -EIO;
332
+ }
333
+
334
+ if (cper_estatus_check_header(estatus)) {
335
+ pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n");
336
+ return -EIO;
337
+ }
338
+
339
+ return 0;
340
+}
341
+
342
+/* Read the CPER block, returning its address, and header in estatus. */
343
+static int __ghes_peek_estatus(struct ghes *ghes,
344
+ struct acpi_hest_generic_status *estatus,
345
+ u64 *buf_paddr, enum fixed_addresses fixmap_idx)
328346 {
329347 struct acpi_hest_generic *g = ghes->generic;
330
- u64 buf_paddr;
331
- u32 len;
332348 int rc;
333349
334
- rc = apei_read(&buf_paddr, &g->error_status_address);
350
+ rc = apei_read(buf_paddr, &g->error_status_address);
335351 if (rc) {
336
- if (!silent && printk_ratelimit())
337
- pr_warning(FW_WARN GHES_PFX
352
+ *buf_paddr = 0;
353
+ pr_warn_ratelimited(FW_WARN GHES_PFX
338354 "Failed to read error status block address for hardware error source: %d.\n",
339355 g->header.source_id);
340356 return -EIO;
341357 }
358
+ if (!*buf_paddr)
359
+ return -ENOENT;
360
+
361
+ ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1,
362
+ fixmap_idx);
363
+ if (!estatus->block_status) {
364
+ *buf_paddr = 0;
365
+ return -ENOENT;
366
+ }
367
+
368
+ return 0;
369
+}
370
+
371
+static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
372
+ u64 buf_paddr, enum fixed_addresses fixmap_idx,
373
+ size_t buf_len)
374
+{
375
+ ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx);
376
+ if (cper_estatus_check(estatus)) {
377
+ pr_warn_ratelimited(FW_WARN GHES_PFX
378
+ "Failed to read error status block!\n");
379
+ return -EIO;
380
+ }
381
+
382
+ return 0;
383
+}
384
+
385
+static int ghes_read_estatus(struct ghes *ghes,
386
+ struct acpi_hest_generic_status *estatus,
387
+ u64 *buf_paddr, enum fixed_addresses fixmap_idx)
388
+{
389
+ int rc;
390
+
391
+ rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx);
392
+ if (rc)
393
+ return rc;
394
+
395
+ rc = __ghes_check_estatus(ghes, estatus);
396
+ if (rc)
397
+ return rc;
398
+
399
+ return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx,
400
+ cper_estatus_len(estatus));
401
+}
402
+
403
+static void ghes_clear_estatus(struct ghes *ghes,
404
+ struct acpi_hest_generic_status *estatus,
405
+ u64 buf_paddr, enum fixed_addresses fixmap_idx)
406
+{
407
+ estatus->block_status = 0;
408
+
342409 if (!buf_paddr)
343
- return -ENOENT;
344
-
345
- ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
346
- sizeof(*ghes->estatus), 1);
347
- if (!ghes->estatus->block_status)
348
- return -ENOENT;
349
-
350
- ghes->buffer_paddr = buf_paddr;
351
- ghes->flags |= GHES_TO_CLEAR;
352
-
353
- rc = -EIO;
354
- len = cper_estatus_len(ghes->estatus);
355
- if (len < sizeof(*ghes->estatus))
356
- goto err_read_block;
357
- if (len > ghes->generic->error_block_length)
358
- goto err_read_block;
359
- if (cper_estatus_check_header(ghes->estatus))
360
- goto err_read_block;
361
- ghes_copy_tofrom_phys(ghes->estatus + 1,
362
- buf_paddr + sizeof(*ghes->estatus),
363
- len - sizeof(*ghes->estatus), 1);
364
- if (cper_estatus_check(ghes->estatus))
365
- goto err_read_block;
366
- rc = 0;
367
-
368
-err_read_block:
369
- if (rc && !silent && printk_ratelimit())
370
- pr_warning(FW_WARN GHES_PFX
371
- "Failed to read error status block!\n");
372
- return rc;
373
-}
374
-
375
-static void ghes_clear_estatus(struct ghes *ghes)
376
-{
377
- ghes->estatus->block_status = 0;
378
- if (!(ghes->flags & GHES_TO_CLEAR))
379410 return;
380
- ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr,
381
- sizeof(ghes->estatus->block_status), 0);
382
- ghes->flags &= ~GHES_TO_CLEAR;
411
+
412
+ ghes_copy_tofrom_phys(estatus, buf_paddr,
413
+ sizeof(estatus->block_status), 0,
414
+ fixmap_idx);
415
+
416
+ /*
417
+ * GHESv2 type HEST entries introduce support for error acknowledgment,
418
+ * so only acknowledge the error if this support is present.
419
+ */
420
+ if (is_hest_type_generic_v2(ghes))
421
+ ghes_ack_error(ghes->generic_v2);
383422 }
384423
385
-static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
424
+/*
425
+ * Called as task_work before returning to user-space.
426
+ * Ensure any queued work has been done before we return to the context that
427
+ * triggered the notification.
428
+ */
429
+static void ghes_kick_task_work(struct callback_head *head)
386430 {
387
-#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
431
+ struct acpi_hest_generic_status *estatus;
432
+ struct ghes_estatus_node *estatus_node;
433
+ u32 node_len;
434
+
435
+ estatus_node = container_of(head, struct ghes_estatus_node, task_work);
436
+ if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
437
+ memory_failure_queue_kick(estatus_node->task_work_cpu);
438
+
439
+ estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
440
+ node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus));
441
+ gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len);
442
+}
443
+
444
+static bool ghes_do_memory_failure(u64 physical_addr, int flags)
445
+{
388446 unsigned long pfn;
447
+
448
+ if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
449
+ return false;
450
+
451
+ pfn = PHYS_PFN(physical_addr);
452
+ if (!pfn_valid(pfn)) {
453
+ pr_warn_ratelimited(FW_WARN GHES_PFX
454
+ "Invalid address in generic error data: %#llx\n",
455
+ physical_addr);
456
+ return false;
457
+ }
458
+
459
+ memory_failure_queue(pfn, flags);
460
+ return true;
461
+}
462
+
463
+static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
464
+ int sev)
465
+{
389466 int flags = -1;
390467 int sec_sev = ghes_severity(gdata->error_severity);
391468 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
392469
393470 if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
394
- return;
395
-
396
- pfn = mem_err->physical_addr >> PAGE_SHIFT;
397
- if (!pfn_valid(pfn)) {
398
- pr_warn_ratelimited(FW_WARN GHES_PFX
399
- "Invalid address in generic error data: %#llx\n",
400
- mem_err->physical_addr);
401
- return;
402
- }
471
+ return false;
403472
404473 /* iff following two events can be handled properly by now */
405474 if (sec_sev == GHES_SEV_CORRECTED &&
....@@ -409,8 +478,53 @@
409478 flags = 0;
410479
411480 if (flags != -1)
412
- memory_failure_queue(pfn, flags);
413
-#endif
481
+ return ghes_do_memory_failure(mem_err->physical_addr, flags);
482
+
483
+ return false;
484
+}
485
+
486
+static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev)
487
+{
488
+ struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
489
+ bool queued = false;
490
+ int sec_sev, i;
491
+ char *p;
492
+
493
+ log_arm_hw_error(err);
494
+
495
+ sec_sev = ghes_severity(gdata->error_severity);
496
+ if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
497
+ return false;
498
+
499
+ p = (char *)(err + 1);
500
+ for (i = 0; i < err->err_info_num; i++) {
501
+ struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
502
+ bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR);
503
+ bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
504
+ const char *error_type = "unknown error";
505
+
506
+ /*
507
+ * The field (err_info->error_info & BIT(26)) is fixed to set to
508
+ * 1 in some old firmware of HiSilicon Kunpeng920. We assume that
509
+ * firmware won't mix corrected errors in an uncorrected section,
510
+ * and don't filter out 'corrected' error here.
511
+ */
512
+ if (is_cache && has_pa) {
513
+ queued = ghes_do_memory_failure(err_info->physical_fault_addr, 0);
514
+ p += err_info->length;
515
+ continue;
516
+ }
517
+
518
+ if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs))
519
+ error_type = cper_proc_error_type_strs[err_info->type];
520
+
521
+ pr_warn_ratelimited(FW_WARN GHES_PFX
522
+ "Unhandled processor error type: %s\n",
523
+ error_type);
524
+ p += err_info->length;
525
+ }
526
+
527
+ return queued;
414528 }
415529
416530 /*
....@@ -458,14 +572,65 @@
458572 #endif
459573 }
460574
461
-static void ghes_do_proc(struct ghes *ghes,
575
+static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list);
576
+
577
+int ghes_register_vendor_record_notifier(struct notifier_block *nb)
578
+{
579
+ return blocking_notifier_chain_register(&vendor_record_notify_list, nb);
580
+}
581
+EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier);
582
+
583
+void ghes_unregister_vendor_record_notifier(struct notifier_block *nb)
584
+{
585
+ blocking_notifier_chain_unregister(&vendor_record_notify_list, nb);
586
+}
587
+EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier);
588
+
589
+static void ghes_vendor_record_work_func(struct work_struct *work)
590
+{
591
+ struct ghes_vendor_record_entry *entry;
592
+ struct acpi_hest_generic_data *gdata;
593
+ u32 len;
594
+
595
+ entry = container_of(work, struct ghes_vendor_record_entry, work);
596
+ gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
597
+
598
+ blocking_notifier_call_chain(&vendor_record_notify_list,
599
+ entry->error_severity, gdata);
600
+
601
+ len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
602
+ gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len);
603
+}
604
+
605
+static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
606
+ int sev)
607
+{
608
+ struct acpi_hest_generic_data *copied_gdata;
609
+ struct ghes_vendor_record_entry *entry;
610
+ u32 len;
611
+
612
+ len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
613
+ entry = (void *)gen_pool_alloc(ghes_estatus_pool, len);
614
+ if (!entry)
615
+ return;
616
+
617
+ copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
618
+ memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata));
619
+ entry->error_severity = sev;
620
+
621
+ INIT_WORK(&entry->work, ghes_vendor_record_work_func);
622
+ schedule_work(&entry->work);
623
+}
624
+
625
+static bool ghes_do_proc(struct ghes *ghes,
462626 const struct acpi_hest_generic_status *estatus)
463627 {
464628 int sev, sec_sev;
465629 struct acpi_hest_generic_data *gdata;
466630 guid_t *sec_type;
467
- guid_t *fru_id = &NULL_UUID_LE;
631
+ const guid_t *fru_id = &guid_null;
468632 char *fru_text = "";
633
+ bool queued = false;
469634
470635 sev = ghes_severity(estatus->error_severity);
471636 apei_estatus_for_each_section(estatus, gdata) {
....@@ -483,23 +648,24 @@
483648 ghes_edac_report_mem_error(sev, mem_err);
484649
485650 arch_apei_report_mem_error(sev, mem_err);
486
- ghes_handle_memory_failure(gdata, sev);
651
+ queued = ghes_handle_memory_failure(gdata, sev);
487652 }
488653 else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
489654 ghes_handle_aer(gdata);
490655 }
491656 else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
492
- struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
493
-
494
- log_arm_hw_error(err);
657
+ queued = ghes_handle_arm_hw_error(gdata, sev);
495658 } else {
496659 void *err = acpi_hest_get_payload(gdata);
497660
661
+ ghes_defer_non_standard_event(gdata, sev);
498662 log_non_standard_event(sec_type, fru_id, fru_text,
499663 sec_sev, err,
500664 gdata->error_data_length);
501665 }
502666 }
667
+
668
+ return queued;
503669 }
504670
505671 static void __ghes_print_estatus(const char *pfx,
....@@ -671,26 +837,13 @@
671837 rcu_read_unlock();
672838 }
673839
674
-static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
840
+static void __ghes_panic(struct ghes *ghes,
841
+ struct acpi_hest_generic_status *estatus,
842
+ u64 buf_paddr, enum fixed_addresses fixmap_idx)
675843 {
676
- int rc;
677
- u64 val = 0;
844
+ __ghes_print_estatus(KERN_EMERG, ghes->generic, estatus);
678845
679
- rc = apei_read(&val, &gv2->read_ack_register);
680
- if (rc)
681
- return rc;
682
-
683
- val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
684
- val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset;
685
-
686
- return apei_write(val, &gv2->read_ack_register);
687
-}
688
-
689
-static void __ghes_panic(struct ghes *ghes)
690
-{
691
- __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);
692
-
693
- ghes_clear_estatus(ghes);
846
+ ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx);
694847
695848 /* reboot to log the error! */
696849 if (!panic_timeout)
....@@ -700,34 +853,25 @@
700853
701854 static int ghes_proc(struct ghes *ghes)
702855 {
856
+ struct acpi_hest_generic_status *estatus = ghes->estatus;
857
+ u64 buf_paddr;
703858 int rc;
704859
705
- rc = ghes_read_estatus(ghes, 0);
860
+ rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ);
706861 if (rc)
707862 goto out;
708863
709
- if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) {
710
- __ghes_panic(ghes);
711
- }
864
+ if (ghes_severity(estatus->error_severity) >= GHES_SEV_PANIC)
865
+ __ghes_panic(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ);
712866
713
- if (!ghes_estatus_cached(ghes->estatus)) {
714
- if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
715
- ghes_estatus_cache_add(ghes->generic, ghes->estatus);
867
+ if (!ghes_estatus_cached(estatus)) {
868
+ if (ghes_print_estatus(NULL, ghes->generic, estatus))
869
+ ghes_estatus_cache_add(ghes->generic, estatus);
716870 }
717
- ghes_do_proc(ghes, ghes->estatus);
871
+ ghes_do_proc(ghes, estatus);
718872
719873 out:
720
- ghes_clear_estatus(ghes);
721
-
722
- if (rc == -ENOENT)
723
- return rc;
724
-
725
- /*
726
- * GHESv2 type HEST entries introduce support for error acknowledgment,
727
- * so only acknowledge the error if this support is present.
728
- */
729
- if (is_hest_type_generic_v2(ghes))
730
- return ghes_ack_error(ghes->generic_v2);
874
+ ghes_clear_estatus(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ);
731875
732876 return rc;
733877 }
....@@ -738,8 +882,8 @@
738882 unsigned long expire;
739883
740884 if (!g->notify.poll_interval) {
741
- pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
742
- g->header.source_id);
885
+ pr_warn(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
886
+ g->header.source_id);
743887 return;
744888 }
745889 expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
....@@ -750,8 +894,11 @@
750894 static void ghes_poll_func(struct timer_list *t)
751895 {
752896 struct ghes *ghes = from_timer(ghes, t, timer);
897
+ unsigned long flags;
753898
899
+ spin_lock_irqsave(&ghes_notify_lock_irq, flags);
754900 ghes_proc(ghes);
901
+ spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
755902 if (!(ghes->flags & GHES_EXITING))
756903 ghes_add_timer(ghes);
757904 }
....@@ -759,9 +906,12 @@
759906 static irqreturn_t ghes_irq_func(int irq, void *data)
760907 {
761908 struct ghes *ghes = data;
909
+ unsigned long flags;
762910 int rc;
763911
912
+ spin_lock_irqsave(&ghes_notify_lock_irq, flags);
764913 rc = ghes_proc(ghes);
914
+ spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
765915 if (rc)
766916 return IRQ_NONE;
767917
....@@ -772,14 +922,17 @@
772922 void *data)
773923 {
774924 struct ghes *ghes;
925
+ unsigned long flags;
775926 int ret = NOTIFY_DONE;
776927
928
+ spin_lock_irqsave(&ghes_notify_lock_irq, flags);
777929 rcu_read_lock();
778930 list_for_each_entry_rcu(ghes, &ghes_hed, list) {
779931 if (!ghes_proc(ghes))
780932 ret = NOTIFY_OK;
781933 }
782934 rcu_read_unlock();
935
+ spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
783936
784937 return ret;
785938 }
....@@ -788,65 +941,19 @@
788941 .notifier_call = ghes_notify_hed,
789942 };
790943
791
-#ifdef CONFIG_ACPI_APEI_SEA
792
-static LIST_HEAD(ghes_sea);
793
-
794944 /*
795
- * Return 0 only if one of the SEA error sources successfully reported an error
796
- * record sent from the firmware.
797
- */
798
-int ghes_notify_sea(void)
799
-{
800
- struct ghes *ghes;
801
- int ret = -ENOENT;
802
-
803
- rcu_read_lock();
804
- list_for_each_entry_rcu(ghes, &ghes_sea, list) {
805
- if (!ghes_proc(ghes))
806
- ret = 0;
807
- }
808
- rcu_read_unlock();
809
- return ret;
810
-}
811
-
812
-static void ghes_sea_add(struct ghes *ghes)
813
-{
814
- mutex_lock(&ghes_list_mutex);
815
- list_add_rcu(&ghes->list, &ghes_sea);
816
- mutex_unlock(&ghes_list_mutex);
817
-}
818
-
819
-static void ghes_sea_remove(struct ghes *ghes)
820
-{
821
- mutex_lock(&ghes_list_mutex);
822
- list_del_rcu(&ghes->list);
823
- mutex_unlock(&ghes_list_mutex);
824
- synchronize_rcu();
825
-}
826
-#else /* CONFIG_ACPI_APEI_SEA */
827
-static inline void ghes_sea_add(struct ghes *ghes) { }
828
-static inline void ghes_sea_remove(struct ghes *ghes) { }
829
-#endif /* CONFIG_ACPI_APEI_SEA */
830
-
831
-#ifdef CONFIG_HAVE_ACPI_APEI_NMI
832
-/*
833
- * printk is not safe in NMI context. So in NMI handler, we allocate
834
- * required memory from lock-less memory allocator
835
- * (ghes_estatus_pool), save estatus into it, put them into lock-less
836
- * list (ghes_estatus_llist), then delay printk into IRQ context via
837
- * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
838
- * required pool size by all NMI error source.
945
+ * Handlers for CPER records may not be NMI safe. For example,
946
+ * memory_failure_queue() takes spinlocks and calls schedule_work_on().
947
+ * In any NMI-like handler, memory from ghes_estatus_pool is used to save
948
+ * estatus, and added to the ghes_estatus_llist. irq_work_queue() causes
949
+ * ghes_proc_in_irq() to run in IRQ context where each estatus in
950
+ * ghes_estatus_llist is processed.
951
+ *
952
+ * Memory from the ghes_estatus_pool is also used with the ghes_estatus_cache
953
+ * to suppress frequent messages.
839954 */
840955 static struct llist_head ghes_estatus_llist;
841956 static struct irq_work ghes_proc_irq_work;
842
-
843
-/*
844
- * NMI may be triggered on any CPU, so ghes_in_nmi is used for
845
- * having only one concurrent reader.
846
- */
847
-static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
848
-
849
-static LIST_HEAD(ghes_nmi);
850957
851958 static void ghes_proc_in_irq(struct irq_work *irq_work)
852959 {
....@@ -854,7 +961,9 @@
854961 struct ghes_estatus_node *estatus_node;
855962 struct acpi_hest_generic *generic;
856963 struct acpi_hest_generic_status *estatus;
964
+ bool task_work_pending;
857965 u32 len, node_len;
966
+ int ret;
858967
859968 llnode = llist_del_all(&ghes_estatus_llist);
860969 /*
....@@ -869,14 +978,26 @@
869978 estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
870979 len = cper_estatus_len(estatus);
871980 node_len = GHES_ESTATUS_NODE_LEN(len);
872
- ghes_do_proc(estatus_node->ghes, estatus);
981
+ task_work_pending = ghes_do_proc(estatus_node->ghes, estatus);
873982 if (!ghes_estatus_cached(estatus)) {
874983 generic = estatus_node->generic;
875984 if (ghes_print_estatus(NULL, generic, estatus))
876985 ghes_estatus_cache_add(generic, estatus);
877986 }
878
- gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
879
- node_len);
987
+
988
+ if (task_work_pending && current->mm) {
989
+ estatus_node->task_work.func = ghes_kick_task_work;
990
+ estatus_node->task_work_cpu = smp_processor_id();
991
+ ret = task_work_add(current, &estatus_node->task_work,
992
+ TWA_RESUME);
993
+ if (ret)
994
+ estatus_node->task_work.func = NULL;
995
+ }
996
+
997
+ if (!estatus_node->task_work.func)
998
+ gen_pool_free(ghes_estatus_pool,
999
+ (unsigned long)estatus_node, node_len);
1000
+
8801001 llnode = next;
8811002 }
8821003 }
....@@ -904,95 +1025,155 @@
9041025 }
9051026 }
9061027
907
-/* Save estatus for further processing in IRQ context */
908
-static void __process_error(struct ghes *ghes)
1028
+static int ghes_in_nmi_queue_one_entry(struct ghes *ghes,
1029
+ enum fixed_addresses fixmap_idx)
9091030 {
910
-#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
911
- u32 len, node_len;
1031
+ struct acpi_hest_generic_status *estatus, tmp_header;
9121032 struct ghes_estatus_node *estatus_node;
913
- struct acpi_hest_generic_status *estatus;
1033
+ u32 len, node_len;
1034
+ u64 buf_paddr;
1035
+ int sev, rc;
9141036
915
- if (ghes_estatus_cached(ghes->estatus))
916
- return;
1037
+ if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG))
1038
+ return -EOPNOTSUPP;
9171039
918
- len = cper_estatus_len(ghes->estatus);
1040
+ rc = __ghes_peek_estatus(ghes, &tmp_header, &buf_paddr, fixmap_idx);
1041
+ if (rc) {
1042
+ ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
1043
+ return rc;
1044
+ }
1045
+
1046
+ rc = __ghes_check_estatus(ghes, &tmp_header);
1047
+ if (rc) {
1048
+ ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
1049
+ return rc;
1050
+ }
1051
+
1052
+ len = cper_estatus_len(&tmp_header);
9191053 node_len = GHES_ESTATUS_NODE_LEN(len);
920
-
9211054 estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len);
9221055 if (!estatus_node)
923
- return;
1056
+ return -ENOMEM;
9241057
9251058 estatus_node->ghes = ghes;
9261059 estatus_node->generic = ghes->generic;
1060
+ estatus_node->task_work.func = NULL;
9271061 estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
928
- memcpy(estatus, ghes->estatus, len);
1062
+
1063
+ if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) {
1064
+ ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx);
1065
+ rc = -ENOENT;
1066
+ goto no_work;
1067
+ }
1068
+
1069
+ sev = ghes_severity(estatus->error_severity);
1070
+ if (sev >= GHES_SEV_PANIC) {
1071
+ ghes_print_queued_estatus();
1072
+ __ghes_panic(ghes, estatus, buf_paddr, fixmap_idx);
1073
+ }
1074
+
1075
+ ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
1076
+
1077
+ /* This error has been reported before, don't process it again. */
1078
+ if (ghes_estatus_cached(estatus))
1079
+ goto no_work;
1080
+
9291081 llist_add(&estatus_node->llnode, &ghes_estatus_llist);
930
-#endif
1082
+
1083
+ return rc;
1084
+
1085
+no_work:
1086
+ gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
1087
+ node_len);
1088
+
1089
+ return rc;
9311090 }
1091
+
1092
+static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list,
1093
+ enum fixed_addresses fixmap_idx)
1094
+{
1095
+ int ret = -ENOENT;
1096
+ struct ghes *ghes;
1097
+
1098
+ rcu_read_lock();
1099
+ list_for_each_entry_rcu(ghes, rcu_list, list) {
1100
+ if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx))
1101
+ ret = 0;
1102
+ }
1103
+ rcu_read_unlock();
1104
+
1105
+ if (IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && !ret)
1106
+ irq_work_queue(&ghes_proc_irq_work);
1107
+
1108
+ return ret;
1109
+}
1110
+
1111
+#ifdef CONFIG_ACPI_APEI_SEA
1112
+static LIST_HEAD(ghes_sea);
1113
+
1114
+/*
1115
+ * Return 0 only if one of the SEA error sources successfully reported an error
1116
+ * record sent from the firmware.
1117
+ */
1118
+int ghes_notify_sea(void)
1119
+{
1120
+ static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sea);
1121
+ int rv;
1122
+
1123
+ raw_spin_lock(&ghes_notify_lock_sea);
1124
+ rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_SEA);
1125
+ raw_spin_unlock(&ghes_notify_lock_sea);
1126
+
1127
+ return rv;
1128
+}
1129
+
1130
+static void ghes_sea_add(struct ghes *ghes)
1131
+{
1132
+ mutex_lock(&ghes_list_mutex);
1133
+ list_add_rcu(&ghes->list, &ghes_sea);
1134
+ mutex_unlock(&ghes_list_mutex);
1135
+}
1136
+
1137
+static void ghes_sea_remove(struct ghes *ghes)
1138
+{
1139
+ mutex_lock(&ghes_list_mutex);
1140
+ list_del_rcu(&ghes->list);
1141
+ mutex_unlock(&ghes_list_mutex);
1142
+ synchronize_rcu();
1143
+}
1144
+#else /* CONFIG_ACPI_APEI_SEA */
1145
+static inline void ghes_sea_add(struct ghes *ghes) { }
1146
+static inline void ghes_sea_remove(struct ghes *ghes) { }
1147
+#endif /* CONFIG_ACPI_APEI_SEA */
1148
+
1149
+#ifdef CONFIG_HAVE_ACPI_APEI_NMI
1150
+/*
1151
+ * NMI may be triggered on any CPU, so ghes_in_nmi is used for
1152
+ * having only one concurrent reader.
1153
+ */
1154
+static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
1155
+
1156
+static LIST_HEAD(ghes_nmi);
9321157
9331158 static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
9341159 {
935
- struct ghes *ghes;
936
- int sev, ret = NMI_DONE;
1160
+ static DEFINE_RAW_SPINLOCK(ghes_notify_lock_nmi);
1161
+ int ret = NMI_DONE;
9371162
9381163 if (!atomic_add_unless(&ghes_in_nmi, 1, 1))
9391164 return ret;
9401165
941
- list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
942
- if (ghes_read_estatus(ghes, 1)) {
943
- ghes_clear_estatus(ghes);
944
- continue;
945
- } else {
946
- ret = NMI_HANDLED;
947
- }
1166
+ raw_spin_lock(&ghes_notify_lock_nmi);
1167
+ if (!ghes_in_nmi_spool_from_list(&ghes_nmi, FIX_APEI_GHES_NMI))
1168
+ ret = NMI_HANDLED;
1169
+ raw_spin_unlock(&ghes_notify_lock_nmi);
9481170
949
- sev = ghes_severity(ghes->estatus->error_severity);
950
- if (sev >= GHES_SEV_PANIC) {
951
- ghes_print_queued_estatus();
952
- __ghes_panic(ghes);
953
- }
954
-
955
- if (!(ghes->flags & GHES_TO_CLEAR))
956
- continue;
957
-
958
- __process_error(ghes);
959
- ghes_clear_estatus(ghes);
960
- }
961
-
962
-#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
963
- if (ret == NMI_HANDLED)
964
- irq_work_queue(&ghes_proc_irq_work);
965
-#endif
9661171 atomic_dec(&ghes_in_nmi);
9671172 return ret;
9681173 }
9691174
970
-static unsigned long ghes_esource_prealloc_size(
971
- const struct acpi_hest_generic *generic)
972
-{
973
- unsigned long block_length, prealloc_records, prealloc_size;
974
-
975
- block_length = min_t(unsigned long, generic->error_block_length,
976
- GHES_ESTATUS_MAX_SIZE);
977
- prealloc_records = max_t(unsigned long,
978
- generic->records_to_preallocate, 1);
979
- prealloc_size = min_t(unsigned long, block_length * prealloc_records,
980
- GHES_ESOURCE_PREALLOC_MAX_SIZE);
981
-
982
- return prealloc_size;
983
-}
984
-
985
-static void ghes_estatus_pool_shrink(unsigned long len)
986
-{
987
- ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
988
-}
989
-
9901175 static void ghes_nmi_add(struct ghes *ghes)
9911176 {
992
- unsigned long len;
993
-
994
- len = ghes_esource_prealloc_size(ghes->generic);
995
- ghes_estatus_pool_expand(len);
9961177 mutex_lock(&ghes_list_mutex);
9971178 if (list_empty(&ghes_nmi))
9981179 register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
....@@ -1002,8 +1183,6 @@
10021183
10031184 static void ghes_nmi_remove(struct ghes *ghes)
10041185 {
1005
- unsigned long len;
1006
-
10071186 mutex_lock(&ghes_list_mutex);
10081187 list_del_rcu(&ghes->list);
10091188 if (list_empty(&ghes_nmi))
....@@ -1014,24 +1193,79 @@
10141193 * freed after NMI handler finishes.
10151194 */
10161195 synchronize_rcu();
1017
- len = ghes_esource_prealloc_size(ghes->generic);
1018
- ghes_estatus_pool_shrink(len);
10191196 }
1197
+#else /* CONFIG_HAVE_ACPI_APEI_NMI */
1198
+static inline void ghes_nmi_add(struct ghes *ghes) { }
1199
+static inline void ghes_nmi_remove(struct ghes *ghes) { }
1200
+#endif /* CONFIG_HAVE_ACPI_APEI_NMI */
10201201
10211202 static void ghes_nmi_init_cxt(void)
10221203 {
10231204 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
10241205 }
1025
-#else /* CONFIG_HAVE_ACPI_APEI_NMI */
1026
-static inline void ghes_nmi_add(struct ghes *ghes) { }
1027
-static inline void ghes_nmi_remove(struct ghes *ghes) { }
1028
-static inline void ghes_nmi_init_cxt(void) { }
1029
-#endif /* CONFIG_HAVE_ACPI_APEI_NMI */
1206
+
1207
+static int __ghes_sdei_callback(struct ghes *ghes,
1208
+ enum fixed_addresses fixmap_idx)
1209
+{
1210
+ if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) {
1211
+ irq_work_queue(&ghes_proc_irq_work);
1212
+
1213
+ return 0;
1214
+ }
1215
+
1216
+ return -ENOENT;
1217
+}
1218
+
1219
+static int ghes_sdei_normal_callback(u32 event_num, struct pt_regs *regs,
1220
+ void *arg)
1221
+{
1222
+ static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_normal);
1223
+ struct ghes *ghes = arg;
1224
+ int err;
1225
+
1226
+ raw_spin_lock(&ghes_notify_lock_sdei_normal);
1227
+ err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_NORMAL);
1228
+ raw_spin_unlock(&ghes_notify_lock_sdei_normal);
1229
+
1230
+ return err;
1231
+}
1232
+
1233
+static int ghes_sdei_critical_callback(u32 event_num, struct pt_regs *regs,
1234
+ void *arg)
1235
+{
1236
+ static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_critical);
1237
+ struct ghes *ghes = arg;
1238
+ int err;
1239
+
1240
+ raw_spin_lock(&ghes_notify_lock_sdei_critical);
1241
+ err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_CRITICAL);
1242
+ raw_spin_unlock(&ghes_notify_lock_sdei_critical);
1243
+
1244
+ return err;
1245
+}
1246
+
1247
+static int apei_sdei_register_ghes(struct ghes *ghes)
1248
+{
1249
+ if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
1250
+ return -EOPNOTSUPP;
1251
+
1252
+ return sdei_register_ghes(ghes, ghes_sdei_normal_callback,
1253
+ ghes_sdei_critical_callback);
1254
+}
1255
+
1256
+static int apei_sdei_unregister_ghes(struct ghes *ghes)
1257
+{
1258
+ if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
1259
+ return -EOPNOTSUPP;
1260
+
1261
+ return sdei_unregister_ghes(ghes);
1262
+}
10301263
10311264 static int ghes_probe(struct platform_device *ghes_dev)
10321265 {
10331266 struct acpi_hest_generic *generic;
10341267 struct ghes *ghes = NULL;
1268
+ unsigned long flags;
10351269
10361270 int rc = -EINVAL;
10371271
....@@ -1062,22 +1296,28 @@
10621296 goto err;
10631297 }
10641298 break;
1299
+ case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
1300
+ if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) {
1301
+ pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n",
1302
+ generic->header.source_id);
1303
+ goto err;
1304
+ }
1305
+ break;
10651306 case ACPI_HEST_NOTIFY_LOCAL:
1066
- pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
1067
- generic->header.source_id);
1307
+ pr_warn(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
1308
+ generic->header.source_id);
10681309 goto err;
10691310 default:
1070
- pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
1071
- generic->notify.type, generic->header.source_id);
1311
+ pr_warn(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
1312
+ generic->notify.type, generic->header.source_id);
10721313 goto err;
10731314 }
10741315
10751316 rc = -EIO;
10761317 if (generic->error_block_length <
10771318 sizeof(struct acpi_hest_generic_status)) {
1078
- pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
1079
- generic->error_block_length,
1080
- generic->header.source_id);
1319
+ pr_warn(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
1320
+ generic->error_block_length, generic->header.source_id);
10811321 goto err;
10821322 }
10831323 ghes = ghes_new(generic);
....@@ -1089,7 +1329,7 @@
10891329
10901330 switch (generic->notify.type) {
10911331 case ACPI_HEST_NOTIFY_POLLED:
1092
- timer_setup(&ghes->timer, ghes_poll_func, TIMER_DEFERRABLE);
1332
+ timer_setup(&ghes->timer, ghes_poll_func, 0);
10931333 ghes_add_timer(ghes);
10941334 break;
10951335 case ACPI_HEST_NOTIFY_EXTERNAL:
....@@ -1125,6 +1365,11 @@
11251365 case ACPI_HEST_NOTIFY_NMI:
11261366 ghes_nmi_add(ghes);
11271367 break;
1368
+ case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
1369
+ rc = apei_sdei_register_ghes(ghes);
1370
+ if (rc)
1371
+ goto err;
1372
+ break;
11281373 default:
11291374 BUG();
11301375 }
....@@ -1134,7 +1379,9 @@
11341379 ghes_edac_register(ghes, &ghes_dev->dev);
11351380
11361381 /* Handle any pending errors right away */
1382
+ spin_lock_irqsave(&ghes_notify_lock_irq, flags);
11371383 ghes_proc(ghes);
1384
+ spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
11381385
11391386 return 0;
11401387
....@@ -1148,6 +1395,7 @@
11481395
11491396 static int ghes_remove(struct platform_device *ghes_dev)
11501397 {
1398
+ int rc;
11511399 struct ghes *ghes;
11521400 struct acpi_hest_generic *generic;
11531401
....@@ -1180,6 +1428,11 @@
11801428 case ACPI_HEST_NOTIFY_NMI:
11811429 ghes_nmi_remove(ghes);
11821430 break;
1431
+ case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
1432
+ rc = apei_sdei_unregister_ghes(ghes);
1433
+ if (rc)
1434
+ return rc;
1435
+ break;
11831436 default:
11841437 BUG();
11851438 break;
....@@ -1204,42 +1457,35 @@
12041457 .remove = ghes_remove,
12051458 };
12061459
1207
-static int __init ghes_init(void)
1460
+void __init ghes_init(void)
12081461 {
12091462 int rc;
12101463
1464
+ sdei_init();
1465
+
12111466 if (acpi_disabled)
1212
- return -ENODEV;
1467
+ return;
12131468
12141469 switch (hest_disable) {
12151470 case HEST_NOT_FOUND:
1216
- return -ENODEV;
1471
+ return;
12171472 case HEST_DISABLED:
12181473 pr_info(GHES_PFX "HEST is not enabled!\n");
1219
- return -EINVAL;
1474
+ return;
12201475 default:
12211476 break;
12221477 }
12231478
12241479 if (ghes_disable) {
12251480 pr_info(GHES_PFX "GHES is not enabled!\n");
1226
- return -EINVAL;
1481
+ return;
12271482 }
12281483
12291484 ghes_nmi_init_cxt();
12301485
1231
- rc = ghes_estatus_pool_init();
1232
- if (rc)
1233
- goto err;
1234
-
1235
- rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
1236
- GHES_ESTATUS_CACHE_ALLOCED_MAX);
1237
- if (rc)
1238
- goto err_pool_exit;
1239
-
12401486 rc = platform_driver_register(&ghes_platform_driver);
12411487 if (rc)
1242
- goto err_pool_exit;
1488
+ return;
12431489
12441490 rc = apei_osc_setup();
12451491 if (rc == 0 && osc_sb_apei_support_acked)
....@@ -1250,11 +1496,4 @@
12501496 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
12511497 else
12521498 pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
1253
-
1254
- return 0;
1255
-err_pool_exit:
1256
- ghes_estatus_pool_exit();
1257
-err:
1258
- return rc;
12591499 }
1260
-device_initcall(ghes_init);