hc
2024-05-10 37f49e37ab4cb5d0bc4c60eb5c6d4dd57db767bb
kernel/drivers/edac/mce_amd.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 #include <linux/module.h>
23 #include <linux/slab.h>
34
....@@ -5,18 +6,11 @@
56
67 #include "mce_amd.h"
78
8
-static struct amd_decoder_ops *fam_ops;
9
+static struct amd_decoder_ops fam_ops;
910
1011 static u8 xec_mask = 0xf;
1112
12
-static bool report_gart_errors;
1313 static void (*decode_dram_ecc)(int node_id, struct mce *m);
14
-
15
-void amd_report_gart_errors(bool v)
16
-{
17
- report_gart_errors = v;
18
-}
19
-EXPORT_SYMBOL_GPL(amd_report_gart_errors);
2014
2115 void amd_register_ecc_decoder(void (*f)(int, struct mce *))
2216 {
....@@ -151,138 +145,259 @@
151145
152146 /* Scalable MCA error strings */
153147 static const char * const smca_ls_mce_desc[] = {
154
- "Load queue parity",
155
- "Store queue parity",
156
- "Miss address buffer payload parity",
157
- "L1 TLB parity",
158
- "Reserved",
159
- "DC tag error type 6",
160
- "DC tag error type 1",
148
+ "Load queue parity error",
149
+ "Store queue parity error",
150
+ "Miss address buffer payload parity error",
151
+ "Level 1 TLB parity error",
152
+ "DC Tag error type 5",
153
+ "DC Tag error type 6",
154
+ "DC Tag error type 1",
161155 "Internal error type 1",
162156 "Internal error type 2",
163
- "Sys Read data error thread 0",
164
- "Sys read data error thread 1",
165
- "DC tag error type 2",
166
- "DC data error type 1 (poison consumption)",
167
- "DC data error type 2",
168
- "DC data error type 3",
169
- "DC tag error type 4",
170
- "L2 TLB parity",
157
+ "System Read Data Error Thread 0",
158
+ "System Read Data Error Thread 1",
159
+ "DC Tag error type 2",
160
+ "DC Data error type 1 and poison consumption",
161
+ "DC Data error type 2",
162
+ "DC Data error type 3",
163
+ "DC Tag error type 4",
164
+ "Level 2 TLB parity error",
171165 "PDC parity error",
172
- "DC tag error type 3",
173
- "DC tag error type 5",
174
- "L2 fill data error",
166
+ "DC Tag error type 3",
167
+ "DC Tag error type 5",
168
+ "L2 Fill Data error",
169
+};
170
+
171
+static const char * const smca_ls2_mce_desc[] = {
172
+ "An ECC error was detected on a data cache read by a probe or victimization",
173
+ "An ECC error or L2 poison was detected on a data cache read by a load",
174
+ "An ECC error was detected on a data cache read-modify-write by a store",
175
+ "An ECC error or poison bit mismatch was detected on a tag read by a probe or victimization",
176
+ "An ECC error or poison bit mismatch was detected on a tag read by a load",
177
+ "An ECC error or poison bit mismatch was detected on a tag read by a store",
178
+ "An ECC error was detected on an EMEM read by a load",
179
+ "An ECC error was detected on an EMEM read-modify-write by a store",
180
+ "A parity error was detected in an L1 TLB entry by any access",
181
+ "A parity error was detected in an L2 TLB entry by any access",
182
+ "A parity error was detected in a PWC entry by any access",
183
+ "A parity error was detected in an STQ entry by any access",
184
+ "A parity error was detected in an LDQ entry by any access",
185
+ "A parity error was detected in a MAB entry by any access",
186
+ "A parity error was detected in an SCB entry state field by any access",
187
+ "A parity error was detected in an SCB entry address field by any access",
188
+ "A parity error was detected in an SCB entry data field by any access",
189
+ "A parity error was detected in a WCB entry by any access",
190
+ "A poisoned line was detected in an SCB entry by any access",
191
+ "A SystemReadDataError error was reported on read data returned from L2 for a load",
192
+ "A SystemReadDataError error was reported on read data returned from L2 for an SCB store",
193
+ "A SystemReadDataError error was reported on read data returned from L2 for a WCB store",
194
+ "A hardware assertion error was reported",
195
+ "A parity error was detected in an STLF, SCB EMEM entry or SRB store data by any access",
175196 };
176197
177198 static const char * const smca_if_mce_desc[] = {
178
- "microtag probe port parity error",
179
- "IC microtag or full tag multi-hit error",
180
- "IC full tag parity",
181
- "IC data array parity",
182
- "Decoupling queue phys addr parity error",
183
- "L0 ITLB parity error",
184
- "L1 ITLB parity error",
185
- "L2 ITLB parity error",
186
- "BPQ snoop parity on Thread 0",
187
- "BPQ snoop parity on Thread 1",
188
- "L1 BTB multi-match error",
189
- "L2 BTB multi-match error",
190
- "L2 Cache Response Poison error",
191
- "System Read Data error",
199
+ "Op Cache Microtag Probe Port Parity Error",
200
+ "IC Microtag or Full Tag Multi-hit Error",
201
+ "IC Full Tag Parity Error",
202
+ "IC Data Array Parity Error",
203
+ "Decoupling Queue PhysAddr Parity Error",
204
+ "L0 ITLB Parity Error",
205
+ "L1 ITLB Parity Error",
206
+ "L2 ITLB Parity Error",
207
+ "BPQ Thread 0 Snoop Parity Error",
208
+ "BPQ Thread 1 Snoop Parity Error",
209
+ "L1 BTB Multi-Match Error",
210
+ "L2 BTB Multi-Match Error",
211
+ "L2 Cache Response Poison Error",
212
+ "System Read Data Error",
213
+ "Hardware Assertion Error",
214
+ "L1-TLB Multi-Hit",
215
+ "L2-TLB Multi-Hit",
216
+ "BSR Parity Error",
217
+ "CT MCE",
192218 };
193219
194220 static const char * const smca_l2_mce_desc[] = {
195
- "L2M tag multi-way-hit error",
196
- "L2M tag ECC error",
197
- "L2M data ECC error",
198
- "HW assert",
221
+ "L2M Tag Multiple-Way-Hit error",
222
+ "L2M Tag or State Array ECC Error",
223
+ "L2M Data Array ECC Error",
224
+ "Hardware Assert Error",
199225 };
200226
201227 static const char * const smca_de_mce_desc[] = {
202
- "uop cache tag parity error",
203
- "uop cache data parity error",
204
- "Insn buffer parity error",
205
- "uop queue parity error",
206
- "Insn dispatch queue parity error",
207
- "Fetch address FIFO parity",
208
- "Patch RAM data parity",
209
- "Patch RAM sequencer parity",
210
- "uop buffer parity"
228
+ "Micro-op cache tag parity error",
229
+ "Micro-op cache data parity error",
230
+ "Instruction buffer parity error",
231
+ "Micro-op queue parity error",
232
+ "Instruction dispatch queue parity error",
233
+ "Fetch address FIFO parity error",
234
+ "Patch RAM data parity error",
235
+ "Patch RAM sequencer parity error",
236
+ "Micro-op buffer parity error",
237
+ "Hardware Assertion MCA Error",
211238 };
212239
213240 static const char * const smca_ex_mce_desc[] = {
214
- "Watchdog timeout error",
215
- "Phy register file parity",
216
- "Flag register file parity",
217
- "Immediate displacement register file parity",
218
- "Address generator payload parity",
219
- "EX payload parity",
220
- "Checkpoint queue parity",
221
- "Retire dispatch queue parity",
241
+ "Watchdog Timeout error",
242
+ "Physical register file parity error",
243
+ "Flag register file parity error",
244
+ "Immediate displacement register file parity error",
245
+ "Address generator payload parity error",
246
+ "EX payload parity error",
247
+ "Checkpoint queue parity error",
248
+ "Retire dispatch queue parity error",
222249 "Retire status queue parity error",
223250 "Scheduling queue parity error",
224251 "Branch buffer queue parity error",
252
+ "Hardware Assertion error",
253
+ "Spec Map parity error",
254
+ "Retire Map parity error",
225255 };
226256
227257 static const char * const smca_fp_mce_desc[] = {
228
- "Physical register file parity",
229
- "Freelist parity error",
230
- "Schedule queue parity",
258
+ "Physical register file (PRF) parity error",
259
+ "Freelist (FL) parity error",
260
+ "Schedule queue parity error",
231261 "NSQ parity error",
232
- "Retire queue parity",
233
- "Status register file parity",
262
+ "Retire queue (RQ) parity error",
263
+ "Status register file (SRF) parity error",
234264 "Hardware assertion",
235265 };
236266
237267 static const char * const smca_l3_mce_desc[] = {
238
- "Shadow tag macro ECC error",
239
- "Shadow tag macro multi-way-hit error",
240
- "L3M tag ECC error",
241
- "L3M tag multi-way-hit error",
242
- "L3M data ECC error",
243
- "XI parity, L3 fill done channel error",
244
- "L3 victim queue parity",
245
- "L3 HW assert",
268
+ "Shadow Tag Macro ECC Error",
269
+ "Shadow Tag Macro Multi-way-hit Error",
270
+ "L3M Tag ECC Error",
271
+ "L3M Tag Multi-way-hit Error",
272
+ "L3M Data ECC Error",
273
+ "SDP Parity Error or SystemReadDataError from XI",
274
+ "L3 Victim Queue Parity Error",
275
+ "L3 Hardware Assertion",
246276 };
247277
248278 static const char * const smca_cs_mce_desc[] = {
249
- "Illegal request from transport layer",
250
- "Address violation",
251
- "Security violation",
252
- "Illegal response from transport layer",
253
- "Unexpected response",
254
- "Parity error on incoming request or probe response data",
255
- "Parity error on incoming read response data",
256
- "Atomic request parity",
257
- "ECC error on probe filter access",
279
+ "Illegal Request",
280
+ "Address Violation",
281
+ "Security Violation",
282
+ "Illegal Response",
283
+ "Unexpected Response",
284
+ "Request or Probe Parity Error",
285
+ "Read Response Parity Error",
286
+ "Atomic Request Parity Error",
287
+ "Probe Filter ECC Error",
288
+};
289
+
290
+static const char * const smca_cs2_mce_desc[] = {
291
+ "Illegal Request",
292
+ "Address Violation",
293
+ "Security Violation",
294
+ "Illegal Response",
295
+ "Unexpected Response",
296
+ "Request or Probe Parity Error",
297
+ "Read Response Parity Error",
298
+ "Atomic Request Parity Error",
299
+ "SDP read response had no match in the CS queue",
300
+ "Probe Filter Protocol Error",
301
+ "Probe Filter ECC Error",
302
+ "SDP read response had an unexpected RETRY error",
303
+ "Counter overflow error",
304
+ "Counter underflow error",
258305 };
259306
260307 static const char * const smca_pie_mce_desc[] = {
261
- "HW assert",
262
- "Internal PIE register security violation",
263
- "Error on GMI link",
264
- "Poison data written to internal PIE register",
308
+ "Hardware Assert",
309
+ "Register security violation",
310
+ "Link Error",
311
+ "Poison data consumption",
312
+ "A deferred error was detected in the DF"
265313 };
266314
267315 static const char * const smca_umc_mce_desc[] = {
268316 "DRAM ECC error",
269
- "Data poison error on DRAM",
317
+ "Data poison error",
270318 "SDP parity error",
271319 "Advanced peripheral bus error",
272
- "Command/address parity error",
320
+ "Address/Command parity error",
273321 "Write data CRC error",
322
+ "DCQ SRAM ECC error",
323
+ "AES SRAM ECC error",
274324 };
275325
276326 static const char * const smca_pb_mce_desc[] = {
277
- "Parameter Block RAM ECC error",
327
+ "An ECC error in the Parameter Block RAM array",
278328 };
279329
280330 static const char * const smca_psp_mce_desc[] = {
281
- "PSP RAM ECC or parity error",
331
+ "An ECC or parity error in a PSP RAM instance",
332
+};
333
+
334
+static const char * const smca_psp2_mce_desc[] = {
335
+ "High SRAM ECC or parity error",
336
+ "Low SRAM ECC or parity error",
337
+ "Instruction Cache Bank 0 ECC or parity error",
338
+ "Instruction Cache Bank 1 ECC or parity error",
339
+ "Instruction Tag Ram 0 parity error",
340
+ "Instruction Tag Ram 1 parity error",
341
+ "Data Cache Bank 0 ECC or parity error",
342
+ "Data Cache Bank 1 ECC or parity error",
343
+ "Data Cache Bank 2 ECC or parity error",
344
+ "Data Cache Bank 3 ECC or parity error",
345
+ "Data Tag Bank 0 parity error",
346
+ "Data Tag Bank 1 parity error",
347
+ "Data Tag Bank 2 parity error",
348
+ "Data Tag Bank 3 parity error",
349
+ "Dirty Data Ram parity error",
350
+ "TLB Bank 0 parity error",
351
+ "TLB Bank 1 parity error",
352
+ "System Hub Read Buffer ECC or parity error",
282353 };
283354
284355 static const char * const smca_smu_mce_desc[] = {
285
- "SMU RAM ECC or parity error",
356
+ "An ECC or parity error in an SMU RAM instance",
357
+};
358
+
359
+static const char * const smca_smu2_mce_desc[] = {
360
+ "High SRAM ECC or parity error",
361
+ "Low SRAM ECC or parity error",
362
+ "Data Cache Bank A ECC or parity error",
363
+ "Data Cache Bank B ECC or parity error",
364
+ "Data Tag Cache Bank A ECC or parity error",
365
+ "Data Tag Cache Bank B ECC or parity error",
366
+ "Instruction Cache Bank A ECC or parity error",
367
+ "Instruction Cache Bank B ECC or parity error",
368
+ "Instruction Tag Cache Bank A ECC or parity error",
369
+ "Instruction Tag Cache Bank B ECC or parity error",
370
+ "System Hub Read Buffer ECC or parity error",
371
+ "PHY RAM ECC error",
372
+};
373
+
374
+static const char * const smca_mp5_mce_desc[] = {
375
+ "High SRAM ECC or parity error",
376
+ "Low SRAM ECC or parity error",
377
+ "Data Cache Bank A ECC or parity error",
378
+ "Data Cache Bank B ECC or parity error",
379
+ "Data Tag Cache Bank A ECC or parity error",
380
+ "Data Tag Cache Bank B ECC or parity error",
381
+ "Instruction Cache Bank A ECC or parity error",
382
+ "Instruction Cache Bank B ECC or parity error",
383
+ "Instruction Tag Cache Bank A ECC or parity error",
384
+ "Instruction Tag Cache Bank B ECC or parity error",
385
+};
386
+
387
+static const char * const smca_nbio_mce_desc[] = {
388
+ "ECC or Parity error",
389
+ "PCIE error",
390
+ "SDP ErrEvent error",
391
+ "SDP Egress Poison Error",
392
+ "IOHC Internal Poison Error",
393
+};
394
+
395
+static const char * const smca_pcie_mce_desc[] = {
396
+ "CCIX PER Message logging",
397
+ "CCIX Read Response with Status: Non-Data Error",
398
+ "CCIX Write Response with Status: Non-Data Error",
399
+ "CCIX Read Response with Status: Data Error",
400
+ "CCIX Non-okay write response with data error",
286401 };
287402
288403 struct smca_mce_desc {
....@@ -292,6 +407,7 @@
292407
293408 static struct smca_mce_desc smca_mce_descs[] = {
294409 [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) },
410
+ [SMCA_LS_V2] = { smca_ls2_mce_desc, ARRAY_SIZE(smca_ls2_mce_desc) },
295411 [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) },
296412 [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) },
297413 [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) },
....@@ -299,11 +415,17 @@
299415 [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
300416 [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
301417 [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
418
+ [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) },
302419 [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
303420 [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
304421 [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
305422 [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
423
+ [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc) },
306424 [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
425
+ [SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc) },
426
+ [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) },
427
+ [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) },
428
+ [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) },
307429 };
308430
309431 static bool f12h_mc0_mce(u16 ec, u8 xec)
....@@ -463,7 +585,7 @@
463585 : (xec ? "multimatch" : "parity")));
464586 return;
465587 }
466
- } else if (fam_ops->mc0_mce(ec, xec))
588
+ } else if (fam_ops.mc0_mce(ec, xec))
467589 ;
468590 else
469591 pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
....@@ -577,7 +699,7 @@
577699 pr_cont("Hardware Assert.\n");
578700 else
579701 goto wrong_mc1_mce;
580
- } else if (fam_ops->mc1_mce(ec, xec))
702
+ } else if (fam_ops.mc1_mce(ec, xec))
581703 ;
582704 else
583705 goto wrong_mc1_mce;
....@@ -711,7 +833,7 @@
711833
712834 pr_emerg(HW_ERR "MC2 Error: ");
713835
714
- if (!fam_ops->mc2_mce(ec, xec))
836
+ if (!fam_ops.mc2_mce(ec, xec))
715837 pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
716838 }
717839
....@@ -874,17 +996,14 @@
874996
875997 ip_name = smca_get_long_name(bank_type);
876998
877
- pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
999
+ pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec);
8781000
8791001 /* Only print the decode of valid error codes */
880
- if (xec < smca_mce_descs[bank_type].num_descs &&
881
- (hwid->xec_bitmap & BIT_ULL(xec))) {
882
- pr_emerg(HW_ERR "%s Error: ", ip_name);
883
- pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
884
- }
1002
+ if (xec < smca_mce_descs[bank_type].num_descs)
1003
+ pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
8851004
8861005 if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
887
- decode_dram_ecc(cpu_to_node(m->extcpu), m);
1006
+ decode_dram_ecc(topology_die_id(m->extcpu), m);
8881007 }
8891008
8901009 static inline void amd_decode_err_code(u16 ec)
....@@ -911,20 +1030,6 @@
9111030 pr_cont("\n");
9121031 }
9131032
914
-/*
915
- * Filter out unwanted MCE signatures here.
916
- */
917
-static bool amd_filter_mce(struct mce *m)
918
-{
919
- /*
920
- * NB GART TLB error reporting is disabled by default.
921
- */
922
- if (m->bank == 4 && XEC(m->status, 0x1f) == 0x5 && !report_gart_errors)
923
- return true;
924
-
925
- return false;
926
-}
927
-
9281033 static const char *decode_error_status(struct mce *m)
9291034 {
9301035 if (m->status & MCI_STATUS_UC) {
....@@ -948,8 +1053,8 @@
9481053 unsigned int fam = x86_family(m->cpuid);
9491054 int ecc;
9501055
951
- if (amd_filter_mce(m))
952
- return NOTIFY_STOP;
1056
+ if (m->kflags & MCE_HANDLED_CEC)
1057
+ return NOTIFY_DONE;
9531058
9541059 pr_emerg(HW_ERR "%s\n", decode_error_status(m));
9551060
....@@ -961,8 +1066,24 @@
9611066 ((m->status & MCI_STATUS_UC) ? "UE" :
9621067 (m->status & MCI_STATUS_DEFERRED) ? "-" : "CE"),
9631068 ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
964
- ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
965
- ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
1069
+ ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"),
1070
+ ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"));
1071
+
1072
+ if (boot_cpu_has(X86_FEATURE_SMCA)) {
1073
+ u32 low, high;
1074
+ u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
1075
+
1076
+ if (!rdmsr_safe(addr, &low, &high) &&
1077
+ (low & MCI_CONFIG_MCAX))
1078
+ pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
1079
+
1080
+ pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
1081
+ }
1082
+
1083
+ /* do the two bits[14:13] together */
1084
+ ecc = (m->status >> 45) & 0x3;
1085
+ if (ecc)
1086
+ pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
9661087
9671088 if (fam >= 0x15) {
9681089 pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-"));
....@@ -972,26 +1093,16 @@
9721093 pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-"));
9731094 }
9741095
975
- if (boot_cpu_has(X86_FEATURE_SMCA)) {
976
- u32 low, high;
977
- u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
978
-
979
- pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
980
-
981
- if (!rdmsr_safe(addr, &low, &high) &&
982
- (low & MCI_CONFIG_MCAX))
983
- pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
984
- }
985
-
986
- /* do the two bits[14:13] together */
987
- ecc = (m->status >> 45) & 0x3;
988
- if (ecc)
989
- pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
1096
+ if (fam >= 0x17)
1097
+ pr_cont("|%s", (m->status & MCI_STATUS_SCRUB ? "Scrub" : "-"));
9901098
9911099 pr_cont("]: 0x%016llx\n", m->status);
9921100
9931101 if (m->status & MCI_STATUS_ADDRV)
9941102 pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr);
1103
+
1104
+ if (m->ppin)
1105
+ pr_emerg(HW_ERR "PPIN: 0x%016llx\n", m->ppin);
9951106
9961107 if (boot_cpu_has(X86_FEATURE_SMCA)) {
9971108 pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid);
....@@ -1008,7 +1119,8 @@
10081119 if (m->tsc)
10091120 pr_emerg(HW_ERR "TSC: %llu\n", m->tsc);
10101121
1011
- if (!fam_ops)
1122
+ /* Doesn't matter which member to test. */
1123
+ if (!fam_ops.mc0_mce)
10121124 goto err_code;
10131125
10141126 switch (m->bank) {
....@@ -1047,7 +1159,8 @@
10471159 err_code:
10481160 amd_decode_err_code(m->status & 0xffff);
10491161
1050
- return NOTIFY_STOP;
1162
+ m->kflags |= MCE_HANDLED_EDAC;
1163
+ return NOTIFY_OK;
10511164 }
10521165
10531166 static struct notifier_block amd_mce_dec_nb = {
....@@ -1059,82 +1172,80 @@
10591172 {
10601173 struct cpuinfo_x86 *c = &boot_cpu_data;
10611174
1062
- if (c->x86_vendor != X86_VENDOR_AMD)
1175
+ if (c->x86_vendor != X86_VENDOR_AMD &&
1176
+ c->x86_vendor != X86_VENDOR_HYGON)
10631177 return -ENODEV;
10641178
1065
- fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
1066
- if (!fam_ops)
1067
- return -ENOMEM;
1179
+ if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
1180
+ return -ENODEV;
1181
+
1182
+ if (boot_cpu_has(X86_FEATURE_SMCA)) {
1183
+ xec_mask = 0x3f;
1184
+ goto out;
1185
+ }
10681186
10691187 switch (c->x86) {
10701188 case 0xf:
1071
- fam_ops->mc0_mce = k8_mc0_mce;
1072
- fam_ops->mc1_mce = k8_mc1_mce;
1073
- fam_ops->mc2_mce = k8_mc2_mce;
1189
+ fam_ops.mc0_mce = k8_mc0_mce;
1190
+ fam_ops.mc1_mce = k8_mc1_mce;
1191
+ fam_ops.mc2_mce = k8_mc2_mce;
10741192 break;
10751193
10761194 case 0x10:
1077
- fam_ops->mc0_mce = f10h_mc0_mce;
1078
- fam_ops->mc1_mce = k8_mc1_mce;
1079
- fam_ops->mc2_mce = k8_mc2_mce;
1195
+ fam_ops.mc0_mce = f10h_mc0_mce;
1196
+ fam_ops.mc1_mce = k8_mc1_mce;
1197
+ fam_ops.mc2_mce = k8_mc2_mce;
10801198 break;
10811199
10821200 case 0x11:
1083
- fam_ops->mc0_mce = k8_mc0_mce;
1084
- fam_ops->mc1_mce = k8_mc1_mce;
1085
- fam_ops->mc2_mce = k8_mc2_mce;
1201
+ fam_ops.mc0_mce = k8_mc0_mce;
1202
+ fam_ops.mc1_mce = k8_mc1_mce;
1203
+ fam_ops.mc2_mce = k8_mc2_mce;
10861204 break;
10871205
10881206 case 0x12:
1089
- fam_ops->mc0_mce = f12h_mc0_mce;
1090
- fam_ops->mc1_mce = k8_mc1_mce;
1091
- fam_ops->mc2_mce = k8_mc2_mce;
1207
+ fam_ops.mc0_mce = f12h_mc0_mce;
1208
+ fam_ops.mc1_mce = k8_mc1_mce;
1209
+ fam_ops.mc2_mce = k8_mc2_mce;
10921210 break;
10931211
10941212 case 0x14:
1095
- fam_ops->mc0_mce = cat_mc0_mce;
1096
- fam_ops->mc1_mce = cat_mc1_mce;
1097
- fam_ops->mc2_mce = k8_mc2_mce;
1213
+ fam_ops.mc0_mce = cat_mc0_mce;
1214
+ fam_ops.mc1_mce = cat_mc1_mce;
1215
+ fam_ops.mc2_mce = k8_mc2_mce;
10981216 break;
10991217
11001218 case 0x15:
11011219 xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
11021220
1103
- fam_ops->mc0_mce = f15h_mc0_mce;
1104
- fam_ops->mc1_mce = f15h_mc1_mce;
1105
- fam_ops->mc2_mce = f15h_mc2_mce;
1221
+ fam_ops.mc0_mce = f15h_mc0_mce;
1222
+ fam_ops.mc1_mce = f15h_mc1_mce;
1223
+ fam_ops.mc2_mce = f15h_mc2_mce;
11061224 break;
11071225
11081226 case 0x16:
11091227 xec_mask = 0x1f;
1110
- fam_ops->mc0_mce = cat_mc0_mce;
1111
- fam_ops->mc1_mce = cat_mc1_mce;
1112
- fam_ops->mc2_mce = f16h_mc2_mce;
1228
+ fam_ops.mc0_mce = cat_mc0_mce;
1229
+ fam_ops.mc1_mce = cat_mc1_mce;
1230
+ fam_ops.mc2_mce = f16h_mc2_mce;
11131231 break;
11141232
11151233 case 0x17:
1116
- xec_mask = 0x3f;
1117
- if (!boot_cpu_has(X86_FEATURE_SMCA)) {
1118
- printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
1119
- goto err_out;
1120
- }
1121
- break;
1234
+ case 0x18:
1235
+ pr_warn_once("Decoding supported only on Scalable MCA processors.\n");
1236
+ return -EINVAL;
11221237
11231238 default:
11241239 printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
1125
- goto err_out;
1240
+ return -EINVAL;
11261241 }
11271242
1243
+out:
11281244 pr_info("MCE: In-kernel MCE decoding enabled.\n");
11291245
11301246 mce_register_decode_chain(&amd_mce_dec_nb);
11311247
11321248 return 0;
1133
-
1134
-err_out:
1135
- kfree(fam_ops);
1136
- fam_ops = NULL;
1137
- return -EINVAL;
11381249 }
11391250 early_initcall(mce_amd_init);
11401251
....@@ -1142,7 +1253,6 @@
11421253 static void __exit mce_amd_exit(void)
11431254 {
11441255 mce_unregister_decode_chain(&amd_mce_dec_nb);
1145
- kfree(fam_ops);
11461256 }
11471257
11481258 MODULE_DESCRIPTION("AMD MCE decoder");