hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/edac/sb_edac.c
....@@ -1,10 +1,8 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /* Intel Sandy Bridge -EN/-EP/-EX Memory Controller kernel module
23 *
34 * This driver supports the memory controllers found on the Intel
45 * processor family Sandy Bridge.
5
- *
6
- * This file may be distributed under the terms of the
7
- * GNU General Public License version 2 only.
86 *
97 * Copyright (c) 2011 by:
108 * Mauro Carvalho Chehab
....@@ -256,18 +254,20 @@
256254 * FIXME: Implement the error count reads directly
257255 */
258256
259
-static const u32 correrrcnt[] = {
260
- 0x104, 0x108, 0x10c, 0x110,
261
-};
262
-
263257 #define RANK_ODD_OV(reg) GET_BITFIELD(reg, 31, 31)
264258 #define RANK_ODD_ERR_CNT(reg) GET_BITFIELD(reg, 16, 30)
265259 #define RANK_EVEN_OV(reg) GET_BITFIELD(reg, 15, 15)
266260 #define RANK_EVEN_ERR_CNT(reg) GET_BITFIELD(reg, 0, 14)
267261
262
+#if 0 /* Currently unused*/
263
+static const u32 correrrcnt[] = {
264
+ 0x104, 0x108, 0x10c, 0x110,
265
+};
266
+
268267 static const u32 correrrthrsld[] = {
269268 0x11c, 0x120, 0x124, 0x128,
270269 };
270
+#endif
271271
272272 #define RANK_ODD_ERR_THRSLD(reg) GET_BITFIELD(reg, 16, 30)
273273 #define RANK_EVEN_ERR_THRSLD(reg) GET_BITFIELD(reg, 0, 14)
....@@ -326,6 +326,7 @@
326326 const struct interleave_pkg *interleave_pkg;
327327 u8 max_sad;
328328 u8 (*get_node_id)(struct sbridge_pvt *pvt);
329
+ u8 (*get_ha)(u8 bank);
329330 enum mem_type (*get_memory_type)(struct sbridge_pvt *pvt);
330331 enum dev_type (*get_width)(struct sbridge_pvt *pvt, u32 mtr);
331332 struct pci_dev *pci_vtd;
....@@ -938,12 +939,9 @@
938939
939940 static enum dev_type __ibridge_get_width(u32 mtr)
940941 {
941
- enum dev_type type;
942
+ enum dev_type type = DEV_UNKNOWN;
942943
943944 switch (mtr) {
944
- case 3:
945
- type = DEV_UNKNOWN;
946
- break;
947945 case 2:
948946 type = DEV_X16;
949947 break;
....@@ -1002,6 +1000,39 @@
10021000 return GET_BITFIELD(reg, 0, 2);
10031001 }
10041002
1003
+/*
1004
+ * Use the reporting bank number to determine which memory
1005
+ * controller (also known as "ha" for "home agent"). Sandy
1006
+ * Bridge only has one memory controller per socket, so the
1007
+ * answer is always zero.
1008
+ */
1009
+static u8 sbridge_get_ha(u8 bank)
1010
+{
1011
+ return 0;
1012
+}
1013
+
1014
+/*
1015
+ * On Ivy Bridge, Haswell and Broadwell the error may be in a
1016
+ * home agent bank (7, 8), or one of the per-channel memory
1017
+ * controller banks (9 .. 16).
1018
+ */
1019
+static u8 ibridge_get_ha(u8 bank)
1020
+{
1021
+ switch (bank) {
1022
+ case 7 ... 8:
1023
+ return bank - 7;
1024
+ case 9 ... 16:
1025
+ return (bank - 9) / 4;
1026
+ default:
1027
+ return 0xff;
1028
+ }
1029
+}
1030
+
1031
+/* Not used, but included for safety/symmetry */
1032
+static u8 knl_get_ha(u8 bank)
1033
+{
1034
+ return 0xff;
1035
+}
10051036
10061037 static u64 haswell_get_tolm(struct sbridge_pvt *pvt)
10071038 {
....@@ -1308,7 +1339,7 @@
13081339 */
13091340 static int knl_get_dimm_capacity(struct sbridge_pvt *pvt, u64 *mc_sizes)
13101341 {
1311
- u64 sad_base, sad_size, sad_limit = 0;
1342
+ u64 sad_base, sad_limit = 0;
13121343 u64 tad_base, tad_size, tad_limit, tad_deadspace, tad_livespace;
13131344 int sad_rule = 0;
13141345 int tad_rule = 0;
....@@ -1395,7 +1426,6 @@
13951426 edram_only = KNL_EDRAM_ONLY(dram_rule);
13961427
13971428 sad_limit = pvt->info.sad_limit(dram_rule)+1;
1398
- sad_size = sad_limit - sad_base;
13991429
14001430 pci_read_config_dword(pvt->pci_sad0,
14011431 pvt->info.interleave_list[sad_rule], &interleave_reg);
....@@ -1479,7 +1509,6 @@
14791509 sad_actual_size[mc] += tad_size;
14801510 }
14811511 }
1482
- tad_base = tad_limit+1;
14831512 }
14841513 }
14851514
....@@ -1589,7 +1618,7 @@
15891618 }
15901619
15911620 for (j = 0; j < max_dimms_per_channel; j++) {
1592
- dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, j, 0);
1621
+ dimm = edac_get_dimm(mci, i, j, 0);
15931622 if (pvt->info.type == KNIGHTS_LANDING) {
15941623 pci_read_config_dword(pvt->knl.pci_channel[i],
15951624 knl_mtr_reg, &mtr);
....@@ -2203,6 +2232,60 @@
22032232 limit,
22042233 rir_way,
22052234 idx);
2235
+
2236
+ return 0;
2237
+}
2238
+
2239
+static int get_memory_error_data_from_mce(struct mem_ctl_info *mci,
2240
+ const struct mce *m, u8 *socket,
2241
+ u8 *ha, long *channel_mask,
2242
+ char *msg)
2243
+{
2244
+ u32 reg, channel = GET_BITFIELD(m->status, 0, 3);
2245
+ struct mem_ctl_info *new_mci;
2246
+ struct sbridge_pvt *pvt;
2247
+ struct pci_dev *pci_ha;
2248
+ bool tad0;
2249
+
2250
+ if (channel >= NUM_CHANNELS) {
2251
+ sprintf(msg, "Invalid channel 0x%x", channel);
2252
+ return -EINVAL;
2253
+ }
2254
+
2255
+ pvt = mci->pvt_info;
2256
+ if (!pvt->info.get_ha) {
2257
+ sprintf(msg, "No get_ha()");
2258
+ return -EINVAL;
2259
+ }
2260
+ *ha = pvt->info.get_ha(m->bank);
2261
+ if (*ha != 0 && *ha != 1) {
2262
+ sprintf(msg, "Impossible bank %d", m->bank);
2263
+ return -EINVAL;
2264
+ }
2265
+
2266
+ *socket = m->socketid;
2267
+ new_mci = get_mci_for_node_id(*socket, *ha);
2268
+ if (!new_mci) {
2269
+ strcpy(msg, "mci socket got corrupted!");
2270
+ return -EINVAL;
2271
+ }
2272
+
2273
+ pvt = new_mci->pvt_info;
2274
+ pci_ha = pvt->pci_ha;
2275
+ pci_read_config_dword(pci_ha, tad_dram_rule[0], &reg);
2276
+ tad0 = m->addr <= TAD_LIMIT(reg);
2277
+
2278
+ *channel_mask = 1 << channel;
2279
+ if (pvt->mirror_mode == FULL_MIRRORING ||
2280
+ (pvt->mirror_mode == ADDR_RANGE_MIRRORING && tad0)) {
2281
+ *channel_mask |= 1 << ((channel + 2) % 4);
2282
+ pvt->is_cur_addr_mirrored = true;
2283
+ } else {
2284
+ pvt->is_cur_addr_mirrored = false;
2285
+ }
2286
+
2287
+ if (pvt->is_lockstep)
2288
+ *channel_mask |= 1 << ((channel + 1) % 4);
22062289
22072290 return 0;
22082291 }
....@@ -2867,7 +2950,7 @@
28672950 struct mem_ctl_info *new_mci;
28682951 struct sbridge_pvt *pvt = mci->pvt_info;
28692952 enum hw_event_mc_err_type tp_event;
2870
- char *type, *optype, msg[256];
2953
+ char *optype, msg[256];
28712954 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
28722955 bool overflow = GET_BITFIELD(m->status, 62, 62);
28732956 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
....@@ -2877,10 +2960,16 @@
28772960 u32 errcode = GET_BITFIELD(m->status, 0, 15);
28782961 u32 channel = GET_BITFIELD(m->status, 0, 3);
28792962 u32 optypenum = GET_BITFIELD(m->status, 4, 6);
2963
+ /*
2964
+ * Bits 5-0 of MCi_MISC give the least significant bit that is valid.
2965
+ * A value 6 is for cache line aligned address, a value 12 is for page
2966
+ * aligned address reported by patrol scrubber.
2967
+ */
2968
+ u32 lsb = GET_BITFIELD(m->misc, 0, 5);
28802969 long channel_mask, first_channel;
2881
- u8 rank, socket, ha;
2970
+ u8 rank = 0xff, socket, ha;
28822971 int rc, dimm;
2883
- char *area_type = NULL;
2972
+ char *area_type = "DRAM";
28842973
28852974 if (pvt->info.type != SANDY_BRIDGE)
28862975 recoverable = true;
....@@ -2890,14 +2979,11 @@
28902979 if (uncorrected_error) {
28912980 core_err_cnt = 1;
28922981 if (ripv) {
2893
- type = "FATAL";
2894
- tp_event = HW_EVENT_ERR_FATAL;
2895
- } else {
2896
- type = "NON_FATAL";
28972982 tp_event = HW_EVENT_ERR_UNCORRECTED;
2983
+ } else {
2984
+ tp_event = HW_EVENT_ERR_FATAL;
28982985 }
28992986 } else {
2900
- type = "CORRECTED";
29012987 tp_event = HW_EVENT_ERR_CORRECTED;
29022988 }
29032989
....@@ -2965,9 +3051,13 @@
29653051 optype, msg);
29663052 }
29673053 return;
2968
- } else {
3054
+ } else if (lsb < 12) {
29693055 rc = get_memory_error_data(mci, m->addr, &socket, &ha,
2970
- &channel_mask, &rank, &area_type, msg);
3056
+ &channel_mask, &rank,
3057
+ &area_type, msg);
3058
+ } else {
3059
+ rc = get_memory_error_data_from_mce(mci, m, &socket, &ha,
3060
+ &channel_mask, msg);
29713061 }
29723062
29733063 if (rc < 0)
....@@ -2982,13 +3072,14 @@
29823072
29833073 first_channel = find_first_bit(&channel_mask, NUM_CHANNELS);
29843074
2985
- if (rank < 4)
3075
+ if (rank == 0xff)
3076
+ dimm = -1;
3077
+ else if (rank < 4)
29863078 dimm = 0;
29873079 else if (rank < 8)
29883080 dimm = 1;
29893081 else
29903082 dimm = 2;
2991
-
29923083
29933084 /*
29943085 * FIXME: On some memory configurations (mirror, lockstep), the
....@@ -3040,7 +3131,7 @@
30403131 struct mem_ctl_info *mci;
30413132 char *type;
30423133
3043
- if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
3134
+ if (mce->kflags & MCE_HANDLED_CEC)
30443135 return NOTIFY_DONE;
30453136
30463137 /*
....@@ -3089,7 +3180,8 @@
30893180 sbridge_mce_output_error(mci, mce);
30903181
30913182 /* Advice mcelog that the error were handled */
3092
- return NOTIFY_STOP;
3183
+ mce->kflags |= MCE_HANDLED_EDAC;
3184
+ return NOTIFY_OK;
30933185 }
30943186
30953187 static struct notifier_block sbridge_mce_dec = {
....@@ -3104,7 +3196,6 @@
31043196 static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
31053197 {
31063198 struct mem_ctl_info *mci = sbridge_dev->mci;
3107
- struct sbridge_pvt *pvt;
31083199
31093200 if (unlikely(!mci || !mci->pvt_info)) {
31103201 edac_dbg(0, "MC: dev = %p\n", &sbridge_dev->pdev[0]->dev);
....@@ -3112,8 +3203,6 @@
31123203 sbridge_printk(KERN_ERR, "Couldn't find mci handler\n");
31133204 return;
31143205 }
3115
-
3116
- pvt = mci->pvt_info;
31173206
31183207 edac_dbg(0, "MC: mci = %p, dev = %p\n",
31193208 mci, &sbridge_dev->pdev[0]->dev);
....@@ -3176,6 +3265,7 @@
31763265 pvt->info.dram_rule = ibridge_dram_rule;
31773266 pvt->info.get_memory_type = get_memory_type;
31783267 pvt->info.get_node_id = get_node_id;
3268
+ pvt->info.get_ha = ibridge_get_ha;
31793269 pvt->info.rir_limit = rir_limit;
31803270 pvt->info.sad_limit = sad_limit;
31813271 pvt->info.interleave_mode = interleave_mode;
....@@ -3200,6 +3290,7 @@
32003290 pvt->info.dram_rule = sbridge_dram_rule;
32013291 pvt->info.get_memory_type = get_memory_type;
32023292 pvt->info.get_node_id = get_node_id;
3293
+ pvt->info.get_ha = sbridge_get_ha;
32033294 pvt->info.rir_limit = rir_limit;
32043295 pvt->info.sad_limit = sad_limit;
32053296 pvt->info.interleave_mode = interleave_mode;
....@@ -3224,6 +3315,7 @@
32243315 pvt->info.dram_rule = ibridge_dram_rule;
32253316 pvt->info.get_memory_type = haswell_get_memory_type;
32263317 pvt->info.get_node_id = haswell_get_node_id;
3318
+ pvt->info.get_ha = ibridge_get_ha;
32273319 pvt->info.rir_limit = haswell_rir_limit;
32283320 pvt->info.sad_limit = sad_limit;
32293321 pvt->info.interleave_mode = interleave_mode;
....@@ -3248,6 +3340,7 @@
32483340 pvt->info.dram_rule = ibridge_dram_rule;
32493341 pvt->info.get_memory_type = haswell_get_memory_type;
32503342 pvt->info.get_node_id = haswell_get_node_id;
3343
+ pvt->info.get_ha = ibridge_get_ha;
32513344 pvt->info.rir_limit = haswell_rir_limit;
32523345 pvt->info.sad_limit = sad_limit;
32533346 pvt->info.interleave_mode = interleave_mode;
....@@ -3272,6 +3365,7 @@
32723365 pvt->info.dram_rule = knl_dram_rule;
32733366 pvt->info.get_memory_type = knl_get_memory_type;
32743367 pvt->info.get_node_id = knl_get_node_id;
3368
+ pvt->info.get_ha = knl_get_ha;
32753369 pvt->info.rir_limit = NULL;
32763370 pvt->info.sad_limit = knl_sad_limit;
32773371 pvt->info.interleave_mode = knl_interleave_mode;
....@@ -3323,17 +3417,14 @@
33233417 return rc;
33243418 }
33253419
3326
-#define ICPU(model, table) \
3327
- { X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table }
3328
-
33293420 static const struct x86_cpu_id sbridge_cpuids[] = {
3330
- ICPU(INTEL_FAM6_SANDYBRIDGE_X, pci_dev_descr_sbridge_table),
3331
- ICPU(INTEL_FAM6_IVYBRIDGE_X, pci_dev_descr_ibridge_table),
3332
- ICPU(INTEL_FAM6_HASWELL_X, pci_dev_descr_haswell_table),
3333
- ICPU(INTEL_FAM6_BROADWELL_X, pci_dev_descr_broadwell_table),
3334
- ICPU(INTEL_FAM6_BROADWELL_XEON_D, pci_dev_descr_broadwell_table),
3335
- ICPU(INTEL_FAM6_XEON_PHI_KNL, pci_dev_descr_knl_table),
3336
- ICPU(INTEL_FAM6_XEON_PHI_KNM, pci_dev_descr_knl_table),
3421
+ X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &pci_dev_descr_sbridge_table),
3422
+ X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &pci_dev_descr_ibridge_table),
3423
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &pci_dev_descr_haswell_table),
3424
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &pci_dev_descr_broadwell_table),
3425
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &pci_dev_descr_broadwell_table),
3426
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &pci_dev_descr_knl_table),
3427
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &pci_dev_descr_knl_table),
33373428 { }
33383429 };
33393430 MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids);
....@@ -3419,6 +3510,9 @@
34193510 if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
34203511 return -EBUSY;
34213512
3513
+ if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
3514
+ return -ENODEV;
3515
+
34223516 id = x86_match_cpu(sbridge_cpuids);
34233517 if (!id)
34243518 return -ENODEV;
....@@ -3430,8 +3524,6 @@
34303524
34313525 if (rc >= 0) {
34323526 mce_register_decode_chain(&sbridge_mce_dec);
3433
- if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
3434
- sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
34353527 return 0;
34363528 }
34373529
....@@ -3460,6 +3552,6 @@
34603552
34613553 MODULE_LICENSE("GPL");
34623554 MODULE_AUTHOR("Mauro Carvalho Chehab");
3463
-MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
3555
+MODULE_AUTHOR("Red Hat Inc. (https://www.redhat.com)");
34643556 MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge and Ivy Bridge memory controllers - "
34653557 SBRIDGE_REVISION);