From 6778948f9de86c3cfaf36725a7c87dcff9ba247f Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Mon, 11 Dec 2023 08:20:59 +0000
Subject: [PATCH] kernel_5.10 no rt
---
kernel/drivers/edac/amd64_edac.c | 715 ++++++++++++++++++++++++++++++++++++++---------------------
1 files changed, 459 insertions(+), 256 deletions(-)
diff --git a/kernel/drivers/edac/amd64_edac.c b/kernel/drivers/edac/amd64_edac.c
index fe25c98..f5635df 100644
--- a/kernel/drivers/edac/amd64_edac.c
+++ b/kernel/drivers/edac/amd64_edac.c
@@ -1,10 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include "amd64_edac.h"
#include <asm/amd_nb.h>
static struct edac_pci_ctl_info *pci_ctl;
-
-static int report_gart_errors;
-module_param(report_gart_errors, int, 0644);
/*
* Set by command line parameter. If BIOS has enabled the ECC, this override is
@@ -14,6 +12,8 @@
module_param(ecc_enable_override, int, 0644);
static struct msr __percpu *msrs;
+
+static struct amd64_family_type *fam_type;
/* Per-node stuff */
static struct ecc_settings **ecc_stngs;
@@ -214,7 +214,7 @@
scrubval = scrubrates[i].scrubval;
- if (pvt->fam == 0x17) {
+ if (pvt->umc) {
__f17h_set_scrubval(pvt, scrubval);
} else if (pvt->fam == 0x15 && pvt->model == 0x60) {
f15h_select_dct(pvt, 0);
@@ -256,19 +256,7 @@
int i, retval = -EINVAL;
u32 scrubval = 0;
- switch (pvt->fam) {
- case 0x15:
- /* Erratum #505 */
- if (pvt->model < 0x10)
- f15h_select_dct(pvt, 0);
-
- if (pvt->model == 0x60)
- amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
- else
- amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
- break;
-
- case 0x17:
+ if (pvt->umc) {
amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval);
if (scrubval & BIT(0)) {
amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval);
@@ -277,11 +265,17 @@
} else {
scrubval = 0;
}
- break;
+ } else if (pvt->fam == 0x15) {
+ /* Erratum #505 */
+ if (pvt->model < 0x10)
+ f15h_select_dct(pvt, 0);
- default:
+ if (pvt->model == 0x60)
+ amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
+ else
+ amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
+ } else {
amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
- break;
}
scrubval = scrubval & 0x001F;
@@ -452,6 +446,9 @@
#define for_each_chip_select_mask(i, dct, pvt) \
for (i = 0; i < pvt->csels[dct].m_cnt; i++)
+
+#define for_each_umc(i) \
+ for (i = 0; i < fam_type->max_mcs; i++)
/*
* @input_addr is an InputAddr associated with the node given by mci. Return the
@@ -726,7 +723,7 @@
if (pvt->umc) {
u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0;
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT))
continue;
@@ -785,24 +782,61 @@
(dclr & BIT(15)) ? "yes" : "no");
}
+#define CS_EVEN_PRIMARY BIT(0)
+#define CS_ODD_PRIMARY BIT(1)
+#define CS_EVEN_SECONDARY BIT(2)
+#define CS_ODD_SECONDARY BIT(3)
+#define CS_3R_INTERLEAVE BIT(4)
+
+#define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY)
+#define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY)
+
+static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
+{
+ u8 base, count = 0;
+ int cs_mode = 0;
+
+ if (csrow_enabled(2 * dimm, ctrl, pvt))
+ cs_mode |= CS_EVEN_PRIMARY;
+
+ if (csrow_enabled(2 * dimm + 1, ctrl, pvt))
+ cs_mode |= CS_ODD_PRIMARY;
+
+ /* Asymmetric dual-rank DIMM support. */
+ if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt))
+ cs_mode |= CS_ODD_SECONDARY;
+
+ /*
+ * 3 Rank inteleaving support.
+ * There should be only three bases enabled and their two masks should
+ * be equal.
+ */
+ for_each_chip_select(base, ctrl, pvt)
+ count += csrow_enabled(base, ctrl, pvt);
+
+ if (count == 3 &&
+ pvt->csels[ctrl].csmasks[0] == pvt->csels[ctrl].csmasks[1]) {
+ edac_dbg(1, "3R interleaving in use.\n");
+ cs_mode |= CS_3R_INTERLEAVE;
+ }
+
+ return cs_mode;
+}
+
static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
{
- int dimm, size0, size1, cs0, cs1;
+ int dimm, size0, size1, cs0, cs1, cs_mode;
edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl);
- for (dimm = 0; dimm < 4; dimm++) {
- size0 = 0;
+ for (dimm = 0; dimm < 2; dimm++) {
cs0 = dimm * 2;
-
- if (csrow_enabled(cs0, ctrl, pvt))
- size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs0);
-
- size1 = 0;
cs1 = dimm * 2 + 1;
- if (csrow_enabled(cs1, ctrl, pvt))
- size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1);
+ cs_mode = f17_get_cs_mode(dimm, ctrl, pvt);
+
+ size0 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs0);
+ size1 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs1);
amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
cs0, size0,
@@ -815,7 +849,7 @@
struct amd64_umc *umc;
u32 i, tmp, umc_base;
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
umc_base = get_umc_base(i);
umc = &pvt->umc[i];
@@ -898,8 +932,7 @@
edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
- amd64_info("using %s syndromes.\n",
- ((pvt->ecc_sym_sz == 8) ? "x8" : "x4"));
+ amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz);
}
/*
@@ -913,9 +946,68 @@
} else if (pvt->fam == 0x15 && pvt->model == 0x30) {
pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4;
pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2;
+ } else if (pvt->fam >= 0x17) {
+ int umc;
+
+ for_each_umc(umc) {
+ pvt->csels[umc].b_cnt = 4;
+ pvt->csels[umc].m_cnt = 2;
+ }
+
} else {
pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
+ }
+}
+
+static void read_umc_base_mask(struct amd64_pvt *pvt)
+{
+ u32 umc_base_reg, umc_base_reg_sec;
+ u32 umc_mask_reg, umc_mask_reg_sec;
+ u32 base_reg, base_reg_sec;
+ u32 mask_reg, mask_reg_sec;
+ u32 *base, *base_sec;
+ u32 *mask, *mask_sec;
+ int cs, umc;
+
+ for_each_umc(umc) {
+ umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR;
+ umc_base_reg_sec = get_umc_base(umc) + UMCCH_BASE_ADDR_SEC;
+
+ for_each_chip_select(cs, umc, pvt) {
+ base = &pvt->csels[umc].csbases[cs];
+ base_sec = &pvt->csels[umc].csbases_sec[cs];
+
+ base_reg = umc_base_reg + (cs * 4);
+ base_reg_sec = umc_base_reg_sec + (cs * 4);
+
+ if (!amd_smn_read(pvt->mc_node_id, base_reg, base))
+ edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *base, base_reg);
+
+ if (!amd_smn_read(pvt->mc_node_id, base_reg_sec, base_sec))
+ edac_dbg(0, " DCSB_SEC%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *base_sec, base_reg_sec);
+ }
+
+ umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK;
+ umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC;
+
+ for_each_chip_select_mask(cs, umc, pvt) {
+ mask = &pvt->csels[umc].csmasks[cs];
+ mask_sec = &pvt->csels[umc].csmasks_sec[cs];
+
+ mask_reg = umc_mask_reg + (cs * 4);
+ mask_reg_sec = umc_mask_reg_sec + (cs * 4);
+
+ if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask))
+ edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *mask, mask_reg);
+
+ if (!amd_smn_read(pvt->mc_node_id, mask_reg_sec, mask_sec))
+ edac_dbg(0, " DCSM_SEC%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *mask_sec, mask_reg_sec);
+ }
}
}
@@ -924,84 +1016,65 @@
*/
static void read_dct_base_mask(struct amd64_pvt *pvt)
{
- int base_reg0, base_reg1, mask_reg0, mask_reg1, cs;
+ int cs;
prep_chip_selects(pvt);
- if (pvt->umc) {
- base_reg0 = get_umc_base(0) + UMCCH_BASE_ADDR;
- base_reg1 = get_umc_base(1) + UMCCH_BASE_ADDR;
- mask_reg0 = get_umc_base(0) + UMCCH_ADDR_MASK;
- mask_reg1 = get_umc_base(1) + UMCCH_ADDR_MASK;
- } else {
- base_reg0 = DCSB0;
- base_reg1 = DCSB1;
- mask_reg0 = DCSM0;
- mask_reg1 = DCSM1;
- }
+ if (pvt->umc)
+ return read_umc_base_mask(pvt);
for_each_chip_select(cs, 0, pvt) {
- int reg0 = base_reg0 + (cs * 4);
- int reg1 = base_reg1 + (cs * 4);
+ int reg0 = DCSB0 + (cs * 4);
+ int reg1 = DCSB1 + (cs * 4);
u32 *base0 = &pvt->csels[0].csbases[cs];
u32 *base1 = &pvt->csels[1].csbases[cs];
- if (pvt->umc) {
- if (!amd_smn_read(pvt->mc_node_id, reg0, base0))
- edac_dbg(0, " DCSB0[%d]=0x%08x reg: 0x%x\n",
- cs, *base0, reg0);
+ if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0))
+ edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n",
+ cs, *base0, reg0);
- if (!amd_smn_read(pvt->mc_node_id, reg1, base1))
- edac_dbg(0, " DCSB1[%d]=0x%08x reg: 0x%x\n",
- cs, *base1, reg1);
- } else {
- if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0))
- edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n",
- cs, *base0, reg0);
+ if (pvt->fam == 0xf)
+ continue;
- if (pvt->fam == 0xf)
- continue;
-
- if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1))
- edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n",
- cs, *base1, (pvt->fam == 0x10) ? reg1
- : reg0);
- }
+ if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1))
+ edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n",
+ cs, *base1, (pvt->fam == 0x10) ? reg1
+ : reg0);
}
for_each_chip_select_mask(cs, 0, pvt) {
- int reg0 = mask_reg0 + (cs * 4);
- int reg1 = mask_reg1 + (cs * 4);
+ int reg0 = DCSM0 + (cs * 4);
+ int reg1 = DCSM1 + (cs * 4);
u32 *mask0 = &pvt->csels[0].csmasks[cs];
u32 *mask1 = &pvt->csels[1].csmasks[cs];
- if (pvt->umc) {
- if (!amd_smn_read(pvt->mc_node_id, reg0, mask0))
- edac_dbg(0, " DCSM0[%d]=0x%08x reg: 0x%x\n",
- cs, *mask0, reg0);
+ if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0))
+ edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n",
+ cs, *mask0, reg0);
- if (!amd_smn_read(pvt->mc_node_id, reg1, mask1))
- edac_dbg(0, " DCSM1[%d]=0x%08x reg: 0x%x\n",
- cs, *mask1, reg1);
- } else {
- if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0))
- edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n",
- cs, *mask0, reg0);
+ if (pvt->fam == 0xf)
+ continue;
- if (pvt->fam == 0xf)
- continue;
-
- if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1))
- edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n",
- cs, *mask1, (pvt->fam == 0x10) ? reg1
- : reg0);
- }
+ if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1))
+ edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n",
+ cs, *mask1, (pvt->fam == 0x10) ? reg1
+ : reg0);
}
}
static void determine_memory_type(struct amd64_pvt *pvt)
{
u32 dram_ctrl, dcsm;
+
+ if (pvt->umc) {
+ if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
+ pvt->dram_type = MEM_LRDDR4;
+ else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
+ pvt->dram_type = MEM_RDDR4;
+ else
+ pvt->dram_type = MEM_DDR4;
+ return;
+ }
switch (pvt->fam) {
case 0xf:
@@ -1047,15 +1120,6 @@
case 0x16:
goto ddr3;
-
- case 0x17:
- if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
- pvt->dram_type = MEM_LRDDR4;
- else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
- pvt->dram_type = MEM_RDDR4;
- else
- pvt->dram_type = MEM_DDR4;
- return;
default:
WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam);
@@ -1391,7 +1455,7 @@
int i, channels = 0;
/* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */
- for (i = 0; i < NUM_UMCS; i++)
+ for_each_umc(i)
channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT);
amd64_info("MCT channel count: %d\n", channels);
@@ -1526,18 +1590,62 @@
return ddr3_cs_size(cs_mode, false);
}
-static int f17_base_addr_to_cs_size(struct amd64_pvt *pvt, u8 umc,
+static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
- u32 base_addr = pvt->csels[umc].csbases[csrow_nr];
+ u32 addr_mask_orig, addr_mask_deinterleaved;
+ u32 msb, weight, num_zero_bits;
+ int dimm, size = 0;
- /* Each mask is used for every two base addresses. */
- u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr >> 1];
+ /* No Chip Selects are enabled. */
+ if (!cs_mode)
+ return size;
- /* Register [31:1] = Address [39:9]. Size is in kBs here. */
- u32 size = ((addr_mask >> 1) - (base_addr >> 1) + 1) >> 1;
+ /* Requested size of an even CS but none are enabled. */
+ if (!(cs_mode & CS_EVEN) && !(csrow_nr & 1))
+ return size;
- edac_dbg(1, "BaseAddr: 0x%x, AddrMask: 0x%x\n", base_addr, addr_mask);
+ /* Requested size of an odd CS but none are enabled. */
+ if (!(cs_mode & CS_ODD) && (csrow_nr & 1))
+ return size;
+
+ /*
+ * There is one mask per DIMM, and two Chip Selects per DIMM.
+ * CS0 and CS1 -> DIMM0
+ * CS2 and CS3 -> DIMM1
+ */
+ dimm = csrow_nr >> 1;
+
+ /* Asymmetric dual-rank DIMM support. */
+ if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY))
+ addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm];
+ else
+ addr_mask_orig = pvt->csels[umc].csmasks[dimm];
+
+ /*
+ * The number of zero bits in the mask is equal to the number of bits
+ * in a full mask minus the number of bits in the current mask.
+ *
+ * The MSB is the number of bits in the full mask because BIT[0] is
+ * always 0.
+ *
+ * In the special 3 Rank interleaving case, a single bit is flipped
+ * without swapping with the most significant bit. This can be handled
+ * by keeping the MSB where it is and ignoring the single zero bit.
+ */
+ msb = fls(addr_mask_orig) - 1;
+ weight = hweight_long(addr_mask_orig);
+ num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
+
+ /* Take the number of zero bits off from the top of the mask. */
+ addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
+
+ edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
+ edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
+ edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
+
+ /* Register [31:1] = Address [39:9]. Size is in kBs here. */
+ size = (addr_mask_deinterleaved >> 2) + 1;
/* Return size in MBs. */
return size >> 10;
@@ -2130,6 +2238,7 @@
.ctl_name = "K8",
.f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
.f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
+ .max_mcs = 2,
.ops = {
.early_channel_count = k8_early_channel_count,
.map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow,
@@ -2140,6 +2249,7 @@
.ctl_name = "F10h",
.f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
.f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
+ .max_mcs = 2,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -2150,6 +2260,7 @@
.ctl_name = "F15h",
.f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
.f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2,
+ .max_mcs = 2,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -2160,6 +2271,7 @@
.ctl_name = "F15h_M30h",
.f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1,
.f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2,
+ .max_mcs = 2,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -2170,6 +2282,7 @@
.ctl_name = "F15h_M60h",
.f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1,
.f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2,
+ .max_mcs = 2,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -2180,6 +2293,7 @@
.ctl_name = "F16h",
.f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
.f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2,
+ .max_mcs = 2,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -2190,6 +2304,7 @@
.ctl_name = "F16h_M30h",
.f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1,
.f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2,
+ .max_mcs = 2,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -2200,27 +2315,60 @@
.ctl_name = "F17h",
.f0_id = PCI_DEVICE_ID_AMD_17H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6,
+ .max_mcs = 2,
.ops = {
.early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_base_addr_to_cs_size,
+ .dbam_to_cs = f17_addr_mask_to_cs_size,
}
},
[F17_M10H_CPUS] = {
.ctl_name = "F17h_M10h",
.f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6,
+ .max_mcs = 2,
.ops = {
.early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_base_addr_to_cs_size,
+ .dbam_to_cs = f17_addr_mask_to_cs_size,
}
},
[F17_M30H_CPUS] = {
.ctl_name = "F17h_M30h",
.f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6,
+ .max_mcs = 8,
.ops = {
.early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_base_addr_to_cs_size,
+ .dbam_to_cs = f17_addr_mask_to_cs_size,
+ }
+ },
+ [F17_M60H_CPUS] = {
+ .ctl_name = "F17h_M60h",
+ .f0_id = PCI_DEVICE_ID_AMD_17H_M60H_DF_F0,
+ .f6_id = PCI_DEVICE_ID_AMD_17H_M60H_DF_F6,
+ .max_mcs = 2,
+ .ops = {
+ .early_channel_count = f17_early_channel_count,
+ .dbam_to_cs = f17_addr_mask_to_cs_size,
+ }
+ },
+ [F17_M70H_CPUS] = {
+ .ctl_name = "F17h_M70h",
+ .f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0,
+ .f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6,
+ .max_mcs = 2,
+ .ops = {
+ .early_channel_count = f17_early_channel_count,
+ .dbam_to_cs = f17_addr_mask_to_cs_size,
+ }
+ },
+ [F19_CPUS] = {
+ .ctl_name = "F19h",
+ .f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0,
+ .f6_id = PCI_DEVICE_ID_AMD_19H_DF_F6,
+ .max_mcs = 8,
+ .ops = {
+ .early_channel_count = f17_early_channel_count,
+ .dbam_to_cs = f17_addr_mask_to_cs_size,
}
},
};
@@ -2476,18 +2624,14 @@
* To find the UMC channel represented by this bank we need to match on its
* instance_id. The instance_id of a bank is held in the lower 32 bits of its
* IPID.
+ *
+ * Currently, we can derive the channel number by looking at the 6th nibble in
+ * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel
+ * number.
*/
-static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m)
+static int find_umc_channel(struct mce *m)
{
- u32 umc_instance_id[] = {0x50f00, 0x150f00};
- u32 instance_id = m->ipid & GENMASK(31, 0);
- int i, channel = -1;
-
- for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++)
- if (umc_instance_id[i] == instance_id)
- channel = i;
-
- return channel;
+ return (m->ipid & GENMASK(31, 0)) >> 20;
}
static void decode_umc_error(int node_id, struct mce *m)
@@ -2509,11 +2653,7 @@
if (m->status & MCI_STATUS_DEFERRED)
ecc_type = 3;
- err.channel = find_umc_channel(pvt, m);
- if (err.channel < 0) {
- err.err_code = ERR_CHANNEL;
- goto log_error;
- }
+ err.channel = find_umc_channel(m);
if (!(m->status & MCI_STATUS_SYNDV)) {
err.err_code = ERR_SYND;
@@ -2621,19 +2761,19 @@
if (pvt->umc) {
u8 i;
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
/* Check enabled channels only: */
- if ((pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) &&
- (pvt->umc[i].ecc_ctrl & BIT(7))) {
- pvt->ecc_sym_sz = 8;
- break;
+ if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
+ if (pvt->umc[i].ecc_ctrl & BIT(9)) {
+ pvt->ecc_sym_sz = 16;
+ return;
+ } else if (pvt->umc[i].ecc_ctrl & BIT(7)) {
+ pvt->ecc_sym_sz = 8;
+ return;
+ }
}
}
-
- return;
- }
-
- if (pvt->fam >= 0x10) {
+ } else if (pvt->fam >= 0x10) {
u32 tmp;
amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
@@ -2657,7 +2797,7 @@
u32 i, umc_base;
/* Read registers from each UMC */
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
umc_base = get_umc_base(i);
umc = &pvt->umc[i];
@@ -2749,8 +2889,6 @@
edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
determine_ecc_sym_sz(pvt);
-
- dump_misc_regs(pvt);
}
/*
@@ -2793,10 +2931,12 @@
int csrow_nr = csrow_nr_orig;
u32 cs_mode, nr_pages;
- if (!pvt->umc)
+ if (!pvt->umc) {
csrow_nr >>= 1;
-
- cs_mode = DBAM_DIMM(csrow_nr, dbam);
+ cs_mode = DBAM_DIMM(csrow_nr, dbam);
+ } else {
+ cs_mode = f17_get_cs_mode(csrow_nr >> 1, dct, pvt);
+ }
nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr);
nr_pages <<= 20 - PAGE_SHIFT;
@@ -2806,6 +2946,50 @@
edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
return nr_pages;
+}
+
+static int init_csrows_df(struct mem_ctl_info *mci)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+ enum edac_type edac_mode = EDAC_NONE;
+ enum dev_type dev_type = DEV_UNKNOWN;
+ struct dimm_info *dimm;
+ int empty = 1;
+ u8 umc, cs;
+
+ if (mci->edac_ctl_cap & EDAC_FLAG_S16ECD16ED) {
+ edac_mode = EDAC_S16ECD16ED;
+ dev_type = DEV_X16;
+ } else if (mci->edac_ctl_cap & EDAC_FLAG_S8ECD8ED) {
+ edac_mode = EDAC_S8ECD8ED;
+ dev_type = DEV_X8;
+ } else if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED) {
+ edac_mode = EDAC_S4ECD4ED;
+ dev_type = DEV_X4;
+ } else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED) {
+ edac_mode = EDAC_SECDED;
+ }
+
+ for_each_umc(umc) {
+ for_each_chip_select(cs, umc, pvt) {
+ if (!csrow_enabled(cs, umc, pvt))
+ continue;
+
+ empty = 0;
+ dimm = mci->csrows[cs]->channels[umc]->dimm;
+
+ edac_dbg(1, "MC node: %d, csrow: %d\n",
+ pvt->mc_node_id, cs);
+
+ dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs);
+ dimm->mtype = pvt->dram_type;
+ dimm->edac_mode = edac_mode;
+ dimm->dtype = dev_type;
+ dimm->grain = 64;
+ }
+ }
+
+ return empty;
}
/*
@@ -2822,15 +3006,16 @@
int nr_pages = 0;
u32 val;
- if (!pvt->umc) {
- amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
+ if (pvt->umc)
+ return init_csrows_df(mci);
- pvt->nbcfg = val;
+ amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
- edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
- pvt->mc_node_id, val,
- !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
- }
+ pvt->nbcfg = val;
+
+ edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
+ pvt->mc_node_id, val,
+ !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
/*
* We iterate over DCT0 here but we look at DCT1 in parallel, if needed.
@@ -2867,13 +3052,7 @@
edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages);
/* Determine DIMM ECC mode: */
- if (pvt->umc) {
- if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED)
- edac_mode = EDAC_S4ECD4ED;
- else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED)
- edac_mode = EDAC_SECDED;
-
- } else if (pvt->nbcfg & NBCFG_ECC_ENABLE) {
+ if (pvt->nbcfg & NBCFG_ECC_ENABLE) {
edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL)
? EDAC_S4ECD4ED
: EDAC_SECDED;
@@ -3050,43 +3229,27 @@
amd64_warn("Error restoring NB MCGCTL settings!\n");
}
-/*
- * EDAC requires that the BIOS have ECC enabled before
- * taking over the processing of ECC errors. A command line
- * option allows to force-enable hardware ECC later in
- * enable_ecc_error_reporting().
- */
-static const char *ecc_msg =
- "ECC disabled in the BIOS or no ECC capability, module will not load.\n"
- " Either enable ECC checking or force module loading by setting "
- "'ecc_enable_override'.\n"
- " (Note that use of the override may cause unknown side effects.)\n";
-
-static bool ecc_enabled(struct pci_dev *F3, u16 nid)
+static bool ecc_enabled(struct amd64_pvt *pvt)
{
+ u16 nid = pvt->mc_node_id;
bool nb_mce_en = false;
u8 ecc_en = 0, i;
u32 value;
if (boot_cpu_data.x86 >= 0x17) {
u8 umc_en_mask = 0, ecc_en_mask = 0;
+ struct amd64_umc *umc;
- for (i = 0; i < NUM_UMCS; i++) {
- u32 base = get_umc_base(i);
+ for_each_umc(i) {
+ umc = &pvt->umc[i];
/* Only check enabled UMCs. */
- if (amd_smn_read(nid, base + UMCCH_SDP_CTRL, &value))
- continue;
-
- if (!(value & UMC_SDP_INIT))
+ if (!(umc->sdp_ctrl & UMC_SDP_INIT))
continue;
umc_en_mask |= BIT(i);
- if (amd_smn_read(nid, base + UMCCH_UMC_CAP_HI, &value))
- continue;
-
- if (value & UMC_ECC_ENABLED)
+ if (umc->umc_cap_hi & UMC_ECC_ENABLED)
ecc_en_mask |= BIT(i);
}
@@ -3099,7 +3262,7 @@
/* Assume UMC MCA banks are enabled. */
nb_mce_en = true;
} else {
- amd64_read_pci_cfg(F3, NBCFG, &value);
+ amd64_read_pci_cfg(pvt->F3, NBCFG, &value);
ecc_en = !!(value & NBCFG_ECC_ENABLE);
@@ -3112,11 +3275,10 @@
amd64_info("Node %d: DRAM ECC %s.\n",
nid, (ecc_en ? "enabled" : "disabled"));
- if (!ecc_en || !nb_mce_en) {
- amd64_info("%s", ecc_msg);
+ if (!ecc_en || !nb_mce_en)
return false;
- }
- return true;
+ else
+ return true;
}
static inline void
@@ -3124,7 +3286,7 @@
{
u8 i, ecc_en = 1, cpk_en = 1, dev_x4 = 1, dev_x16 = 1;
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED);
cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP);
@@ -3150,8 +3312,7 @@
}
}
-static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
- struct amd64_family_type *fam)
+static void setup_mci_misc_attrs(struct mem_ctl_info *mci)
{
struct amd64_pvt *pvt = mci->pvt_info;
@@ -3170,7 +3331,7 @@
mci->edac_cap = determine_edac_cap(pvt);
mci->mod_name = EDAC_MOD_STR;
- mci->ctl_name = fam->ctl_name;
+ mci->ctl_name = fam_type->ctl_name;
mci->dev_name = pci_name(pvt->F3);
mci->ctl_page_to_phys = NULL;
@@ -3184,8 +3345,6 @@
*/
static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
{
- struct amd64_family_type *fam_type = NULL;
-
pvt->ext_model = boot_cpu_data.x86_model >> 4;
pvt->stepping = boot_cpu_data.x86_stepping;
pvt->model = boot_cpu_data.x86_model;
@@ -3211,10 +3370,13 @@
fam_type = &family_types[F15_M60H_CPUS];
pvt->ops = &family_types[F15_M60H_CPUS].ops;
break;
+ /* Richland is only client */
+ } else if (pvt->model == 0x13) {
+ return NULL;
+ } else {
+ fam_type = &family_types[F15_CPUS];
+ pvt->ops = &family_types[F15_CPUS].ops;
}
-
- fam_type = &family_types[F15_CPUS];
- pvt->ops = &family_types[F15_CPUS].ops;
break;
case 0x16:
@@ -3236,9 +3398,34 @@
fam_type = &family_types[F17_M30H_CPUS];
pvt->ops = &family_types[F17_M30H_CPUS].ops;
break;
+ } else if (pvt->model >= 0x60 && pvt->model <= 0x6f) {
+ fam_type = &family_types[F17_M60H_CPUS];
+ pvt->ops = &family_types[F17_M60H_CPUS].ops;
+ break;
+ } else if (pvt->model >= 0x70 && pvt->model <= 0x7f) {
+ fam_type = &family_types[F17_M70H_CPUS];
+ pvt->ops = &family_types[F17_M70H_CPUS].ops;
+ break;
}
+ fallthrough;
+ case 0x18:
fam_type = &family_types[F17_CPUS];
pvt->ops = &family_types[F17_CPUS].ops;
+
+ if (pvt->fam == 0x18)
+ family_types[F17_CPUS].ctl_name = "F18h";
+ break;
+
+ case 0x19:
+ if (pvt->model >= 0x20 && pvt->model <= 0x2f) {
+ fam_type = &family_types[F17_M70H_CPUS];
+ pvt->ops = &family_types[F17_M70H_CPUS].ops;
+ fam_type->ctl_name = "F19h_M20h";
+ break;
+ }
+ fam_type = &family_types[F19_CPUS];
+ pvt->ops = &family_types[F19_CPUS].ops;
+ family_types[F19_CPUS].ctl_name = "F19h";
break;
default:
@@ -3264,35 +3451,15 @@
NULL
};
-static int init_one_instance(unsigned int nid)
+static int hw_info_get(struct amd64_pvt *pvt)
{
- struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
- struct amd64_family_type *fam_type = NULL;
- struct mem_ctl_info *mci = NULL;
- struct edac_mc_layer layers[2];
- struct amd64_pvt *pvt = NULL;
u16 pci_id1, pci_id2;
- int err = 0, ret;
-
- ret = -ENOMEM;
- pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
- if (!pvt)
- goto err_ret;
-
- pvt->mc_node_id = nid;
- pvt->F3 = F3;
-
- ret = -EINVAL;
- fam_type = per_family_init(pvt);
- if (!fam_type)
- goto err_free;
+ int ret;
if (pvt->fam >= 0x17) {
- pvt->umc = kcalloc(NUM_UMCS, sizeof(struct amd64_umc), GFP_KERNEL);
- if (!pvt->umc) {
- ret = -ENOMEM;
- goto err_free;
- }
+ pvt->umc = kcalloc(fam_type->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL);
+ if (!pvt->umc)
+ return -ENOMEM;
pci_id1 = fam_type->f0_id;
pci_id2 = fam_type->f6_id;
@@ -3301,21 +3468,37 @@
pci_id2 = fam_type->f2_id;
}
- err = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2);
- if (err)
- goto err_post_init;
+ ret = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2);
+ if (ret)
+ return ret;
read_mc_regs(pvt);
+
+ return 0;
+}
+
+static void hw_info_put(struct amd64_pvt *pvt)
+{
+ if (pvt->F0 || pvt->F1)
+ free_mc_sibling_devs(pvt);
+
+ kfree(pvt->umc);
+}
+
+static int init_one_instance(struct amd64_pvt *pvt)
+{
+ struct mem_ctl_info *mci = NULL;
+ struct edac_mc_layer layers[2];
+ int ret = -EINVAL;
/*
* We need to determine how many memory channels there are. Then use
* that information for calculating the size of the dynamic instance
* tables in the 'mci' structure.
*/
- ret = -EINVAL;
pvt->channel_count = pvt->ops->early_channel_count(pvt);
if (pvt->channel_count < 0)
- goto err_siblings;
+ return ret;
ret = -ENOMEM;
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
@@ -3328,17 +3511,17 @@
* only one channel. Also, this simplifies handling later for the price
* of a couple of KBs tops.
*/
- layers[1].size = 2;
+ layers[1].size = fam_type->max_mcs;
layers[1].is_virt_csrow = false;
- mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
+ mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0);
if (!mci)
- goto err_siblings;
+ return ret;
mci->pvt_info = pvt;
mci->pdev = &pvt->F3->dev;
- setup_mci_misc_attrs(mci, fam_type);
+ setup_mci_misc_attrs(mci);
if (init_csrows(mci))
mci->edac_cap = EDAC_FLAG_NONE;
@@ -3346,31 +3529,30 @@
ret = -ENODEV;
if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) {
edac_dbg(1, "failed edac_mc_add_mc()\n");
- goto err_add_mc;
+ edac_mc_free(mci);
+ return ret;
}
return 0;
+}
-err_add_mc:
- edac_mc_free(mci);
+static bool instance_has_memory(struct amd64_pvt *pvt)
+{
+ bool cs_enabled = false;
+ int cs = 0, dct = 0;
-err_siblings:
- free_mc_sibling_devs(pvt);
+ for (dct = 0; dct < fam_type->max_mcs; dct++) {
+ for_each_chip_select(cs, dct, pvt)
+ cs_enabled |= csrow_enabled(cs, dct, pvt);
+ }
-err_post_init:
- if (pvt->fam >= 0x17)
- kfree(pvt->umc);
-
-err_free:
- kfree(pvt);
-
-err_ret:
- return ret;
+ return cs_enabled;
}
static int probe_one_instance(unsigned int nid)
{
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
+ struct amd64_pvt *pvt = NULL;
struct ecc_settings *s;
int ret;
@@ -3381,8 +3563,30 @@
ecc_stngs[nid] = s;
- if (!ecc_enabled(F3, nid)) {
- ret = 0;
+ pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
+ if (!pvt)
+ goto err_settings;
+
+ pvt->mc_node_id = nid;
+ pvt->F3 = F3;
+
+ ret = -ENODEV;
+ fam_type = per_family_init(pvt);
+ if (!fam_type)
+ goto err_enable;
+
+ ret = hw_info_get(pvt);
+ if (ret < 0)
+ goto err_enable;
+
+ ret = 0;
+ if (!instance_has_memory(pvt)) {
+ amd64_info("Node %d: No DIMMs detected.\n", nid);
+ goto err_enable;
+ }
+
+ if (!ecc_enabled(pvt)) {
+ ret = -ENODEV;
if (!ecc_enable_override)
goto err_enable;
@@ -3397,7 +3601,7 @@
goto err_enable;
}
- ret = init_one_instance(nid);
+ ret = init_one_instance(pvt);
if (ret < 0) {
amd64_err("Error probing instance: %d\n", nid);
@@ -3407,9 +3611,15 @@
goto err_enable;
}
+ dump_misc_regs(pvt);
+
return ret;
err_enable:
+ hw_info_put(pvt);
+ kfree(pvt);
+
+err_settings:
kfree(s);
ecc_stngs[nid] = NULL;
@@ -3424,9 +3634,6 @@
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
- mci = find_mci_by_dev(&F3->dev);
- WARN_ON(!mci);
-
/* Remove from EDAC CORE tracking list */
mci = edac_mc_del_mc(&F3->dev);
if (!mci)
@@ -3436,14 +3643,13 @@
restore_ecc_error_reporting(s, nid, F3);
- free_mc_sibling_devs(pvt);
-
kfree(ecc_stngs[nid]);
ecc_stngs[nid] = NULL;
/* Free the EDAC CORE resources */
mci->pvt_info = NULL;
+ hw_info_put(pvt);
kfree(pvt);
edac_mc_free(mci);
}
@@ -3461,11 +3667,13 @@
}
static const struct x86_cpu_id amd64_cpuids[] = {
- { X86_VENDOR_AMD, 0xF, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
- { X86_VENDOR_AMD, 0x10, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
- { X86_VENDOR_AMD, 0x15, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
- { X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
- { X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
+ X86_MATCH_VENDOR_FAM(AMD, 0x0F, NULL),
+ X86_MATCH_VENDOR_FAM(AMD, 0x10, NULL),
+ X86_MATCH_VENDOR_FAM(AMD, 0x15, NULL),
+ X86_MATCH_VENDOR_FAM(AMD, 0x16, NULL),
+ X86_MATCH_VENDOR_FAM(AMD, 0x17, NULL),
+ X86_MATCH_VENDOR_FAM(HYGON, 0x18, NULL),
+ X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL),
{ }
};
MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids);
@@ -3514,9 +3722,6 @@
}
/* register stuff with EDAC MCE */
- if (report_gart_errors)
- amd_report_gart_errors(true);
-
if (boot_cpu_data.x86 >= 0x17)
amd_register_ecc_decoder(decode_umc_error);
else
@@ -3553,8 +3758,6 @@
edac_pci_release_generic_ctl(pci_ctl);
/* unregister from EDAC MCE */
- amd_report_gart_errors(false);
-
if (boot_cpu_data.x86 >= 0x17)
amd_unregister_ecc_decoder(decode_umc_error);
else
--
Gitblit v1.6.2