hc
2024-10-12 a5969cabbb4660eab42b6ef0412cbbd1200cf14d
kernel/arch/x86/include/asm/mce.h
....@@ -10,41 +10,45 @@
1010
1111 /* MCG_CAP register defines */
1212 #define MCG_BANKCNT_MASK 0xff /* Number of Banks */
13
-#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
14
-#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
15
-#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
13
+#define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */
14
+#define MCG_EXT_P BIT_ULL(9) /* Extended registers available */
15
+#define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */
1616 #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
1717 #define MCG_EXT_CNT_SHIFT 16
1818 #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
19
-#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
20
-#define MCG_ELOG_P (1ULL<<26) /* Extended error log supported */
21
-#define MCG_LMCE_P (1ULL<<27) /* Local machine check supported */
19
+#define MCG_SER_P BIT_ULL(24) /* MCA recovery/new status bits */
20
+#define MCG_ELOG_P BIT_ULL(26) /* Extended error log supported */
21
+#define MCG_LMCE_P BIT_ULL(27) /* Local machine check supported */
2222
2323 /* MCG_STATUS register defines */
24
-#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
25
-#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
26
-#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
27
-#define MCG_STATUS_LMCES (1ULL<<3) /* LMCE signaled */
24
+#define MCG_STATUS_RIPV BIT_ULL(0) /* restart ip valid */
25
+#define MCG_STATUS_EIPV BIT_ULL(1) /* ip points to correct instruction */
26
+#define MCG_STATUS_MCIP BIT_ULL(2) /* machine check in progress */
27
+#define MCG_STATUS_LMCES BIT_ULL(3) /* LMCE signaled */
2828
2929 /* MCG_EXT_CTL register defines */
30
-#define MCG_EXT_CTL_LMCE_EN (1ULL<<0) /* Enable LMCE */
30
+#define MCG_EXT_CTL_LMCE_EN BIT_ULL(0) /* Enable LMCE */
3131
3232 /* MCi_STATUS register defines */
33
-#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
34
-#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
35
-#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
36
-#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
37
-#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
38
-#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
39
-#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
40
-#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
41
-#define MCI_STATUS_AR (1ULL<<55) /* Action required */
33
+#define MCI_STATUS_VAL BIT_ULL(63) /* valid error */
34
+#define MCI_STATUS_OVER BIT_ULL(62) /* previous errors lost */
35
+#define MCI_STATUS_UC BIT_ULL(61) /* uncorrected error */
36
+#define MCI_STATUS_EN BIT_ULL(60) /* error enabled */
37
+#define MCI_STATUS_MISCV BIT_ULL(59) /* misc error reg. valid */
38
+#define MCI_STATUS_ADDRV BIT_ULL(58) /* addr reg. valid */
39
+#define MCI_STATUS_PCC BIT_ULL(57) /* processor context corrupt */
40
+#define MCI_STATUS_S BIT_ULL(56) /* Signaled machine check */
41
+#define MCI_STATUS_AR BIT_ULL(55) /* Action required */
42
+#define MCI_STATUS_CEC_SHIFT 38 /* Corrected Error Count */
43
+#define MCI_STATUS_CEC_MASK GENMASK_ULL(52,38)
44
+#define MCI_STATUS_CEC(c) (((c) & MCI_STATUS_CEC_MASK) >> MCI_STATUS_CEC_SHIFT)
4245
4346 /* AMD-specific bits */
44
-#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */
45
-#define MCI_STATUS_SYNDV (1ULL<<53) /* synd reg. valid */
46
-#define MCI_STATUS_DEFERRED (1ULL<<44) /* uncorrected error, deferred exception */
47
-#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */
47
+#define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */
48
+#define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */
49
+#define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */
50
+#define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */
51
+#define MCI_STATUS_SCRUB BIT_ULL(40) /* Error detected during scrub operation */
4852
4953 /*
5054 * McaX field if set indicates a given bank supports MCA extensions:
....@@ -84,7 +88,7 @@
8488 #define MCI_MISC_ADDR_GENERIC 7 /* generic */
8589
8690 /* CTL2 register defines */
87
-#define MCI_CTL2_CMCI_EN (1ULL << 30)
91
+#define MCI_CTL2_CMCI_EN BIT_ULL(30)
8892 #define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
8993
9094 #define MCJ_CTX_MASK 3
....@@ -98,7 +102,7 @@
98102
99103 #define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
100104
101
-#define MCE_LOG_LEN 32
105
+#define MCE_LOG_MIN_LEN 32U
102106 #define MCE_LOG_SIGNATURE "MACHINECHECK"
103107
104108 /* AMD Scalable MCA */
....@@ -123,6 +127,32 @@
123127 #define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
124128 #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
125129
130
+#define XEC(x, mask) (((x) >> 16) & mask)
131
+
132
+/* mce.kflags flag bits for logging etc. */
133
+#define MCE_HANDLED_CEC BIT_ULL(0)
134
+#define MCE_HANDLED_UC BIT_ULL(1)
135
+#define MCE_HANDLED_EXTLOG BIT_ULL(2)
136
+#define MCE_HANDLED_NFIT BIT_ULL(3)
137
+#define MCE_HANDLED_EDAC BIT_ULL(4)
138
+#define MCE_HANDLED_MCELOG BIT_ULL(5)
139
+
140
+/*
141
+ * Indicates an MCE which has happened in kernel space but from
142
+ * which the kernel can recover simply by executing fixup_exception()
143
+ * so that an error is returned to the caller of the function that
144
+ * hit the machine check.
145
+ */
146
+#define MCE_IN_KERNEL_RECOV BIT_ULL(6)
147
+
148
+/*
149
+ * Indicates an MCE that happened in kernel space while copying data
150
+ * from user. In this case fixup_exception() gets the kernel to the
151
+ * error exit for the copy function. Machine check handler can then
152
+ * treat it like a fault taken in user mode.
153
+ */
154
+#define MCE_IN_KERNEL_COPYIN BIT_ULL(7)
155
+
126156 /*
127157 * This structure contains all data related to the MCE log. Also
128158 * carries a signature to make it easier to find from external
....@@ -131,21 +161,24 @@
131161 */
132162 struct mce_log_buffer {
133163 char signature[12]; /* "MACHINECHECK" */
134
- unsigned len; /* = MCE_LOG_LEN */
164
+ unsigned len; /* = elements in .mce_entry[] */
135165 unsigned next;
136166 unsigned flags;
137167 unsigned recordlen; /* length of struct mce */
138
- struct mce entry[MCE_LOG_LEN];
168
+ struct mce entry[];
139169 };
140170
171
+/* Highest last */
141172 enum mce_notifier_prios {
142
- MCE_PRIO_FIRST = INT_MAX,
143
- MCE_PRIO_SRAO = INT_MAX - 1,
144
- MCE_PRIO_EXTLOG = INT_MAX - 2,
145
- MCE_PRIO_NFIT = INT_MAX - 3,
146
- MCE_PRIO_EDAC = INT_MAX - 4,
147
- MCE_PRIO_MCELOG = 1,
148
- MCE_PRIO_LOWEST = 0,
173
+ MCE_PRIO_LOWEST,
174
+ MCE_PRIO_MCELOG,
175
+ MCE_PRIO_EDAC,
176
+ MCE_PRIO_NFIT,
177
+ MCE_PRIO_EXTLOG,
178
+ MCE_PRIO_UC,
179
+ MCE_PRIO_EARLY,
180
+ MCE_PRIO_CEC,
181
+ MCE_PRIO_HIGHEST = MCE_PRIO_CEC
149182 };
150183
151184 struct notifier_block;
....@@ -156,6 +189,15 @@
156189 #include <linux/atomic.h>
157190
158191 extern int mce_p5_enabled;
192
+
193
+#ifdef CONFIG_ARCH_HAS_COPY_MC
194
+extern void enable_copy_mc_fragile(void);
195
+unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt);
196
+#else
197
+static inline void enable_copy_mc_fragile(void)
198
+{
199
+}
200
+#endif
159201
160202 #ifdef CONFIG_X86_MCE
161203 int mcheck_init(void);
....@@ -183,12 +225,8 @@
183225 void mce_log(struct mce *m);
184226 DECLARE_PER_CPU(struct device *, mce_device);
185227
186
-/*
187
- * Maximum banks number.
188
- * This is the limit of the current register layout on
189
- * Intel CPUs.
190
- */
191
-#define MAX_NR_BANKS 32
228
+/* Maximum number of MCA banks per CPU. */
229
+#define MAX_NR_BANKS 64
192230
193231 #ifdef CONFIG_X86_MCE_INTEL
194232 void mce_intel_feature_init(struct cpuinfo_x86 *c);
....@@ -206,14 +244,6 @@
206244 static inline void cmci_recheck(void) {}
207245 #endif
208246
209
-#ifdef CONFIG_X86_MCE_AMD
210
-void mce_amd_feature_init(struct cpuinfo_x86 *c);
211
-int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
212
-#else
213
-static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
214
-static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; };
215
-#endif
216
-
217247 int mce_available(struct cpuinfo_x86 *c);
218248 bool mce_is_memory_error(struct mce *m);
219249 bool mce_is_correctable(struct mce *m);
....@@ -229,6 +259,7 @@
229259 MCP_TIMESTAMP = BIT(0), /* log time stamp */
230260 MCP_UC = BIT(1), /* log uncorrected errors */
231261 MCP_DONTLOG = BIT(2), /* only clear, don't log */
262
+ MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */
232263 };
233264 bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
234265
....@@ -242,10 +273,7 @@
242273 /*
243274 * Exception handler
244275 */
245
-
246
-/* Call the installed machine check handler for this CPU setup. */
247
-extern void (*machine_check_vector)(struct pt_regs *, long error_code);
248
-void do_machine_check(struct pt_regs *, long);
276
+void do_machine_check(struct pt_regs *pt_regs);
249277
250278 /*
251279 * Threshold handler
....@@ -294,6 +322,7 @@
294322 /* These may be used by multiple smca_hwid_mcatypes */
295323 enum smca_bank_types {
296324 SMCA_LS = 0, /* Load Store */
325
+ SMCA_LS_V2, /* Load Store */
297326 SMCA_IF, /* Instruction Fetch */
298327 SMCA_L2_CACHE, /* L2 Cache */
299328 SMCA_DE, /* Decoder Unit */
....@@ -302,11 +331,17 @@
302331 SMCA_FP, /* Floating Point */
303332 SMCA_L3_CACHE, /* L3 Cache */
304333 SMCA_CS, /* Coherent Slave */
334
+ SMCA_CS_V2, /* Coherent Slave */
305335 SMCA_PIE, /* Power, Interrupts, etc. */
306336 SMCA_UMC, /* Unified Memory Controller */
307337 SMCA_PB, /* Parameter Block */
308338 SMCA_PSP, /* Platform Security Processor */
339
+ SMCA_PSP_V2, /* Platform Security Processor */
309340 SMCA_SMU, /* System Management Unit */
341
+ SMCA_SMU_V2, /* System Management Unit */
342
+ SMCA_MP5, /* Microprocessor 5 Unit */
343
+ SMCA_NBIO, /* Northbridge IO Unit */
344
+ SMCA_PCIE, /* PCI Express Unit */
310345 N_SMCA_BANK_TYPES
311346 };
312347
....@@ -315,7 +350,6 @@
315350 struct smca_hwid {
316351 unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
317352 u32 hwid_mcatype; /* (hwid,mcatype) tuple */
318
- u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */
319353 u8 count; /* Number of instances. */
320354 };
321355
....@@ -333,12 +367,18 @@
333367 extern int mce_threshold_create_device(unsigned int cpu);
334368 extern int mce_threshold_remove_device(unsigned int cpu);
335369
370
+void mce_amd_feature_init(struct cpuinfo_x86 *c);
371
+int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
372
+
336373 #else
337374
338
-static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
339
-static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; };
340
-static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
341
-
375
+static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
376
+static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; };
377
+static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
378
+static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
379
+static inline int
380
+umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; };
342381 #endif
343382
383
+static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); }
344384 #endif /* _ASM_X86_MCE_H */