.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | #include <linux/module.h> |
---|
2 | 3 | #include <linux/slab.h> |
---|
3 | 4 | |
---|
.. | .. |
---|
5 | 6 | |
---|
6 | 7 | #include "mce_amd.h" |
---|
7 | 8 | |
---|
8 | | -static struct amd_decoder_ops *fam_ops; |
---|
| 9 | +static struct amd_decoder_ops fam_ops; |
---|
9 | 10 | |
---|
10 | 11 | static u8 xec_mask = 0xf; |
---|
11 | 12 | |
---|
12 | | -static bool report_gart_errors; |
---|
13 | 13 | static void (*decode_dram_ecc)(int node_id, struct mce *m); |
---|
14 | | - |
---|
15 | | -void amd_report_gart_errors(bool v) |
---|
16 | | -{ |
---|
17 | | - report_gart_errors = v; |
---|
18 | | -} |
---|
19 | | -EXPORT_SYMBOL_GPL(amd_report_gart_errors); |
---|
20 | 14 | |
---|
21 | 15 | void amd_register_ecc_decoder(void (*f)(int, struct mce *)) |
---|
22 | 16 | { |
---|
.. | .. |
---|
151 | 145 | |
---|
152 | 146 | /* Scalable MCA error strings */ |
---|
153 | 147 | static const char * const smca_ls_mce_desc[] = { |
---|
154 | | - "Load queue parity", |
---|
155 | | - "Store queue parity", |
---|
156 | | - "Miss address buffer payload parity", |
---|
157 | | - "L1 TLB parity", |
---|
158 | | - "Reserved", |
---|
159 | | - "DC tag error type 6", |
---|
160 | | - "DC tag error type 1", |
---|
| 148 | + "Load queue parity error", |
---|
| 149 | + "Store queue parity error", |
---|
| 150 | + "Miss address buffer payload parity error", |
---|
| 151 | + "Level 1 TLB parity error", |
---|
| 152 | + "DC Tag error type 5", |
---|
| 153 | + "DC Tag error type 6", |
---|
| 154 | + "DC Tag error type 1", |
---|
161 | 155 | "Internal error type 1", |
---|
162 | 156 | "Internal error type 2", |
---|
163 | | - "Sys Read data error thread 0", |
---|
164 | | - "Sys read data error thread 1", |
---|
165 | | - "DC tag error type 2", |
---|
166 | | - "DC data error type 1 (poison consumption)", |
---|
167 | | - "DC data error type 2", |
---|
168 | | - "DC data error type 3", |
---|
169 | | - "DC tag error type 4", |
---|
170 | | - "L2 TLB parity", |
---|
| 157 | + "System Read Data Error Thread 0", |
---|
| 158 | + "System Read Data Error Thread 1", |
---|
| 159 | + "DC Tag error type 2", |
---|
| 160 | + "DC Data error type 1 and poison consumption", |
---|
| 161 | + "DC Data error type 2", |
---|
| 162 | + "DC Data error type 3", |
---|
| 163 | + "DC Tag error type 4", |
---|
| 164 | + "Level 2 TLB parity error", |
---|
171 | 165 | "PDC parity error", |
---|
172 | | - "DC tag error type 3", |
---|
173 | | - "DC tag error type 5", |
---|
174 | | - "L2 fill data error", |
---|
| 166 | + "DC Tag error type 3", |
---|
| 167 | + "DC Tag error type 5", |
---|
| 168 | + "L2 Fill Data error", |
---|
| 169 | +}; |
---|
| 170 | + |
---|
| 171 | +static const char * const smca_ls2_mce_desc[] = { |
---|
| 172 | + "An ECC error was detected on a data cache read by a probe or victimization", |
---|
| 173 | + "An ECC error or L2 poison was detected on a data cache read by a load", |
---|
| 174 | + "An ECC error was detected on a data cache read-modify-write by a store", |
---|
| 175 | + "An ECC error or poison bit mismatch was detected on a tag read by a probe or victimization", |
---|
| 176 | + "An ECC error or poison bit mismatch was detected on a tag read by a load", |
---|
| 177 | + "An ECC error or poison bit mismatch was detected on a tag read by a store", |
---|
| 178 | + "An ECC error was detected on an EMEM read by a load", |
---|
| 179 | + "An ECC error was detected on an EMEM read-modify-write by a store", |
---|
| 180 | + "A parity error was detected in an L1 TLB entry by any access", |
---|
| 181 | + "A parity error was detected in an L2 TLB entry by any access", |
---|
| 182 | + "A parity error was detected in a PWC entry by any access", |
---|
| 183 | + "A parity error was detected in an STQ entry by any access", |
---|
| 184 | + "A parity error was detected in an LDQ entry by any access", |
---|
| 185 | + "A parity error was detected in a MAB entry by any access", |
---|
| 186 | + "A parity error was detected in an SCB entry state field by any access", |
---|
| 187 | + "A parity error was detected in an SCB entry address field by any access", |
---|
| 188 | + "A parity error was detected in an SCB entry data field by any access", |
---|
| 189 | + "A parity error was detected in a WCB entry by any access", |
---|
| 190 | + "A poisoned line was detected in an SCB entry by any access", |
---|
| 191 | + "A SystemReadDataError error was reported on read data returned from L2 for a load", |
---|
| 192 | + "A SystemReadDataError error was reported on read data returned from L2 for an SCB store", |
---|
| 193 | + "A SystemReadDataError error was reported on read data returned from L2 for a WCB store", |
---|
| 194 | + "A hardware assertion error was reported", |
---|
| 195 | + "A parity error was detected in an STLF, SCB EMEM entry or SRB store data by any access", |
---|
175 | 196 | }; |
---|
176 | 197 | |
---|
177 | 198 | static const char * const smca_if_mce_desc[] = { |
---|
178 | | - "microtag probe port parity error", |
---|
179 | | - "IC microtag or full tag multi-hit error", |
---|
180 | | - "IC full tag parity", |
---|
181 | | - "IC data array parity", |
---|
182 | | - "Decoupling queue phys addr parity error", |
---|
183 | | - "L0 ITLB parity error", |
---|
184 | | - "L1 ITLB parity error", |
---|
185 | | - "L2 ITLB parity error", |
---|
186 | | - "BPQ snoop parity on Thread 0", |
---|
187 | | - "BPQ snoop parity on Thread 1", |
---|
188 | | - "L1 BTB multi-match error", |
---|
189 | | - "L2 BTB multi-match error", |
---|
190 | | - "L2 Cache Response Poison error", |
---|
191 | | - "System Read Data error", |
---|
| 199 | + "Op Cache Microtag Probe Port Parity Error", |
---|
| 200 | + "IC Microtag or Full Tag Multi-hit Error", |
---|
| 201 | + "IC Full Tag Parity Error", |
---|
| 202 | + "IC Data Array Parity Error", |
---|
| 203 | + "Decoupling Queue PhysAddr Parity Error", |
---|
| 204 | + "L0 ITLB Parity Error", |
---|
| 205 | + "L1 ITLB Parity Error", |
---|
| 206 | + "L2 ITLB Parity Error", |
---|
| 207 | + "BPQ Thread 0 Snoop Parity Error", |
---|
| 208 | + "BPQ Thread 1 Snoop Parity Error", |
---|
| 209 | + "L1 BTB Multi-Match Error", |
---|
| 210 | + "L2 BTB Multi-Match Error", |
---|
| 211 | + "L2 Cache Response Poison Error", |
---|
| 212 | + "System Read Data Error", |
---|
| 213 | + "Hardware Assertion Error", |
---|
| 214 | + "L1-TLB Multi-Hit", |
---|
| 215 | + "L2-TLB Multi-Hit", |
---|
| 216 | + "BSR Parity Error", |
---|
| 217 | + "CT MCE", |
---|
192 | 218 | }; |
---|
193 | 219 | |
---|
194 | 220 | static const char * const smca_l2_mce_desc[] = { |
---|
195 | | - "L2M tag multi-way-hit error", |
---|
196 | | - "L2M tag ECC error", |
---|
197 | | - "L2M data ECC error", |
---|
198 | | - "HW assert", |
---|
| 221 | + "L2M Tag Multiple-Way-Hit error", |
---|
| 222 | + "L2M Tag or State Array ECC Error", |
---|
| 223 | + "L2M Data Array ECC Error", |
---|
| 224 | + "Hardware Assert Error", |
---|
199 | 225 | }; |
---|
200 | 226 | |
---|
201 | 227 | static const char * const smca_de_mce_desc[] = { |
---|
202 | | - "uop cache tag parity error", |
---|
203 | | - "uop cache data parity error", |
---|
204 | | - "Insn buffer parity error", |
---|
205 | | - "uop queue parity error", |
---|
206 | | - "Insn dispatch queue parity error", |
---|
207 | | - "Fetch address FIFO parity", |
---|
208 | | - "Patch RAM data parity", |
---|
209 | | - "Patch RAM sequencer parity", |
---|
210 | | - "uop buffer parity" |
---|
| 228 | + "Micro-op cache tag parity error", |
---|
| 229 | + "Micro-op cache data parity error", |
---|
| 230 | + "Instruction buffer parity error", |
---|
| 231 | + "Micro-op queue parity error", |
---|
| 232 | + "Instruction dispatch queue parity error", |
---|
| 233 | + "Fetch address FIFO parity error", |
---|
| 234 | + "Patch RAM data parity error", |
---|
| 235 | + "Patch RAM sequencer parity error", |
---|
| 236 | + "Micro-op buffer parity error", |
---|
| 237 | + "Hardware Assertion MCA Error", |
---|
211 | 238 | }; |
---|
212 | 239 | |
---|
213 | 240 | static const char * const smca_ex_mce_desc[] = { |
---|
214 | | - "Watchdog timeout error", |
---|
215 | | - "Phy register file parity", |
---|
216 | | - "Flag register file parity", |
---|
217 | | - "Immediate displacement register file parity", |
---|
218 | | - "Address generator payload parity", |
---|
219 | | - "EX payload parity", |
---|
220 | | - "Checkpoint queue parity", |
---|
221 | | - "Retire dispatch queue parity", |
---|
| 241 | + "Watchdog Timeout error", |
---|
| 242 | + "Physical register file parity error", |
---|
| 243 | + "Flag register file parity error", |
---|
| 244 | + "Immediate displacement register file parity error", |
---|
| 245 | + "Address generator payload parity error", |
---|
| 246 | + "EX payload parity error", |
---|
| 247 | + "Checkpoint queue parity error", |
---|
| 248 | + "Retire dispatch queue parity error", |
---|
222 | 249 | "Retire status queue parity error", |
---|
223 | 250 | "Scheduling queue parity error", |
---|
224 | 251 | "Branch buffer queue parity error", |
---|
| 252 | + "Hardware Assertion error", |
---|
| 253 | + "Spec Map parity error", |
---|
| 254 | + "Retire Map parity error", |
---|
225 | 255 | }; |
---|
226 | 256 | |
---|
227 | 257 | static const char * const smca_fp_mce_desc[] = { |
---|
228 | | - "Physical register file parity", |
---|
229 | | - "Freelist parity error", |
---|
230 | | - "Schedule queue parity", |
---|
| 258 | + "Physical register file (PRF) parity error", |
---|
| 259 | + "Freelist (FL) parity error", |
---|
| 260 | + "Schedule queue parity error", |
---|
231 | 261 | "NSQ parity error", |
---|
232 | | - "Retire queue parity", |
---|
233 | | - "Status register file parity", |
---|
| 262 | + "Retire queue (RQ) parity error", |
---|
| 263 | + "Status register file (SRF) parity error", |
---|
234 | 264 | "Hardware assertion", |
---|
235 | 265 | }; |
---|
236 | 266 | |
---|
237 | 267 | static const char * const smca_l3_mce_desc[] = { |
---|
238 | | - "Shadow tag macro ECC error", |
---|
239 | | - "Shadow tag macro multi-way-hit error", |
---|
240 | | - "L3M tag ECC error", |
---|
241 | | - "L3M tag multi-way-hit error", |
---|
242 | | - "L3M data ECC error", |
---|
243 | | - "XI parity, L3 fill done channel error", |
---|
244 | | - "L3 victim queue parity", |
---|
245 | | - "L3 HW assert", |
---|
| 268 | + "Shadow Tag Macro ECC Error", |
---|
| 269 | + "Shadow Tag Macro Multi-way-hit Error", |
---|
| 270 | + "L3M Tag ECC Error", |
---|
| 271 | + "L3M Tag Multi-way-hit Error", |
---|
| 272 | + "L3M Data ECC Error", |
---|
| 273 | + "SDP Parity Error or SystemReadDataError from XI", |
---|
| 274 | + "L3 Victim Queue Parity Error", |
---|
| 275 | + "L3 Hardware Assertion", |
---|
246 | 276 | }; |
---|
247 | 277 | |
---|
248 | 278 | static const char * const smca_cs_mce_desc[] = { |
---|
249 | | - "Illegal request from transport layer", |
---|
250 | | - "Address violation", |
---|
251 | | - "Security violation", |
---|
252 | | - "Illegal response from transport layer", |
---|
253 | | - "Unexpected response", |
---|
254 | | - "Parity error on incoming request or probe response data", |
---|
255 | | - "Parity error on incoming read response data", |
---|
256 | | - "Atomic request parity", |
---|
257 | | - "ECC error on probe filter access", |
---|
| 279 | + "Illegal Request", |
---|
| 280 | + "Address Violation", |
---|
| 281 | + "Security Violation", |
---|
| 282 | + "Illegal Response", |
---|
| 283 | + "Unexpected Response", |
---|
| 284 | + "Request or Probe Parity Error", |
---|
| 285 | + "Read Response Parity Error", |
---|
| 286 | + "Atomic Request Parity Error", |
---|
| 287 | + "Probe Filter ECC Error", |
---|
| 288 | +}; |
---|
| 289 | + |
---|
| 290 | +static const char * const smca_cs2_mce_desc[] = { |
---|
| 291 | + "Illegal Request", |
---|
| 292 | + "Address Violation", |
---|
| 293 | + "Security Violation", |
---|
| 294 | + "Illegal Response", |
---|
| 295 | + "Unexpected Response", |
---|
| 296 | + "Request or Probe Parity Error", |
---|
| 297 | + "Read Response Parity Error", |
---|
| 298 | + "Atomic Request Parity Error", |
---|
| 299 | + "SDP read response had no match in the CS queue", |
---|
| 300 | + "Probe Filter Protocol Error", |
---|
| 301 | + "Probe Filter ECC Error", |
---|
| 302 | + "SDP read response had an unexpected RETRY error", |
---|
| 303 | + "Counter overflow error", |
---|
| 304 | + "Counter underflow error", |
---|
258 | 305 | }; |
---|
259 | 306 | |
---|
260 | 307 | static const char * const smca_pie_mce_desc[] = { |
---|
261 | | - "HW assert", |
---|
262 | | - "Internal PIE register security violation", |
---|
263 | | - "Error on GMI link", |
---|
264 | | - "Poison data written to internal PIE register", |
---|
| 308 | + "Hardware Assert", |
---|
| 309 | + "Register security violation", |
---|
| 310 | + "Link Error", |
---|
| 311 | + "Poison data consumption", |
---|
| 312 | + "A deferred error was detected in the DF" |
---|
265 | 313 | }; |
---|
266 | 314 | |
---|
267 | 315 | static const char * const smca_umc_mce_desc[] = { |
---|
268 | 316 | "DRAM ECC error", |
---|
269 | | - "Data poison error on DRAM", |
---|
| 317 | + "Data poison error", |
---|
270 | 318 | "SDP parity error", |
---|
271 | 319 | "Advanced peripheral bus error", |
---|
272 | | - "Command/address parity error", |
---|
| 320 | + "Address/Command parity error", |
---|
273 | 321 | "Write data CRC error", |
---|
| 322 | + "DCQ SRAM ECC error", |
---|
| 323 | + "AES SRAM ECC error", |
---|
274 | 324 | }; |
---|
275 | 325 | |
---|
276 | 326 | static const char * const smca_pb_mce_desc[] = { |
---|
277 | | - "Parameter Block RAM ECC error", |
---|
| 327 | + "An ECC error in the Parameter Block RAM array", |
---|
278 | 328 | }; |
---|
279 | 329 | |
---|
280 | 330 | static const char * const smca_psp_mce_desc[] = { |
---|
281 | | - "PSP RAM ECC or parity error", |
---|
| 331 | + "An ECC or parity error in a PSP RAM instance", |
---|
| 332 | +}; |
---|
| 333 | + |
---|
| 334 | +static const char * const smca_psp2_mce_desc[] = { |
---|
| 335 | + "High SRAM ECC or parity error", |
---|
| 336 | + "Low SRAM ECC or parity error", |
---|
| 337 | + "Instruction Cache Bank 0 ECC or parity error", |
---|
| 338 | + "Instruction Cache Bank 1 ECC or parity error", |
---|
| 339 | + "Instruction Tag Ram 0 parity error", |
---|
| 340 | + "Instruction Tag Ram 1 parity error", |
---|
| 341 | + "Data Cache Bank 0 ECC or parity error", |
---|
| 342 | + "Data Cache Bank 1 ECC or parity error", |
---|
| 343 | + "Data Cache Bank 2 ECC or parity error", |
---|
| 344 | + "Data Cache Bank 3 ECC or parity error", |
---|
| 345 | + "Data Tag Bank 0 parity error", |
---|
| 346 | + "Data Tag Bank 1 parity error", |
---|
| 347 | + "Data Tag Bank 2 parity error", |
---|
| 348 | + "Data Tag Bank 3 parity error", |
---|
| 349 | + "Dirty Data Ram parity error", |
---|
| 350 | + "TLB Bank 0 parity error", |
---|
| 351 | + "TLB Bank 1 parity error", |
---|
| 352 | + "System Hub Read Buffer ECC or parity error", |
---|
282 | 353 | }; |
---|
283 | 354 | |
---|
284 | 355 | static const char * const smca_smu_mce_desc[] = { |
---|
285 | | - "SMU RAM ECC or parity error", |
---|
| 356 | + "An ECC or parity error in an SMU RAM instance", |
---|
| 357 | +}; |
---|
| 358 | + |
---|
| 359 | +static const char * const smca_smu2_mce_desc[] = { |
---|
| 360 | + "High SRAM ECC or parity error", |
---|
| 361 | + "Low SRAM ECC or parity error", |
---|
| 362 | + "Data Cache Bank A ECC or parity error", |
---|
| 363 | + "Data Cache Bank B ECC or parity error", |
---|
| 364 | + "Data Tag Cache Bank A ECC or parity error", |
---|
| 365 | + "Data Tag Cache Bank B ECC or parity error", |
---|
| 366 | + "Instruction Cache Bank A ECC or parity error", |
---|
| 367 | + "Instruction Cache Bank B ECC or parity error", |
---|
| 368 | + "Instruction Tag Cache Bank A ECC or parity error", |
---|
| 369 | + "Instruction Tag Cache Bank B ECC or parity error", |
---|
| 370 | + "System Hub Read Buffer ECC or parity error", |
---|
| 371 | + "PHY RAM ECC error", |
---|
| 372 | +}; |
---|
| 373 | + |
---|
| 374 | +static const char * const smca_mp5_mce_desc[] = { |
---|
| 375 | + "High SRAM ECC or parity error", |
---|
| 376 | + "Low SRAM ECC or parity error", |
---|
| 377 | + "Data Cache Bank A ECC or parity error", |
---|
| 378 | + "Data Cache Bank B ECC or parity error", |
---|
| 379 | + "Data Tag Cache Bank A ECC or parity error", |
---|
| 380 | + "Data Tag Cache Bank B ECC or parity error", |
---|
| 381 | + "Instruction Cache Bank A ECC or parity error", |
---|
| 382 | + "Instruction Cache Bank B ECC or parity error", |
---|
| 383 | + "Instruction Tag Cache Bank A ECC or parity error", |
---|
| 384 | + "Instruction Tag Cache Bank B ECC or parity error", |
---|
| 385 | +}; |
---|
| 386 | + |
---|
| 387 | +static const char * const smca_nbio_mce_desc[] = { |
---|
| 388 | + "ECC or Parity error", |
---|
| 389 | + "PCIE error", |
---|
| 390 | + "SDP ErrEvent error", |
---|
| 391 | + "SDP Egress Poison Error", |
---|
| 392 | + "IOHC Internal Poison Error", |
---|
| 393 | +}; |
---|
| 394 | + |
---|
| 395 | +static const char * const smca_pcie_mce_desc[] = { |
---|
| 396 | + "CCIX PER Message logging", |
---|
| 397 | + "CCIX Read Response with Status: Non-Data Error", |
---|
| 398 | + "CCIX Write Response with Status: Non-Data Error", |
---|
| 399 | + "CCIX Read Response with Status: Data Error", |
---|
| 400 | + "CCIX Non-okay write response with data error", |
---|
286 | 401 | }; |
---|
287 | 402 | |
---|
288 | 403 | struct smca_mce_desc { |
---|
.. | .. |
---|
292 | 407 | |
---|
293 | 408 | static struct smca_mce_desc smca_mce_descs[] = { |
---|
294 | 409 | [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) }, |
---|
| 410 | + [SMCA_LS_V2] = { smca_ls2_mce_desc, ARRAY_SIZE(smca_ls2_mce_desc) }, |
---|
295 | 411 | [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) }, |
---|
296 | 412 | [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) }, |
---|
297 | 413 | [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) }, |
---|
.. | .. |
---|
299 | 415 | [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) }, |
---|
300 | 416 | [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) }, |
---|
301 | 417 | [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) }, |
---|
| 418 | + [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) }, |
---|
302 | 419 | [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, |
---|
303 | 420 | [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, |
---|
304 | 421 | [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, |
---|
305 | 422 | [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, |
---|
| 423 | + [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc) }, |
---|
306 | 424 | [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) }, |
---|
| 425 | + [SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc) }, |
---|
| 426 | + [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) }, |
---|
| 427 | + [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) }, |
---|
| 428 | + [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) }, |
---|
307 | 429 | }; |
---|
308 | 430 | |
---|
309 | 431 | static bool f12h_mc0_mce(u16 ec, u8 xec) |
---|
.. | .. |
---|
463 | 585 | : (xec ? "multimatch" : "parity"))); |
---|
464 | 586 | return; |
---|
465 | 587 | } |
---|
466 | | - } else if (fam_ops->mc0_mce(ec, xec)) |
---|
| 588 | + } else if (fam_ops.mc0_mce(ec, xec)) |
---|
467 | 589 | ; |
---|
468 | 590 | else |
---|
469 | 591 | pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n"); |
---|
.. | .. |
---|
577 | 699 | pr_cont("Hardware Assert.\n"); |
---|
578 | 700 | else |
---|
579 | 701 | goto wrong_mc1_mce; |
---|
580 | | - } else if (fam_ops->mc1_mce(ec, xec)) |
---|
| 702 | + } else if (fam_ops.mc1_mce(ec, xec)) |
---|
581 | 703 | ; |
---|
582 | 704 | else |
---|
583 | 705 | goto wrong_mc1_mce; |
---|
.. | .. |
---|
711 | 833 | |
---|
712 | 834 | pr_emerg(HW_ERR "MC2 Error: "); |
---|
713 | 835 | |
---|
714 | | - if (!fam_ops->mc2_mce(ec, xec)) |
---|
| 836 | + if (!fam_ops.mc2_mce(ec, xec)) |
---|
715 | 837 | pr_cont(HW_ERR "Corrupted MC2 MCE info?\n"); |
---|
716 | 838 | } |
---|
717 | 839 | |
---|
.. | .. |
---|
874 | 996 | |
---|
875 | 997 | ip_name = smca_get_long_name(bank_type); |
---|
876 | 998 | |
---|
877 | | - pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec); |
---|
| 999 | + pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec); |
---|
878 | 1000 | |
---|
879 | 1001 | /* Only print the decode of valid error codes */ |
---|
880 | | - if (xec < smca_mce_descs[bank_type].num_descs && |
---|
881 | | - (hwid->xec_bitmap & BIT_ULL(xec))) { |
---|
882 | | - pr_emerg(HW_ERR "%s Error: ", ip_name); |
---|
883 | | - pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]); |
---|
884 | | - } |
---|
| 1002 | + if (xec < smca_mce_descs[bank_type].num_descs) |
---|
| 1003 | + pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]); |
---|
885 | 1004 | |
---|
886 | 1005 | if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc) |
---|
887 | | - decode_dram_ecc(cpu_to_node(m->extcpu), m); |
---|
| 1006 | + decode_dram_ecc(topology_die_id(m->extcpu), m); |
---|
888 | 1007 | } |
---|
889 | 1008 | |
---|
890 | 1009 | static inline void amd_decode_err_code(u16 ec) |
---|
.. | .. |
---|
911 | 1030 | pr_cont("\n"); |
---|
912 | 1031 | } |
---|
913 | 1032 | |
---|
914 | | -/* |
---|
915 | | - * Filter out unwanted MCE signatures here. |
---|
916 | | - */ |
---|
917 | | -static bool amd_filter_mce(struct mce *m) |
---|
918 | | -{ |
---|
919 | | - /* |
---|
920 | | - * NB GART TLB error reporting is disabled by default. |
---|
921 | | - */ |
---|
922 | | - if (m->bank == 4 && XEC(m->status, 0x1f) == 0x5 && !report_gart_errors) |
---|
923 | | - return true; |
---|
924 | | - |
---|
925 | | - return false; |
---|
926 | | -} |
---|
927 | | - |
---|
928 | 1033 | static const char *decode_error_status(struct mce *m) |
---|
929 | 1034 | { |
---|
930 | 1035 | if (m->status & MCI_STATUS_UC) { |
---|
.. | .. |
---|
948 | 1053 | unsigned int fam = x86_family(m->cpuid); |
---|
949 | 1054 | int ecc; |
---|
950 | 1055 | |
---|
951 | | - if (amd_filter_mce(m)) |
---|
952 | | - return NOTIFY_STOP; |
---|
| 1056 | + if (m->kflags & MCE_HANDLED_CEC) |
---|
| 1057 | + return NOTIFY_DONE; |
---|
953 | 1058 | |
---|
954 | 1059 | pr_emerg(HW_ERR "%s\n", decode_error_status(m)); |
---|
955 | 1060 | |
---|
.. | .. |
---|
961 | 1066 | ((m->status & MCI_STATUS_UC) ? "UE" : |
---|
962 | 1067 | (m->status & MCI_STATUS_DEFERRED) ? "-" : "CE"), |
---|
963 | 1068 | ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"), |
---|
964 | | - ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"), |
---|
965 | | - ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-")); |
---|
| 1069 | + ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"), |
---|
| 1070 | + ((m->status & MCI_STATUS_PCC) ? "PCC" : "-")); |
---|
| 1071 | + |
---|
| 1072 | + if (boot_cpu_has(X86_FEATURE_SMCA)) { |
---|
| 1073 | + u32 low, high; |
---|
| 1074 | + u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank); |
---|
| 1075 | + |
---|
| 1076 | + if (!rdmsr_safe(addr, &low, &high) && |
---|
| 1077 | + (low & MCI_CONFIG_MCAX)) |
---|
| 1078 | + pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-")); |
---|
| 1079 | + |
---|
| 1080 | + pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-")); |
---|
| 1081 | + } |
---|
| 1082 | + |
---|
| 1083 | + /* do the two bits[14:13] together */ |
---|
| 1084 | + ecc = (m->status >> 45) & 0x3; |
---|
| 1085 | + if (ecc) |
---|
| 1086 | + pr_cont("|%sECC", ((ecc == 2) ? "C" : "U")); |
---|
966 | 1087 | |
---|
967 | 1088 | if (fam >= 0x15) { |
---|
968 | 1089 | pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-")); |
---|
.. | .. |
---|
972 | 1093 | pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-")); |
---|
973 | 1094 | } |
---|
974 | 1095 | |
---|
975 | | - if (boot_cpu_has(X86_FEATURE_SMCA)) { |
---|
976 | | - u32 low, high; |
---|
977 | | - u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank); |
---|
978 | | - |
---|
979 | | - pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-")); |
---|
980 | | - |
---|
981 | | - if (!rdmsr_safe(addr, &low, &high) && |
---|
982 | | - (low & MCI_CONFIG_MCAX)) |
---|
983 | | - pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-")); |
---|
984 | | - } |
---|
985 | | - |
---|
986 | | - /* do the two bits[14:13] together */ |
---|
987 | | - ecc = (m->status >> 45) & 0x3; |
---|
988 | | - if (ecc) |
---|
989 | | - pr_cont("|%sECC", ((ecc == 2) ? "C" : "U")); |
---|
| 1096 | + if (fam >= 0x17) |
---|
| 1097 | + pr_cont("|%s", (m->status & MCI_STATUS_SCRUB ? "Scrub" : "-")); |
---|
990 | 1098 | |
---|
991 | 1099 | pr_cont("]: 0x%016llx\n", m->status); |
---|
992 | 1100 | |
---|
993 | 1101 | if (m->status & MCI_STATUS_ADDRV) |
---|
994 | 1102 | pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr); |
---|
| 1103 | + |
---|
| 1104 | + if (m->ppin) |
---|
| 1105 | + pr_emerg(HW_ERR "PPIN: 0x%016llx\n", m->ppin); |
---|
995 | 1106 | |
---|
996 | 1107 | if (boot_cpu_has(X86_FEATURE_SMCA)) { |
---|
997 | 1108 | pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid); |
---|
.. | .. |
---|
1008 | 1119 | if (m->tsc) |
---|
1009 | 1120 | pr_emerg(HW_ERR "TSC: %llu\n", m->tsc); |
---|
1010 | 1121 | |
---|
1011 | | - if (!fam_ops) |
---|
| 1122 | + /* Doesn't matter which member to test. */ |
---|
| 1123 | + if (!fam_ops.mc0_mce) |
---|
1012 | 1124 | goto err_code; |
---|
1013 | 1125 | |
---|
1014 | 1126 | switch (m->bank) { |
---|
.. | .. |
---|
1047 | 1159 | err_code: |
---|
1048 | 1160 | amd_decode_err_code(m->status & 0xffff); |
---|
1049 | 1161 | |
---|
1050 | | - return NOTIFY_STOP; |
---|
| 1162 | + m->kflags |= MCE_HANDLED_EDAC; |
---|
| 1163 | + return NOTIFY_OK; |
---|
1051 | 1164 | } |
---|
1052 | 1165 | |
---|
1053 | 1166 | static struct notifier_block amd_mce_dec_nb = { |
---|
.. | .. |
---|
1059 | 1172 | { |
---|
1060 | 1173 | struct cpuinfo_x86 *c = &boot_cpu_data; |
---|
1061 | 1174 | |
---|
1062 | | - if (c->x86_vendor != X86_VENDOR_AMD) |
---|
| 1175 | + if (c->x86_vendor != X86_VENDOR_AMD && |
---|
| 1176 | + c->x86_vendor != X86_VENDOR_HYGON) |
---|
1063 | 1177 | return -ENODEV; |
---|
1064 | 1178 | |
---|
1065 | | - fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); |
---|
1066 | | - if (!fam_ops) |
---|
1067 | | - return -ENOMEM; |
---|
| 1179 | + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) |
---|
| 1180 | + return -ENODEV; |
---|
| 1181 | + |
---|
| 1182 | + if (boot_cpu_has(X86_FEATURE_SMCA)) { |
---|
| 1183 | + xec_mask = 0x3f; |
---|
| 1184 | + goto out; |
---|
| 1185 | + } |
---|
1068 | 1186 | |
---|
1069 | 1187 | switch (c->x86) { |
---|
1070 | 1188 | case 0xf: |
---|
1071 | | - fam_ops->mc0_mce = k8_mc0_mce; |
---|
1072 | | - fam_ops->mc1_mce = k8_mc1_mce; |
---|
1073 | | - fam_ops->mc2_mce = k8_mc2_mce; |
---|
| 1189 | + fam_ops.mc0_mce = k8_mc0_mce; |
---|
| 1190 | + fam_ops.mc1_mce = k8_mc1_mce; |
---|
| 1191 | + fam_ops.mc2_mce = k8_mc2_mce; |
---|
1074 | 1192 | break; |
---|
1075 | 1193 | |
---|
1076 | 1194 | case 0x10: |
---|
1077 | | - fam_ops->mc0_mce = f10h_mc0_mce; |
---|
1078 | | - fam_ops->mc1_mce = k8_mc1_mce; |
---|
1079 | | - fam_ops->mc2_mce = k8_mc2_mce; |
---|
| 1195 | + fam_ops.mc0_mce = f10h_mc0_mce; |
---|
| 1196 | + fam_ops.mc1_mce = k8_mc1_mce; |
---|
| 1197 | + fam_ops.mc2_mce = k8_mc2_mce; |
---|
1080 | 1198 | break; |
---|
1081 | 1199 | |
---|
1082 | 1200 | case 0x11: |
---|
1083 | | - fam_ops->mc0_mce = k8_mc0_mce; |
---|
1084 | | - fam_ops->mc1_mce = k8_mc1_mce; |
---|
1085 | | - fam_ops->mc2_mce = k8_mc2_mce; |
---|
| 1201 | + fam_ops.mc0_mce = k8_mc0_mce; |
---|
| 1202 | + fam_ops.mc1_mce = k8_mc1_mce; |
---|
| 1203 | + fam_ops.mc2_mce = k8_mc2_mce; |
---|
1086 | 1204 | break; |
---|
1087 | 1205 | |
---|
1088 | 1206 | case 0x12: |
---|
1089 | | - fam_ops->mc0_mce = f12h_mc0_mce; |
---|
1090 | | - fam_ops->mc1_mce = k8_mc1_mce; |
---|
1091 | | - fam_ops->mc2_mce = k8_mc2_mce; |
---|
| 1207 | + fam_ops.mc0_mce = f12h_mc0_mce; |
---|
| 1208 | + fam_ops.mc1_mce = k8_mc1_mce; |
---|
| 1209 | + fam_ops.mc2_mce = k8_mc2_mce; |
---|
1092 | 1210 | break; |
---|
1093 | 1211 | |
---|
1094 | 1212 | case 0x14: |
---|
1095 | | - fam_ops->mc0_mce = cat_mc0_mce; |
---|
1096 | | - fam_ops->mc1_mce = cat_mc1_mce; |
---|
1097 | | - fam_ops->mc2_mce = k8_mc2_mce; |
---|
| 1213 | + fam_ops.mc0_mce = cat_mc0_mce; |
---|
| 1214 | + fam_ops.mc1_mce = cat_mc1_mce; |
---|
| 1215 | + fam_ops.mc2_mce = k8_mc2_mce; |
---|
1098 | 1216 | break; |
---|
1099 | 1217 | |
---|
1100 | 1218 | case 0x15: |
---|
1101 | 1219 | xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f; |
---|
1102 | 1220 | |
---|
1103 | | - fam_ops->mc0_mce = f15h_mc0_mce; |
---|
1104 | | - fam_ops->mc1_mce = f15h_mc1_mce; |
---|
1105 | | - fam_ops->mc2_mce = f15h_mc2_mce; |
---|
| 1221 | + fam_ops.mc0_mce = f15h_mc0_mce; |
---|
| 1222 | + fam_ops.mc1_mce = f15h_mc1_mce; |
---|
| 1223 | + fam_ops.mc2_mce = f15h_mc2_mce; |
---|
1106 | 1224 | break; |
---|
1107 | 1225 | |
---|
1108 | 1226 | case 0x16: |
---|
1109 | 1227 | xec_mask = 0x1f; |
---|
1110 | | - fam_ops->mc0_mce = cat_mc0_mce; |
---|
1111 | | - fam_ops->mc1_mce = cat_mc1_mce; |
---|
1112 | | - fam_ops->mc2_mce = f16h_mc2_mce; |
---|
| 1228 | + fam_ops.mc0_mce = cat_mc0_mce; |
---|
| 1229 | + fam_ops.mc1_mce = cat_mc1_mce; |
---|
| 1230 | + fam_ops.mc2_mce = f16h_mc2_mce; |
---|
1113 | 1231 | break; |
---|
1114 | 1232 | |
---|
1115 | 1233 | case 0x17: |
---|
1116 | | - xec_mask = 0x3f; |
---|
1117 | | - if (!boot_cpu_has(X86_FEATURE_SMCA)) { |
---|
1118 | | - printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n"); |
---|
1119 | | - goto err_out; |
---|
1120 | | - } |
---|
1121 | | - break; |
---|
| 1234 | + case 0x18: |
---|
| 1235 | + pr_warn_once("Decoding supported only on Scalable MCA processors.\n"); |
---|
| 1236 | + return -EINVAL; |
---|
1122 | 1237 | |
---|
1123 | 1238 | default: |
---|
1124 | 1239 | printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86); |
---|
1125 | | - goto err_out; |
---|
| 1240 | + return -EINVAL; |
---|
1126 | 1241 | } |
---|
1127 | 1242 | |
---|
| 1243 | +out: |
---|
1128 | 1244 | pr_info("MCE: In-kernel MCE decoding enabled.\n"); |
---|
1129 | 1245 | |
---|
1130 | 1246 | mce_register_decode_chain(&amd_mce_dec_nb); |
---|
1131 | 1247 | |
---|
1132 | 1248 | return 0; |
---|
1133 | | - |
---|
1134 | | -err_out: |
---|
1135 | | - kfree(fam_ops); |
---|
1136 | | - fam_ops = NULL; |
---|
1137 | | - return -EINVAL; |
---|
1138 | 1249 | } |
---|
1139 | 1250 | early_initcall(mce_amd_init); |
---|
1140 | 1251 | |
---|
.. | .. |
---|
1142 | 1253 | static void __exit mce_amd_exit(void) |
---|
1143 | 1254 | { |
---|
1144 | 1255 | mce_unregister_decode_chain(&amd_mce_dec_nb); |
---|
1145 | | - kfree(fam_ops); |
---|
1146 | 1256 | } |
---|
1147 | 1257 | |
---|
1148 | 1258 | MODULE_DESCRIPTION("AMD MCE decoder"); |
---|