.. | .. |
---|
1 | 1 | // SPDX-License-Identifier: GPL-2.0 |
---|
| 2 | +/* |
---|
| 3 | + * Copyright (c) 2017-2019 Borislav Petkov, SUSE Labs. |
---|
| 4 | + */ |
---|
2 | 5 | #include <linux/mm.h> |
---|
3 | 6 | #include <linux/gfp.h> |
---|
| 7 | +#include <linux/ras.h> |
---|
4 | 8 | #include <linux/kernel.h> |
---|
5 | 9 | #include <linux/workqueue.h> |
---|
6 | 10 | |
---|
.. | .. |
---|
37 | 41 | * thus emulate an an LRU-like behavior when deleting elements to free up space |
---|
38 | 42 | * in the page. |
---|
39 | 43 | * |
---|
40 | | - * When an element reaches it's max count of count_threshold, we try to poison |
---|
41 | | - * it by assuming that errors triggered count_threshold times in a single page |
---|
42 | | - * are excessive and that page shouldn't be used anymore. count_threshold is |
---|
| 44 | + * When an element reaches it's max count of action_threshold, we try to poison |
---|
| 45 | + * it by assuming that errors triggered action_threshold times in a single page |
---|
| 46 | + * are excessive and that page shouldn't be used anymore. action_threshold is |
---|
43 | 47 | * initialized to COUNT_MASK which is the maximum. |
---|
44 | 48 | * |
---|
45 | 49 | * That error event entry causes cec_add_elem() to return !0 value and thus |
---|
.. | .. |
---|
122 | 126 | static u64 dfs_pfn; |
---|
123 | 127 | |
---|
124 | 128 | /* Amount of errors after which we offline */ |
---|
125 | | -static unsigned int count_threshold = COUNT_MASK; |
---|
| 129 | +static u64 action_threshold = COUNT_MASK; |
---|
126 | 130 | |
---|
127 | 131 | /* Each element "decays" each decay_interval which is 24hrs by default. */ |
---|
128 | 132 | #define CEC_DECAY_DEFAULT_INTERVAL 24 * 60 * 60 /* 24 hrs */ |
---|
.. | .. |
---|
276 | 280 | return pfn; |
---|
277 | 281 | } |
---|
278 | 282 | |
---|
| 283 | +static bool sanity_check(struct ce_array *ca) |
---|
| 284 | +{ |
---|
| 285 | + bool ret = false; |
---|
| 286 | + u64 prev = 0; |
---|
| 287 | + int i; |
---|
279 | 288 | |
---|
280 | | -int cec_add_elem(u64 pfn) |
---|
| 289 | + for (i = 0; i < ca->n; i++) { |
---|
| 290 | + u64 this = PFN(ca->array[i]); |
---|
| 291 | + |
---|
| 292 | + if (WARN(prev > this, "prev: 0x%016llx <-> this: 0x%016llx\n", prev, this)) |
---|
| 293 | + ret = true; |
---|
| 294 | + |
---|
| 295 | + prev = this; |
---|
| 296 | + } |
---|
| 297 | + |
---|
| 298 | + if (!ret) |
---|
| 299 | + return ret; |
---|
| 300 | + |
---|
| 301 | + pr_info("Sanity check dump:\n{ n: %d\n", ca->n); |
---|
| 302 | + for (i = 0; i < ca->n; i++) { |
---|
| 303 | + u64 this = PFN(ca->array[i]); |
---|
| 304 | + |
---|
| 305 | + pr_info(" %03d: [%016llx|%03llx]\n", i, this, FULL_COUNT(ca->array[i])); |
---|
| 306 | + } |
---|
| 307 | + pr_info("}\n"); |
---|
| 308 | + |
---|
| 309 | + return ret; |
---|
| 310 | +} |
---|
| 311 | + |
---|
| 312 | +/** |
---|
| 313 | + * cec_add_elem - Add an element to the CEC array. |
---|
| 314 | + * @pfn: page frame number to insert |
---|
| 315 | + * |
---|
| 316 | + * Return values: |
---|
| 317 | + * - <0: on error |
---|
| 318 | + * - 0: on success |
---|
| 319 | + * - >0: when the inserted pfn was offlined |
---|
| 320 | + */ |
---|
| 321 | +static int cec_add_elem(u64 pfn) |
---|
281 | 322 | { |
---|
282 | 323 | struct ce_array *ca = &ce_arr; |
---|
283 | | - unsigned int to; |
---|
284 | | - int count, ret = 0; |
---|
| 324 | + int count, err, ret = 0; |
---|
| 325 | + unsigned int to = 0; |
---|
285 | 326 | |
---|
286 | 327 | /* |
---|
287 | 328 | * We can be called very early on the identify_cpu() path where we are |
---|
.. | .. |
---|
290 | 331 | if (!ce_arr.array || ce_arr.disabled) |
---|
291 | 332 | return -ENODEV; |
---|
292 | 333 | |
---|
293 | | - ca->ces_entered++; |
---|
294 | | - |
---|
295 | 334 | mutex_lock(&ce_mutex); |
---|
296 | 335 | |
---|
| 336 | + ca->ces_entered++; |
---|
| 337 | + |
---|
| 338 | + /* Array full, free the LRU slot. */ |
---|
297 | 339 | if (ca->n == MAX_ELEMS) |
---|
298 | 340 | WARN_ON(!del_lru_elem_unlocked(ca)); |
---|
299 | 341 | |
---|
300 | | - ret = find_elem(ca, pfn, &to); |
---|
301 | | - if (ret < 0) { |
---|
| 342 | + err = find_elem(ca, pfn, &to); |
---|
| 343 | + if (err < 0) { |
---|
302 | 344 | /* |
---|
303 | 345 | * Shift range [to-end] to make room for one more element. |
---|
304 | 346 | */ |
---|
.. | .. |
---|
306 | 348 | (void *)&ca->array[to], |
---|
307 | 349 | (ca->n - to) * sizeof(u64)); |
---|
308 | 350 | |
---|
309 | | - ca->array[to] = (pfn << PAGE_SHIFT) | |
---|
310 | | - (DECAY_MASK << COUNT_BITS) | 1; |
---|
311 | | - |
---|
| 351 | + ca->array[to] = pfn << PAGE_SHIFT; |
---|
312 | 352 | ca->n++; |
---|
313 | | - |
---|
314 | | - ret = 0; |
---|
315 | | - |
---|
316 | | - goto decay; |
---|
317 | 353 | } |
---|
318 | 354 | |
---|
| 355 | + /* Add/refresh element generation and increment count */ |
---|
| 356 | + ca->array[to] |= DECAY_MASK << COUNT_BITS; |
---|
| 357 | + ca->array[to]++; |
---|
| 358 | + |
---|
| 359 | + /* Check action threshold and soft-offline, if reached. */ |
---|
319 | 360 | count = COUNT(ca->array[to]); |
---|
320 | | - |
---|
321 | | - if (count < count_threshold) { |
---|
322 | | - ca->array[to] |= (DECAY_MASK << COUNT_BITS); |
---|
323 | | - ca->array[to]++; |
---|
324 | | - |
---|
325 | | - ret = 0; |
---|
326 | | - } else { |
---|
| 361 | + if (count >= action_threshold) { |
---|
327 | 362 | u64 pfn = ca->array[to] >> PAGE_SHIFT; |
---|
328 | 363 | |
---|
329 | 364 | if (!pfn_valid(pfn)) { |
---|
.. | .. |
---|
338 | 373 | del_elem(ca, to); |
---|
339 | 374 | |
---|
340 | 375 | /* |
---|
341 | | - * Return a >0 value to denote that we've reached the offlining |
---|
342 | | - * threshold. |
---|
| 376 | + * Return a >0 value to callers, to denote that we've reached |
---|
| 377 | + * the offlining threshold. |
---|
343 | 378 | */ |
---|
344 | 379 | ret = 1; |
---|
345 | 380 | |
---|
346 | 381 | goto unlock; |
---|
347 | 382 | } |
---|
348 | 383 | |
---|
349 | | -decay: |
---|
350 | 384 | ca->decay_count++; |
---|
351 | 385 | |
---|
352 | 386 | if (ca->decay_count >= CLEAN_ELEMS) |
---|
353 | 387 | do_spring_cleaning(ca); |
---|
| 388 | + |
---|
| 389 | + WARN_ON_ONCE(sanity_check(ca)); |
---|
354 | 390 | |
---|
355 | 391 | unlock: |
---|
356 | 392 | mutex_unlock(&ce_mutex); |
---|
.. | .. |
---|
378 | 414 | |
---|
379 | 415 | static int decay_interval_set(void *data, u64 val) |
---|
380 | 416 | { |
---|
381 | | - *(u64 *)data = val; |
---|
382 | | - |
---|
383 | 417 | if (val < CEC_DECAY_MIN_INTERVAL) |
---|
384 | 418 | return -EINVAL; |
---|
385 | 419 | |
---|
386 | 420 | if (val > CEC_DECAY_MAX_INTERVAL) |
---|
387 | 421 | return -EINVAL; |
---|
388 | 422 | |
---|
| 423 | + *(u64 *)data = val; |
---|
389 | 424 | decay_interval = val; |
---|
390 | 425 | |
---|
391 | 426 | cec_mod_work(decay_interval); |
---|
| 427 | + |
---|
392 | 428 | return 0; |
---|
393 | 429 | } |
---|
394 | 430 | DEFINE_DEBUGFS_ATTRIBUTE(decay_interval_ops, u64_get, decay_interval_set, "%lld\n"); |
---|
395 | 431 | |
---|
396 | | -static int count_threshold_set(void *data, u64 val) |
---|
| 432 | +static int action_threshold_set(void *data, u64 val) |
---|
397 | 433 | { |
---|
398 | 434 | *(u64 *)data = val; |
---|
399 | 435 | |
---|
400 | 436 | if (val > COUNT_MASK) |
---|
401 | 437 | val = COUNT_MASK; |
---|
402 | 438 | |
---|
403 | | - count_threshold = val; |
---|
| 439 | + action_threshold = val; |
---|
404 | 440 | |
---|
405 | 441 | return 0; |
---|
406 | 442 | } |
---|
407 | | -DEFINE_DEBUGFS_ATTRIBUTE(count_threshold_ops, u64_get, count_threshold_set, "%lld\n"); |
---|
| 443 | +DEFINE_DEBUGFS_ATTRIBUTE(action_threshold_ops, u64_get, action_threshold_set, "%lld\n"); |
---|
408 | 444 | |
---|
409 | | -static int array_dump(struct seq_file *m, void *v) |
---|
| 445 | +static const char * const bins[] = { "00", "01", "10", "11" }; |
---|
| 446 | + |
---|
| 447 | +static int array_show(struct seq_file *m, void *v) |
---|
410 | 448 | { |
---|
411 | 449 | struct ce_array *ca = &ce_arr; |
---|
412 | | - u64 prev = 0; |
---|
413 | 450 | int i; |
---|
414 | 451 | |
---|
415 | 452 | mutex_lock(&ce_mutex); |
---|
.. | .. |
---|
418 | 455 | for (i = 0; i < ca->n; i++) { |
---|
419 | 456 | u64 this = PFN(ca->array[i]); |
---|
420 | 457 | |
---|
421 | | - seq_printf(m, " %03d: [%016llx|%03llx]\n", i, this, FULL_COUNT(ca->array[i])); |
---|
422 | | - |
---|
423 | | - WARN_ON(prev > this); |
---|
424 | | - |
---|
425 | | - prev = this; |
---|
| 458 | + seq_printf(m, " %3d: [%016llx|%s|%03llx]\n", |
---|
| 459 | + i, this, bins[DECAY(ca->array[i])], COUNT(ca->array[i])); |
---|
426 | 460 | } |
---|
427 | 461 | |
---|
428 | 462 | seq_printf(m, "}\n"); |
---|
.. | .. |
---|
435 | 469 | seq_printf(m, "Decay interval: %lld seconds\n", decay_interval); |
---|
436 | 470 | seq_printf(m, "Decays: %lld\n", ca->decays_done); |
---|
437 | 471 | |
---|
438 | | - seq_printf(m, "Action threshold: %d\n", count_threshold); |
---|
| 472 | + seq_printf(m, "Action threshold: %lld\n", action_threshold); |
---|
439 | 473 | |
---|
440 | 474 | mutex_unlock(&ce_mutex); |
---|
441 | 475 | |
---|
442 | 476 | return 0; |
---|
443 | 477 | } |
---|
444 | 478 | |
---|
445 | | -static int array_open(struct inode *inode, struct file *filp) |
---|
446 | | -{ |
---|
447 | | - return single_open(filp, array_dump, NULL); |
---|
448 | | -} |
---|
449 | | - |
---|
450 | | -static const struct file_operations array_ops = { |
---|
451 | | - .owner = THIS_MODULE, |
---|
452 | | - .open = array_open, |
---|
453 | | - .read = seq_read, |
---|
454 | | - .llseek = seq_lseek, |
---|
455 | | - .release = single_release, |
---|
456 | | -}; |
---|
| 479 | +DEFINE_SHOW_ATTRIBUTE(array); |
---|
457 | 480 | |
---|
458 | 481 | static int __init create_debugfs_nodes(void) |
---|
459 | 482 | { |
---|
.. | .. |
---|
465 | 488 | return -1; |
---|
466 | 489 | } |
---|
467 | 490 | |
---|
468 | | - pfn = debugfs_create_file("pfn", S_IRUSR | S_IWUSR, d, &dfs_pfn, &pfn_ops); |
---|
469 | | - if (!pfn) { |
---|
470 | | - pr_warn("Error creating pfn debugfs node!\n"); |
---|
471 | | - goto err; |
---|
472 | | - } |
---|
473 | | - |
---|
474 | | - array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_ops); |
---|
475 | | - if (!array) { |
---|
476 | | - pr_warn("Error creating array debugfs node!\n"); |
---|
477 | | - goto err; |
---|
478 | | - } |
---|
479 | | - |
---|
480 | 491 | decay = debugfs_create_file("decay_interval", S_IRUSR | S_IWUSR, d, |
---|
481 | 492 | &decay_interval, &decay_interval_ops); |
---|
482 | 493 | if (!decay) { |
---|
.. | .. |
---|
484 | 495 | goto err; |
---|
485 | 496 | } |
---|
486 | 497 | |
---|
487 | | - count = debugfs_create_file("count_threshold", S_IRUSR | S_IWUSR, d, |
---|
488 | | - &count_threshold, &count_threshold_ops); |
---|
| 498 | + count = debugfs_create_file("action_threshold", S_IRUSR | S_IWUSR, d, |
---|
| 499 | + &action_threshold, &action_threshold_ops); |
---|
489 | 500 | if (!count) { |
---|
490 | | - pr_warn("Error creating count_threshold debugfs node!\n"); |
---|
| 501 | + pr_warn("Error creating action_threshold debugfs node!\n"); |
---|
491 | 502 | goto err; |
---|
492 | 503 | } |
---|
493 | 504 | |
---|
| 505 | + if (!IS_ENABLED(CONFIG_RAS_CEC_DEBUG)) |
---|
| 506 | + return 0; |
---|
| 507 | + |
---|
| 508 | + pfn = debugfs_create_file("pfn", S_IRUSR | S_IWUSR, d, &dfs_pfn, &pfn_ops); |
---|
| 509 | + if (!pfn) { |
---|
| 510 | + pr_warn("Error creating pfn debugfs node!\n"); |
---|
| 511 | + goto err; |
---|
| 512 | + } |
---|
| 513 | + |
---|
| 514 | + array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_fops); |
---|
| 515 | + if (!array) { |
---|
| 516 | + pr_warn("Error creating array debugfs node!\n"); |
---|
| 517 | + goto err; |
---|
| 518 | + } |
---|
494 | 519 | |
---|
495 | 520 | return 0; |
---|
496 | 521 | |
---|
.. | .. |
---|
500 | 525 | return 1; |
---|
501 | 526 | } |
---|
502 | 527 | |
---|
503 | | -void __init cec_init(void) |
---|
| 528 | +static int cec_notifier(struct notifier_block *nb, unsigned long val, |
---|
| 529 | + void *data) |
---|
| 530 | +{ |
---|
| 531 | + struct mce *m = (struct mce *)data; |
---|
| 532 | + |
---|
| 533 | + if (!m) |
---|
| 534 | + return NOTIFY_DONE; |
---|
| 535 | + |
---|
| 536 | + /* We eat only correctable DRAM errors with usable addresses. */ |
---|
| 537 | + if (mce_is_memory_error(m) && |
---|
| 538 | + mce_is_correctable(m) && |
---|
| 539 | + mce_usable_address(m)) { |
---|
| 540 | + if (!cec_add_elem(m->addr >> PAGE_SHIFT)) { |
---|
| 541 | + m->kflags |= MCE_HANDLED_CEC; |
---|
| 542 | + return NOTIFY_OK; |
---|
| 543 | + } |
---|
| 544 | + } |
---|
| 545 | + |
---|
| 546 | + return NOTIFY_DONE; |
---|
| 547 | +} |
---|
| 548 | + |
---|
| 549 | +static struct notifier_block cec_nb = { |
---|
| 550 | + .notifier_call = cec_notifier, |
---|
| 551 | + .priority = MCE_PRIO_CEC, |
---|
| 552 | +}; |
---|
| 553 | + |
---|
| 554 | +static int __init cec_init(void) |
---|
504 | 555 | { |
---|
505 | 556 | if (ce_arr.disabled) |
---|
506 | | - return; |
---|
| 557 | + return -ENODEV; |
---|
507 | 558 | |
---|
508 | 559 | ce_arr.array = (void *)get_zeroed_page(GFP_KERNEL); |
---|
509 | 560 | if (!ce_arr.array) { |
---|
510 | 561 | pr_err("Error allocating CE array page!\n"); |
---|
511 | | - return; |
---|
| 562 | + return -ENOMEM; |
---|
512 | 563 | } |
---|
513 | 564 | |
---|
514 | | - if (create_debugfs_nodes()) |
---|
515 | | - return; |
---|
| 565 | + if (create_debugfs_nodes()) { |
---|
| 566 | + free_page((unsigned long)ce_arr.array); |
---|
| 567 | + return -ENOMEM; |
---|
| 568 | + } |
---|
516 | 569 | |
---|
517 | 570 | INIT_DELAYED_WORK(&cec_work, cec_work_fn); |
---|
518 | 571 | schedule_delayed_work(&cec_work, CEC_DECAY_DEFAULT_INTERVAL); |
---|
519 | 572 | |
---|
| 573 | + mce_register_decode_chain(&cec_nb); |
---|
| 574 | + |
---|
520 | 575 | pr_info("Correctable Errors collector initialized.\n"); |
---|
| 576 | + return 0; |
---|
521 | 577 | } |
---|
| 578 | +late_initcall(cec_init); |
---|
522 | 579 | |
---|
523 | 580 | int __init parse_cec_param(char *str) |
---|
524 | 581 | { |
---|