| .. | .. |
|---|
| 1 | 1 | // SPDX-License-Identifier: GPL-2.0 |
|---|
| 2 | +/* |
|---|
| 3 | + * Copyright (c) 2017-2019 Borislav Petkov, SUSE Labs. |
|---|
| 4 | + */ |
|---|
| 2 | 5 | #include <linux/mm.h> |
|---|
| 3 | 6 | #include <linux/gfp.h> |
|---|
| 7 | +#include <linux/ras.h> |
|---|
| 4 | 8 | #include <linux/kernel.h> |
|---|
| 5 | 9 | #include <linux/workqueue.h> |
|---|
| 6 | 10 | |
|---|
| .. | .. |
|---|
| 37 | 41 | * thus emulate an an LRU-like behavior when deleting elements to free up space |
|---|
| 38 | 42 | * in the page. |
|---|
| 39 | 43 | * |
|---|
| 40 | | - * When an element reaches it's max count of count_threshold, we try to poison |
|---|
| 41 | | - * it by assuming that errors triggered count_threshold times in a single page |
|---|
| 42 | | - * are excessive and that page shouldn't be used anymore. count_threshold is |
|---|
| 44 | + * When an element reaches it's max count of action_threshold, we try to poison |
|---|
| 45 | + * it by assuming that errors triggered action_threshold times in a single page |
|---|
| 46 | + * are excessive and that page shouldn't be used anymore. action_threshold is |
|---|
| 43 | 47 | * initialized to COUNT_MASK which is the maximum. |
|---|
| 44 | 48 | * |
|---|
| 45 | 49 | * That error event entry causes cec_add_elem() to return !0 value and thus |
|---|
| .. | .. |
|---|
| 122 | 126 | static u64 dfs_pfn; |
|---|
| 123 | 127 | |
|---|
| 124 | 128 | /* Amount of errors after which we offline */ |
|---|
| 125 | | -static unsigned int count_threshold = COUNT_MASK; |
|---|
| 129 | +static u64 action_threshold = COUNT_MASK; |
|---|
| 126 | 130 | |
|---|
| 127 | 131 | /* Each element "decays" each decay_interval which is 24hrs by default. */ |
|---|
| 128 | 132 | #define CEC_DECAY_DEFAULT_INTERVAL 24 * 60 * 60 /* 24 hrs */ |
|---|
| .. | .. |
|---|
| 276 | 280 | return pfn; |
|---|
| 277 | 281 | } |
|---|
| 278 | 282 | |
|---|
| 283 | +static bool sanity_check(struct ce_array *ca) |
|---|
| 284 | +{ |
|---|
| 285 | + bool ret = false; |
|---|
| 286 | + u64 prev = 0; |
|---|
| 287 | + int i; |
|---|
| 279 | 288 | |
|---|
| 280 | | -int cec_add_elem(u64 pfn) |
|---|
| 289 | + for (i = 0; i < ca->n; i++) { |
|---|
| 290 | + u64 this = PFN(ca->array[i]); |
|---|
| 291 | + |
|---|
| 292 | + if (WARN(prev > this, "prev: 0x%016llx <-> this: 0x%016llx\n", prev, this)) |
|---|
| 293 | + ret = true; |
|---|
| 294 | + |
|---|
| 295 | + prev = this; |
|---|
| 296 | + } |
|---|
| 297 | + |
|---|
| 298 | + if (!ret) |
|---|
| 299 | + return ret; |
|---|
| 300 | + |
|---|
| 301 | + pr_info("Sanity check dump:\n{ n: %d\n", ca->n); |
|---|
| 302 | + for (i = 0; i < ca->n; i++) { |
|---|
| 303 | + u64 this = PFN(ca->array[i]); |
|---|
| 304 | + |
|---|
| 305 | + pr_info(" %03d: [%016llx|%03llx]\n", i, this, FULL_COUNT(ca->array[i])); |
|---|
| 306 | + } |
|---|
| 307 | + pr_info("}\n"); |
|---|
| 308 | + |
|---|
| 309 | + return ret; |
|---|
| 310 | +} |
|---|
| 311 | + |
|---|
| 312 | +/** |
|---|
| 313 | + * cec_add_elem - Add an element to the CEC array. |
|---|
| 314 | + * @pfn: page frame number to insert |
|---|
| 315 | + * |
|---|
| 316 | + * Return values: |
|---|
| 317 | + * - <0: on error |
|---|
| 318 | + * - 0: on success |
|---|
| 319 | + * - >0: when the inserted pfn was offlined |
|---|
| 320 | + */ |
|---|
| 321 | +static int cec_add_elem(u64 pfn) |
|---|
| 281 | 322 | { |
|---|
| 282 | 323 | struct ce_array *ca = &ce_arr; |
|---|
| 283 | | - unsigned int to; |
|---|
| 284 | | - int count, ret = 0; |
|---|
| 324 | + int count, err, ret = 0; |
|---|
| 325 | + unsigned int to = 0; |
|---|
| 285 | 326 | |
|---|
| 286 | 327 | /* |
|---|
| 287 | 328 | * We can be called very early on the identify_cpu() path where we are |
|---|
| .. | .. |
|---|
| 290 | 331 | if (!ce_arr.array || ce_arr.disabled) |
|---|
| 291 | 332 | return -ENODEV; |
|---|
| 292 | 333 | |
|---|
| 293 | | - ca->ces_entered++; |
|---|
| 294 | | - |
|---|
| 295 | 334 | mutex_lock(&ce_mutex); |
|---|
| 296 | 335 | |
|---|
| 336 | + ca->ces_entered++; |
|---|
| 337 | + |
|---|
| 338 | + /* Array full, free the LRU slot. */ |
|---|
| 297 | 339 | if (ca->n == MAX_ELEMS) |
|---|
| 298 | 340 | WARN_ON(!del_lru_elem_unlocked(ca)); |
|---|
| 299 | 341 | |
|---|
| 300 | | - ret = find_elem(ca, pfn, &to); |
|---|
| 301 | | - if (ret < 0) { |
|---|
| 342 | + err = find_elem(ca, pfn, &to); |
|---|
| 343 | + if (err < 0) { |
|---|
| 302 | 344 | /* |
|---|
| 303 | 345 | * Shift range [to-end] to make room for one more element. |
|---|
| 304 | 346 | */ |
|---|
| .. | .. |
|---|
| 306 | 348 | (void *)&ca->array[to], |
|---|
| 307 | 349 | (ca->n - to) * sizeof(u64)); |
|---|
| 308 | 350 | |
|---|
| 309 | | - ca->array[to] = (pfn << PAGE_SHIFT) | |
|---|
| 310 | | - (DECAY_MASK << COUNT_BITS) | 1; |
|---|
| 311 | | - |
|---|
| 351 | + ca->array[to] = pfn << PAGE_SHIFT; |
|---|
| 312 | 352 | ca->n++; |
|---|
| 313 | | - |
|---|
| 314 | | - ret = 0; |
|---|
| 315 | | - |
|---|
| 316 | | - goto decay; |
|---|
| 317 | 353 | } |
|---|
| 318 | 354 | |
|---|
| 355 | + /* Add/refresh element generation and increment count */ |
|---|
| 356 | + ca->array[to] |= DECAY_MASK << COUNT_BITS; |
|---|
| 357 | + ca->array[to]++; |
|---|
| 358 | + |
|---|
| 359 | + /* Check action threshold and soft-offline, if reached. */ |
|---|
| 319 | 360 | count = COUNT(ca->array[to]); |
|---|
| 320 | | - |
|---|
| 321 | | - if (count < count_threshold) { |
|---|
| 322 | | - ca->array[to] |= (DECAY_MASK << COUNT_BITS); |
|---|
| 323 | | - ca->array[to]++; |
|---|
| 324 | | - |
|---|
| 325 | | - ret = 0; |
|---|
| 326 | | - } else { |
|---|
| 361 | + if (count >= action_threshold) { |
|---|
| 327 | 362 | u64 pfn = ca->array[to] >> PAGE_SHIFT; |
|---|
| 328 | 363 | |
|---|
| 329 | 364 | if (!pfn_valid(pfn)) { |
|---|
| .. | .. |
|---|
| 338 | 373 | del_elem(ca, to); |
|---|
| 339 | 374 | |
|---|
| 340 | 375 | /* |
|---|
| 341 | | - * Return a >0 value to denote that we've reached the offlining |
|---|
| 342 | | - * threshold. |
|---|
| 376 | + * Return a >0 value to callers, to denote that we've reached |
|---|
| 377 | + * the offlining threshold. |
|---|
| 343 | 378 | */ |
|---|
| 344 | 379 | ret = 1; |
|---|
| 345 | 380 | |
|---|
| 346 | 381 | goto unlock; |
|---|
| 347 | 382 | } |
|---|
| 348 | 383 | |
|---|
| 349 | | -decay: |
|---|
| 350 | 384 | ca->decay_count++; |
|---|
| 351 | 385 | |
|---|
| 352 | 386 | if (ca->decay_count >= CLEAN_ELEMS) |
|---|
| 353 | 387 | do_spring_cleaning(ca); |
|---|
| 388 | + |
|---|
| 389 | + WARN_ON_ONCE(sanity_check(ca)); |
|---|
| 354 | 390 | |
|---|
| 355 | 391 | unlock: |
|---|
| 356 | 392 | mutex_unlock(&ce_mutex); |
|---|
| .. | .. |
|---|
| 378 | 414 | |
|---|
| 379 | 415 | static int decay_interval_set(void *data, u64 val) |
|---|
| 380 | 416 | { |
|---|
| 381 | | - *(u64 *)data = val; |
|---|
| 382 | | - |
|---|
| 383 | 417 | if (val < CEC_DECAY_MIN_INTERVAL) |
|---|
| 384 | 418 | return -EINVAL; |
|---|
| 385 | 419 | |
|---|
| 386 | 420 | if (val > CEC_DECAY_MAX_INTERVAL) |
|---|
| 387 | 421 | return -EINVAL; |
|---|
| 388 | 422 | |
|---|
| 423 | + *(u64 *)data = val; |
|---|
| 389 | 424 | decay_interval = val; |
|---|
| 390 | 425 | |
|---|
| 391 | 426 | cec_mod_work(decay_interval); |
|---|
| 427 | + |
|---|
| 392 | 428 | return 0; |
|---|
| 393 | 429 | } |
|---|
| 394 | 430 | DEFINE_DEBUGFS_ATTRIBUTE(decay_interval_ops, u64_get, decay_interval_set, "%lld\n"); |
|---|
| 395 | 431 | |
|---|
| 396 | | -static int count_threshold_set(void *data, u64 val) |
|---|
| 432 | +static int action_threshold_set(void *data, u64 val) |
|---|
| 397 | 433 | { |
|---|
| 398 | 434 | *(u64 *)data = val; |
|---|
| 399 | 435 | |
|---|
| 400 | 436 | if (val > COUNT_MASK) |
|---|
| 401 | 437 | val = COUNT_MASK; |
|---|
| 402 | 438 | |
|---|
| 403 | | - count_threshold = val; |
|---|
| 439 | + action_threshold = val; |
|---|
| 404 | 440 | |
|---|
| 405 | 441 | return 0; |
|---|
| 406 | 442 | } |
|---|
| 407 | | -DEFINE_DEBUGFS_ATTRIBUTE(count_threshold_ops, u64_get, count_threshold_set, "%lld\n"); |
|---|
| 443 | +DEFINE_DEBUGFS_ATTRIBUTE(action_threshold_ops, u64_get, action_threshold_set, "%lld\n"); |
|---|
| 408 | 444 | |
|---|
| 409 | | -static int array_dump(struct seq_file *m, void *v) |
|---|
| 445 | +static const char * const bins[] = { "00", "01", "10", "11" }; |
|---|
| 446 | + |
|---|
| 447 | +static int array_show(struct seq_file *m, void *v) |
|---|
| 410 | 448 | { |
|---|
| 411 | 449 | struct ce_array *ca = &ce_arr; |
|---|
| 412 | | - u64 prev = 0; |
|---|
| 413 | 450 | int i; |
|---|
| 414 | 451 | |
|---|
| 415 | 452 | mutex_lock(&ce_mutex); |
|---|
| .. | .. |
|---|
| 418 | 455 | for (i = 0; i < ca->n; i++) { |
|---|
| 419 | 456 | u64 this = PFN(ca->array[i]); |
|---|
| 420 | 457 | |
|---|
| 421 | | - seq_printf(m, " %03d: [%016llx|%03llx]\n", i, this, FULL_COUNT(ca->array[i])); |
|---|
| 422 | | - |
|---|
| 423 | | - WARN_ON(prev > this); |
|---|
| 424 | | - |
|---|
| 425 | | - prev = this; |
|---|
| 458 | + seq_printf(m, " %3d: [%016llx|%s|%03llx]\n", |
|---|
| 459 | + i, this, bins[DECAY(ca->array[i])], COUNT(ca->array[i])); |
|---|
| 426 | 460 | } |
|---|
| 427 | 461 | |
|---|
| 428 | 462 | seq_printf(m, "}\n"); |
|---|
| .. | .. |
|---|
| 435 | 469 | seq_printf(m, "Decay interval: %lld seconds\n", decay_interval); |
|---|
| 436 | 470 | seq_printf(m, "Decays: %lld\n", ca->decays_done); |
|---|
| 437 | 471 | |
|---|
| 438 | | - seq_printf(m, "Action threshold: %d\n", count_threshold); |
|---|
| 472 | + seq_printf(m, "Action threshold: %lld\n", action_threshold); |
|---|
| 439 | 473 | |
|---|
| 440 | 474 | mutex_unlock(&ce_mutex); |
|---|
| 441 | 475 | |
|---|
| 442 | 476 | return 0; |
|---|
| 443 | 477 | } |
|---|
| 444 | 478 | |
|---|
| 445 | | -static int array_open(struct inode *inode, struct file *filp) |
|---|
| 446 | | -{ |
|---|
| 447 | | - return single_open(filp, array_dump, NULL); |
|---|
| 448 | | -} |
|---|
| 449 | | - |
|---|
| 450 | | -static const struct file_operations array_ops = { |
|---|
| 451 | | - .owner = THIS_MODULE, |
|---|
| 452 | | - .open = array_open, |
|---|
| 453 | | - .read = seq_read, |
|---|
| 454 | | - .llseek = seq_lseek, |
|---|
| 455 | | - .release = single_release, |
|---|
| 456 | | -}; |
|---|
| 479 | +DEFINE_SHOW_ATTRIBUTE(array); |
|---|
| 457 | 480 | |
|---|
| 458 | 481 | static int __init create_debugfs_nodes(void) |
|---|
| 459 | 482 | { |
|---|
| .. | .. |
|---|
| 465 | 488 | return -1; |
|---|
| 466 | 489 | } |
|---|
| 467 | 490 | |
|---|
| 468 | | - pfn = debugfs_create_file("pfn", S_IRUSR | S_IWUSR, d, &dfs_pfn, &pfn_ops); |
|---|
| 469 | | - if (!pfn) { |
|---|
| 470 | | - pr_warn("Error creating pfn debugfs node!\n"); |
|---|
| 471 | | - goto err; |
|---|
| 472 | | - } |
|---|
| 473 | | - |
|---|
| 474 | | - array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_ops); |
|---|
| 475 | | - if (!array) { |
|---|
| 476 | | - pr_warn("Error creating array debugfs node!\n"); |
|---|
| 477 | | - goto err; |
|---|
| 478 | | - } |
|---|
| 479 | | - |
|---|
| 480 | 491 | decay = debugfs_create_file("decay_interval", S_IRUSR | S_IWUSR, d, |
|---|
| 481 | 492 | &decay_interval, &decay_interval_ops); |
|---|
| 482 | 493 | if (!decay) { |
|---|
| .. | .. |
|---|
| 484 | 495 | goto err; |
|---|
| 485 | 496 | } |
|---|
| 486 | 497 | |
|---|
| 487 | | - count = debugfs_create_file("count_threshold", S_IRUSR | S_IWUSR, d, |
|---|
| 488 | | - &count_threshold, &count_threshold_ops); |
|---|
| 498 | + count = debugfs_create_file("action_threshold", S_IRUSR | S_IWUSR, d, |
|---|
| 499 | + &action_threshold, &action_threshold_ops); |
|---|
| 489 | 500 | if (!count) { |
|---|
| 490 | | - pr_warn("Error creating count_threshold debugfs node!\n"); |
|---|
| 501 | + pr_warn("Error creating action_threshold debugfs node!\n"); |
|---|
| 491 | 502 | goto err; |
|---|
| 492 | 503 | } |
|---|
| 493 | 504 | |
|---|
| 505 | + if (!IS_ENABLED(CONFIG_RAS_CEC_DEBUG)) |
|---|
| 506 | + return 0; |
|---|
| 507 | + |
|---|
| 508 | + pfn = debugfs_create_file("pfn", S_IRUSR | S_IWUSR, d, &dfs_pfn, &pfn_ops); |
|---|
| 509 | + if (!pfn) { |
|---|
| 510 | + pr_warn("Error creating pfn debugfs node!\n"); |
|---|
| 511 | + goto err; |
|---|
| 512 | + } |
|---|
| 513 | + |
|---|
| 514 | + array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_fops); |
|---|
| 515 | + if (!array) { |
|---|
| 516 | + pr_warn("Error creating array debugfs node!\n"); |
|---|
| 517 | + goto err; |
|---|
| 518 | + } |
|---|
| 494 | 519 | |
|---|
| 495 | 520 | return 0; |
|---|
| 496 | 521 | |
|---|
| .. | .. |
|---|
| 500 | 525 | return 1; |
|---|
| 501 | 526 | } |
|---|
| 502 | 527 | |
|---|
| 503 | | -void __init cec_init(void) |
|---|
| 528 | +static int cec_notifier(struct notifier_block *nb, unsigned long val, |
|---|
| 529 | + void *data) |
|---|
| 530 | +{ |
|---|
| 531 | + struct mce *m = (struct mce *)data; |
|---|
| 532 | + |
|---|
| 533 | + if (!m) |
|---|
| 534 | + return NOTIFY_DONE; |
|---|
| 535 | + |
|---|
| 536 | + /* We eat only correctable DRAM errors with usable addresses. */ |
|---|
| 537 | + if (mce_is_memory_error(m) && |
|---|
| 538 | + mce_is_correctable(m) && |
|---|
| 539 | + mce_usable_address(m)) { |
|---|
| 540 | + if (!cec_add_elem(m->addr >> PAGE_SHIFT)) { |
|---|
| 541 | + m->kflags |= MCE_HANDLED_CEC; |
|---|
| 542 | + return NOTIFY_OK; |
|---|
| 543 | + } |
|---|
| 544 | + } |
|---|
| 545 | + |
|---|
| 546 | + return NOTIFY_DONE; |
|---|
| 547 | +} |
|---|
| 548 | + |
|---|
| 549 | +static struct notifier_block cec_nb = { |
|---|
| 550 | + .notifier_call = cec_notifier, |
|---|
| 551 | + .priority = MCE_PRIO_CEC, |
|---|
| 552 | +}; |
|---|
| 553 | + |
|---|
| 554 | +static int __init cec_init(void) |
|---|
| 504 | 555 | { |
|---|
| 505 | 556 | if (ce_arr.disabled) |
|---|
| 506 | | - return; |
|---|
| 557 | + return -ENODEV; |
|---|
| 507 | 558 | |
|---|
| 508 | 559 | ce_arr.array = (void *)get_zeroed_page(GFP_KERNEL); |
|---|
| 509 | 560 | if (!ce_arr.array) { |
|---|
| 510 | 561 | pr_err("Error allocating CE array page!\n"); |
|---|
| 511 | | - return; |
|---|
| 562 | + return -ENOMEM; |
|---|
| 512 | 563 | } |
|---|
| 513 | 564 | |
|---|
| 514 | | - if (create_debugfs_nodes()) |
|---|
| 515 | | - return; |
|---|
| 565 | + if (create_debugfs_nodes()) { |
|---|
| 566 | + free_page((unsigned long)ce_arr.array); |
|---|
| 567 | + return -ENOMEM; |
|---|
| 568 | + } |
|---|
| 516 | 569 | |
|---|
| 517 | 570 | INIT_DELAYED_WORK(&cec_work, cec_work_fn); |
|---|
| 518 | 571 | schedule_delayed_work(&cec_work, CEC_DECAY_DEFAULT_INTERVAL); |
|---|
| 519 | 572 | |
|---|
| 573 | + mce_register_decode_chain(&cec_nb); |
|---|
| 574 | + |
|---|
| 520 | 575 | pr_info("Correctable Errors collector initialized.\n"); |
|---|
| 576 | + return 0; |
|---|
| 521 | 577 | } |
|---|
| 578 | +late_initcall(cec_init); |
|---|
| 522 | 579 | |
|---|
| 523 | 580 | int __init parse_cec_param(char *str) |
|---|
| 524 | 581 | { |
|---|