hc
2024-05-16 8d2a02b24d66aa359e83eebc1ed3c0f85367a1cb
kernel/drivers/edac/edac_mc.c
....@@ -43,8 +43,6 @@
4343 int edac_op_state = EDAC_OPSTATE_INVAL;
4444 EXPORT_SYMBOL_GPL(edac_op_state);
4545
46
-static int edac_report = EDAC_REPORTING_ENABLED;
47
-
4846 /* lock to memory controller's control array */
4947 static DEFINE_MUTEX(mem_ctls_mutex);
5048 static LIST_HEAD(mc_devices);
....@@ -55,69 +53,13 @@
5553 */
5654 static const char *edac_mc_owner;
5755
58
-static struct bus_type mc_bus[EDAC_MAX_MCS];
59
-
60
-int edac_get_report_status(void)
56
+static struct mem_ctl_info *error_desc_to_mci(struct edac_raw_error_desc *e)
6157 {
62
- return edac_report;
63
-}
64
-EXPORT_SYMBOL_GPL(edac_get_report_status);
65
-
66
-void edac_set_report_status(int new)
67
-{
68
- if (new == EDAC_REPORTING_ENABLED ||
69
- new == EDAC_REPORTING_DISABLED ||
70
- new == EDAC_REPORTING_FORCE)
71
- edac_report = new;
72
-}
73
-EXPORT_SYMBOL_GPL(edac_set_report_status);
74
-
75
-static int edac_report_set(const char *str, const struct kernel_param *kp)
76
-{
77
- if (!str)
78
- return -EINVAL;
79
-
80
- if (!strncmp(str, "on", 2))
81
- edac_report = EDAC_REPORTING_ENABLED;
82
- else if (!strncmp(str, "off", 3))
83
- edac_report = EDAC_REPORTING_DISABLED;
84
- else if (!strncmp(str, "force", 5))
85
- edac_report = EDAC_REPORTING_FORCE;
86
-
87
- return 0;
58
+ return container_of(e, struct mem_ctl_info, error_desc);
8859 }
8960
90
-static int edac_report_get(char *buffer, const struct kernel_param *kp)
91
-{
92
- int ret = 0;
93
-
94
- switch (edac_report) {
95
- case EDAC_REPORTING_ENABLED:
96
- ret = sprintf(buffer, "on");
97
- break;
98
- case EDAC_REPORTING_DISABLED:
99
- ret = sprintf(buffer, "off");
100
- break;
101
- case EDAC_REPORTING_FORCE:
102
- ret = sprintf(buffer, "force");
103
- break;
104
- default:
105
- ret = -EINVAL;
106
- break;
107
- }
108
-
109
- return ret;
110
-}
111
-
112
-static const struct kernel_param_ops edac_report_ops = {
113
- .set = edac_report_set,
114
- .get = edac_report_get,
115
-};
116
-
117
-module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644);
118
-
119
-unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
120
- unsigned len)
61
+unsigned int edac_dimm_info_location(struct dimm_info *dimm, char *buf,
62
+ unsigned int len)
12163 {
12264 struct mem_ctl_info *mci = dimm->mci;
12365 int i, n, count = 0;
....@@ -147,15 +89,18 @@
14789 edac_dbg(4, " channel->dimm = %p\n", chan->dimm);
14890 }
14991
150
-static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
92
+static void edac_mc_dump_dimm(struct dimm_info *dimm)
15193 {
15294 char location[80];
95
+
96
+ if (!dimm->nr_pages)
97
+ return;
15398
15499 edac_dimm_info_location(dimm, location, sizeof(location));
155100
156101 edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
157102 dimm->mci->csbased ? "rank" : "dimm",
158
- number, location, dimm->csrow, dimm->cschannel);
103
+ dimm->idx, location, dimm->csrow, dimm->cschannel);
159104 edac_dbg(4, " dimm = %p\n", dimm);
160105 edac_dbg(4, " dimm->label = '%s'\n", dimm->label);
161106 edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages);
....@@ -238,9 +183,9 @@
238183 * At return, the pointer 'p' will be incremented to be used on a next call
239184 * to this function.
240185 */
241
-void *edac_align_ptr(void **p, unsigned size, int n_elems)
186
+void *edac_align_ptr(void **p, unsigned int size, int n_elems)
242187 {
243
- unsigned align, r;
188
+ unsigned int align, r;
244189 void *ptr = *p;
245190
246191 *p += size * n_elems;
....@@ -277,65 +222,195 @@
277222
278223 static void _edac_mc_free(struct mem_ctl_info *mci)
279224 {
280
- int i, chn, row;
225
+ put_device(&mci->dev);
226
+}
227
+
228
+static void mci_release(struct device *dev)
229
+{
230
+ struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
281231 struct csrow_info *csr;
282
- const unsigned int tot_dimms = mci->tot_dimms;
283
- const unsigned int tot_channels = mci->num_cschannel;
284
- const unsigned int tot_csrows = mci->nr_csrows;
232
+ int i, chn, row;
285233
286234 if (mci->dimms) {
287
- for (i = 0; i < tot_dimms; i++)
235
+ for (i = 0; i < mci->tot_dimms; i++)
288236 kfree(mci->dimms[i]);
289237 kfree(mci->dimms);
290238 }
239
+
291240 if (mci->csrows) {
292
- for (row = 0; row < tot_csrows; row++) {
241
+ for (row = 0; row < mci->nr_csrows; row++) {
293242 csr = mci->csrows[row];
294
- if (csr) {
295
- if (csr->channels) {
296
- for (chn = 0; chn < tot_channels; chn++)
297
- kfree(csr->channels[chn]);
298
- kfree(csr->channels);
299
- }
300
- kfree(csr);
243
+ if (!csr)
244
+ continue;
245
+
246
+ if (csr->channels) {
247
+ for (chn = 0; chn < mci->num_cschannel; chn++)
248
+ kfree(csr->channels[chn]);
249
+ kfree(csr->channels);
301250 }
251
+ kfree(csr);
302252 }
303253 kfree(mci->csrows);
304254 }
305255 kfree(mci);
306256 }
307257
308
-struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
309
- unsigned n_layers,
258
+static int edac_mc_alloc_csrows(struct mem_ctl_info *mci)
259
+{
260
+ unsigned int tot_channels = mci->num_cschannel;
261
+ unsigned int tot_csrows = mci->nr_csrows;
262
+ unsigned int row, chn;
263
+
264
+ /*
265
+ * Alocate and fill the csrow/channels structs
266
+ */
267
+ mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
268
+ if (!mci->csrows)
269
+ return -ENOMEM;
270
+
271
+ for (row = 0; row < tot_csrows; row++) {
272
+ struct csrow_info *csr;
273
+
274
+ csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
275
+ if (!csr)
276
+ return -ENOMEM;
277
+
278
+ mci->csrows[row] = csr;
279
+ csr->csrow_idx = row;
280
+ csr->mci = mci;
281
+ csr->nr_channels = tot_channels;
282
+ csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
283
+ GFP_KERNEL);
284
+ if (!csr->channels)
285
+ return -ENOMEM;
286
+
287
+ for (chn = 0; chn < tot_channels; chn++) {
288
+ struct rank_info *chan;
289
+
290
+ chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
291
+ if (!chan)
292
+ return -ENOMEM;
293
+
294
+ csr->channels[chn] = chan;
295
+ chan->chan_idx = chn;
296
+ chan->csrow = csr;
297
+ }
298
+ }
299
+
300
+ return 0;
301
+}
302
+
303
+static int edac_mc_alloc_dimms(struct mem_ctl_info *mci)
304
+{
305
+ unsigned int pos[EDAC_MAX_LAYERS];
306
+ unsigned int row, chn, idx;
307
+ int layer;
308
+ void *p;
309
+
310
+ /*
311
+ * Allocate and fill the dimm structs
312
+ */
313
+ mci->dimms = kcalloc(mci->tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
314
+ if (!mci->dimms)
315
+ return -ENOMEM;
316
+
317
+ memset(&pos, 0, sizeof(pos));
318
+ row = 0;
319
+ chn = 0;
320
+ for (idx = 0; idx < mci->tot_dimms; idx++) {
321
+ struct dimm_info *dimm;
322
+ struct rank_info *chan;
323
+ int n, len;
324
+
325
+ chan = mci->csrows[row]->channels[chn];
326
+
327
+ dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
328
+ if (!dimm)
329
+ return -ENOMEM;
330
+ mci->dimms[idx] = dimm;
331
+ dimm->mci = mci;
332
+ dimm->idx = idx;
333
+
334
+ /*
335
+ * Copy DIMM location and initialize it.
336
+ */
337
+ len = sizeof(dimm->label);
338
+ p = dimm->label;
339
+ n = snprintf(p, len, "mc#%u", mci->mc_idx);
340
+ p += n;
341
+ len -= n;
342
+ for (layer = 0; layer < mci->n_layers; layer++) {
343
+ n = snprintf(p, len, "%s#%u",
344
+ edac_layer_name[mci->layers[layer].type],
345
+ pos[layer]);
346
+ p += n;
347
+ len -= n;
348
+ dimm->location[layer] = pos[layer];
349
+
350
+ if (len <= 0)
351
+ break;
352
+ }
353
+
354
+ /* Link it to the csrows old API data */
355
+ chan->dimm = dimm;
356
+ dimm->csrow = row;
357
+ dimm->cschannel = chn;
358
+
359
+ /* Increment csrow location */
360
+ if (mci->layers[0].is_virt_csrow) {
361
+ chn++;
362
+ if (chn == mci->num_cschannel) {
363
+ chn = 0;
364
+ row++;
365
+ }
366
+ } else {
367
+ row++;
368
+ if (row == mci->nr_csrows) {
369
+ row = 0;
370
+ chn++;
371
+ }
372
+ }
373
+
374
+ /* Increment dimm location */
375
+ for (layer = mci->n_layers - 1; layer >= 0; layer--) {
376
+ pos[layer]++;
377
+ if (pos[layer] < mci->layers[layer].size)
378
+ break;
379
+ pos[layer] = 0;
380
+ }
381
+ }
382
+
383
+ return 0;
384
+}
385
+
386
+struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
387
+ unsigned int n_layers,
310388 struct edac_mc_layer *layers,
311
- unsigned sz_pvt)
389
+ unsigned int sz_pvt)
312390 {
313391 struct mem_ctl_info *mci;
314392 struct edac_mc_layer *layer;
315
- struct csrow_info *csr;
316
- struct rank_info *chan;
317
- struct dimm_info *dimm;
318
- u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
319
- unsigned pos[EDAC_MAX_LAYERS];
320
- unsigned size, tot_dimms = 1, count = 1;
321
- unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
322
- void *pvt, *p, *ptr = NULL;
323
- int i, j, row, chn, n, len, off;
393
+ unsigned int idx, size, tot_dimms = 1;
394
+ unsigned int tot_csrows = 1, tot_channels = 1;
395
+ void *pvt, *ptr = NULL;
324396 bool per_rank = false;
325397
326
- BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
398
+ if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
399
+ return NULL;
400
+
327401 /*
328402 * Calculate the total amount of dimms and csrows/cschannels while
329403 * in the old API emulation mode
330404 */
331
- for (i = 0; i < n_layers; i++) {
332
- tot_dimms *= layers[i].size;
333
- if (layers[i].is_virt_csrow)
334
- tot_csrows *= layers[i].size;
335
- else
336
- tot_channels *= layers[i].size;
405
+ for (idx = 0; idx < n_layers; idx++) {
406
+ tot_dimms *= layers[idx].size;
337407
338
- if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
408
+ if (layers[idx].is_virt_csrow)
409
+ tot_csrows *= layers[idx].size;
410
+ else
411
+ tot_channels *= layers[idx].size;
412
+
413
+ if (layers[idx].type == EDAC_MC_LAYER_CHIP_SELECT)
339414 per_rank = true;
340415 }
341416
....@@ -344,19 +419,10 @@
344419 * stringent as what the compiler would provide if we could simply
345420 * hardcode everything into a single struct.
346421 */
347
- mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
348
- layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
349
- for (i = 0; i < n_layers; i++) {
350
- count *= layers[i].size;
351
- edac_dbg(4, "errcount layer %d size %d\n", i, count);
352
- ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
353
- ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
354
- tot_errcount += 2 * count;
355
- }
356
-
357
- edac_dbg(4, "allocating %d error counters\n", tot_errcount);
358
- pvt = edac_align_ptr(&ptr, sz_pvt, 1);
359
- size = ((unsigned long)pvt) + sz_pvt;
422
+ mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
423
+ layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
424
+ pvt = edac_align_ptr(&ptr, sz_pvt, 1);
425
+ size = ((unsigned long)pvt) + sz_pvt;
360426
361427 edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
362428 size,
....@@ -368,14 +434,13 @@
368434 if (mci == NULL)
369435 return NULL;
370436
437
+ mci->dev.release = mci_release;
438
+ device_initialize(&mci->dev);
439
+
371440 /* Adjust pointers so they point within the memory we just allocated
372441 * rather than an imaginary chunk of memory located at address 0.
373442 */
374443 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
375
- for (i = 0; i < n_layers; i++) {
376
- mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
377
- mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
378
- }
379444 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
380445
381446 /* setup index and various internal pointers */
....@@ -389,107 +454,11 @@
389454 mci->num_cschannel = tot_channels;
390455 mci->csbased = per_rank;
391456
392
- /*
393
- * Alocate and fill the csrow/channels structs
394
- */
395
- mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
396
- if (!mci->csrows)
397
- goto error;
398
- for (row = 0; row < tot_csrows; row++) {
399
- csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
400
- if (!csr)
401
- goto error;
402
- mci->csrows[row] = csr;
403
- csr->csrow_idx = row;
404
- csr->mci = mci;
405
- csr->nr_channels = tot_channels;
406
- csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
407
- GFP_KERNEL);
408
- if (!csr->channels)
409
- goto error;
410
-
411
- for (chn = 0; chn < tot_channels; chn++) {
412
- chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
413
- if (!chan)
414
- goto error;
415
- csr->channels[chn] = chan;
416
- chan->chan_idx = chn;
417
- chan->csrow = csr;
418
- }
419
- }
420
-
421
- /*
422
- * Allocate and fill the dimm structs
423
- */
424
- mci->dimms = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
425
- if (!mci->dimms)
457
+ if (edac_mc_alloc_csrows(mci))
426458 goto error;
427459
428
- memset(&pos, 0, sizeof(pos));
429
- row = 0;
430
- chn = 0;
431
- for (i = 0; i < tot_dimms; i++) {
432
- chan = mci->csrows[row]->channels[chn];
433
- off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
434
- if (off < 0 || off >= tot_dimms) {
435
- edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
436
- goto error;
437
- }
438
-
439
- dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
440
- if (!dimm)
441
- goto error;
442
- mci->dimms[off] = dimm;
443
- dimm->mci = mci;
444
-
445
- /*
446
- * Copy DIMM location and initialize it.
447
- */
448
- len = sizeof(dimm->label);
449
- p = dimm->label;
450
- n = snprintf(p, len, "mc#%u", mc_num);
451
- p += n;
452
- len -= n;
453
- for (j = 0; j < n_layers; j++) {
454
- n = snprintf(p, len, "%s#%u",
455
- edac_layer_name[layers[j].type],
456
- pos[j]);
457
- p += n;
458
- len -= n;
459
- dimm->location[j] = pos[j];
460
-
461
- if (len <= 0)
462
- break;
463
- }
464
-
465
- /* Link it to the csrows old API data */
466
- chan->dimm = dimm;
467
- dimm->csrow = row;
468
- dimm->cschannel = chn;
469
-
470
- /* Increment csrow location */
471
- if (layers[0].is_virt_csrow) {
472
- chn++;
473
- if (chn == tot_channels) {
474
- chn = 0;
475
- row++;
476
- }
477
- } else {
478
- row++;
479
- if (row == tot_csrows) {
480
- row = 0;
481
- chn++;
482
- }
483
- }
484
-
485
- /* Increment dimm location */
486
- for (j = n_layers - 1; j >= 0; j--) {
487
- pos[j]++;
488
- if (pos[j] < layers[j].size)
489
- break;
490
- pos[j] = 0;
491
- }
492
- }
460
+ if (edac_mc_alloc_dimms(mci))
461
+ goto error;
493462
494463 mci->op_state = OP_ALLOC;
495464
....@@ -506,16 +475,7 @@
506475 {
507476 edac_dbg(1, "\n");
508477
509
- /* If we're not yet registered with sysfs free only what was allocated
510
- * in edac_mc_alloc().
511
- */
512
- if (!device_is_registered(&mci->dev)) {
513
- _edac_mc_free(mci);
514
- return;
515
- }
516
-
517
- /* the mci instance is freed here, when the sysfs object is dropped */
518
- edac_unregister_sysfs(mci);
478
+ _edac_mc_free(mci);
519479 }
520480 EXPORT_SYMBOL_GPL(edac_mc_free);
521481
....@@ -712,16 +672,12 @@
712672 int ret = -EINVAL;
713673 edac_dbg(0, "\n");
714674
715
- if (mci->mc_idx >= EDAC_MAX_MCS) {
716
- pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx);
717
- return -ENODEV;
718
- }
719
-
720675 #ifdef CONFIG_EDAC_DEBUG
721676 if (edac_debug_level >= 3)
722677 edac_mc_dump_mci(mci);
723678
724679 if (edac_debug_level >= 4) {
680
+ struct dimm_info *dimm;
725681 int i;
726682
727683 for (i = 0; i < mci->nr_csrows; i++) {
....@@ -738,9 +694,9 @@
738694 if (csrow->channels[j]->dimm->nr_pages)
739695 edac_mc_dump_channel(csrow->channels[j]);
740696 }
741
- for (i = 0; i < mci->tot_dimms; i++)
742
- if (mci->dimms[i]->nr_pages)
743
- edac_mc_dump_dimm(mci->dimms[i], i);
697
+
698
+ mci_for_each_dimm(mci, dimm)
699
+ edac_mc_dump_dimm(dimm);
744700 }
745701 #endif
746702 mutex_lock(&mem_ctls_mutex);
....@@ -756,7 +712,7 @@
756712 /* set load time so that error rate can be tracked */
757713 mci->start_time = jiffies;
758714
759
- mci->bus = &mc_bus[mci->mc_idx];
715
+ mci->bus = edac_get_sysfs_subsys();
760716
761717 if (edac_create_sysfs_mci_device(mci, groups)) {
762718 edac_mc_printk(mci, KERN_WARNING,
....@@ -913,88 +869,51 @@
913869 };
914870 EXPORT_SYMBOL_GPL(edac_layer_name);
915871
916
-static void edac_inc_ce_error(struct mem_ctl_info *mci,
917
- bool enable_per_layer_report,
918
- const int pos[EDAC_MAX_LAYERS],
919
- const u16 count)
872
+static void edac_inc_ce_error(struct edac_raw_error_desc *e)
920873 {
921
- int i, index = 0;
874
+ int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
875
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
876
+ struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
922877
923
- mci->ce_mc += count;
878
+ mci->ce_mc += e->error_count;
924879
925
- if (!enable_per_layer_report) {
926
- mci->ce_noinfo_count += count;
927
- return;
928
- }
929
-
930
- for (i = 0; i < mci->n_layers; i++) {
931
- if (pos[i] < 0)
932
- break;
933
- index += pos[i];
934
- mci->ce_per_layer[i][index] += count;
935
-
936
- if (i < mci->n_layers - 1)
937
- index *= mci->layers[i + 1].size;
938
- }
880
+ if (dimm)
881
+ dimm->ce_count += e->error_count;
882
+ else
883
+ mci->ce_noinfo_count += e->error_count;
939884 }
940885
941
-static void edac_inc_ue_error(struct mem_ctl_info *mci,
942
- bool enable_per_layer_report,
943
- const int pos[EDAC_MAX_LAYERS],
944
- const u16 count)
886
+static void edac_inc_ue_error(struct edac_raw_error_desc *e)
945887 {
946
- int i, index = 0;
888
+ int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
889
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
890
+ struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
947891
948
- mci->ue_mc += count;
892
+ mci->ue_mc += e->error_count;
949893
950
- if (!enable_per_layer_report) {
951
- mci->ue_noinfo_count += count;
952
- return;
953
- }
954
-
955
- for (i = 0; i < mci->n_layers; i++) {
956
- if (pos[i] < 0)
957
- break;
958
- index += pos[i];
959
- mci->ue_per_layer[i][index] += count;
960
-
961
- if (i < mci->n_layers - 1)
962
- index *= mci->layers[i + 1].size;
963
- }
894
+ if (dimm)
895
+ dimm->ue_count += e->error_count;
896
+ else
897
+ mci->ue_noinfo_count += e->error_count;
964898 }
965899
966
-static void edac_ce_error(struct mem_ctl_info *mci,
967
- const u16 error_count,
968
- const int pos[EDAC_MAX_LAYERS],
969
- const char *msg,
970
- const char *location,
971
- const char *label,
972
- const char *detail,
973
- const char *other_detail,
974
- const bool enable_per_layer_report,
975
- const unsigned long page_frame_number,
976
- const unsigned long offset_in_page,
977
- long grain)
900
+static void edac_ce_error(struct edac_raw_error_desc *e)
978901 {
902
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
979903 unsigned long remapped_page;
980
- char *msg_aux = "";
981
-
982
- if (*msg)
983
- msg_aux = " ";
984904
985905 if (edac_mc_get_log_ce()) {
986
- if (other_detail && *other_detail)
987
- edac_mc_printk(mci, KERN_WARNING,
988
- "%d CE %s%son %s (%s %s - %s)\n",
989
- error_count, msg, msg_aux, label,
990
- location, detail, other_detail);
991
- else
992
- edac_mc_printk(mci, KERN_WARNING,
993
- "%d CE %s%son %s (%s %s)\n",
994
- error_count, msg, msg_aux, label,
995
- location, detail);
906
+ edac_mc_printk(mci, KERN_WARNING,
907
+ "%d CE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx%s%s)\n",
908
+ e->error_count, e->msg,
909
+ *e->msg ? " " : "",
910
+ e->label, e->location, e->page_frame_number, e->offset_in_page,
911
+ e->grain, e->syndrome,
912
+ *e->other_detail ? " - " : "",
913
+ e->other_detail);
996914 }
997
- edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
915
+
916
+ edac_inc_ce_error(e);
998917
999918 if (mci->scrub_mode == SCRUB_SW_SRC) {
1000919 /*
....@@ -1009,80 +928,84 @@
1009928 * be scrubbed.
1010929 */
1011930 remapped_page = mci->ctl_page_to_phys ?
1012
- mci->ctl_page_to_phys(mci, page_frame_number) :
1013
- page_frame_number;
931
+ mci->ctl_page_to_phys(mci, e->page_frame_number) :
932
+ e->page_frame_number;
1014933
1015
- edac_mc_scrub_block(remapped_page,
1016
- offset_in_page, grain);
934
+ edac_mc_scrub_block(remapped_page, e->offset_in_page, e->grain);
1017935 }
1018936 }
1019937
1020
-static void edac_ue_error(struct mem_ctl_info *mci,
1021
- const u16 error_count,
1022
- const int pos[EDAC_MAX_LAYERS],
1023
- const char *msg,
1024
- const char *location,
1025
- const char *label,
1026
- const char *detail,
1027
- const char *other_detail,
1028
- const bool enable_per_layer_report)
938
+static void edac_ue_error(struct edac_raw_error_desc *e)
1029939 {
1030
- char *msg_aux = "";
1031
-
1032
- if (*msg)
1033
- msg_aux = " ";
940
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
1034941
1035942 if (edac_mc_get_log_ue()) {
1036
- if (other_detail && *other_detail)
1037
- edac_mc_printk(mci, KERN_WARNING,
1038
- "%d UE %s%son %s (%s %s - %s)\n",
1039
- error_count, msg, msg_aux, label,
1040
- location, detail, other_detail);
1041
- else
1042
- edac_mc_printk(mci, KERN_WARNING,
1043
- "%d UE %s%son %s (%s %s)\n",
1044
- error_count, msg, msg_aux, label,
1045
- location, detail);
943
+ edac_mc_printk(mci, KERN_WARNING,
944
+ "%d UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
945
+ e->error_count, e->msg,
946
+ *e->msg ? " " : "",
947
+ e->label, e->location, e->page_frame_number, e->offset_in_page,
948
+ e->grain,
949
+ *e->other_detail ? " - " : "",
950
+ e->other_detail);
1046951 }
952
+
953
+ edac_inc_ue_error(e);
1047954
1048955 if (edac_mc_get_panic_on_ue()) {
1049
- if (other_detail && *other_detail)
1050
- panic("UE %s%son %s (%s%s - %s)\n",
1051
- msg, msg_aux, label, location, detail, other_detail);
1052
- else
1053
- panic("UE %s%son %s (%s%s)\n",
1054
- msg, msg_aux, label, location, detail);
956
+ panic("UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
957
+ e->msg,
958
+ *e->msg ? " " : "",
959
+ e->label, e->location, e->page_frame_number, e->offset_in_page,
960
+ e->grain,
961
+ *e->other_detail ? " - " : "",
962
+ e->other_detail);
1055963 }
1056
-
1057
- edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
1058964 }
1059965
1060
-void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
1061
- struct mem_ctl_info *mci,
1062
- struct edac_raw_error_desc *e)
966
+static void edac_inc_csrow(struct edac_raw_error_desc *e, int row, int chan)
1063967 {
1064
- char detail[80];
1065
- int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
968
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
969
+ enum hw_event_mc_err_type type = e->type;
970
+ u16 count = e->error_count;
1066971
1067
- /* Memory type dependent details about the error */
972
+ if (row < 0)
973
+ return;
974
+
975
+ edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
976
+
1068977 if (type == HW_EVENT_ERR_CORRECTED) {
1069
- snprintf(detail, sizeof(detail),
1070
- "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1071
- e->page_frame_number, e->offset_in_page,
1072
- e->grain, e->syndrome);
1073
- edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1074
- detail, e->other_detail, e->enable_per_layer_report,
1075
- e->page_frame_number, e->offset_in_page, e->grain);
978
+ mci->csrows[row]->ce_count += count;
979
+ if (chan >= 0)
980
+ mci->csrows[row]->channels[chan]->ce_count += count;
1076981 } else {
1077
- snprintf(detail, sizeof(detail),
1078
- "page:0x%lx offset:0x%lx grain:%ld",
1079
- e->page_frame_number, e->offset_in_page, e->grain);
1080
-
1081
- edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1082
- detail, e->other_detail, e->enable_per_layer_report);
982
+ mci->csrows[row]->ue_count += count;
1083983 }
984
+}
1084985
986
+void edac_raw_mc_handle_error(struct edac_raw_error_desc *e)
987
+{
988
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
989
+ u8 grain_bits;
1085990
991
+ /* Sanity-check driver-supplied grain value. */
992
+ if (WARN_ON_ONCE(!e->grain))
993
+ e->grain = 1;
994
+
995
+ grain_bits = fls_long(e->grain - 1);
996
+
997
+ /* Report the error via the trace interface */
998
+ if (IS_ENABLED(CONFIG_RAS))
999
+ trace_mc_event(e->type, e->msg, e->label, e->error_count,
1000
+ mci->mc_idx, e->top_layer, e->mid_layer,
1001
+ e->low_layer,
1002
+ (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
1003
+ grain_bits, e->syndrome, e->other_detail);
1004
+
1005
+ if (e->type == HW_EVENT_ERR_CORRECTED)
1006
+ edac_ce_error(e);
1007
+ else
1008
+ edac_ue_error(e);
10861009 }
10871010 EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
10881011
....@@ -1098,31 +1021,33 @@
10981021 const char *msg,
10991022 const char *other_detail)
11001023 {
1024
+ struct dimm_info *dimm;
11011025 char *p;
11021026 int row = -1, chan = -1;
11031027 int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
11041028 int i, n_labels = 0;
1105
- u8 grain_bits;
11061029 struct edac_raw_error_desc *e = &mci->error_desc;
1030
+ bool any_memory = true;
11071031
11081032 edac_dbg(3, "MC%d\n", mci->mc_idx);
11091033
11101034 /* Fills the error report buffer */
11111035 memset(e, 0, sizeof (*e));
11121036 e->error_count = error_count;
1037
+ e->type = type;
11131038 e->top_layer = top_layer;
11141039 e->mid_layer = mid_layer;
11151040 e->low_layer = low_layer;
11161041 e->page_frame_number = page_frame_number;
11171042 e->offset_in_page = offset_in_page;
11181043 e->syndrome = syndrome;
1119
- e->msg = msg;
1120
- e->other_detail = other_detail;
1044
+ /* need valid strings here for both: */
1045
+ e->msg = msg ?: "";
1046
+ e->other_detail = other_detail ?: "";
11211047
11221048 /*
1123
- * Check if the event report is consistent and if the memory
1124
- * location is known. If it is known, enable_per_layer_report will be
1125
- * true, the DIMM(s) label info will be filled and the per-layer
1049
+ * Check if the event report is consistent and if the memory location is
1050
+ * known. If it is, the DIMM(s) label info will be filled and the DIMM's
11261051 * error counters will be incremented.
11271052 */
11281053 for (i = 0; i < mci->n_layers; i++) {
....@@ -1141,7 +1066,7 @@
11411066 pos[i] = -1;
11421067 }
11431068 if (pos[i] >= 0)
1144
- e->enable_per_layer_report = true;
1069
+ any_memory = false;
11451070 }
11461071
11471072 /*
....@@ -1158,9 +1083,7 @@
11581083 p = e->label;
11591084 *p = '\0';
11601085
1161
- for (i = 0; i < mci->tot_dimms; i++) {
1162
- struct dimm_info *dimm = mci->dimms[i];
1163
-
1086
+ mci_for_each_dimm(mci, dimm) {
11641087 if (top_layer >= 0 && top_layer != dimm->location[0])
11651088 continue;
11661089 if (mid_layer >= 0 && mid_layer != dimm->location[1])
....@@ -1174,59 +1097,50 @@
11741097
11751098 /*
11761099 * If the error is memory-controller wide, there's no need to
1177
- * seek for the affected DIMMs because the whole
1178
- * channel/memory controller/... may be affected.
1179
- * Also, don't show errors for empty DIMM slots.
1100
+ * seek for the affected DIMMs because the whole channel/memory
1101
+ * controller/... may be affected. Also, don't show errors for
1102
+ * empty DIMM slots.
11801103 */
1181
- if (e->enable_per_layer_report && dimm->nr_pages) {
1182
- if (n_labels >= EDAC_MAX_LABELS) {
1183
- e->enable_per_layer_report = false;
1184
- break;
1185
- }
1186
- n_labels++;
1104
+ if (!dimm->nr_pages)
1105
+ continue;
1106
+
1107
+ n_labels++;
1108
+ if (n_labels > EDAC_MAX_LABELS) {
1109
+ p = e->label;
1110
+ *p = '\0';
1111
+ } else {
11871112 if (p != e->label) {
11881113 strcpy(p, OTHER_LABEL);
11891114 p += strlen(OTHER_LABEL);
11901115 }
11911116 strcpy(p, dimm->label);
11921117 p += strlen(p);
1193
- *p = '\0';
1194
-
1195
- /*
1196
- * get csrow/channel of the DIMM, in order to allow
1197
- * incrementing the compat API counters
1198
- */
1199
- edac_dbg(4, "%s csrows map: (%d,%d)\n",
1200
- mci->csbased ? "rank" : "dimm",
1201
- dimm->csrow, dimm->cschannel);
1202
- if (row == -1)
1203
- row = dimm->csrow;
1204
- else if (row >= 0 && row != dimm->csrow)
1205
- row = -2;
1206
-
1207
- if (chan == -1)
1208
- chan = dimm->cschannel;
1209
- else if (chan >= 0 && chan != dimm->cschannel)
1210
- chan = -2;
12111118 }
1119
+
1120
+ /*
1121
+ * get csrow/channel of the DIMM, in order to allow
1122
+ * incrementing the compat API counters
1123
+ */
1124
+ edac_dbg(4, "%s csrows map: (%d,%d)\n",
1125
+ mci->csbased ? "rank" : "dimm",
1126
+ dimm->csrow, dimm->cschannel);
1127
+ if (row == -1)
1128
+ row = dimm->csrow;
1129
+ else if (row >= 0 && row != dimm->csrow)
1130
+ row = -2;
1131
+
1132
+ if (chan == -1)
1133
+ chan = dimm->cschannel;
1134
+ else if (chan >= 0 && chan != dimm->cschannel)
1135
+ chan = -2;
12121136 }
12131137
1214
- if (!e->enable_per_layer_report) {
1138
+ if (any_memory)
12151139 strcpy(e->label, "any memory");
1216
- } else {
1217
- edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1218
- if (p == e->label)
1219
- strcpy(e->label, "unknown memory");
1220
- if (type == HW_EVENT_ERR_CORRECTED) {
1221
- if (row >= 0) {
1222
- mci->csrows[row]->ce_count += error_count;
1223
- if (chan >= 0)
1224
- mci->csrows[row]->channels[chan]->ce_count += error_count;
1225
- }
1226
- } else
1227
- if (row >= 0)
1228
- mci->csrows[row]->ue_count += error_count;
1229
- }
1140
+ else if (!*e->label)
1141
+ strcpy(e->label, "unknown memory");
1142
+
1143
+ edac_inc_csrow(e, row, chan);
12301144
12311145 /* Fill the RAM location data */
12321146 p = e->location;
....@@ -1242,20 +1156,6 @@
12421156 if (p > e->location)
12431157 *(p - 1) = '\0';
12441158
1245
- /* Sanity-check driver-supplied grain value. */
1246
- if (WARN_ON_ONCE(!e->grain))
1247
- e->grain = 1;
1248
-
1249
- grain_bits = fls_long(e->grain - 1);
1250
-
1251
- /* Report the error via the trace interface */
1252
- if (IS_ENABLED(CONFIG_RAS))
1253
- trace_mc_event(type, e->msg, e->label, e->error_count,
1254
- mci->mc_idx, e->top_layer, e->mid_layer,
1255
- e->low_layer,
1256
- (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
1257
- grain_bits, e->syndrome, e->other_detail);
1258
-
1259
- edac_raw_mc_handle_error(type, mci, e);
1159
+ edac_raw_mc_handle_error(e);
12601160 }
12611161 EXPORT_SYMBOL_GPL(edac_mc_handle_error);