hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/base/node.c
....@@ -17,6 +17,7 @@
1717 #include <linux/nodemask.h>
1818 #include <linux/cpu.h>
1919 #include <linux/device.h>
20
+#include <linux/pm_runtime.h>
2021 #include <linux/swap.h>
2122 #include <linux/slab.h>
2223
....@@ -45,149 +46,456 @@
4546 return n;
4647 }
4748
48
-static inline ssize_t node_read_cpumask(struct device *dev,
49
- struct device_attribute *attr, char *buf)
49
+static inline ssize_t cpumap_show(struct device *dev,
50
+ struct device_attribute *attr,
51
+ char *buf)
5052 {
5153 return node_read_cpumap(dev, false, buf);
5254 }
53
-static inline ssize_t node_read_cpulist(struct device *dev,
54
- struct device_attribute *attr, char *buf)
55
+
56
+static DEVICE_ATTR_RO(cpumap);
57
+
58
+static inline ssize_t cpulist_show(struct device *dev,
59
+ struct device_attribute *attr,
60
+ char *buf)
5561 {
5662 return node_read_cpumap(dev, true, buf);
5763 }
5864
59
-static DEVICE_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL);
60
-static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL);
65
+static DEVICE_ATTR_RO(cpulist);
66
+
67
+/**
68
+ * struct node_access_nodes - Access class device to hold user visible
69
+ * relationships to other nodes.
70
+ * @dev: Device for this memory access class
71
+ * @list_node: List element in the node's access list
72
+ * @access: The access class rank
73
+ * @hmem_attrs: Heterogeneous memory performance attributes
74
+ */
75
+struct node_access_nodes {
76
+ struct device dev;
77
+ struct list_head list_node;
78
+ unsigned access;
79
+#ifdef CONFIG_HMEM_REPORTING
80
+ struct node_hmem_attrs hmem_attrs;
81
+#endif
82
+};
83
+#define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev)
84
+
85
+static struct attribute *node_init_access_node_attrs[] = {
86
+ NULL,
87
+};
88
+
89
+static struct attribute *node_targ_access_node_attrs[] = {
90
+ NULL,
91
+};
92
+
93
+static const struct attribute_group initiators = {
94
+ .name = "initiators",
95
+ .attrs = node_init_access_node_attrs,
96
+};
97
+
98
+static const struct attribute_group targets = {
99
+ .name = "targets",
100
+ .attrs = node_targ_access_node_attrs,
101
+};
102
+
103
+static const struct attribute_group *node_access_node_groups[] = {
104
+ &initiators,
105
+ &targets,
106
+ NULL,
107
+};
108
+
109
+static void node_remove_accesses(struct node *node)
110
+{
111
+ struct node_access_nodes *c, *cnext;
112
+
113
+ list_for_each_entry_safe(c, cnext, &node->access_list, list_node) {
114
+ list_del(&c->list_node);
115
+ device_unregister(&c->dev);
116
+ }
117
+}
118
+
119
+static void node_access_release(struct device *dev)
120
+{
121
+ kfree(to_access_nodes(dev));
122
+}
123
+
124
+static struct node_access_nodes *node_init_node_access(struct node *node,
125
+ unsigned access)
126
+{
127
+ struct node_access_nodes *access_node;
128
+ struct device *dev;
129
+
130
+ list_for_each_entry(access_node, &node->access_list, list_node)
131
+ if (access_node->access == access)
132
+ return access_node;
133
+
134
+ access_node = kzalloc(sizeof(*access_node), GFP_KERNEL);
135
+ if (!access_node)
136
+ return NULL;
137
+
138
+ access_node->access = access;
139
+ dev = &access_node->dev;
140
+ dev->parent = &node->dev;
141
+ dev->release = node_access_release;
142
+ dev->groups = node_access_node_groups;
143
+ if (dev_set_name(dev, "access%u", access))
144
+ goto free;
145
+
146
+ if (device_register(dev))
147
+ goto free_name;
148
+
149
+ pm_runtime_no_callbacks(dev);
150
+ list_add_tail(&access_node->list_node, &node->access_list);
151
+ return access_node;
152
+free_name:
153
+ kfree_const(dev->kobj.name);
154
+free:
155
+ kfree(access_node);
156
+ return NULL;
157
+}
158
+
159
+#ifdef CONFIG_HMEM_REPORTING
160
+#define ACCESS_ATTR(name) \
161
+static ssize_t name##_show(struct device *dev, \
162
+ struct device_attribute *attr, \
163
+ char *buf) \
164
+{ \
165
+ return sysfs_emit(buf, "%u\n", \
166
+ to_access_nodes(dev)->hmem_attrs.name); \
167
+} \
168
+static DEVICE_ATTR_RO(name)
169
+
170
+ACCESS_ATTR(read_bandwidth);
171
+ACCESS_ATTR(read_latency);
172
+ACCESS_ATTR(write_bandwidth);
173
+ACCESS_ATTR(write_latency);
174
+
175
+static struct attribute *access_attrs[] = {
176
+ &dev_attr_read_bandwidth.attr,
177
+ &dev_attr_read_latency.attr,
178
+ &dev_attr_write_bandwidth.attr,
179
+ &dev_attr_write_latency.attr,
180
+ NULL,
181
+};
182
+
183
+/**
184
+ * node_set_perf_attrs - Set the performance values for given access class
185
+ * @nid: Node identifier to be set
186
+ * @hmem_attrs: Heterogeneous memory performance attributes
187
+ * @access: The access class the for the given attributes
188
+ */
189
+void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs,
190
+ unsigned access)
191
+{
192
+ struct node_access_nodes *c;
193
+ struct node *node;
194
+ int i;
195
+
196
+ if (WARN_ON_ONCE(!node_online(nid)))
197
+ return;
198
+
199
+ node = node_devices[nid];
200
+ c = node_init_node_access(node, access);
201
+ if (!c)
202
+ return;
203
+
204
+ c->hmem_attrs = *hmem_attrs;
205
+ for (i = 0; access_attrs[i] != NULL; i++) {
206
+ if (sysfs_add_file_to_group(&c->dev.kobj, access_attrs[i],
207
+ "initiators")) {
208
+ pr_info("failed to add performance attribute to node %d\n",
209
+ nid);
210
+ break;
211
+ }
212
+ }
213
+}
214
+
215
+/**
216
+ * struct node_cache_info - Internal tracking for memory node caches
217
+ * @dev: Device represeting the cache level
218
+ * @node: List element for tracking in the node
219
+ * @cache_attrs:Attributes for this cache level
220
+ */
221
+struct node_cache_info {
222
+ struct device dev;
223
+ struct list_head node;
224
+ struct node_cache_attrs cache_attrs;
225
+};
226
+#define to_cache_info(device) container_of(device, struct node_cache_info, dev)
227
+
228
+#define CACHE_ATTR(name, fmt) \
229
+static ssize_t name##_show(struct device *dev, \
230
+ struct device_attribute *attr, \
231
+ char *buf) \
232
+{ \
233
+ return sysfs_emit(buf, fmt "\n", \
234
+ to_cache_info(dev)->cache_attrs.name); \
235
+} \
236
+DEVICE_ATTR_RO(name);
237
+
238
+CACHE_ATTR(size, "%llu")
239
+CACHE_ATTR(line_size, "%u")
240
+CACHE_ATTR(indexing, "%u")
241
+CACHE_ATTR(write_policy, "%u")
242
+
243
+static struct attribute *cache_attrs[] = {
244
+ &dev_attr_indexing.attr,
245
+ &dev_attr_size.attr,
246
+ &dev_attr_line_size.attr,
247
+ &dev_attr_write_policy.attr,
248
+ NULL,
249
+};
250
+ATTRIBUTE_GROUPS(cache);
251
+
252
+static void node_cache_release(struct device *dev)
253
+{
254
+ kfree(dev);
255
+}
256
+
257
+static void node_cacheinfo_release(struct device *dev)
258
+{
259
+ struct node_cache_info *info = to_cache_info(dev);
260
+ kfree(info);
261
+}
262
+
263
+static void node_init_cache_dev(struct node *node)
264
+{
265
+ struct device *dev;
266
+
267
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
268
+ if (!dev)
269
+ return;
270
+
271
+ device_initialize(dev);
272
+ dev->parent = &node->dev;
273
+ dev->release = node_cache_release;
274
+ if (dev_set_name(dev, "memory_side_cache"))
275
+ goto put_device;
276
+
277
+ if (device_add(dev))
278
+ goto put_device;
279
+
280
+ pm_runtime_no_callbacks(dev);
281
+ node->cache_dev = dev;
282
+ return;
283
+put_device:
284
+ put_device(dev);
285
+}
286
+
287
+/**
288
+ * node_add_cache() - add cache attribute to a memory node
289
+ * @nid: Node identifier that has new cache attributes
290
+ * @cache_attrs: Attributes for the cache being added
291
+ */
292
+void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs)
293
+{
294
+ struct node_cache_info *info;
295
+ struct device *dev;
296
+ struct node *node;
297
+
298
+ if (!node_online(nid) || !node_devices[nid])
299
+ return;
300
+
301
+ node = node_devices[nid];
302
+ list_for_each_entry(info, &node->cache_attrs, node) {
303
+ if (info->cache_attrs.level == cache_attrs->level) {
304
+ dev_warn(&node->dev,
305
+ "attempt to add duplicate cache level:%d\n",
306
+ cache_attrs->level);
307
+ return;
308
+ }
309
+ }
310
+
311
+ if (!node->cache_dev)
312
+ node_init_cache_dev(node);
313
+ if (!node->cache_dev)
314
+ return;
315
+
316
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
317
+ if (!info)
318
+ return;
319
+
320
+ dev = &info->dev;
321
+ device_initialize(dev);
322
+ dev->parent = node->cache_dev;
323
+ dev->release = node_cacheinfo_release;
324
+ dev->groups = cache_groups;
325
+ if (dev_set_name(dev, "index%d", cache_attrs->level))
326
+ goto put_device;
327
+
328
+ info->cache_attrs = *cache_attrs;
329
+ if (device_add(dev)) {
330
+ dev_warn(&node->dev, "failed to add cache level:%d\n",
331
+ cache_attrs->level);
332
+ goto put_device;
333
+ }
334
+ pm_runtime_no_callbacks(dev);
335
+ list_add_tail(&info->node, &node->cache_attrs);
336
+ return;
337
+put_device:
338
+ put_device(dev);
339
+}
340
+
341
+static void node_remove_caches(struct node *node)
342
+{
343
+ struct node_cache_info *info, *next;
344
+
345
+ if (!node->cache_dev)
346
+ return;
347
+
348
+ list_for_each_entry_safe(info, next, &node->cache_attrs, node) {
349
+ list_del(&info->node);
350
+ device_unregister(&info->dev);
351
+ }
352
+ device_unregister(node->cache_dev);
353
+}
354
+
355
+static void node_init_caches(unsigned int nid)
356
+{
357
+ INIT_LIST_HEAD(&node_devices[nid]->cache_attrs);
358
+}
359
+#else
360
+static void node_init_caches(unsigned int nid) { }
361
+static void node_remove_caches(struct node *node) { }
362
+#endif
61363
62364 #define K(x) ((x) << (PAGE_SHIFT - 10))
63365 static ssize_t node_read_meminfo(struct device *dev,
64366 struct device_attribute *attr, char *buf)
65367 {
66
- int n;
368
+ int len = 0;
67369 int nid = dev->id;
68370 struct pglist_data *pgdat = NODE_DATA(nid);
69371 struct sysinfo i;
70372 unsigned long sreclaimable, sunreclaimable;
71373
72374 si_meminfo_node(&i, nid);
73
- sreclaimable = node_page_state(pgdat, NR_SLAB_RECLAIMABLE);
74
- sunreclaimable = node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE);
75
- n = sprintf(buf,
76
- "Node %d MemTotal: %8lu kB\n"
77
- "Node %d MemFree: %8lu kB\n"
78
- "Node %d MemUsed: %8lu kB\n"
79
- "Node %d Active: %8lu kB\n"
80
- "Node %d Inactive: %8lu kB\n"
81
- "Node %d Active(anon): %8lu kB\n"
82
- "Node %d Inactive(anon): %8lu kB\n"
83
- "Node %d Active(file): %8lu kB\n"
84
- "Node %d Inactive(file): %8lu kB\n"
85
- "Node %d Unevictable: %8lu kB\n"
86
- "Node %d Mlocked: %8lu kB\n",
87
- nid, K(i.totalram),
88
- nid, K(i.freeram),
89
- nid, K(i.totalram - i.freeram),
90
- nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) +
91
- node_page_state(pgdat, NR_ACTIVE_FILE)),
92
- nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) +
93
- node_page_state(pgdat, NR_INACTIVE_FILE)),
94
- nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)),
95
- nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)),
96
- nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)),
97
- nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)),
98
- nid, K(node_page_state(pgdat, NR_UNEVICTABLE)),
99
- nid, K(sum_zone_node_page_state(nid, NR_MLOCK)));
375
+ sreclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B);
376
+ sunreclaimable = node_page_state_pages(pgdat, NR_SLAB_UNRECLAIMABLE_B);
377
+ len = sysfs_emit_at(buf, len,
378
+ "Node %d MemTotal: %8lu kB\n"
379
+ "Node %d MemFree: %8lu kB\n"
380
+ "Node %d MemUsed: %8lu kB\n"
381
+ "Node %d Active: %8lu kB\n"
382
+ "Node %d Inactive: %8lu kB\n"
383
+ "Node %d Active(anon): %8lu kB\n"
384
+ "Node %d Inactive(anon): %8lu kB\n"
385
+ "Node %d Active(file): %8lu kB\n"
386
+ "Node %d Inactive(file): %8lu kB\n"
387
+ "Node %d Unevictable: %8lu kB\n"
388
+ "Node %d Mlocked: %8lu kB\n",
389
+ nid, K(i.totalram),
390
+ nid, K(i.freeram),
391
+ nid, K(i.totalram - i.freeram),
392
+ nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) +
393
+ node_page_state(pgdat, NR_ACTIVE_FILE)),
394
+ nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) +
395
+ node_page_state(pgdat, NR_INACTIVE_FILE)),
396
+ nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)),
397
+ nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)),
398
+ nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)),
399
+ nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)),
400
+ nid, K(node_page_state(pgdat, NR_UNEVICTABLE)),
401
+ nid, K(sum_zone_node_page_state(nid, NR_MLOCK)));
100402
101403 #ifdef CONFIG_HIGHMEM
102
- n += sprintf(buf + n,
103
- "Node %d HighTotal: %8lu kB\n"
104
- "Node %d HighFree: %8lu kB\n"
105
- "Node %d LowTotal: %8lu kB\n"
106
- "Node %d LowFree: %8lu kB\n",
107
- nid, K(i.totalhigh),
108
- nid, K(i.freehigh),
109
- nid, K(i.totalram - i.totalhigh),
110
- nid, K(i.freeram - i.freehigh));
404
+ len += sysfs_emit_at(buf, len,
405
+ "Node %d HighTotal: %8lu kB\n"
406
+ "Node %d HighFree: %8lu kB\n"
407
+ "Node %d LowTotal: %8lu kB\n"
408
+ "Node %d LowFree: %8lu kB\n",
409
+ nid, K(i.totalhigh),
410
+ nid, K(i.freehigh),
411
+ nid, K(i.totalram - i.totalhigh),
412
+ nid, K(i.freeram - i.freehigh));
111413 #endif
112
- n += sprintf(buf + n,
113
- "Node %d Dirty: %8lu kB\n"
114
- "Node %d Writeback: %8lu kB\n"
115
- "Node %d FilePages: %8lu kB\n"
116
- "Node %d Mapped: %8lu kB\n"
117
- "Node %d AnonPages: %8lu kB\n"
118
- "Node %d Shmem: %8lu kB\n"
119
- "Node %d KernelStack: %8lu kB\n"
414
+ len += sysfs_emit_at(buf, len,
415
+ "Node %d Dirty: %8lu kB\n"
416
+ "Node %d Writeback: %8lu kB\n"
417
+ "Node %d FilePages: %8lu kB\n"
418
+ "Node %d Mapped: %8lu kB\n"
419
+ "Node %d AnonPages: %8lu kB\n"
420
+ "Node %d Shmem: %8lu kB\n"
421
+ "Node %d KernelStack: %8lu kB\n"
120422 #ifdef CONFIG_SHADOW_CALL_STACK
121
- "Node %d ShadowCallStack:%8lu kB\n"
423
+ "Node %d ShadowCallStack:%8lu kB\n"
122424 #endif
123
- "Node %d PageTables: %8lu kB\n"
124
- "Node %d NFS_Unstable: %8lu kB\n"
125
- "Node %d Bounce: %8lu kB\n"
126
- "Node %d WritebackTmp: %8lu kB\n"
127
- "Node %d KReclaimable: %8lu kB\n"
128
- "Node %d Slab: %8lu kB\n"
129
- "Node %d SReclaimable: %8lu kB\n"
130
- "Node %d SUnreclaim: %8lu kB\n"
425
+ "Node %d PageTables: %8lu kB\n"
426
+ "Node %d NFS_Unstable: %8lu kB\n"
427
+ "Node %d Bounce: %8lu kB\n"
428
+ "Node %d WritebackTmp: %8lu kB\n"
429
+ "Node %d KReclaimable: %8lu kB\n"
430
+ "Node %d Slab: %8lu kB\n"
431
+ "Node %d SReclaimable: %8lu kB\n"
432
+ "Node %d SUnreclaim: %8lu kB\n"
131433 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
132
- "Node %d AnonHugePages: %8lu kB\n"
133
- "Node %d ShmemHugePages: %8lu kB\n"
134
- "Node %d ShmemPmdMapped: %8lu kB\n"
434
+ "Node %d AnonHugePages: %8lu kB\n"
435
+ "Node %d ShmemHugePages: %8lu kB\n"
436
+ "Node %d ShmemPmdMapped: %8lu kB\n"
437
+ "Node %d FileHugePages: %8lu kB\n"
438
+ "Node %d FilePmdMapped: %8lu kB\n"
135439 #endif
136
- ,
137
- nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
138
- nid, K(node_page_state(pgdat, NR_WRITEBACK)),
139
- nid, K(node_page_state(pgdat, NR_FILE_PAGES)),
140
- nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
141
- nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
142
- nid, K(i.sharedram),
143
- nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
440
+ ,
441
+ nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
442
+ nid, K(node_page_state(pgdat, NR_WRITEBACK)),
443
+ nid, K(node_page_state(pgdat, NR_FILE_PAGES)),
444
+ nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
445
+ nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
446
+ nid, K(i.sharedram),
447
+ nid, node_page_state(pgdat, NR_KERNEL_STACK_KB),
144448 #ifdef CONFIG_SHADOW_CALL_STACK
145
- nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024,
449
+ nid, node_page_state(pgdat, NR_KERNEL_SCS_KB),
146450 #endif
147
- nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
148
- nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
149
- nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
150
- nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
151
- nid, K(sreclaimable +
152
- node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)),
153
- nid, K(sreclaimable + sunreclaimable),
154
- nid, K(sreclaimable),
155
- nid, K(sunreclaimable)
451
+ nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
452
+ nid, 0UL,
453
+ nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
454
+ nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
455
+ nid, K(sreclaimable +
456
+ node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)),
457
+ nid, K(sreclaimable + sunreclaimable),
458
+ nid, K(sreclaimable),
459
+ nid, K(sunreclaimable)
156460 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
157
- ,
158
- nid, K(node_page_state(pgdat, NR_ANON_THPS) *
159
- HPAGE_PMD_NR),
160
- nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
161
- HPAGE_PMD_NR),
162
- nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
163
- HPAGE_PMD_NR)
461
+ ,
462
+ nid, K(node_page_state(pgdat, NR_ANON_THPS) *
463
+ HPAGE_PMD_NR),
464
+ nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
465
+ HPAGE_PMD_NR),
466
+ nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
467
+ HPAGE_PMD_NR),
468
+ nid, K(node_page_state(pgdat, NR_FILE_THPS) *
469
+ HPAGE_PMD_NR),
470
+ nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED) *
471
+ HPAGE_PMD_NR)
164472 #endif
165
- );
166
- n += hugetlb_report_node_meminfo(nid, buf + n);
167
- return n;
473
+ );
474
+ len += hugetlb_report_node_meminfo(buf, len, nid);
475
+ return len;
168476 }
169477
170478 #undef K
171
-static DEVICE_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL);
479
+static DEVICE_ATTR(meminfo, 0444, node_read_meminfo, NULL);
172480
173481 static ssize_t node_read_numastat(struct device *dev,
174
- struct device_attribute *attr, char *buf)
482
+ struct device_attribute *attr, char *buf)
175483 {
176
- return sprintf(buf,
177
- "numa_hit %lu\n"
178
- "numa_miss %lu\n"
179
- "numa_foreign %lu\n"
180
- "interleave_hit %lu\n"
181
- "local_node %lu\n"
182
- "other_node %lu\n",
183
- sum_zone_numa_state(dev->id, NUMA_HIT),
184
- sum_zone_numa_state(dev->id, NUMA_MISS),
185
- sum_zone_numa_state(dev->id, NUMA_FOREIGN),
186
- sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT),
187
- sum_zone_numa_state(dev->id, NUMA_LOCAL),
188
- sum_zone_numa_state(dev->id, NUMA_OTHER));
484
+ return sysfs_emit(buf,
485
+ "numa_hit %lu\n"
486
+ "numa_miss %lu\n"
487
+ "numa_foreign %lu\n"
488
+ "interleave_hit %lu\n"
489
+ "local_node %lu\n"
490
+ "other_node %lu\n",
491
+ sum_zone_numa_state(dev->id, NUMA_HIT),
492
+ sum_zone_numa_state(dev->id, NUMA_MISS),
493
+ sum_zone_numa_state(dev->id, NUMA_FOREIGN),
494
+ sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT),
495
+ sum_zone_numa_state(dev->id, NUMA_LOCAL),
496
+ sum_zone_numa_state(dev->id, NUMA_OTHER));
189497 }
190
-static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
498
+static DEVICE_ATTR(numastat, 0444, node_read_numastat, NULL);
191499
192500 static ssize_t node_read_vmstat(struct device *dev,
193501 struct device_attribute *attr, char *buf)
....@@ -195,36 +503,31 @@
195503 int nid = dev->id;
196504 struct pglist_data *pgdat = NODE_DATA(nid);
197505 int i;
198
- int n = 0;
506
+ int len = 0;
199507
200508 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
201
- n += sprintf(buf+n, "%s %lu\n", vmstat_text[i],
202
- sum_zone_node_page_state(nid, i));
509
+ len += sysfs_emit_at(buf, len, "%s %lu\n",
510
+ zone_stat_name(i),
511
+ sum_zone_node_page_state(nid, i));
203512
204513 #ifdef CONFIG_NUMA
205514 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
206
- n += sprintf(buf+n, "%s %lu\n",
207
- vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
208
- sum_zone_numa_state(nid, i));
515
+ len += sysfs_emit_at(buf, len, "%s %lu\n",
516
+ numa_stat_name(i),
517
+ sum_zone_numa_state(nid, i));
518
+
209519 #endif
520
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
521
+ len += sysfs_emit_at(buf, len, "%s %lu\n",
522
+ node_stat_name(i),
523
+ node_page_state_pages(pgdat, i));
210524
211
- for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
212
- /* Skip hidden vmstat items. */
213
- if (*vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
214
- NR_VM_NUMA_STAT_ITEMS] == '\0')
215
- continue;
216
- n += sprintf(buf+n, "%s %lu\n",
217
- vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
218
- NR_VM_NUMA_STAT_ITEMS],
219
- node_page_state(pgdat, i));
220
- }
221
-
222
- return n;
525
+ return len;
223526 }
224
-static DEVICE_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL);
527
+static DEVICE_ATTR(vmstat, 0444, node_read_vmstat, NULL);
225528
226529 static ssize_t node_read_distance(struct device *dev,
227
- struct device_attribute *attr, char *buf)
530
+ struct device_attribute *attr, char *buf)
228531 {
229532 int nid = dev->id;
230533 int len = 0;
....@@ -236,13 +539,15 @@
236539 */
237540 BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE);
238541
239
- for_each_online_node(i)
240
- len += sprintf(buf + len, "%s%d", i ? " " : "", node_distance(nid, i));
542
+ for_each_online_node(i) {
543
+ len += sysfs_emit_at(buf, len, "%s%d",
544
+ i ? " " : "", node_distance(nid, i));
545
+ }
241546
242
- len += sprintf(buf + len, "\n");
547
+ len += sysfs_emit_at(buf, len, "\n");
243548 return len;
244549 }
245
-static DEVICE_ATTR(distance, S_IRUGO, node_read_distance, NULL);
550
+static DEVICE_ATTR(distance, 0444, node_read_distance, NULL);
246551
247552 static struct attribute *node_dev_attrs[] = {
248553 &dev_attr_cpumap.attr,
....@@ -350,8 +655,10 @@
350655 */
351656 void unregister_node(struct node *node)
352657 {
658
+ compaction_unregister_node(node);
353659 hugetlb_unregister_node(node); /* no-op, if memoryless node */
354
-
660
+ node_remove_accesses(node);
661
+ node_remove_caches(node);
355662 device_unregister(&node->dev);
356663 }
357664
....@@ -381,6 +688,56 @@
381688 return sysfs_create_link(&obj->kobj,
382689 &node_devices[nid]->dev.kobj,
383690 kobject_name(&node_devices[nid]->dev.kobj));
691
+}
692
+
693
+/**
694
+ * register_memory_node_under_compute_node - link memory node to its compute
695
+ * node for a given access class.
696
+ * @mem_nid: Memory node number
697
+ * @cpu_nid: Cpu node number
698
+ * @access: Access class to register
699
+ *
700
+ * Description:
701
+ * For use with platforms that may have separate memory and compute nodes.
702
+ * This function will export node relationships linking which memory
703
+ * initiator nodes can access memory targets at a given ranked access
704
+ * class.
705
+ */
706
+int register_memory_node_under_compute_node(unsigned int mem_nid,
707
+ unsigned int cpu_nid,
708
+ unsigned access)
709
+{
710
+ struct node *init_node, *targ_node;
711
+ struct node_access_nodes *initiator, *target;
712
+ int ret;
713
+
714
+ if (!node_online(cpu_nid) || !node_online(mem_nid))
715
+ return -ENODEV;
716
+
717
+ init_node = node_devices[cpu_nid];
718
+ targ_node = node_devices[mem_nid];
719
+ initiator = node_init_node_access(init_node, access);
720
+ target = node_init_node_access(targ_node, access);
721
+ if (!initiator || !target)
722
+ return -ENOMEM;
723
+
724
+ ret = sysfs_add_link_to_group(&initiator->dev.kobj, "targets",
725
+ &targ_node->dev.kobj,
726
+ dev_name(&targ_node->dev));
727
+ if (ret)
728
+ return ret;
729
+
730
+ ret = sysfs_add_link_to_group(&target->dev.kobj, "initiators",
731
+ &init_node->dev.kobj,
732
+ dev_name(&init_node->dev));
733
+ if (ret)
734
+ goto err;
735
+
736
+ return 0;
737
+ err:
738
+ sysfs_remove_link_from_group(&initiator->dev.kobj, "targets",
739
+ dev_name(&targ_node->dev));
740
+ return ret;
384741 }
385742
386743 int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
....@@ -414,8 +771,8 @@
414771 return pfn_to_nid(pfn);
415772 }
416773
417
-static int do_register_memory_block_under_node(int nid,
418
- struct memory_block *mem_blk)
774
+static void do_register_memory_block_under_node(int nid,
775
+ struct memory_block *mem_blk)
419776 {
420777 int ret;
421778
....@@ -428,31 +785,39 @@
428785 ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
429786 &mem_blk->dev.kobj,
430787 kobject_name(&mem_blk->dev.kobj));
431
- if (ret)
432
- return ret;
788
+ if (ret && ret != -EEXIST)
789
+ dev_err_ratelimited(&node_devices[nid]->dev,
790
+ "can't create link to %s in sysfs (%d)\n",
791
+ kobject_name(&mem_blk->dev.kobj), ret);
433792
434
- return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
793
+ ret = sysfs_create_link_nowarn(&mem_blk->dev.kobj,
435794 &node_devices[nid]->dev.kobj,
436795 kobject_name(&node_devices[nid]->dev.kobj));
796
+ if (ret && ret != -EEXIST)
797
+ dev_err_ratelimited(&mem_blk->dev,
798
+ "can't create link to %s in sysfs (%d)\n",
799
+ kobject_name(&node_devices[nid]->dev.kobj),
800
+ ret);
437801 }
438802
439803 /* register memory section under specified node if it spans that node */
440
-int register_mem_block_under_node_early(struct memory_block *mem_blk, void *arg)
804
+static int register_mem_block_under_node_early(struct memory_block *mem_blk,
805
+ void *arg)
441806 {
807
+ unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE;
808
+ unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
809
+ unsigned long end_pfn = start_pfn + memory_block_pfns - 1;
442810 int nid = *(int *)arg;
443
- unsigned long pfn, sect_start_pfn, sect_end_pfn;
811
+ unsigned long pfn;
444812
445
- sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
446
- sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr);
447
- sect_end_pfn += PAGES_PER_SECTION - 1;
448
- for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
813
+ for (pfn = start_pfn; pfn <= end_pfn; pfn++) {
449814 int page_nid;
450815
451816 /*
452817 * memory block could have several absent sections from start.
453818 * skip pfn range from absent section
454819 */
455
- if (!pfn_present(pfn)) {
820
+ if (!pfn_in_present_section(pfn)) {
456821 pfn = round_down(pfn + PAGES_PER_SECTION,
457822 PAGES_PER_SECTION) - 1;
458823 continue;
....@@ -468,7 +833,8 @@
468833 if (page_nid != nid)
469834 continue;
470835
471
- return do_register_memory_block_under_node(nid, mem_blk);
836
+ do_register_memory_block_under_node(nid, mem_blk);
837
+ return 0;
472838 }
473839 /* mem section does not span the specified node */
474840 return 0;
....@@ -483,7 +849,8 @@
483849 {
484850 int nid = *(int *)arg;
485851
486
- return do_register_memory_block_under_node(nid, mem_blk);
852
+ do_register_memory_block_under_node(nid, mem_blk);
853
+ return 0;
487854 }
488855
489856 /*
....@@ -501,8 +868,8 @@
501868 kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
502869 }
503870
504
-int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
505
- enum meminit_context context)
871
+void link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
872
+ enum meminit_context context)
506873 {
507874 walk_memory_blocks_func_t func;
508875
....@@ -511,7 +878,9 @@
511878 else
512879 func = register_mem_block_under_node_early;
513880
514
- return walk_memory_range(start_pfn, end_pfn, (void *)&nid, func);
881
+ walk_memory_blocks(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn),
882
+ (void *)&nid, func);
883
+ return;
515884 }
516885
517886 #ifdef CONFIG_HUGETLBFS
....@@ -599,8 +968,10 @@
599968 register_cpu_under_node(cpu, nid);
600969 }
601970
971
+ INIT_LIST_HEAD(&node_devices[nid]->access_list);
602972 /* initialize work queue for memory hot plug */
603973 init_node_hugetlb_work(nid);
974
+ node_init_caches(nid);
604975
605976 return error;
606977 }
....@@ -618,17 +989,6 @@
618989 * node states attributes
619990 */
620991
621
-static ssize_t print_nodes_state(enum node_states state, char *buf)
622
-{
623
- int n;
624
-
625
- n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl",
626
- nodemask_pr_args(&node_states[state]));
627
- buf[n++] = '\n';
628
- buf[n] = '\0';
629
- return n;
630
-}
631
-
632992 struct node_attr {
633993 struct device_attribute attr;
634994 enum node_states state;
....@@ -638,7 +998,9 @@
638998 struct device_attribute *attr, char *buf)
639999 {
6401000 struct node_attr *na = container_of(attr, struct node_attr, attr);
641
- return print_nodes_state(na->state, buf);
1001
+
1002
+ return sysfs_emit(buf, "%*pbl\n",
1003
+ nodemask_pr_args(&node_states[na->state]));
6421004 }
6431005
6441006 #define _NODE_ATTR(name, state) \
....@@ -653,6 +1015,8 @@
6531015 #endif
6541016 [N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
6551017 [N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
1018
+ [N_GENERIC_INITIATOR] = _NODE_ATTR(has_generic_initiator,
1019
+ N_GENERIC_INITIATOR),
6561020 };
6571021
6581022 static struct attribute *node_state_attrs[] = {
....@@ -664,6 +1028,7 @@
6641028 #endif
6651029 &node_state_attr[N_MEMORY].attr.attr,
6661030 &node_state_attr[N_CPU].attr.attr,
1031
+ &node_state_attr[N_GENERIC_INITIATOR].attr.attr,
6671032 NULL
6681033 };
6691034