.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * pSeries NUMA support |
---|
3 | 4 | * |
---|
4 | 5 | * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM |
---|
5 | | - * |
---|
6 | | - * This program is free software; you can redistribute it and/or |
---|
7 | | - * modify it under the terms of the GNU General Public License |
---|
8 | | - * as published by the Free Software Foundation; either version |
---|
9 | | - * 2 of the License, or (at your option) any later version. |
---|
10 | 6 | */ |
---|
11 | 7 | #define pr_fmt(fmt) "numa: " fmt |
---|
12 | 8 | |
---|
13 | 9 | #include <linux/threads.h> |
---|
14 | | -#include <linux/bootmem.h> |
---|
| 10 | +#include <linux/memblock.h> |
---|
15 | 11 | #include <linux/init.h> |
---|
16 | 12 | #include <linux/mm.h> |
---|
17 | 13 | #include <linux/mmzone.h> |
---|
.. | .. |
---|
19 | 15 | #include <linux/nodemask.h> |
---|
20 | 16 | #include <linux/cpu.h> |
---|
21 | 17 | #include <linux/notifier.h> |
---|
22 | | -#include <linux/memblock.h> |
---|
23 | 18 | #include <linux/of.h> |
---|
24 | 19 | #include <linux/pfn.h> |
---|
25 | 20 | #include <linux/cpuset.h> |
---|
.. | .. |
---|
33 | 28 | #include <asm/sparsemem.h> |
---|
34 | 29 | #include <asm/prom.h> |
---|
35 | 30 | #include <asm/smp.h> |
---|
36 | | -#include <asm/cputhreads.h> |
---|
37 | 31 | #include <asm/topology.h> |
---|
38 | 32 | #include <asm/firmware.h> |
---|
39 | 33 | #include <asm/paca.h> |
---|
.. | .. |
---|
57 | 51 | EXPORT_SYMBOL(node_to_cpumask_map); |
---|
58 | 52 | EXPORT_SYMBOL(node_data); |
---|
59 | 53 | |
---|
60 | | -static int min_common_depth; |
---|
| 54 | +static int primary_domain_index; |
---|
61 | 55 | static int n_mem_addr_cells, n_mem_size_cells; |
---|
62 | | -static int form1_affinity; |
---|
| 56 | + |
---|
| 57 | +#define FORM0_AFFINITY 0 |
---|
| 58 | +#define FORM1_AFFINITY 1 |
---|
| 59 | +#define FORM2_AFFINITY 2 |
---|
| 60 | +static int affinity_form; |
---|
63 | 61 | |
---|
64 | 62 | #define MAX_DISTANCE_REF_POINTS 4 |
---|
65 | 63 | static int distance_ref_points_depth; |
---|
66 | 64 | static const __be32 *distance_ref_points; |
---|
67 | 65 | static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS]; |
---|
| 66 | +static int numa_distance_table[MAX_NUMNODES][MAX_NUMNODES] = { |
---|
| 67 | + [0 ... MAX_NUMNODES - 1] = { [0 ... MAX_NUMNODES - 1] = -1 } |
---|
| 68 | +}; |
---|
| 69 | +static int numa_id_index_table[MAX_NUMNODES] = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE }; |
---|
68 | 70 | |
---|
69 | 71 | /* |
---|
70 | 72 | * Allocate node_to_cpumask_map based on number of available nodes |
---|
.. | .. |
---|
85 | 87 | alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); |
---|
86 | 88 | |
---|
87 | 89 | /* cpumask_of_node() will now work */ |
---|
88 | | - dbg("Node to cpumask map for %d nodes\n", nr_node_ids); |
---|
| 90 | + dbg("Node to cpumask map for %u nodes\n", nr_node_ids); |
---|
89 | 91 | } |
---|
90 | 92 | |
---|
91 | 93 | static int __init fake_numa_create_new_node(unsigned long end_pfn, |
---|
.. | .. |
---|
169 | 171 | } |
---|
170 | 172 | #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */ |
---|
171 | 173 | |
---|
| 174 | +static int __associativity_to_nid(const __be32 *associativity, |
---|
| 175 | + int max_array_sz) |
---|
| 176 | +{ |
---|
| 177 | + int nid; |
---|
| 178 | + /* |
---|
| 179 | + * primary_domain_index is 1 based array index. |
---|
| 180 | + */ |
---|
| 181 | + int index = primary_domain_index - 1; |
---|
| 182 | + |
---|
| 183 | + if (!numa_enabled || index >= max_array_sz) |
---|
| 184 | + return NUMA_NO_NODE; |
---|
| 185 | + |
---|
| 186 | + nid = of_read_number(&associativity[index], 1); |
---|
| 187 | + |
---|
| 188 | + /* POWER4 LPAR uses 0xffff as invalid node */ |
---|
| 189 | + if (nid == 0xffff || nid >= nr_node_ids) |
---|
| 190 | + nid = NUMA_NO_NODE; |
---|
| 191 | + return nid; |
---|
| 192 | +} |
---|
| 193 | +/* |
---|
| 194 | + * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA |
---|
| 195 | + * info is found. |
---|
| 196 | + */ |
---|
| 197 | +static int associativity_to_nid(const __be32 *associativity) |
---|
| 198 | +{ |
---|
| 199 | + int array_sz = of_read_number(associativity, 1); |
---|
| 200 | + |
---|
| 201 | + /* Skip the first element in the associativity array */ |
---|
| 202 | + return __associativity_to_nid((associativity + 1), array_sz); |
---|
| 203 | +} |
---|
| 204 | + |
---|
| 205 | +static int __cpu_form2_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) |
---|
| 206 | +{ |
---|
| 207 | + int dist; |
---|
| 208 | + int node1, node2; |
---|
| 209 | + |
---|
| 210 | + node1 = associativity_to_nid(cpu1_assoc); |
---|
| 211 | + node2 = associativity_to_nid(cpu2_assoc); |
---|
| 212 | + |
---|
| 213 | + dist = numa_distance_table[node1][node2]; |
---|
| 214 | + if (dist <= LOCAL_DISTANCE) |
---|
| 215 | + return 0; |
---|
| 216 | + else if (dist <= REMOTE_DISTANCE) |
---|
| 217 | + return 1; |
---|
| 218 | + else |
---|
| 219 | + return 2; |
---|
| 220 | +} |
---|
| 221 | + |
---|
| 222 | +static int __cpu_form1_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) |
---|
| 223 | +{ |
---|
| 224 | + int dist = 0; |
---|
| 225 | + |
---|
| 226 | + int i, index; |
---|
| 227 | + |
---|
| 228 | + for (i = 0; i < distance_ref_points_depth; i++) { |
---|
| 229 | + index = be32_to_cpu(distance_ref_points[i]); |
---|
| 230 | + if (cpu1_assoc[index] == cpu2_assoc[index]) |
---|
| 231 | + break; |
---|
| 232 | + dist++; |
---|
| 233 | + } |
---|
| 234 | + |
---|
| 235 | + return dist; |
---|
| 236 | +} |
---|
| 237 | + |
---|
| 238 | +int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) |
---|
| 239 | +{ |
---|
| 240 | + /* We should not get called with FORM0 */ |
---|
| 241 | + VM_WARN_ON(affinity_form == FORM0_AFFINITY); |
---|
| 242 | + if (affinity_form == FORM1_AFFINITY) |
---|
| 243 | + return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc); |
---|
| 244 | + return __cpu_form2_relative_distance(cpu1_assoc, cpu2_assoc); |
---|
| 245 | +} |
---|
| 246 | + |
---|
172 | 247 | /* must hold reference to node during call */ |
---|
173 | 248 | static const __be32 *of_get_associativity(struct device_node *dev) |
---|
174 | 249 | { |
---|
.. | .. |
---|
180 | 255 | int i; |
---|
181 | 256 | int distance = LOCAL_DISTANCE; |
---|
182 | 257 | |
---|
183 | | - if (!form1_affinity) |
---|
| 258 | + if (affinity_form == FORM2_AFFINITY) |
---|
| 259 | + return numa_distance_table[a][b]; |
---|
| 260 | + else if (affinity_form == FORM0_AFFINITY) |
---|
184 | 261 | return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE); |
---|
185 | 262 | |
---|
186 | 263 | for (i = 0; i < distance_ref_points_depth; i++) { |
---|
.. | .. |
---|
195 | 272 | } |
---|
196 | 273 | EXPORT_SYMBOL(__node_distance); |
---|
197 | 274 | |
---|
198 | | -static void initialize_distance_lookup_table(int nid, |
---|
199 | | - const __be32 *associativity) |
---|
200 | | -{ |
---|
201 | | - int i; |
---|
202 | | - |
---|
203 | | - if (!form1_affinity) |
---|
204 | | - return; |
---|
205 | | - |
---|
206 | | - for (i = 0; i < distance_ref_points_depth; i++) { |
---|
207 | | - const __be32 *entry; |
---|
208 | | - |
---|
209 | | - entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1]; |
---|
210 | | - distance_lookup_table[nid][i] = of_read_number(entry, 1); |
---|
211 | | - } |
---|
212 | | -} |
---|
213 | | - |
---|
214 | | -/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa |
---|
215 | | - * info is found. |
---|
216 | | - */ |
---|
217 | | -static int associativity_to_nid(const __be32 *associativity) |
---|
218 | | -{ |
---|
219 | | - int nid = -1; |
---|
220 | | - |
---|
221 | | - if (min_common_depth == -1) |
---|
222 | | - goto out; |
---|
223 | | - |
---|
224 | | - if (of_read_number(associativity, 1) >= min_common_depth) |
---|
225 | | - nid = of_read_number(&associativity[min_common_depth], 1); |
---|
226 | | - |
---|
227 | | - /* POWER4 LPAR uses 0xffff as invalid node */ |
---|
228 | | - if (nid == 0xffff || nid >= MAX_NUMNODES) |
---|
229 | | - nid = -1; |
---|
230 | | - |
---|
231 | | - if (nid > 0 && |
---|
232 | | - of_read_number(associativity, 1) >= distance_ref_points_depth) { |
---|
233 | | - /* |
---|
234 | | - * Skip the length field and send start of associativity array |
---|
235 | | - */ |
---|
236 | | - initialize_distance_lookup_table(nid, associativity + 1); |
---|
237 | | - } |
---|
238 | | - |
---|
239 | | -out: |
---|
240 | | - return nid; |
---|
241 | | -} |
---|
242 | | - |
---|
243 | 275 | /* Returns the nid associated with the given device tree node, |
---|
244 | 276 | * or -1 if not found. |
---|
245 | 277 | */ |
---|
246 | 278 | static int of_node_to_nid_single(struct device_node *device) |
---|
247 | 279 | { |
---|
248 | | - int nid = -1; |
---|
| 280 | + int nid = NUMA_NO_NODE; |
---|
249 | 281 | const __be32 *tmp; |
---|
250 | 282 | |
---|
251 | 283 | tmp = of_get_associativity(device); |
---|
.. | .. |
---|
257 | 289 | /* Walk the device tree upwards, looking for an associativity id */ |
---|
258 | 290 | int of_node_to_nid(struct device_node *device) |
---|
259 | 291 | { |
---|
260 | | - int nid = -1; |
---|
| 292 | + int nid = NUMA_NO_NODE; |
---|
261 | 293 | |
---|
262 | 294 | of_node_get(device); |
---|
263 | 295 | while (device) { |
---|
.. | .. |
---|
273 | 305 | } |
---|
274 | 306 | EXPORT_SYMBOL(of_node_to_nid); |
---|
275 | 307 | |
---|
276 | | -static int __init find_min_common_depth(void) |
---|
| 308 | +static void __initialize_form1_numa_distance(const __be32 *associativity, |
---|
| 309 | + int max_array_sz) |
---|
277 | 310 | { |
---|
278 | | - int depth; |
---|
| 311 | + int i, nid; |
---|
| 312 | + |
---|
| 313 | + if (affinity_form != FORM1_AFFINITY) |
---|
| 314 | + return; |
---|
| 315 | + |
---|
| 316 | + nid = __associativity_to_nid(associativity, max_array_sz); |
---|
| 317 | + if (nid != NUMA_NO_NODE) { |
---|
| 318 | + for (i = 0; i < distance_ref_points_depth; i++) { |
---|
| 319 | + const __be32 *entry; |
---|
| 320 | + int index = be32_to_cpu(distance_ref_points[i]) - 1; |
---|
| 321 | + |
---|
| 322 | + /* |
---|
| 323 | + * broken hierarchy, return with broken distance table |
---|
| 324 | + */ |
---|
| 325 | + if (WARN(index >= max_array_sz, "Broken ibm,associativity property")) |
---|
| 326 | + return; |
---|
| 327 | + |
---|
| 328 | + entry = &associativity[index]; |
---|
| 329 | + distance_lookup_table[nid][i] = of_read_number(entry, 1); |
---|
| 330 | + } |
---|
| 331 | + } |
---|
| 332 | +} |
---|
| 333 | + |
---|
| 334 | +static void initialize_form1_numa_distance(const __be32 *associativity) |
---|
| 335 | +{ |
---|
| 336 | + int array_sz; |
---|
| 337 | + |
---|
| 338 | + array_sz = of_read_number(associativity, 1); |
---|
| 339 | + /* Skip the first element in the associativity array */ |
---|
| 340 | + __initialize_form1_numa_distance(associativity + 1, array_sz); |
---|
| 341 | +} |
---|
| 342 | + |
---|
| 343 | +/* |
---|
| 344 | + * Used to update distance information w.r.t newly added node. |
---|
| 345 | + */ |
---|
| 346 | +void update_numa_distance(struct device_node *node) |
---|
| 347 | +{ |
---|
| 348 | + int nid; |
---|
| 349 | + |
---|
| 350 | + if (affinity_form == FORM0_AFFINITY) |
---|
| 351 | + return; |
---|
| 352 | + else if (affinity_form == FORM1_AFFINITY) { |
---|
| 353 | + const __be32 *associativity; |
---|
| 354 | + |
---|
| 355 | + associativity = of_get_associativity(node); |
---|
| 356 | + if (!associativity) |
---|
| 357 | + return; |
---|
| 358 | + |
---|
| 359 | + initialize_form1_numa_distance(associativity); |
---|
| 360 | + return; |
---|
| 361 | + } |
---|
| 362 | + |
---|
| 363 | + /* FORM2 affinity */ |
---|
| 364 | + nid = of_node_to_nid_single(node); |
---|
| 365 | + if (nid == NUMA_NO_NODE) |
---|
| 366 | + return; |
---|
| 367 | + |
---|
| 368 | + /* |
---|
| 369 | + * With FORM2 we expect NUMA distance of all possible NUMA |
---|
| 370 | + * nodes to be provided during boot. |
---|
| 371 | + */ |
---|
| 372 | + WARN(numa_distance_table[nid][nid] == -1, |
---|
| 373 | + "NUMA distance details for node %d not provided\n", nid); |
---|
| 374 | +} |
---|
| 375 | +EXPORT_SYMBOL_GPL(update_numa_distance); |
---|
| 376 | + |
---|
| 377 | +/* |
---|
| 378 | + * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN} |
---|
| 379 | + * ibm,numa-distance-table = { N, 1, 2, 4, 5, 1, 6, .... N elements} |
---|
| 380 | + */ |
---|
| 381 | +static void initialize_form2_numa_distance_lookup_table(void) |
---|
| 382 | +{ |
---|
| 383 | + int i, j; |
---|
279 | 384 | struct device_node *root; |
---|
| 385 | + const __u8 *numa_dist_table; |
---|
| 386 | + const __be32 *numa_lookup_index; |
---|
| 387 | + int numa_dist_table_length; |
---|
| 388 | + int max_numa_index, distance_index; |
---|
| 389 | + |
---|
| 390 | + if (firmware_has_feature(FW_FEATURE_OPAL)) |
---|
| 391 | + root = of_find_node_by_path("/ibm,opal"); |
---|
| 392 | + else |
---|
| 393 | + root = of_find_node_by_path("/rtas"); |
---|
| 394 | + if (!root) |
---|
| 395 | + root = of_find_node_by_path("/"); |
---|
| 396 | + |
---|
| 397 | + numa_lookup_index = of_get_property(root, "ibm,numa-lookup-index-table", NULL); |
---|
| 398 | + max_numa_index = of_read_number(&numa_lookup_index[0], 1); |
---|
| 399 | + |
---|
| 400 | + /* first element of the array is the size and is encode-int */ |
---|
| 401 | + numa_dist_table = of_get_property(root, "ibm,numa-distance-table", NULL); |
---|
| 402 | + numa_dist_table_length = of_read_number((const __be32 *)&numa_dist_table[0], 1); |
---|
| 403 | + /* Skip the size which is encoded int */ |
---|
| 404 | + numa_dist_table += sizeof(__be32); |
---|
| 405 | + |
---|
| 406 | + pr_debug("numa_dist_table_len = %d, numa_dist_indexes_len = %d\n", |
---|
| 407 | + numa_dist_table_length, max_numa_index); |
---|
| 408 | + |
---|
| 409 | + for (i = 0; i < max_numa_index; i++) |
---|
| 410 | + /* +1 skip the max_numa_index in the property */ |
---|
| 411 | + numa_id_index_table[i] = of_read_number(&numa_lookup_index[i + 1], 1); |
---|
| 412 | + |
---|
| 413 | + |
---|
| 414 | + if (numa_dist_table_length != max_numa_index * max_numa_index) { |
---|
| 415 | + WARN(1, "Wrong NUMA distance information\n"); |
---|
| 416 | + /* consider everybody else just remote. */ |
---|
| 417 | + for (i = 0; i < max_numa_index; i++) { |
---|
| 418 | + for (j = 0; j < max_numa_index; j++) { |
---|
| 419 | + int nodeA = numa_id_index_table[i]; |
---|
| 420 | + int nodeB = numa_id_index_table[j]; |
---|
| 421 | + |
---|
| 422 | + if (nodeA == nodeB) |
---|
| 423 | + numa_distance_table[nodeA][nodeB] = LOCAL_DISTANCE; |
---|
| 424 | + else |
---|
| 425 | + numa_distance_table[nodeA][nodeB] = REMOTE_DISTANCE; |
---|
| 426 | + } |
---|
| 427 | + } |
---|
| 428 | + } |
---|
| 429 | + |
---|
| 430 | + distance_index = 0; |
---|
| 431 | + for (i = 0; i < max_numa_index; i++) { |
---|
| 432 | + for (j = 0; j < max_numa_index; j++) { |
---|
| 433 | + int nodeA = numa_id_index_table[i]; |
---|
| 434 | + int nodeB = numa_id_index_table[j]; |
---|
| 435 | + |
---|
| 436 | + numa_distance_table[nodeA][nodeB] = numa_dist_table[distance_index++]; |
---|
| 437 | + pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, numa_distance_table[nodeA][nodeB]); |
---|
| 438 | + } |
---|
| 439 | + } |
---|
| 440 | + of_node_put(root); |
---|
| 441 | +} |
---|
| 442 | + |
---|
| 443 | +static int __init find_primary_domain_index(void) |
---|
| 444 | +{ |
---|
| 445 | + int index; |
---|
| 446 | + struct device_node *root; |
---|
| 447 | + |
---|
| 448 | + /* |
---|
| 449 | + * Check for which form of affinity. |
---|
| 450 | + */ |
---|
| 451 | + if (firmware_has_feature(FW_FEATURE_OPAL)) { |
---|
| 452 | + affinity_form = FORM1_AFFINITY; |
---|
| 453 | + } else if (firmware_has_feature(FW_FEATURE_FORM2_AFFINITY)) { |
---|
| 454 | + dbg("Using form 2 affinity\n"); |
---|
| 455 | + affinity_form = FORM2_AFFINITY; |
---|
| 456 | + } else if (firmware_has_feature(FW_FEATURE_FORM1_AFFINITY)) { |
---|
| 457 | + dbg("Using form 1 affinity\n"); |
---|
| 458 | + affinity_form = FORM1_AFFINITY; |
---|
| 459 | + } else |
---|
| 460 | + affinity_form = FORM0_AFFINITY; |
---|
280 | 461 | |
---|
281 | 462 | if (firmware_has_feature(FW_FEATURE_OPAL)) |
---|
282 | 463 | root = of_find_node_by_path("/ibm,opal"); |
---|
.. | .. |
---|
307 | 488 | } |
---|
308 | 489 | |
---|
309 | 490 | distance_ref_points_depth /= sizeof(int); |
---|
310 | | - |
---|
311 | | - if (firmware_has_feature(FW_FEATURE_OPAL) || |
---|
312 | | - firmware_has_feature(FW_FEATURE_TYPE1_AFFINITY)) { |
---|
313 | | - dbg("Using form 1 affinity\n"); |
---|
314 | | - form1_affinity = 1; |
---|
315 | | - } |
---|
316 | | - |
---|
317 | | - if (form1_affinity) { |
---|
318 | | - depth = of_read_number(distance_ref_points, 1); |
---|
319 | | - } else { |
---|
| 491 | + if (affinity_form == FORM0_AFFINITY) { |
---|
320 | 492 | if (distance_ref_points_depth < 2) { |
---|
321 | 493 | printk(KERN_WARNING "NUMA: " |
---|
322 | | - "short ibm,associativity-reference-points\n"); |
---|
| 494 | + "short ibm,associativity-reference-points\n"); |
---|
323 | 495 | goto err; |
---|
324 | 496 | } |
---|
325 | 497 | |
---|
326 | | - depth = of_read_number(&distance_ref_points[1], 1); |
---|
| 498 | + index = of_read_number(&distance_ref_points[1], 1); |
---|
| 499 | + } else { |
---|
| 500 | + /* |
---|
| 501 | + * Both FORM1 and FORM2 affinity find the primary domain details |
---|
| 502 | + * at the same offset. |
---|
| 503 | + */ |
---|
| 504 | + index = of_read_number(distance_ref_points, 1); |
---|
327 | 505 | } |
---|
328 | | - |
---|
329 | 506 | /* |
---|
330 | 507 | * Warn and cap if the hardware supports more than |
---|
331 | 508 | * MAX_DISTANCE_REF_POINTS domains. |
---|
.. | .. |
---|
337 | 514 | } |
---|
338 | 515 | |
---|
339 | 516 | of_node_put(root); |
---|
340 | | - return depth; |
---|
| 517 | + return index; |
---|
341 | 518 | |
---|
342 | 519 | err: |
---|
343 | 520 | of_node_put(root); |
---|
.. | .. |
---|
415 | 592 | return 0; |
---|
416 | 593 | } |
---|
417 | 594 | |
---|
418 | | -/* |
---|
419 | | - * This is like of_node_to_nid_single() for memory represented in the |
---|
420 | | - * ibm,dynamic-reconfiguration-memory node. |
---|
421 | | - */ |
---|
422 | | -static int of_drconf_to_nid_single(struct drmem_lmb *lmb) |
---|
| 595 | +static int get_nid_and_numa_distance(struct drmem_lmb *lmb) |
---|
423 | 596 | { |
---|
424 | 597 | struct assoc_arrays aa = { .arrays = NULL }; |
---|
425 | | - int default_nid = 0; |
---|
| 598 | + int default_nid = NUMA_NO_NODE; |
---|
426 | 599 | int nid = default_nid; |
---|
427 | 600 | int rc, index; |
---|
| 601 | + |
---|
| 602 | + if ((primary_domain_index < 0) || !numa_enabled) |
---|
| 603 | + return default_nid; |
---|
428 | 604 | |
---|
429 | 605 | rc = of_get_assoc_arrays(&aa); |
---|
430 | 606 | if (rc) |
---|
431 | 607 | return default_nid; |
---|
432 | 608 | |
---|
433 | | - if (min_common_depth > 0 && min_common_depth <= aa.array_sz && |
---|
434 | | - !(lmb->flags & DRCONF_MEM_AI_INVALID) && |
---|
435 | | - lmb->aa_index < aa.n_arrays) { |
---|
436 | | - index = lmb->aa_index * aa.array_sz + min_common_depth - 1; |
---|
437 | | - nid = of_read_number(&aa.arrays[index], 1); |
---|
| 609 | + if (primary_domain_index <= aa.array_sz && |
---|
| 610 | + !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) { |
---|
| 611 | + const __be32 *associativity; |
---|
438 | 612 | |
---|
439 | | - if (nid == 0xffff || nid >= MAX_NUMNODES) |
---|
440 | | - nid = default_nid; |
---|
441 | | - |
---|
442 | | - if (nid > 0) { |
---|
443 | | - index = lmb->aa_index * aa.array_sz; |
---|
444 | | - initialize_distance_lookup_table(nid, |
---|
445 | | - &aa.arrays[index]); |
---|
| 613 | + index = lmb->aa_index * aa.array_sz; |
---|
| 614 | + associativity = &aa.arrays[index]; |
---|
| 615 | + nid = __associativity_to_nid(associativity, aa.array_sz); |
---|
| 616 | + if (nid > 0 && affinity_form == FORM1_AFFINITY) { |
---|
| 617 | + /* |
---|
| 618 | + * lookup array associativity entries have |
---|
| 619 | + * no length of the array as the first element. |
---|
| 620 | + */ |
---|
| 621 | + __initialize_form1_numa_distance(associativity, aa.array_sz); |
---|
446 | 622 | } |
---|
447 | 623 | } |
---|
448 | | - |
---|
449 | 624 | return nid; |
---|
450 | 625 | } |
---|
| 626 | + |
---|
| 627 | +/* |
---|
| 628 | + * This is like of_node_to_nid_single() for memory represented in the |
---|
| 629 | + * ibm,dynamic-reconfiguration-memory node. |
---|
| 630 | + */ |
---|
| 631 | +int of_drconf_to_nid_single(struct drmem_lmb *lmb) |
---|
| 632 | +{ |
---|
| 633 | + struct assoc_arrays aa = { .arrays = NULL }; |
---|
| 634 | + int default_nid = NUMA_NO_NODE; |
---|
| 635 | + int nid = default_nid; |
---|
| 636 | + int rc, index; |
---|
| 637 | + |
---|
| 638 | + if ((primary_domain_index < 0) || !numa_enabled) |
---|
| 639 | + return default_nid; |
---|
| 640 | + |
---|
| 641 | + rc = of_get_assoc_arrays(&aa); |
---|
| 642 | + if (rc) |
---|
| 643 | + return default_nid; |
---|
| 644 | + |
---|
| 645 | + if (primary_domain_index <= aa.array_sz && |
---|
| 646 | + !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) { |
---|
| 647 | + const __be32 *associativity; |
---|
| 648 | + |
---|
| 649 | + index = lmb->aa_index * aa.array_sz; |
---|
| 650 | + associativity = &aa.arrays[index]; |
---|
| 651 | + nid = __associativity_to_nid(associativity, aa.array_sz); |
---|
| 652 | + } |
---|
| 653 | + return nid; |
---|
| 654 | +} |
---|
| 655 | + |
---|
| 656 | +#ifdef CONFIG_PPC_SPLPAR |
---|
| 657 | + |
---|
| 658 | +static int __vphn_get_associativity(long lcpu, __be32 *associativity) |
---|
| 659 | +{ |
---|
| 660 | + long rc, hwid; |
---|
| 661 | + |
---|
| 662 | + /* |
---|
| 663 | + * On a shared lpar, device tree will not have node associativity. |
---|
| 664 | + * At this time lppaca, or its __old_status field may not be |
---|
| 665 | + * updated. Hence kernel cannot detect if its on a shared lpar. So |
---|
| 666 | + * request an explicit associativity irrespective of whether the |
---|
| 667 | + * lpar is shared or dedicated. Use the device tree property as a |
---|
| 668 | + * fallback. cpu_to_phys_id is only valid between |
---|
| 669 | + * smp_setup_cpu_maps() and smp_setup_pacas(). |
---|
| 670 | + */ |
---|
| 671 | + if (firmware_has_feature(FW_FEATURE_VPHN)) { |
---|
| 672 | + if (cpu_to_phys_id) |
---|
| 673 | + hwid = cpu_to_phys_id[lcpu]; |
---|
| 674 | + else |
---|
| 675 | + hwid = get_hard_smp_processor_id(lcpu); |
---|
| 676 | + |
---|
| 677 | + rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity); |
---|
| 678 | + if (rc == H_SUCCESS) |
---|
| 679 | + return 0; |
---|
| 680 | + } |
---|
| 681 | + |
---|
| 682 | + return -1; |
---|
| 683 | +} |
---|
| 684 | + |
---|
| 685 | +static int vphn_get_nid(long lcpu) |
---|
| 686 | +{ |
---|
| 687 | + __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; |
---|
| 688 | + |
---|
| 689 | + |
---|
| 690 | + if (!__vphn_get_associativity(lcpu, associativity)) |
---|
| 691 | + return associativity_to_nid(associativity); |
---|
| 692 | + |
---|
| 693 | + return NUMA_NO_NODE; |
---|
| 694 | + |
---|
| 695 | +} |
---|
| 696 | +#else |
---|
| 697 | + |
---|
| 698 | +static int __vphn_get_associativity(long lcpu, __be32 *associativity) |
---|
| 699 | +{ |
---|
| 700 | + return -1; |
---|
| 701 | +} |
---|
| 702 | + |
---|
| 703 | +static int vphn_get_nid(long unused) |
---|
| 704 | +{ |
---|
| 705 | + return NUMA_NO_NODE; |
---|
| 706 | +} |
---|
| 707 | +#endif /* CONFIG_PPC_SPLPAR */ |
---|
451 | 708 | |
---|
452 | 709 | /* |
---|
453 | 710 | * Figure out to which domain a cpu belongs and stick it there. |
---|
.. | .. |
---|
455 | 712 | */ |
---|
456 | 713 | static int numa_setup_cpu(unsigned long lcpu) |
---|
457 | 714 | { |
---|
458 | | - int nid = -1; |
---|
459 | 715 | struct device_node *cpu; |
---|
| 716 | + int fcpu = cpu_first_thread_sibling(lcpu); |
---|
| 717 | + int nid = NUMA_NO_NODE; |
---|
| 718 | + |
---|
| 719 | + if (!cpu_present(lcpu)) { |
---|
| 720 | + set_cpu_numa_node(lcpu, first_online_node); |
---|
| 721 | + return first_online_node; |
---|
| 722 | + } |
---|
460 | 723 | |
---|
461 | 724 | /* |
---|
462 | 725 | * If a valid cpu-to-node mapping is already available, use it |
---|
463 | 726 | * directly instead of querying the firmware, since it represents |
---|
464 | 727 | * the most recent mapping notified to us by the platform (eg: VPHN). |
---|
| 728 | + * Since cpu_to_node binding remains the same for all threads in the |
---|
| 729 | + * core. If a valid cpu-to-node mapping is already available, for |
---|
| 730 | + * the first thread in the core, use it. |
---|
465 | 731 | */ |
---|
466 | | - if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) { |
---|
| 732 | + nid = numa_cpu_lookup_table[fcpu]; |
---|
| 733 | + if (nid >= 0) { |
---|
467 | 734 | map_cpu_to_node(lcpu, nid); |
---|
468 | 735 | return nid; |
---|
469 | 736 | } |
---|
| 737 | + |
---|
| 738 | + nid = vphn_get_nid(lcpu); |
---|
| 739 | + if (nid != NUMA_NO_NODE) |
---|
| 740 | + goto out_present; |
---|
470 | 741 | |
---|
471 | 742 | cpu = of_get_cpu_node(lcpu, NULL); |
---|
472 | 743 | |
---|
.. | .. |
---|
479 | 750 | } |
---|
480 | 751 | |
---|
481 | 752 | nid = of_node_to_nid_single(cpu); |
---|
| 753 | + of_node_put(cpu); |
---|
482 | 754 | |
---|
483 | 755 | out_present: |
---|
484 | 756 | if (nid < 0 || !node_possible(nid)) |
---|
485 | 757 | nid = first_online_node; |
---|
486 | 758 | |
---|
| 759 | + /* |
---|
| 760 | + * Update for the first thread of the core. All threads of a core |
---|
| 761 | + * have to be part of the same node. This not only avoids querying |
---|
| 762 | + * for every other thread in the core, but always avoids a case |
---|
| 763 | + * where virtual node associativity change causes subsequent threads |
---|
| 764 | + * of a core to be associated with different nid. However if first |
---|
| 765 | + * thread is already online, expect it to have a valid mapping. |
---|
| 766 | + */ |
---|
| 767 | + if (fcpu != lcpu) { |
---|
| 768 | + WARN_ON(cpu_online(fcpu)); |
---|
| 769 | + map_cpu_to_node(fcpu, nid); |
---|
| 770 | + } |
---|
| 771 | + |
---|
487 | 772 | map_cpu_to_node(lcpu, nid); |
---|
488 | | - of_node_put(cpu); |
---|
489 | 773 | out: |
---|
490 | 774 | return nid; |
---|
491 | 775 | } |
---|
.. | .. |
---|
575 | 859 | * Extract NUMA information from the ibm,dynamic-reconfiguration-memory |
---|
576 | 860 | * node. This assumes n_mem_{addr,size}_cells have been set. |
---|
577 | 861 | */ |
---|
578 | | -static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, |
---|
579 | | - const __be32 **usm) |
---|
| 862 | +static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, |
---|
| 863 | + const __be32 **usm, |
---|
| 864 | + void *data) |
---|
580 | 865 | { |
---|
581 | 866 | unsigned int ranges, is_kexec_kdump = 0; |
---|
582 | 867 | unsigned long base, size, sz; |
---|
.. | .. |
---|
588 | 873 | */ |
---|
589 | 874 | if ((lmb->flags & DRCONF_MEM_RESERVED) |
---|
590 | 875 | || !(lmb->flags & DRCONF_MEM_ASSIGNED)) |
---|
591 | | - return; |
---|
| 876 | + return 0; |
---|
592 | 877 | |
---|
593 | 878 | if (*usm) |
---|
594 | 879 | is_kexec_kdump = 1; |
---|
.. | .. |
---|
600 | 885 | if (is_kexec_kdump) { |
---|
601 | 886 | ranges = read_usm_ranges(usm); |
---|
602 | 887 | if (!ranges) /* there are no (base, size) duple */ |
---|
603 | | - return; |
---|
| 888 | + return 0; |
---|
604 | 889 | } |
---|
605 | 890 | |
---|
606 | 891 | do { |
---|
.. | .. |
---|
609 | 894 | size = read_n_cells(n_mem_size_cells, usm); |
---|
610 | 895 | } |
---|
611 | 896 | |
---|
612 | | - nid = of_drconf_to_nid_single(lmb); |
---|
| 897 | + nid = get_nid_and_numa_distance(lmb); |
---|
613 | 898 | fake_numa_create_new_node(((base + size) >> PAGE_SHIFT), |
---|
614 | 899 | &nid); |
---|
615 | 900 | node_set_online(nid); |
---|
.. | .. |
---|
617 | 902 | if (sz) |
---|
618 | 903 | memblock_set_node(base, sz, &memblock.memory, nid); |
---|
619 | 904 | } while (--ranges); |
---|
| 905 | + |
---|
| 906 | + return 0; |
---|
620 | 907 | } |
---|
621 | 908 | |
---|
622 | 909 | static int __init parse_numa_properties(void) |
---|
.. | .. |
---|
624 | 911 | struct device_node *memory; |
---|
625 | 912 | int default_nid = 0; |
---|
626 | 913 | unsigned long i; |
---|
| 914 | + const __be32 *associativity; |
---|
627 | 915 | |
---|
628 | 916 | if (numa_enabled == 0) { |
---|
629 | 917 | printk(KERN_WARNING "NUMA disabled by user\n"); |
---|
630 | 918 | return -1; |
---|
631 | 919 | } |
---|
632 | 920 | |
---|
633 | | - min_common_depth = find_min_common_depth(); |
---|
| 921 | + primary_domain_index = find_primary_domain_index(); |
---|
634 | 922 | |
---|
635 | | - if (min_common_depth < 0) |
---|
636 | | - return min_common_depth; |
---|
| 923 | + if (primary_domain_index < 0) { |
---|
| 924 | + /* |
---|
| 925 | + * if we fail to parse primary_domain_index from device tree |
---|
| 926 | + * mark the numa disabled, boot with numa disabled. |
---|
| 927 | + */ |
---|
| 928 | + numa_enabled = false; |
---|
| 929 | + return primary_domain_index; |
---|
| 930 | + } |
---|
637 | 931 | |
---|
638 | | - dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); |
---|
| 932 | + dbg("NUMA associativity depth for CPU/Memory: %d\n", primary_domain_index); |
---|
| 933 | + |
---|
| 934 | + /* |
---|
| 935 | + * If it is FORM2 initialize the distance table here. |
---|
| 936 | + */ |
---|
| 937 | + if (affinity_form == FORM2_AFFINITY) |
---|
| 938 | + initialize_form2_numa_distance_lookup_table(); |
---|
639 | 939 | |
---|
640 | 940 | /* |
---|
641 | 941 | * Even though we connect cpus to numa domains later in SMP |
---|
.. | .. |
---|
643 | 943 | * each node to be onlined must have NODE_DATA etc backing it. |
---|
644 | 944 | */ |
---|
645 | 945 | for_each_present_cpu(i) { |
---|
| 946 | + __be32 vphn_assoc[VPHN_ASSOC_BUFSIZE]; |
---|
646 | 947 | struct device_node *cpu; |
---|
647 | | - int nid; |
---|
| 948 | + int nid = NUMA_NO_NODE; |
---|
648 | 949 | |
---|
649 | | - cpu = of_get_cpu_node(i, NULL); |
---|
650 | | - BUG_ON(!cpu); |
---|
651 | | - nid = of_node_to_nid_single(cpu); |
---|
652 | | - of_node_put(cpu); |
---|
| 950 | + memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32)); |
---|
653 | 951 | |
---|
654 | | - /* |
---|
655 | | - * Don't fall back to default_nid yet -- we will plug |
---|
656 | | - * cpus into nodes once the memory scan has discovered |
---|
657 | | - * the topology. |
---|
658 | | - */ |
---|
659 | | - if (nid < 0) |
---|
660 | | - continue; |
---|
661 | | - node_set_online(nid); |
---|
| 952 | + if (__vphn_get_associativity(i, vphn_assoc) == 0) { |
---|
| 953 | + nid = associativity_to_nid(vphn_assoc); |
---|
| 954 | + initialize_form1_numa_distance(vphn_assoc); |
---|
| 955 | + } else { |
---|
| 956 | + |
---|
| 957 | + /* |
---|
| 958 | + * Don't fall back to default_nid yet -- we will plug |
---|
| 959 | + * cpus into nodes once the memory scan has discovered |
---|
| 960 | + * the topology. |
---|
| 961 | + */ |
---|
| 962 | + cpu = of_get_cpu_node(i, NULL); |
---|
| 963 | + BUG_ON(!cpu); |
---|
| 964 | + |
---|
| 965 | + associativity = of_get_associativity(cpu); |
---|
| 966 | + if (associativity) { |
---|
| 967 | + nid = associativity_to_nid(associativity); |
---|
| 968 | + initialize_form1_numa_distance(associativity); |
---|
| 969 | + } |
---|
| 970 | + of_node_put(cpu); |
---|
| 971 | + } |
---|
| 972 | + |
---|
| 973 | + /* node_set_online() is an UB if 'nid' is negative */ |
---|
| 974 | + if (likely(nid >= 0)) |
---|
| 975 | + node_set_online(nid); |
---|
662 | 976 | } |
---|
663 | 977 | |
---|
664 | 978 | get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); |
---|
.. | .. |
---|
690 | 1004 | * have associativity properties. If none, then |
---|
691 | 1005 | * everything goes to default_nid. |
---|
692 | 1006 | */ |
---|
693 | | - nid = of_node_to_nid_single(memory); |
---|
694 | | - if (nid < 0) |
---|
| 1007 | + associativity = of_get_associativity(memory); |
---|
| 1008 | + if (associativity) { |
---|
| 1009 | + nid = associativity_to_nid(associativity); |
---|
| 1010 | + initialize_form1_numa_distance(associativity); |
---|
| 1011 | + } else |
---|
695 | 1012 | nid = default_nid; |
---|
696 | 1013 | |
---|
697 | 1014 | fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid); |
---|
.. | .. |
---|
712 | 1029 | */ |
---|
713 | 1030 | memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); |
---|
714 | 1031 | if (memory) { |
---|
715 | | - walk_drmem_lmbs(memory, numa_setup_drmem_lmb); |
---|
| 1032 | + walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb); |
---|
716 | 1033 | of_node_put(memory); |
---|
717 | 1034 | } |
---|
718 | 1035 | |
---|
.. | .. |
---|
725 | 1042 | unsigned long total_ram = memblock_phys_mem_size(); |
---|
726 | 1043 | unsigned long start_pfn, end_pfn; |
---|
727 | 1044 | unsigned int nid = 0; |
---|
728 | | - struct memblock_region *reg; |
---|
| 1045 | + int i; |
---|
729 | 1046 | |
---|
730 | 1047 | printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", |
---|
731 | 1048 | top_of_ram, total_ram); |
---|
732 | 1049 | printk(KERN_DEBUG "Memory hole size: %ldMB\n", |
---|
733 | 1050 | (top_of_ram - total_ram) >> 20); |
---|
734 | 1051 | |
---|
735 | | - for_each_memblock(memory, reg) { |
---|
736 | | - start_pfn = memblock_region_memory_base_pfn(reg); |
---|
737 | | - end_pfn = memblock_region_memory_end_pfn(reg); |
---|
738 | | - |
---|
| 1052 | + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { |
---|
739 | 1053 | fake_numa_create_new_node(end_pfn, &nid); |
---|
740 | 1054 | memblock_set_node(PFN_PHYS(start_pfn), |
---|
741 | 1055 | PFN_PHYS(end_pfn - start_pfn), |
---|
.. | .. |
---|
749 | 1063 | unsigned int node; |
---|
750 | 1064 | unsigned int cpu, count; |
---|
751 | 1065 | |
---|
752 | | - if (min_common_depth == -1 || !numa_enabled) |
---|
| 1066 | + if (!numa_enabled) |
---|
753 | 1067 | return; |
---|
754 | 1068 | |
---|
755 | 1069 | for_each_online_node(node) { |
---|
.. | .. |
---|
788 | 1102 | void *nd; |
---|
789 | 1103 | int tnid; |
---|
790 | 1104 | |
---|
791 | | - nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); |
---|
| 1105 | + nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); |
---|
| 1106 | + if (!nd_pa) |
---|
| 1107 | + panic("Cannot allocate %zu bytes for node %d data\n", |
---|
| 1108 | + nd_size, nid); |
---|
| 1109 | + |
---|
792 | 1110 | nd = __va(nd_pa); |
---|
793 | 1111 | |
---|
794 | 1112 | /* report and initialize */ |
---|
.. | .. |
---|
808 | 1126 | static void __init find_possible_nodes(void) |
---|
809 | 1127 | { |
---|
810 | 1128 | struct device_node *rtas; |
---|
811 | | - u32 numnodes, i; |
---|
| 1129 | + const __be32 *domains = NULL; |
---|
| 1130 | + int prop_length, max_nodes; |
---|
| 1131 | + u32 i; |
---|
812 | 1132 | |
---|
813 | | - if (min_common_depth <= 0) |
---|
| 1133 | + if (!numa_enabled) |
---|
814 | 1134 | return; |
---|
815 | 1135 | |
---|
816 | 1136 | rtas = of_find_node_by_path("/rtas"); |
---|
817 | 1137 | if (!rtas) |
---|
818 | 1138 | return; |
---|
819 | 1139 | |
---|
820 | | - if (of_property_read_u32_index(rtas, |
---|
821 | | - "ibm,max-associativity-domains", |
---|
822 | | - min_common_depth, &numnodes)) |
---|
823 | | - goto out; |
---|
| 1140 | + /* |
---|
| 1141 | + * ibm,current-associativity-domains is a fairly recent property. If |
---|
| 1142 | + * it doesn't exist, then fallback on ibm,max-associativity-domains. |
---|
| 1143 | + * Current denotes what the platform can support compared to max |
---|
| 1144 | + * which denotes what the Hypervisor can support. |
---|
| 1145 | + * |
---|
| 1146 | + * If the LPAR is migratable, new nodes might be activated after a LPM, |
---|
| 1147 | + * so we should consider the max number in that case. |
---|
| 1148 | + */ |
---|
| 1149 | + if (!of_get_property(of_root, "ibm,migratable-partition", NULL)) |
---|
| 1150 | + domains = of_get_property(rtas, |
---|
| 1151 | + "ibm,current-associativity-domains", |
---|
| 1152 | + &prop_length); |
---|
| 1153 | + if (!domains) { |
---|
| 1154 | + domains = of_get_property(rtas, "ibm,max-associativity-domains", |
---|
| 1155 | + &prop_length); |
---|
| 1156 | + if (!domains) |
---|
| 1157 | + goto out; |
---|
| 1158 | + } |
---|
824 | 1159 | |
---|
825 | | - for (i = 0; i < numnodes; i++) { |
---|
| 1160 | + max_nodes = of_read_number(&domains[primary_domain_index], 1); |
---|
| 1161 | + pr_info("Partition configured for %d NUMA nodes.\n", max_nodes); |
---|
| 1162 | + |
---|
| 1163 | + for (i = 0; i < max_nodes; i++) { |
---|
826 | 1164 | if (!node_possible(i)) |
---|
827 | 1165 | node_set(i, node_possible_map); |
---|
828 | 1166 | } |
---|
| 1167 | + |
---|
| 1168 | + prop_length /= sizeof(int); |
---|
| 1169 | + if (prop_length > primary_domain_index + 2) |
---|
| 1170 | + coregroup_enabled = 1; |
---|
829 | 1171 | |
---|
830 | 1172 | out: |
---|
831 | 1173 | of_node_put(rtas); |
---|
.. | .. |
---|
834 | 1176 | void __init mem_topology_setup(void) |
---|
835 | 1177 | { |
---|
836 | 1178 | int cpu; |
---|
| 1179 | + |
---|
| 1180 | + /* |
---|
| 1181 | + * Linux/mm assumes node 0 to be online at boot. However this is not |
---|
| 1182 | + * true on PowerPC, where node 0 is similar to any other node, it |
---|
| 1183 | + * could be cpuless, memoryless node. So force node 0 to be offline |
---|
| 1184 | + * for now. This will prevent cpuless, memoryless node 0 showing up |
---|
| 1185 | + * unnecessarily as online. If a node has cpus or memory that need |
---|
| 1186 | + * to be online, then node will anyway be marked online. |
---|
| 1187 | + */ |
---|
| 1188 | + node_set_offline(0); |
---|
837 | 1189 | |
---|
838 | 1190 | if (parse_numa_properties()) |
---|
839 | 1191 | setup_nonnuma(); |
---|
.. | .. |
---|
852 | 1204 | |
---|
853 | 1205 | reset_numa_cpu_lookup_table(); |
---|
854 | 1206 | |
---|
855 | | - for_each_present_cpu(cpu) |
---|
| 1207 | + for_each_possible_cpu(cpu) { |
---|
| 1208 | + /* |
---|
| 1209 | + * Powerpc with CONFIG_NUMA always used to have a node 0, |
---|
| 1210 | + * even if it was memoryless or cpuless. For all cpus that |
---|
| 1211 | + * are possible but not present, cpu_to_node() would point |
---|
| 1212 | + * to node 0. To remove a cpuless, memoryless dummy node, |
---|
| 1213 | + * powerpc need to make sure all possible but not present |
---|
| 1214 | + * cpu_to_node are set to a proper node. |
---|
| 1215 | + */ |
---|
856 | 1216 | numa_setup_cpu(cpu); |
---|
| 1217 | + } |
---|
857 | 1218 | } |
---|
858 | 1219 | |
---|
859 | 1220 | void __init initmem_init(void) |
---|
.. | .. |
---|
870 | 1231 | |
---|
871 | 1232 | get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); |
---|
872 | 1233 | setup_node_data(nid, start_pfn, end_pfn); |
---|
873 | | - sparse_memory_present_with_active_regions(nid); |
---|
874 | 1234 | } |
---|
875 | 1235 | |
---|
876 | 1236 | sparse_init(); |
---|
.. | .. |
---|
905 | 1265 | } |
---|
906 | 1266 | early_param("numa", early_numa); |
---|
907 | 1267 | |
---|
908 | | -static bool topology_updates_enabled = true; |
---|
909 | | - |
---|
910 | | -static int __init early_topology_updates(char *p) |
---|
911 | | -{ |
---|
912 | | - if (!p) |
---|
913 | | - return 0; |
---|
914 | | - |
---|
915 | | - if (!strcmp(p, "off")) { |
---|
916 | | - pr_info("Disabling topology updates\n"); |
---|
917 | | - topology_updates_enabled = false; |
---|
918 | | - } |
---|
919 | | - |
---|
920 | | - return 0; |
---|
921 | | -} |
---|
922 | | -early_param("topology_updates", early_topology_updates); |
---|
923 | | - |
---|
924 | 1268 | #ifdef CONFIG_MEMORY_HOTPLUG |
---|
925 | 1269 | /* |
---|
926 | 1270 | * Find the node associated with a hot added memory section for |
---|
.. | .. |
---|
931 | 1275 | { |
---|
932 | 1276 | struct drmem_lmb *lmb; |
---|
933 | 1277 | unsigned long lmb_size; |
---|
934 | | - int nid = -1; |
---|
| 1278 | + int nid = NUMA_NO_NODE; |
---|
935 | 1279 | |
---|
936 | 1280 | lmb_size = drmem_lmb_size(); |
---|
937 | 1281 | |
---|
.. | .. |
---|
961 | 1305 | static int hot_add_node_scn_to_nid(unsigned long scn_addr) |
---|
962 | 1306 | { |
---|
963 | 1307 | struct device_node *memory; |
---|
964 | | - int nid = -1; |
---|
| 1308 | + int nid = NUMA_NO_NODE; |
---|
965 | 1309 | |
---|
966 | 1310 | for_each_node_by_type(memory, "memory") { |
---|
967 | 1311 | unsigned long start, size; |
---|
.. | .. |
---|
1006 | 1350 | struct device_node *memory = NULL; |
---|
1007 | 1351 | int nid; |
---|
1008 | 1352 | |
---|
1009 | | - if (!numa_enabled || (min_common_depth < 0)) |
---|
| 1353 | + if (!numa_enabled) |
---|
1010 | 1354 | return first_online_node; |
---|
1011 | 1355 | |
---|
1012 | 1356 | memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); |
---|
.. | .. |
---|
1059 | 1403 | |
---|
1060 | 1404 | /* Virtual Processor Home Node (VPHN) support */ |
---|
1061 | 1405 | #ifdef CONFIG_PPC_SPLPAR |
---|
1062 | | - |
---|
1063 | | -#include "vphn.h" |
---|
1064 | | - |
---|
1065 | | -struct topology_update_data { |
---|
1066 | | - struct topology_update_data *next; |
---|
1067 | | - unsigned int cpu; |
---|
1068 | | - int old_nid; |
---|
1069 | | - int new_nid; |
---|
1070 | | -}; |
---|
1071 | | - |
---|
1072 | | -#define TOPOLOGY_DEF_TIMER_SECS 60 |
---|
1073 | | - |
---|
1074 | | -static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS]; |
---|
1075 | | -static cpumask_t cpu_associativity_changes_mask; |
---|
1076 | | -static int vphn_enabled; |
---|
1077 | | -static int prrn_enabled; |
---|
1078 | | -static void reset_topology_timer(void); |
---|
1079 | | -static int topology_timer_secs = 1; |
---|
1080 | 1406 | static int topology_inited; |
---|
1081 | | - |
---|
1082 | | -/* |
---|
1083 | | - * Change polling interval for associativity changes. |
---|
1084 | | - */ |
---|
1085 | | -int timed_topology_update(int nsecs) |
---|
1086 | | -{ |
---|
1087 | | - if (vphn_enabled) { |
---|
1088 | | - if (nsecs > 0) |
---|
1089 | | - topology_timer_secs = nsecs; |
---|
1090 | | - else |
---|
1091 | | - topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS; |
---|
1092 | | - |
---|
1093 | | - reset_topology_timer(); |
---|
1094 | | - } |
---|
1095 | | - |
---|
1096 | | - return 0; |
---|
1097 | | -} |
---|
1098 | | - |
---|
1099 | | -/* |
---|
1100 | | - * Store the current values of the associativity change counters in the |
---|
1101 | | - * hypervisor. |
---|
1102 | | - */ |
---|
1103 | | -static void setup_cpu_associativity_change_counters(void) |
---|
1104 | | -{ |
---|
1105 | | - int cpu; |
---|
1106 | | - |
---|
1107 | | - /* The VPHN feature supports a maximum of 8 reference points */ |
---|
1108 | | - BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8); |
---|
1109 | | - |
---|
1110 | | - for_each_possible_cpu(cpu) { |
---|
1111 | | - int i; |
---|
1112 | | - u8 *counts = vphn_cpu_change_counts[cpu]; |
---|
1113 | | - volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; |
---|
1114 | | - |
---|
1115 | | - for (i = 0; i < distance_ref_points_depth; i++) |
---|
1116 | | - counts[i] = hypervisor_counts[i]; |
---|
1117 | | - } |
---|
1118 | | -} |
---|
1119 | | - |
---|
1120 | | -/* |
---|
1121 | | - * The hypervisor maintains a set of 8 associativity change counters in |
---|
1122 | | - * the VPA of each cpu that correspond to the associativity levels in the |
---|
1123 | | - * ibm,associativity-reference-points property. When an associativity |
---|
1124 | | - * level changes, the corresponding counter is incremented. |
---|
1125 | | - * |
---|
1126 | | - * Set a bit in cpu_associativity_changes_mask for each cpu whose home |
---|
1127 | | - * node associativity levels have changed. |
---|
1128 | | - * |
---|
1129 | | - * Returns the number of cpus with unhandled associativity changes. |
---|
1130 | | - */ |
---|
1131 | | -static int update_cpu_associativity_changes_mask(void) |
---|
1132 | | -{ |
---|
1133 | | - int cpu; |
---|
1134 | | - cpumask_t *changes = &cpu_associativity_changes_mask; |
---|
1135 | | - |
---|
1136 | | - for_each_possible_cpu(cpu) { |
---|
1137 | | - int i, changed = 0; |
---|
1138 | | - u8 *counts = vphn_cpu_change_counts[cpu]; |
---|
1139 | | - volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; |
---|
1140 | | - |
---|
1141 | | - for (i = 0; i < distance_ref_points_depth; i++) { |
---|
1142 | | - if (hypervisor_counts[i] != counts[i]) { |
---|
1143 | | - counts[i] = hypervisor_counts[i]; |
---|
1144 | | - changed = 1; |
---|
1145 | | - } |
---|
1146 | | - } |
---|
1147 | | - if (changed) { |
---|
1148 | | - cpumask_or(changes, changes, cpu_sibling_mask(cpu)); |
---|
1149 | | - cpu = cpu_last_thread_sibling(cpu); |
---|
1150 | | - } |
---|
1151 | | - } |
---|
1152 | | - |
---|
1153 | | - return cpumask_weight(changes); |
---|
1154 | | -} |
---|
1155 | 1407 | |
---|
1156 | 1408 | /* |
---|
1157 | 1409 | * Retrieve the new associativity information for a virtual processor's |
---|
1158 | 1410 | * home node. |
---|
1159 | 1411 | */ |
---|
1160 | | -static long hcall_vphn(unsigned long cpu, __be32 *associativity) |
---|
1161 | | -{ |
---|
1162 | | - long rc; |
---|
1163 | | - long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; |
---|
1164 | | - u64 flags = 1; |
---|
1165 | | - int hwcpu = get_hard_smp_processor_id(cpu); |
---|
1166 | | - |
---|
1167 | | - rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu); |
---|
1168 | | - vphn_unpack_associativity(retbuf, associativity); |
---|
1169 | | - |
---|
1170 | | - return rc; |
---|
1171 | | -} |
---|
1172 | | - |
---|
1173 | 1412 | static long vphn_get_associativity(unsigned long cpu, |
---|
1174 | 1413 | __be32 *associativity) |
---|
1175 | 1414 | { |
---|
1176 | 1415 | long rc; |
---|
1177 | 1416 | |
---|
1178 | | - rc = hcall_vphn(cpu, associativity); |
---|
| 1417 | + rc = hcall_vphn(get_hard_smp_processor_id(cpu), |
---|
| 1418 | + VPHN_FLAG_VCPU, associativity); |
---|
1179 | 1419 | |
---|
1180 | 1420 | switch (rc) { |
---|
1181 | | - case H_FUNCTION: |
---|
1182 | | - printk_once(KERN_INFO |
---|
1183 | | - "VPHN is not supported. Disabling polling...\n"); |
---|
1184 | | - stop_topology_update(); |
---|
1185 | | - break; |
---|
1186 | | - case H_HARDWARE: |
---|
1187 | | - printk(KERN_ERR |
---|
1188 | | - "hcall_vphn() experienced a hardware fault " |
---|
1189 | | - "preventing VPHN. Disabling polling...\n"); |
---|
1190 | | - stop_topology_update(); |
---|
1191 | | - break; |
---|
1192 | 1421 | case H_SUCCESS: |
---|
1193 | 1422 | dbg("VPHN hcall succeeded. Reset polling...\n"); |
---|
1194 | | - timed_topology_update(0); |
---|
| 1423 | + goto out; |
---|
| 1424 | + |
---|
| 1425 | + case H_FUNCTION: |
---|
| 1426 | + pr_err_ratelimited("VPHN unsupported. Disabling polling...\n"); |
---|
| 1427 | + break; |
---|
| 1428 | + case H_HARDWARE: |
---|
| 1429 | + pr_err_ratelimited("hcall_vphn() experienced a hardware fault " |
---|
| 1430 | + "preventing VPHN. Disabling polling...\n"); |
---|
| 1431 | + break; |
---|
| 1432 | + case H_PARAMETER: |
---|
| 1433 | + pr_err_ratelimited("hcall_vphn() was passed an invalid parameter. " |
---|
| 1434 | + "Disabling polling...\n"); |
---|
| 1435 | + break; |
---|
| 1436 | + default: |
---|
| 1437 | + pr_err_ratelimited("hcall_vphn() returned %ld. Disabling polling...\n" |
---|
| 1438 | + , rc); |
---|
1195 | 1439 | break; |
---|
1196 | 1440 | } |
---|
1197 | | - |
---|
| 1441 | +out: |
---|
1198 | 1442 | return rc; |
---|
1199 | 1443 | } |
---|
1200 | 1444 | |
---|
.. | .. |
---|
1237 | 1481 | return new_nid; |
---|
1238 | 1482 | } |
---|
1239 | 1483 | |
---|
1240 | | -/* |
---|
1241 | | - * Update the CPU maps and sysfs entries for a single CPU when its NUMA |
---|
1242 | | - * characteristics change. This function doesn't perform any locking and is |
---|
1243 | | - * only safe to call from stop_machine(). |
---|
1244 | | - */ |
---|
1245 | | -static int update_cpu_topology(void *data) |
---|
| 1484 | +int cpu_to_coregroup_id(int cpu) |
---|
1246 | 1485 | { |
---|
1247 | | - struct topology_update_data *update; |
---|
1248 | | - unsigned long cpu; |
---|
| 1486 | + __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; |
---|
| 1487 | + int index; |
---|
1249 | 1488 | |
---|
1250 | | - if (!data) |
---|
1251 | | - return -EINVAL; |
---|
| 1489 | + if (cpu < 0 || cpu > nr_cpu_ids) |
---|
| 1490 | + return -1; |
---|
1252 | 1491 | |
---|
1253 | | - cpu = smp_processor_id(); |
---|
1254 | | - |
---|
1255 | | - for (update = data; update; update = update->next) { |
---|
1256 | | - int new_nid = update->new_nid; |
---|
1257 | | - if (cpu != update->cpu) |
---|
1258 | | - continue; |
---|
1259 | | - |
---|
1260 | | - unmap_cpu_from_node(cpu); |
---|
1261 | | - map_cpu_to_node(cpu, new_nid); |
---|
1262 | | - set_cpu_numa_node(cpu, new_nid); |
---|
1263 | | - set_cpu_numa_mem(cpu, local_memory_node(new_nid)); |
---|
1264 | | - vdso_getcpu_init(); |
---|
1265 | | - } |
---|
1266 | | - |
---|
1267 | | - return 0; |
---|
1268 | | -} |
---|
1269 | | - |
---|
1270 | | -static int update_lookup_table(void *data) |
---|
1271 | | -{ |
---|
1272 | | - struct topology_update_data *update; |
---|
1273 | | - |
---|
1274 | | - if (!data) |
---|
1275 | | - return -EINVAL; |
---|
1276 | | - |
---|
1277 | | - /* |
---|
1278 | | - * Upon topology update, the numa-cpu lookup table needs to be updated |
---|
1279 | | - * for all threads in the core, including offline CPUs, to ensure that |
---|
1280 | | - * future hotplug operations respect the cpu-to-node associativity |
---|
1281 | | - * properly. |
---|
1282 | | - */ |
---|
1283 | | - for (update = data; update; update = update->next) { |
---|
1284 | | - int nid, base, j; |
---|
1285 | | - |
---|
1286 | | - nid = update->new_nid; |
---|
1287 | | - base = cpu_first_thread_sibling(update->cpu); |
---|
1288 | | - |
---|
1289 | | - for (j = 0; j < threads_per_core; j++) { |
---|
1290 | | - update_numa_cpu_lookup_table(base + j, nid); |
---|
1291 | | - } |
---|
1292 | | - } |
---|
1293 | | - |
---|
1294 | | - return 0; |
---|
1295 | | -} |
---|
1296 | | - |
---|
1297 | | -/* |
---|
1298 | | - * Update the node maps and sysfs entries for each cpu whose home node |
---|
1299 | | - * has changed. Returns 1 when the topology has changed, and 0 otherwise. |
---|
1300 | | - * |
---|
1301 | | - * cpus_locked says whether we already hold cpu_hotplug_lock. |
---|
1302 | | - */ |
---|
1303 | | -int numa_update_cpu_topology(bool cpus_locked) |
---|
1304 | | -{ |
---|
1305 | | - unsigned int cpu, sibling, changed = 0; |
---|
1306 | | - struct topology_update_data *updates, *ud; |
---|
1307 | | - cpumask_t updated_cpus; |
---|
1308 | | - struct device *dev; |
---|
1309 | | - int weight, new_nid, i = 0; |
---|
1310 | | - |
---|
1311 | | - if (!prrn_enabled && !vphn_enabled && topology_inited) |
---|
1312 | | - return 0; |
---|
1313 | | - |
---|
1314 | | - weight = cpumask_weight(&cpu_associativity_changes_mask); |
---|
1315 | | - if (!weight) |
---|
1316 | | - return 0; |
---|
1317 | | - |
---|
1318 | | - updates = kcalloc(weight, sizeof(*updates), GFP_KERNEL); |
---|
1319 | | - if (!updates) |
---|
1320 | | - return 0; |
---|
1321 | | - |
---|
1322 | | - cpumask_clear(&updated_cpus); |
---|
1323 | | - |
---|
1324 | | - for_each_cpu(cpu, &cpu_associativity_changes_mask) { |
---|
1325 | | - /* |
---|
1326 | | - * If siblings aren't flagged for changes, updates list |
---|
1327 | | - * will be too short. Skip on this update and set for next |
---|
1328 | | - * update. |
---|
1329 | | - */ |
---|
1330 | | - if (!cpumask_subset(cpu_sibling_mask(cpu), |
---|
1331 | | - &cpu_associativity_changes_mask)) { |
---|
1332 | | - pr_info("Sibling bits not set for associativity " |
---|
1333 | | - "change, cpu%d\n", cpu); |
---|
1334 | | - cpumask_or(&cpu_associativity_changes_mask, |
---|
1335 | | - &cpu_associativity_changes_mask, |
---|
1336 | | - cpu_sibling_mask(cpu)); |
---|
1337 | | - cpu = cpu_last_thread_sibling(cpu); |
---|
1338 | | - continue; |
---|
1339 | | - } |
---|
1340 | | - |
---|
1341 | | - new_nid = find_and_online_cpu_nid(cpu); |
---|
1342 | | - |
---|
1343 | | - if (new_nid == numa_cpu_lookup_table[cpu]) { |
---|
1344 | | - cpumask_andnot(&cpu_associativity_changes_mask, |
---|
1345 | | - &cpu_associativity_changes_mask, |
---|
1346 | | - cpu_sibling_mask(cpu)); |
---|
1347 | | - dbg("Assoc chg gives same node %d for cpu%d\n", |
---|
1348 | | - new_nid, cpu); |
---|
1349 | | - cpu = cpu_last_thread_sibling(cpu); |
---|
1350 | | - continue; |
---|
1351 | | - } |
---|
1352 | | - |
---|
1353 | | - for_each_cpu(sibling, cpu_sibling_mask(cpu)) { |
---|
1354 | | - ud = &updates[i++]; |
---|
1355 | | - ud->next = &updates[i]; |
---|
1356 | | - ud->cpu = sibling; |
---|
1357 | | - ud->new_nid = new_nid; |
---|
1358 | | - ud->old_nid = numa_cpu_lookup_table[sibling]; |
---|
1359 | | - cpumask_set_cpu(sibling, &updated_cpus); |
---|
1360 | | - } |
---|
1361 | | - cpu = cpu_last_thread_sibling(cpu); |
---|
1362 | | - } |
---|
1363 | | - |
---|
1364 | | - /* |
---|
1365 | | - * Prevent processing of 'updates' from overflowing array |
---|
1366 | | - * where last entry filled in a 'next' pointer. |
---|
1367 | | - */ |
---|
1368 | | - if (i) |
---|
1369 | | - updates[i-1].next = NULL; |
---|
1370 | | - |
---|
1371 | | - pr_debug("Topology update for the following CPUs:\n"); |
---|
1372 | | - if (cpumask_weight(&updated_cpus)) { |
---|
1373 | | - for (ud = &updates[0]; ud; ud = ud->next) { |
---|
1374 | | - pr_debug("cpu %d moving from node %d " |
---|
1375 | | - "to %d\n", ud->cpu, |
---|
1376 | | - ud->old_nid, ud->new_nid); |
---|
1377 | | - } |
---|
1378 | | - } |
---|
1379 | | - |
---|
1380 | | - /* |
---|
1381 | | - * In cases where we have nothing to update (because the updates list |
---|
1382 | | - * is too short or because the new topology is same as the old one), |
---|
1383 | | - * skip invoking update_cpu_topology() via stop-machine(). This is |
---|
1384 | | - * necessary (and not just a fast-path optimization) since stop-machine |
---|
1385 | | - * can end up electing a random CPU to run update_cpu_topology(), and |
---|
1386 | | - * thus trick us into setting up incorrect cpu-node mappings (since |
---|
1387 | | - * 'updates' is kzalloc()'ed). |
---|
1388 | | - * |
---|
1389 | | - * And for the similar reason, we will skip all the following updating. |
---|
1390 | | - */ |
---|
1391 | | - if (!cpumask_weight(&updated_cpus)) |
---|
| 1492 | + if (!coregroup_enabled) |
---|
1392 | 1493 | goto out; |
---|
1393 | 1494 | |
---|
1394 | | - if (cpus_locked) |
---|
1395 | | - stop_machine_cpuslocked(update_cpu_topology, &updates[0], |
---|
1396 | | - &updated_cpus); |
---|
1397 | | - else |
---|
1398 | | - stop_machine(update_cpu_topology, &updates[0], &updated_cpus); |
---|
| 1495 | + if (!firmware_has_feature(FW_FEATURE_VPHN)) |
---|
| 1496 | + goto out; |
---|
1399 | 1497 | |
---|
1400 | | - /* |
---|
1401 | | - * Update the numa-cpu lookup table with the new mappings, even for |
---|
1402 | | - * offline CPUs. It is best to perform this update from the stop- |
---|
1403 | | - * machine context. |
---|
1404 | | - */ |
---|
1405 | | - if (cpus_locked) |
---|
1406 | | - stop_machine_cpuslocked(update_lookup_table, &updates[0], |
---|
1407 | | - cpumask_of(raw_smp_processor_id())); |
---|
1408 | | - else |
---|
1409 | | - stop_machine(update_lookup_table, &updates[0], |
---|
1410 | | - cpumask_of(raw_smp_processor_id())); |
---|
| 1498 | + if (vphn_get_associativity(cpu, associativity)) |
---|
| 1499 | + goto out; |
---|
1411 | 1500 | |
---|
1412 | | - for (ud = &updates[0]; ud; ud = ud->next) { |
---|
1413 | | - unregister_cpu_under_node(ud->cpu, ud->old_nid); |
---|
1414 | | - register_cpu_under_node(ud->cpu, ud->new_nid); |
---|
1415 | | - |
---|
1416 | | - dev = get_cpu_device(ud->cpu); |
---|
1417 | | - if (dev) |
---|
1418 | | - kobject_uevent(&dev->kobj, KOBJ_CHANGE); |
---|
1419 | | - cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask); |
---|
1420 | | - changed = 1; |
---|
1421 | | - } |
---|
| 1501 | + index = of_read_number(associativity, 1); |
---|
| 1502 | + if (index > primary_domain_index + 1) |
---|
| 1503 | + return of_read_number(&associativity[index - 1], 1); |
---|
1422 | 1504 | |
---|
1423 | 1505 | out: |
---|
1424 | | - kfree(updates); |
---|
1425 | | - return changed; |
---|
| 1506 | + return cpu_to_core_id(cpu); |
---|
1426 | 1507 | } |
---|
1427 | | - |
---|
1428 | | -int arch_update_cpu_topology(void) |
---|
1429 | | -{ |
---|
1430 | | - return numa_update_cpu_topology(true); |
---|
1431 | | -} |
---|
1432 | | - |
---|
1433 | | -static void topology_work_fn(struct work_struct *work) |
---|
1434 | | -{ |
---|
1435 | | - rebuild_sched_domains(); |
---|
1436 | | -} |
---|
1437 | | -static DECLARE_WORK(topology_work, topology_work_fn); |
---|
1438 | | - |
---|
1439 | | -static void topology_schedule_update(void) |
---|
1440 | | -{ |
---|
1441 | | - schedule_work(&topology_work); |
---|
1442 | | -} |
---|
1443 | | - |
---|
1444 | | -static void topology_timer_fn(struct timer_list *unused) |
---|
1445 | | -{ |
---|
1446 | | - if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask)) |
---|
1447 | | - topology_schedule_update(); |
---|
1448 | | - else if (vphn_enabled) { |
---|
1449 | | - if (update_cpu_associativity_changes_mask() > 0) |
---|
1450 | | - topology_schedule_update(); |
---|
1451 | | - reset_topology_timer(); |
---|
1452 | | - } |
---|
1453 | | -} |
---|
1454 | | -static struct timer_list topology_timer; |
---|
1455 | | - |
---|
1456 | | -static void reset_topology_timer(void) |
---|
1457 | | -{ |
---|
1458 | | - if (vphn_enabled) |
---|
1459 | | - mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ); |
---|
1460 | | -} |
---|
1461 | | - |
---|
1462 | | -#ifdef CONFIG_SMP |
---|
1463 | | - |
---|
1464 | | -static int dt_update_callback(struct notifier_block *nb, |
---|
1465 | | - unsigned long action, void *data) |
---|
1466 | | -{ |
---|
1467 | | - struct of_reconfig_data *update = data; |
---|
1468 | | - int rc = NOTIFY_DONE; |
---|
1469 | | - |
---|
1470 | | - switch (action) { |
---|
1471 | | - case OF_RECONFIG_UPDATE_PROPERTY: |
---|
1472 | | - if (!of_prop_cmp(update->dn->type, "cpu") && |
---|
1473 | | - !of_prop_cmp(update->prop->name, "ibm,associativity")) { |
---|
1474 | | - u32 core_id; |
---|
1475 | | - of_property_read_u32(update->dn, "reg", &core_id); |
---|
1476 | | - rc = dlpar_cpu_readd(core_id); |
---|
1477 | | - rc = NOTIFY_OK; |
---|
1478 | | - } |
---|
1479 | | - break; |
---|
1480 | | - } |
---|
1481 | | - |
---|
1482 | | - return rc; |
---|
1483 | | -} |
---|
1484 | | - |
---|
1485 | | -static struct notifier_block dt_update_nb = { |
---|
1486 | | - .notifier_call = dt_update_callback, |
---|
1487 | | -}; |
---|
1488 | | - |
---|
1489 | | -#endif |
---|
1490 | | - |
---|
1491 | | -/* |
---|
1492 | | - * Start polling for associativity changes. |
---|
1493 | | - */ |
---|
1494 | | -int start_topology_update(void) |
---|
1495 | | -{ |
---|
1496 | | - int rc = 0; |
---|
1497 | | - |
---|
1498 | | - if (!topology_updates_enabled) |
---|
1499 | | - return 0; |
---|
1500 | | - |
---|
1501 | | - if (firmware_has_feature(FW_FEATURE_PRRN)) { |
---|
1502 | | - if (!prrn_enabled) { |
---|
1503 | | - prrn_enabled = 1; |
---|
1504 | | -#ifdef CONFIG_SMP |
---|
1505 | | - rc = of_reconfig_notifier_register(&dt_update_nb); |
---|
1506 | | -#endif |
---|
1507 | | - } |
---|
1508 | | - } |
---|
1509 | | - if (firmware_has_feature(FW_FEATURE_VPHN) && |
---|
1510 | | - lppaca_shared_proc(get_lppaca())) { |
---|
1511 | | - if (!vphn_enabled) { |
---|
1512 | | - vphn_enabled = 1; |
---|
1513 | | - setup_cpu_associativity_change_counters(); |
---|
1514 | | - timer_setup(&topology_timer, topology_timer_fn, |
---|
1515 | | - TIMER_DEFERRABLE); |
---|
1516 | | - reset_topology_timer(); |
---|
1517 | | - } |
---|
1518 | | - } |
---|
1519 | | - |
---|
1520 | | - return rc; |
---|
1521 | | -} |
---|
1522 | | - |
---|
1523 | | -/* |
---|
1524 | | - * Disable polling for VPHN associativity changes. |
---|
1525 | | - */ |
---|
1526 | | -int stop_topology_update(void) |
---|
1527 | | -{ |
---|
1528 | | - int rc = 0; |
---|
1529 | | - |
---|
1530 | | - if (!topology_updates_enabled) |
---|
1531 | | - return 0; |
---|
1532 | | - |
---|
1533 | | - if (prrn_enabled) { |
---|
1534 | | - prrn_enabled = 0; |
---|
1535 | | -#ifdef CONFIG_SMP |
---|
1536 | | - rc = of_reconfig_notifier_unregister(&dt_update_nb); |
---|
1537 | | -#endif |
---|
1538 | | - } |
---|
1539 | | - if (vphn_enabled) { |
---|
1540 | | - vphn_enabled = 0; |
---|
1541 | | - rc = del_timer_sync(&topology_timer); |
---|
1542 | | - } |
---|
1543 | | - |
---|
1544 | | - return rc; |
---|
1545 | | -} |
---|
1546 | | - |
---|
1547 | | -int prrn_is_enabled(void) |
---|
1548 | | -{ |
---|
1549 | | - return prrn_enabled; |
---|
1550 | | -} |
---|
1551 | | - |
---|
1552 | | -void __init shared_proc_topology_init(void) |
---|
1553 | | -{ |
---|
1554 | | - if (lppaca_shared_proc(get_lppaca())) { |
---|
1555 | | - bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask), |
---|
1556 | | - nr_cpumask_bits); |
---|
1557 | | - numa_update_cpu_topology(false); |
---|
1558 | | - } |
---|
1559 | | -} |
---|
1560 | | - |
---|
1561 | | -static int topology_read(struct seq_file *file, void *v) |
---|
1562 | | -{ |
---|
1563 | | - if (vphn_enabled || prrn_enabled) |
---|
1564 | | - seq_puts(file, "on\n"); |
---|
1565 | | - else |
---|
1566 | | - seq_puts(file, "off\n"); |
---|
1567 | | - |
---|
1568 | | - return 0; |
---|
1569 | | -} |
---|
1570 | | - |
---|
1571 | | -static int topology_open(struct inode *inode, struct file *file) |
---|
1572 | | -{ |
---|
1573 | | - return single_open(file, topology_read, NULL); |
---|
1574 | | -} |
---|
1575 | | - |
---|
1576 | | -static ssize_t topology_write(struct file *file, const char __user *buf, |
---|
1577 | | - size_t count, loff_t *off) |
---|
1578 | | -{ |
---|
1579 | | - char kbuf[4]; /* "on" or "off" plus null. */ |
---|
1580 | | - int read_len; |
---|
1581 | | - |
---|
1582 | | - read_len = count < 3 ? count : 3; |
---|
1583 | | - if (copy_from_user(kbuf, buf, read_len)) |
---|
1584 | | - return -EINVAL; |
---|
1585 | | - |
---|
1586 | | - kbuf[read_len] = '\0'; |
---|
1587 | | - |
---|
1588 | | - if (!strncmp(kbuf, "on", 2)) { |
---|
1589 | | - topology_updates_enabled = true; |
---|
1590 | | - start_topology_update(); |
---|
1591 | | - } else if (!strncmp(kbuf, "off", 3)) { |
---|
1592 | | - stop_topology_update(); |
---|
1593 | | - topology_updates_enabled = false; |
---|
1594 | | - } else |
---|
1595 | | - return -EINVAL; |
---|
1596 | | - |
---|
1597 | | - return count; |
---|
1598 | | -} |
---|
1599 | | - |
---|
1600 | | -static const struct file_operations topology_ops = { |
---|
1601 | | - .read = seq_read, |
---|
1602 | | - .write = topology_write, |
---|
1603 | | - .open = topology_open, |
---|
1604 | | - .release = single_release |
---|
1605 | | -}; |
---|
1606 | 1508 | |
---|
1607 | 1509 | static int topology_update_init(void) |
---|
1608 | 1510 | { |
---|
1609 | | - start_topology_update(); |
---|
1610 | | - |
---|
1611 | | - if (vphn_enabled) |
---|
1612 | | - topology_schedule_update(); |
---|
1613 | | - |
---|
1614 | | - if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops)) |
---|
1615 | | - return -ENOMEM; |
---|
1616 | | - |
---|
1617 | 1511 | topology_inited = 1; |
---|
1618 | 1512 | return 0; |
---|
1619 | 1513 | } |
---|