.. | .. |
---|
5 | 5 | #include <linux/mm.h> |
---|
6 | 6 | #include <linux/slab.h> |
---|
7 | 7 | #include <linux/mmzone.h> |
---|
8 | | -#include <linux/bootmem.h> |
---|
| 8 | +#include <linux/memblock.h> |
---|
9 | 9 | #include <linux/compiler.h> |
---|
10 | 10 | #include <linux/highmem.h> |
---|
11 | 11 | #include <linux/export.h> |
---|
12 | 12 | #include <linux/spinlock.h> |
---|
13 | 13 | #include <linux/vmalloc.h> |
---|
| 14 | +#include <linux/swap.h> |
---|
| 15 | +#include <linux/swapops.h> |
---|
14 | 16 | |
---|
15 | 17 | #include "internal.h" |
---|
16 | 18 | #include <asm/dma.h> |
---|
17 | | -#include <asm/pgalloc.h> |
---|
18 | | -#include <asm/pgtable.h> |
---|
19 | 19 | |
---|
20 | 20 | /* |
---|
21 | 21 | * Permanent SPARSEMEM data: |
---|
.. | .. |
---|
65 | 65 | unsigned long array_size = SECTIONS_PER_ROOT * |
---|
66 | 66 | sizeof(struct mem_section); |
---|
67 | 67 | |
---|
68 | | - if (slab_is_available()) |
---|
| 68 | + if (slab_is_available()) { |
---|
69 | 69 | section = kzalloc_node(array_size, GFP_KERNEL, nid); |
---|
70 | | - else |
---|
71 | | - section = memblock_virt_alloc_node(array_size, nid); |
---|
| 70 | + } else { |
---|
| 71 | + section = memblock_alloc_node(array_size, SMP_CACHE_BYTES, |
---|
| 72 | + nid); |
---|
| 73 | + if (!section) |
---|
| 74 | + panic("%s: Failed to allocate %lu bytes nid=%d\n", |
---|
| 75 | + __func__, array_size, nid); |
---|
| 76 | + } |
---|
72 | 77 | |
---|
73 | 78 | return section; |
---|
74 | 79 | } |
---|
.. | .. |
---|
78 | 83 | unsigned long root = SECTION_NR_TO_ROOT(section_nr); |
---|
79 | 84 | struct mem_section *section; |
---|
80 | 85 | |
---|
| 86 | + /* |
---|
| 87 | + * An existing section is possible in the sub-section hotplug |
---|
| 88 | + * case. First hot-add instantiates, follow-on hot-add reuses |
---|
| 89 | + * the existing section. |
---|
| 90 | + * |
---|
| 91 | + * The mem_hotplug_lock resolves the apparent race below. |
---|
| 92 | + */ |
---|
81 | 93 | if (mem_section[root]) |
---|
82 | | - return -EEXIST; |
---|
| 94 | + return 0; |
---|
83 | 95 | |
---|
84 | 96 | section = sparse_index_alloc(nid); |
---|
85 | 97 | if (!section) |
---|
.. | .. |
---|
97 | 109 | #endif |
---|
98 | 110 | |
---|
99 | 111 | #ifdef CONFIG_SPARSEMEM_EXTREME |
---|
100 | | -int __section_nr(struct mem_section* ms) |
---|
| 112 | +unsigned long __section_nr(struct mem_section *ms) |
---|
101 | 113 | { |
---|
102 | 114 | unsigned long root_nr; |
---|
103 | 115 | struct mem_section *root = NULL; |
---|
.. | .. |
---|
116 | 128 | return (root_nr * SECTIONS_PER_ROOT) + (ms - root); |
---|
117 | 129 | } |
---|
118 | 130 | #else |
---|
119 | | -int __section_nr(struct mem_section* ms) |
---|
| 131 | +unsigned long __section_nr(struct mem_section *ms) |
---|
120 | 132 | { |
---|
121 | | - return (int)(ms - mem_section[0]); |
---|
| 133 | + return (unsigned long)(ms - mem_section[0]); |
---|
122 | 134 | } |
---|
123 | 135 | #endif |
---|
124 | 136 | |
---|
.. | .. |
---|
173 | 185 | * Keeping track of this gives us an easy way to break out of |
---|
174 | 186 | * those loops early. |
---|
175 | 187 | */ |
---|
176 | | -int __highest_present_section_nr; |
---|
| 188 | +unsigned long __highest_present_section_nr; |
---|
177 | 189 | static void section_mark_present(struct mem_section *ms) |
---|
178 | 190 | { |
---|
179 | | - int section_nr = __section_nr(ms); |
---|
| 191 | + unsigned long section_nr = __section_nr(ms); |
---|
180 | 192 | |
---|
181 | 193 | if (section_nr > __highest_present_section_nr) |
---|
182 | 194 | __highest_present_section_nr = section_nr; |
---|
.. | .. |
---|
184 | 196 | ms->section_mem_map |= SECTION_MARKED_PRESENT; |
---|
185 | 197 | } |
---|
186 | 198 | |
---|
187 | | -static inline int next_present_section_nr(int section_nr) |
---|
188 | | -{ |
---|
189 | | - do { |
---|
190 | | - section_nr++; |
---|
191 | | - if (present_section_nr(section_nr)) |
---|
192 | | - return section_nr; |
---|
193 | | - } while ((section_nr <= __highest_present_section_nr)); |
---|
194 | | - |
---|
195 | | - return -1; |
---|
196 | | -} |
---|
197 | 199 | #define for_each_present_section_nr(start, section_nr) \ |
---|
198 | 200 | for (section_nr = next_present_section_nr(start-1); \ |
---|
199 | 201 | ((section_nr != -1) && \ |
---|
.. | .. |
---|
205 | 207 | return next_present_section_nr(-1); |
---|
206 | 208 | } |
---|
207 | 209 | |
---|
| 210 | +#ifdef CONFIG_SPARSEMEM_VMEMMAP |
---|
| 211 | +static void subsection_mask_set(unsigned long *map, unsigned long pfn, |
---|
| 212 | + unsigned long nr_pages) |
---|
| 213 | +{ |
---|
| 214 | + int idx = subsection_map_index(pfn); |
---|
| 215 | + int end = subsection_map_index(pfn + nr_pages - 1); |
---|
| 216 | + |
---|
| 217 | + bitmap_set(map, idx, end - idx + 1); |
---|
| 218 | +} |
---|
| 219 | + |
---|
| 220 | +void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages) |
---|
| 221 | +{ |
---|
| 222 | + int end_sec = pfn_to_section_nr(pfn + nr_pages - 1); |
---|
| 223 | + unsigned long nr, start_sec = pfn_to_section_nr(pfn); |
---|
| 224 | + |
---|
| 225 | + if (!nr_pages) |
---|
| 226 | + return; |
---|
| 227 | + |
---|
| 228 | + for (nr = start_sec; nr <= end_sec; nr++) { |
---|
| 229 | + struct mem_section *ms; |
---|
| 230 | + unsigned long pfns; |
---|
| 231 | + |
---|
| 232 | + pfns = min(nr_pages, PAGES_PER_SECTION |
---|
| 233 | + - (pfn & ~PAGE_SECTION_MASK)); |
---|
| 234 | + ms = __nr_to_section(nr); |
---|
| 235 | + subsection_mask_set(ms->usage->subsection_map, pfn, pfns); |
---|
| 236 | + |
---|
| 237 | + pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr, |
---|
| 238 | + pfns, subsection_map_index(pfn), |
---|
| 239 | + subsection_map_index(pfn + pfns - 1)); |
---|
| 240 | + |
---|
| 241 | + pfn += pfns; |
---|
| 242 | + nr_pages -= pfns; |
---|
| 243 | + } |
---|
| 244 | +} |
---|
| 245 | +#else |
---|
| 246 | +void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages) |
---|
| 247 | +{ |
---|
| 248 | +} |
---|
| 249 | +#endif |
---|
| 250 | + |
---|
208 | 251 | /* Record a memory area against a node. */ |
---|
209 | | -void __init memory_present(int nid, unsigned long start, unsigned long end) |
---|
| 252 | +static void __init memory_present(int nid, unsigned long start, unsigned long end) |
---|
210 | 253 | { |
---|
211 | 254 | unsigned long pfn; |
---|
212 | 255 | |
---|
.. | .. |
---|
216 | 259 | |
---|
217 | 260 | size = sizeof(struct mem_section*) * NR_SECTION_ROOTS; |
---|
218 | 261 | align = 1 << (INTERNODE_CACHE_SHIFT); |
---|
219 | | - mem_section = memblock_virt_alloc(size, align); |
---|
| 262 | + mem_section = memblock_alloc(size, align); |
---|
| 263 | + if (!mem_section) |
---|
| 264 | + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", |
---|
| 265 | + __func__, size, align); |
---|
220 | 266 | } |
---|
221 | 267 | #endif |
---|
222 | 268 | |
---|
.. | .. |
---|
239 | 285 | } |
---|
240 | 286 | |
---|
241 | 287 | /* |
---|
| 288 | + * Mark all memblocks as present using memory_present(). |
---|
| 289 | + * This is a convenience function that is useful to mark all of the systems |
---|
| 290 | + * memory as present during initialization. |
---|
| 291 | + */ |
---|
| 292 | +static void __init memblocks_present(void) |
---|
| 293 | +{ |
---|
| 294 | + unsigned long start, end; |
---|
| 295 | + int i, nid; |
---|
| 296 | + |
---|
| 297 | + for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) |
---|
| 298 | + memory_present(nid, start, end); |
---|
| 299 | +} |
---|
| 300 | + |
---|
| 301 | +/* |
---|
242 | 302 | * Subtle, we encode the real pfn into the mem_map such that |
---|
243 | 303 | * the identity pfn - section_mem_map will return the actual |
---|
244 | 304 | * physical page frame number. |
---|
.. | .. |
---|
252 | 312 | return coded_mem_map; |
---|
253 | 313 | } |
---|
254 | 314 | |
---|
| 315 | +#ifdef CONFIG_MEMORY_HOTPLUG |
---|
255 | 316 | /* |
---|
256 | 317 | * Decode mem_map from the coded memmap |
---|
257 | 318 | */ |
---|
.. | .. |
---|
261 | 322 | coded_mem_map &= SECTION_MAP_MASK; |
---|
262 | 323 | return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum); |
---|
263 | 324 | } |
---|
| 325 | +#endif /* CONFIG_MEMORY_HOTPLUG */ |
---|
264 | 326 | |
---|
265 | 327 | static void __meminit sparse_init_one_section(struct mem_section *ms, |
---|
266 | 328 | unsigned long pnum, struct page *mem_map, |
---|
267 | | - unsigned long *pageblock_bitmap) |
---|
| 329 | + struct mem_section_usage *usage, unsigned long flags) |
---|
268 | 330 | { |
---|
269 | 331 | ms->section_mem_map &= ~SECTION_MAP_MASK; |
---|
270 | | - ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) | |
---|
271 | | - SECTION_HAS_MEM_MAP; |
---|
272 | | - ms->pageblock_flags = pageblock_bitmap; |
---|
| 332 | + ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
---|
| 333 | + | SECTION_HAS_MEM_MAP | flags; |
---|
| 334 | + ms->usage = usage; |
---|
273 | 335 | } |
---|
274 | 336 | |
---|
275 | | -unsigned long usemap_size(void) |
---|
| 337 | +static unsigned long usemap_size(void) |
---|
276 | 338 | { |
---|
277 | 339 | return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long); |
---|
278 | 340 | } |
---|
279 | 341 | |
---|
280 | | -#ifdef CONFIG_MEMORY_HOTPLUG |
---|
281 | | -static unsigned long *__kmalloc_section_usemap(void) |
---|
| 342 | +size_t mem_section_usage_size(void) |
---|
282 | 343 | { |
---|
283 | | - return kmalloc(usemap_size(), GFP_KERNEL); |
---|
| 344 | + return sizeof(struct mem_section_usage) + usemap_size(); |
---|
284 | 345 | } |
---|
285 | | -#endif /* CONFIG_MEMORY_HOTPLUG */ |
---|
286 | 346 | |
---|
287 | 347 | #ifdef CONFIG_MEMORY_HOTREMOVE |
---|
288 | | -static unsigned long * __init |
---|
| 348 | +static struct mem_section_usage * __init |
---|
289 | 349 | sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, |
---|
290 | 350 | unsigned long size) |
---|
291 | 351 | { |
---|
| 352 | + struct mem_section_usage *usage; |
---|
292 | 353 | unsigned long goal, limit; |
---|
293 | | - unsigned long *p; |
---|
294 | 354 | int nid; |
---|
295 | 355 | /* |
---|
296 | 356 | * A page may contain usemaps for other sections preventing the |
---|
.. | .. |
---|
306 | 366 | limit = goal + (1UL << PA_SECTION_SHIFT); |
---|
307 | 367 | nid = early_pfn_to_nid(goal >> PAGE_SHIFT); |
---|
308 | 368 | again: |
---|
309 | | - p = memblock_virt_alloc_try_nid_nopanic(size, |
---|
310 | | - SMP_CACHE_BYTES, goal, limit, |
---|
311 | | - nid); |
---|
312 | | - if (!p && limit) { |
---|
| 369 | + usage = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid); |
---|
| 370 | + if (!usage && limit) { |
---|
313 | 371 | limit = 0; |
---|
314 | 372 | goto again; |
---|
315 | 373 | } |
---|
316 | | - return p; |
---|
| 374 | + return usage; |
---|
317 | 375 | } |
---|
318 | 376 | |
---|
319 | | -static void __init check_usemap_section_nr(int nid, unsigned long *usemap) |
---|
| 377 | +static void __init check_usemap_section_nr(int nid, |
---|
| 378 | + struct mem_section_usage *usage) |
---|
320 | 379 | { |
---|
321 | 380 | unsigned long usemap_snr, pgdat_snr; |
---|
322 | 381 | static unsigned long old_usemap_snr; |
---|
.. | .. |
---|
330 | 389 | old_pgdat_snr = NR_MEM_SECTIONS; |
---|
331 | 390 | } |
---|
332 | 391 | |
---|
333 | | - usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT); |
---|
| 392 | + usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT); |
---|
334 | 393 | pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); |
---|
335 | 394 | if (usemap_snr == pgdat_snr) |
---|
336 | 395 | return; |
---|
.. | .. |
---|
358 | 417 | usemap_snr, pgdat_snr, nid); |
---|
359 | 418 | } |
---|
360 | 419 | #else |
---|
361 | | -static unsigned long * __init |
---|
| 420 | +static struct mem_section_usage * __init |
---|
362 | 421 | sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, |
---|
363 | 422 | unsigned long size) |
---|
364 | 423 | { |
---|
365 | | - return memblock_virt_alloc_node_nopanic(size, pgdat->node_id); |
---|
| 424 | + return memblock_alloc_node(size, SMP_CACHE_BYTES, pgdat->node_id); |
---|
366 | 425 | } |
---|
367 | 426 | |
---|
368 | | -static void __init check_usemap_section_nr(int nid, unsigned long *usemap) |
---|
| 427 | +static void __init check_usemap_section_nr(int nid, |
---|
| 428 | + struct mem_section_usage *usage) |
---|
369 | 429 | { |
---|
370 | 430 | } |
---|
371 | 431 | #endif /* CONFIG_MEMORY_HOTREMOVE */ |
---|
.. | .. |
---|
382 | 442 | return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION); |
---|
383 | 443 | } |
---|
384 | 444 | |
---|
385 | | -struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid, |
---|
386 | | - struct vmem_altmap *altmap) |
---|
| 445 | +struct page __init *__populate_section_memmap(unsigned long pfn, |
---|
| 446 | + unsigned long nr_pages, int nid, struct vmem_altmap *altmap) |
---|
387 | 447 | { |
---|
388 | 448 | unsigned long size = section_map_size(); |
---|
389 | 449 | struct page *map = sparse_buffer_alloc(size); |
---|
| 450 | + phys_addr_t addr = __pa(MAX_DMA_ADDRESS); |
---|
390 | 451 | |
---|
391 | 452 | if (map) |
---|
392 | 453 | return map; |
---|
393 | 454 | |
---|
394 | | - map = memblock_virt_alloc_try_nid(size, |
---|
395 | | - PAGE_SIZE, __pa(MAX_DMA_ADDRESS), |
---|
396 | | - BOOTMEM_ALLOC_ACCESSIBLE, nid); |
---|
| 455 | + map = memblock_alloc_try_nid_raw(size, size, addr, |
---|
| 456 | + MEMBLOCK_ALLOC_ACCESSIBLE, nid); |
---|
| 457 | + if (!map) |
---|
| 458 | + panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa\n", |
---|
| 459 | + __func__, size, PAGE_SIZE, nid, &addr); |
---|
| 460 | + |
---|
397 | 461 | return map; |
---|
398 | 462 | } |
---|
399 | 463 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ |
---|
.. | .. |
---|
401 | 465 | static void *sparsemap_buf __meminitdata; |
---|
402 | 466 | static void *sparsemap_buf_end __meminitdata; |
---|
403 | 467 | |
---|
| 468 | +static inline void __meminit sparse_buffer_free(unsigned long size) |
---|
| 469 | +{ |
---|
| 470 | + WARN_ON(!sparsemap_buf || size == 0); |
---|
| 471 | + memblock_free_early(__pa(sparsemap_buf), size); |
---|
| 472 | +} |
---|
| 473 | + |
---|
404 | 474 | static void __init sparse_buffer_init(unsigned long size, int nid) |
---|
405 | 475 | { |
---|
| 476 | + phys_addr_t addr = __pa(MAX_DMA_ADDRESS); |
---|
406 | 477 | WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */ |
---|
407 | | - sparsemap_buf = |
---|
408 | | - memblock_virt_alloc_try_nid_raw(size, PAGE_SIZE, |
---|
409 | | - __pa(MAX_DMA_ADDRESS), |
---|
410 | | - BOOTMEM_ALLOC_ACCESSIBLE, nid); |
---|
| 478 | + /* |
---|
| 479 | + * Pre-allocated buffer is mainly used by __populate_section_memmap |
---|
| 480 | + * and we want it to be properly aligned to the section size - this is |
---|
| 481 | + * especially the case for VMEMMAP which maps memmap to PMDs |
---|
| 482 | + */ |
---|
| 483 | + sparsemap_buf = memblock_alloc_exact_nid_raw(size, section_map_size(), |
---|
| 484 | + addr, MEMBLOCK_ALLOC_ACCESSIBLE, nid); |
---|
411 | 485 | sparsemap_buf_end = sparsemap_buf + size; |
---|
412 | 486 | } |
---|
413 | 487 | |
---|
.. | .. |
---|
416 | 490 | unsigned long size = sparsemap_buf_end - sparsemap_buf; |
---|
417 | 491 | |
---|
418 | 492 | if (sparsemap_buf && size > 0) |
---|
419 | | - memblock_free_early(__pa(sparsemap_buf), size); |
---|
| 493 | + sparse_buffer_free(size); |
---|
420 | 494 | sparsemap_buf = NULL; |
---|
421 | 495 | } |
---|
422 | 496 | |
---|
.. | .. |
---|
425 | 499 | void *ptr = NULL; |
---|
426 | 500 | |
---|
427 | 501 | if (sparsemap_buf) { |
---|
428 | | - ptr = PTR_ALIGN(sparsemap_buf, size); |
---|
| 502 | + ptr = (void *) roundup((unsigned long)sparsemap_buf, size); |
---|
429 | 503 | if (ptr + size > sparsemap_buf_end) |
---|
430 | 504 | ptr = NULL; |
---|
431 | | - else |
---|
| 505 | + else { |
---|
| 506 | + /* Free redundant aligned space */ |
---|
| 507 | + if ((unsigned long)(ptr - sparsemap_buf) > 0) |
---|
| 508 | + sparse_buffer_free((unsigned long)(ptr - sparsemap_buf)); |
---|
432 | 509 | sparsemap_buf = ptr + size; |
---|
| 510 | + } |
---|
433 | 511 | } |
---|
434 | 512 | return ptr; |
---|
435 | 513 | } |
---|
.. | .. |
---|
446 | 524 | unsigned long pnum_end, |
---|
447 | 525 | unsigned long map_count) |
---|
448 | 526 | { |
---|
449 | | - unsigned long pnum, usemap_longs, *usemap; |
---|
| 527 | + struct mem_section_usage *usage; |
---|
| 528 | + unsigned long pnum; |
---|
450 | 529 | struct page *map; |
---|
451 | 530 | |
---|
452 | | - usemap_longs = BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS); |
---|
453 | | - usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), |
---|
454 | | - usemap_size() * |
---|
455 | | - map_count); |
---|
456 | | - if (!usemap) { |
---|
| 531 | + usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), |
---|
| 532 | + mem_section_usage_size() * map_count); |
---|
| 533 | + if (!usage) { |
---|
457 | 534 | pr_err("%s: node[%d] usemap allocation failed", __func__, nid); |
---|
458 | 535 | goto failed; |
---|
459 | 536 | } |
---|
460 | 537 | sparse_buffer_init(map_count * section_map_size(), nid); |
---|
461 | 538 | for_each_present_section_nr(pnum_begin, pnum) { |
---|
| 539 | + unsigned long pfn = section_nr_to_pfn(pnum); |
---|
| 540 | + |
---|
462 | 541 | if (pnum >= pnum_end) |
---|
463 | 542 | break; |
---|
464 | 543 | |
---|
465 | | - map = sparse_mem_map_populate(pnum, nid, NULL); |
---|
| 544 | + map = __populate_section_memmap(pfn, PAGES_PER_SECTION, |
---|
| 545 | + nid, NULL); |
---|
466 | 546 | if (!map) { |
---|
467 | 547 | pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.", |
---|
468 | 548 | __func__, nid); |
---|
.. | .. |
---|
470 | 550 | sparse_buffer_fini(); |
---|
471 | 551 | goto failed; |
---|
472 | 552 | } |
---|
473 | | - check_usemap_section_nr(nid, usemap); |
---|
474 | | - sparse_init_one_section(__nr_to_section(pnum), pnum, map, usemap); |
---|
475 | | - usemap += usemap_longs; |
---|
| 553 | + check_usemap_section_nr(nid, usage); |
---|
| 554 | + sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage, |
---|
| 555 | + SECTION_IS_EARLY); |
---|
| 556 | + usage = (void *) usage + mem_section_usage_size(); |
---|
476 | 557 | } |
---|
477 | 558 | sparse_buffer_fini(); |
---|
478 | 559 | return; |
---|
.. | .. |
---|
494 | 575 | */ |
---|
495 | 576 | void __init sparse_init(void) |
---|
496 | 577 | { |
---|
497 | | - unsigned long pnum_begin = first_present_section_nr(); |
---|
498 | | - int nid_begin = sparse_early_nid(__nr_to_section(pnum_begin)); |
---|
499 | | - unsigned long pnum_end, map_count = 1; |
---|
| 578 | + unsigned long pnum_end, pnum_begin, map_count = 1; |
---|
| 579 | + int nid_begin; |
---|
| 580 | + |
---|
| 581 | + memblocks_present(); |
---|
| 582 | + |
---|
| 583 | + pnum_begin = first_present_section_nr(); |
---|
| 584 | + nid_begin = sparse_early_nid(__nr_to_section(pnum_begin)); |
---|
500 | 585 | |
---|
501 | 586 | /* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */ |
---|
502 | 587 | set_pageblock_order(); |
---|
.. | .. |
---|
540 | 625 | } |
---|
541 | 626 | |
---|
542 | 627 | #ifdef CONFIG_MEMORY_HOTREMOVE |
---|
543 | | -/* Mark all memory sections within the pfn range as online */ |
---|
| 628 | +/* Mark all memory sections within the pfn range as offline */ |
---|
544 | 629 | void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) |
---|
545 | 630 | { |
---|
546 | 631 | unsigned long pfn; |
---|
.. | .. |
---|
563 | 648 | #endif |
---|
564 | 649 | |
---|
565 | 650 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
---|
566 | | -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, |
---|
567 | | - struct vmem_altmap *altmap) |
---|
| 651 | +static struct page * __meminit populate_section_memmap(unsigned long pfn, |
---|
| 652 | + unsigned long nr_pages, int nid, struct vmem_altmap *altmap) |
---|
568 | 653 | { |
---|
569 | | - /* This will make the necessary allocations eventually. */ |
---|
570 | | - return sparse_mem_map_populate(pnum, nid, altmap); |
---|
| 654 | + return __populate_section_memmap(pfn, nr_pages, nid, altmap); |
---|
571 | 655 | } |
---|
572 | | -static void __kfree_section_memmap(struct page *memmap, |
---|
| 656 | + |
---|
| 657 | +static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages, |
---|
573 | 658 | struct vmem_altmap *altmap) |
---|
574 | 659 | { |
---|
575 | | - unsigned long start = (unsigned long)memmap; |
---|
576 | | - unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); |
---|
| 660 | + unsigned long start = (unsigned long) pfn_to_page(pfn); |
---|
| 661 | + unsigned long end = start + nr_pages * sizeof(struct page); |
---|
577 | 662 | |
---|
578 | 663 | vmemmap_free(start, end, altmap); |
---|
579 | 664 | } |
---|
.. | .. |
---|
584 | 669 | |
---|
585 | 670 | vmemmap_free(start, end, NULL); |
---|
586 | 671 | } |
---|
587 | | -#else |
---|
588 | | -static struct page *__kmalloc_section_memmap(void) |
---|
| 672 | + |
---|
| 673 | +static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages) |
---|
589 | 674 | { |
---|
590 | | - struct page *page, *ret; |
---|
591 | | - unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION; |
---|
| 675 | + DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 }; |
---|
| 676 | + DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 }; |
---|
| 677 | + struct mem_section *ms = __pfn_to_section(pfn); |
---|
| 678 | + unsigned long *subsection_map = ms->usage |
---|
| 679 | + ? &ms->usage->subsection_map[0] : NULL; |
---|
592 | 680 | |
---|
593 | | - page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size)); |
---|
594 | | - if (page) |
---|
595 | | - goto got_map_page; |
---|
| 681 | + subsection_mask_set(map, pfn, nr_pages); |
---|
| 682 | + if (subsection_map) |
---|
| 683 | + bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION); |
---|
596 | 684 | |
---|
597 | | - ret = vmalloc(memmap_size); |
---|
598 | | - if (ret) |
---|
599 | | - goto got_map_ptr; |
---|
| 685 | + if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION), |
---|
| 686 | + "section already deactivated (%#lx + %ld)\n", |
---|
| 687 | + pfn, nr_pages)) |
---|
| 688 | + return -EINVAL; |
---|
600 | 689 | |
---|
601 | | - return NULL; |
---|
602 | | -got_map_page: |
---|
603 | | - ret = (struct page *)pfn_to_kaddr(page_to_pfn(page)); |
---|
604 | | -got_map_ptr: |
---|
605 | | - |
---|
606 | | - return ret; |
---|
| 690 | + bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION); |
---|
| 691 | + return 0; |
---|
607 | 692 | } |
---|
608 | 693 | |
---|
609 | | -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, |
---|
610 | | - struct vmem_altmap *altmap) |
---|
| 694 | +static bool is_subsection_map_empty(struct mem_section *ms) |
---|
611 | 695 | { |
---|
612 | | - return __kmalloc_section_memmap(); |
---|
| 696 | + return bitmap_empty(&ms->usage->subsection_map[0], |
---|
| 697 | + SUBSECTIONS_PER_SECTION); |
---|
613 | 698 | } |
---|
614 | 699 | |
---|
615 | | -static void __kfree_section_memmap(struct page *memmap, |
---|
616 | | - struct vmem_altmap *altmap) |
---|
| 700 | +static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages) |
---|
617 | 701 | { |
---|
618 | | - if (is_vmalloc_addr(memmap)) |
---|
619 | | - vfree(memmap); |
---|
| 702 | + struct mem_section *ms = __pfn_to_section(pfn); |
---|
| 703 | + DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 }; |
---|
| 704 | + unsigned long *subsection_map; |
---|
| 705 | + int rc = 0; |
---|
| 706 | + |
---|
| 707 | + subsection_mask_set(map, pfn, nr_pages); |
---|
| 708 | + |
---|
| 709 | + subsection_map = &ms->usage->subsection_map[0]; |
---|
| 710 | + |
---|
| 711 | + if (bitmap_empty(map, SUBSECTIONS_PER_SECTION)) |
---|
| 712 | + rc = -EINVAL; |
---|
| 713 | + else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION)) |
---|
| 714 | + rc = -EEXIST; |
---|
620 | 715 | else |
---|
621 | | - free_pages((unsigned long)memmap, |
---|
622 | | - get_order(sizeof(struct page) * PAGES_PER_SECTION)); |
---|
| 716 | + bitmap_or(subsection_map, map, subsection_map, |
---|
| 717 | + SUBSECTIONS_PER_SECTION); |
---|
| 718 | + |
---|
| 719 | + return rc; |
---|
| 720 | +} |
---|
| 721 | +#else |
---|
| 722 | +struct page * __meminit populate_section_memmap(unsigned long pfn, |
---|
| 723 | + unsigned long nr_pages, int nid, struct vmem_altmap *altmap) |
---|
| 724 | +{ |
---|
| 725 | + return kvmalloc_node(array_size(sizeof(struct page), |
---|
| 726 | + PAGES_PER_SECTION), GFP_KERNEL, nid); |
---|
| 727 | +} |
---|
| 728 | + |
---|
| 729 | +static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages, |
---|
| 730 | + struct vmem_altmap *altmap) |
---|
| 731 | +{ |
---|
| 732 | + kvfree(pfn_to_page(pfn)); |
---|
623 | 733 | } |
---|
624 | 734 | |
---|
625 | 735 | static void free_map_bootmem(struct page *memmap) |
---|
.. | .. |
---|
651 | 761 | put_page_bootmem(page); |
---|
652 | 762 | } |
---|
653 | 763 | } |
---|
| 764 | + |
---|
| 765 | +static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages) |
---|
| 766 | +{ |
---|
| 767 | + return 0; |
---|
| 768 | +} |
---|
| 769 | + |
---|
| 770 | +static bool is_subsection_map_empty(struct mem_section *ms) |
---|
| 771 | +{ |
---|
| 772 | + return true; |
---|
| 773 | +} |
---|
| 774 | + |
---|
| 775 | +static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages) |
---|
| 776 | +{ |
---|
| 777 | + return 0; |
---|
| 778 | +} |
---|
654 | 779 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ |
---|
655 | 780 | |
---|
656 | 781 | /* |
---|
657 | | - * returns the number of sections whose mem_maps were properly |
---|
658 | | - * set. If this is <=0, then that means that the passed-in |
---|
659 | | - * map was not consumed and must be freed. |
---|
| 782 | + * To deactivate a memory region, there are 3 cases to handle across |
---|
| 783 | + * two configurations (SPARSEMEM_VMEMMAP={y,n}): |
---|
| 784 | + * |
---|
| 785 | + * 1. deactivation of a partial hot-added section (only possible in |
---|
| 786 | + * the SPARSEMEM_VMEMMAP=y case). |
---|
| 787 | + * a) section was present at memory init. |
---|
| 788 | + * b) section was hot-added post memory init. |
---|
| 789 | + * 2. deactivation of a complete hot-added section. |
---|
| 790 | + * 3. deactivation of a complete section from memory init. |
---|
| 791 | + * |
---|
| 792 | + * For 1, when subsection_map does not empty we will not be freeing the |
---|
| 793 | + * usage map, but still need to free the vmemmap range. |
---|
| 794 | + * |
---|
| 795 | + * For 2 and 3, the SPARSEMEM_VMEMMAP={y,n} cases are unified |
---|
660 | 796 | */ |
---|
661 | | -int __meminit sparse_add_one_section(int nid, unsigned long start_pfn, |
---|
662 | | - struct vmem_altmap *altmap) |
---|
| 797 | +static void section_deactivate(unsigned long pfn, unsigned long nr_pages, |
---|
| 798 | + struct vmem_altmap *altmap) |
---|
| 799 | +{ |
---|
| 800 | + struct mem_section *ms = __pfn_to_section(pfn); |
---|
| 801 | + bool section_is_early = early_section(ms); |
---|
| 802 | + struct page *memmap = NULL; |
---|
| 803 | + bool empty; |
---|
| 804 | + |
---|
| 805 | + if (clear_subsection_map(pfn, nr_pages)) |
---|
| 806 | + return; |
---|
| 807 | + |
---|
| 808 | + empty = is_subsection_map_empty(ms); |
---|
| 809 | + if (empty) { |
---|
| 810 | + unsigned long section_nr = pfn_to_section_nr(pfn); |
---|
| 811 | + |
---|
| 812 | + /* |
---|
| 813 | + * When removing an early section, the usage map is kept (as the |
---|
| 814 | + * usage maps of other sections fall into the same page). It |
---|
| 815 | + * will be re-used when re-adding the section - which is then no |
---|
| 816 | + * longer an early section. If the usage map is PageReserved, it |
---|
| 817 | + * was allocated during boot. |
---|
| 818 | + */ |
---|
| 819 | + if (!PageReserved(virt_to_page(ms->usage))) { |
---|
| 820 | + kfree(ms->usage); |
---|
| 821 | + ms->usage = NULL; |
---|
| 822 | + } |
---|
| 823 | + memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); |
---|
| 824 | + /* |
---|
| 825 | + * Mark the section invalid so that valid_section() |
---|
| 826 | + * return false. This prevents code from dereferencing |
---|
| 827 | + * ms->usage array. |
---|
| 828 | + */ |
---|
| 829 | + ms->section_mem_map &= ~SECTION_HAS_MEM_MAP; |
---|
| 830 | + } |
---|
| 831 | + |
---|
| 832 | + /* |
---|
| 833 | + * The memmap of early sections is always fully populated. See |
---|
| 834 | + * section_activate() and pfn_valid() . |
---|
| 835 | + */ |
---|
| 836 | + if (!section_is_early) |
---|
| 837 | + depopulate_section_memmap(pfn, nr_pages, altmap); |
---|
| 838 | + else if (memmap) |
---|
| 839 | + free_map_bootmem(memmap); |
---|
| 840 | + |
---|
| 841 | + if (empty) |
---|
| 842 | + ms->section_mem_map = (unsigned long)NULL; |
---|
| 843 | +} |
---|
| 844 | + |
---|
| 845 | +static struct page * __meminit section_activate(int nid, unsigned long pfn, |
---|
| 846 | + unsigned long nr_pages, struct vmem_altmap *altmap) |
---|
| 847 | +{ |
---|
| 848 | + struct mem_section *ms = __pfn_to_section(pfn); |
---|
| 849 | + struct mem_section_usage *usage = NULL; |
---|
| 850 | + struct page *memmap; |
---|
| 851 | + int rc = 0; |
---|
| 852 | + |
---|
| 853 | + if (!ms->usage) { |
---|
| 854 | + usage = kzalloc(mem_section_usage_size(), GFP_KERNEL); |
---|
| 855 | + if (!usage) |
---|
| 856 | + return ERR_PTR(-ENOMEM); |
---|
| 857 | + ms->usage = usage; |
---|
| 858 | + } |
---|
| 859 | + |
---|
| 860 | + rc = fill_subsection_map(pfn, nr_pages); |
---|
| 861 | + if (rc) { |
---|
| 862 | + if (usage) |
---|
| 863 | + ms->usage = NULL; |
---|
| 864 | + kfree(usage); |
---|
| 865 | + return ERR_PTR(rc); |
---|
| 866 | + } |
---|
| 867 | + |
---|
| 868 | + /* |
---|
| 869 | + * The early init code does not consider partially populated |
---|
| 870 | + * initial sections, it simply assumes that memory will never be |
---|
| 871 | + * referenced. If we hot-add memory into such a section then we |
---|
| 872 | + * do not need to populate the memmap and can simply reuse what |
---|
| 873 | + * is already there. |
---|
| 874 | + */ |
---|
| 875 | + if (nr_pages < PAGES_PER_SECTION && early_section(ms)) |
---|
| 876 | + return pfn_to_page(pfn); |
---|
| 877 | + |
---|
| 878 | + memmap = populate_section_memmap(pfn, nr_pages, nid, altmap); |
---|
| 879 | + if (!memmap) { |
---|
| 880 | + section_deactivate(pfn, nr_pages, altmap); |
---|
| 881 | + return ERR_PTR(-ENOMEM); |
---|
| 882 | + } |
---|
| 883 | + |
---|
| 884 | + return memmap; |
---|
| 885 | +} |
---|
| 886 | + |
---|
| 887 | +/** |
---|
| 888 | + * sparse_add_section - add a memory section, or populate an existing one |
---|
| 889 | + * @nid: The node to add section on |
---|
| 890 | + * @start_pfn: start pfn of the memory range |
---|
| 891 | + * @nr_pages: number of pfns to add in the section |
---|
| 892 | + * @altmap: device page map |
---|
| 893 | + * |
---|
| 894 | + * This is only intended for hotplug. |
---|
| 895 | + * |
---|
| 896 | + * Note that only VMEMMAP supports sub-section aligned hotplug, |
---|
| 897 | + * the proper alignment and size are gated by check_pfn_span(). |
---|
| 898 | + * |
---|
| 899 | + * |
---|
| 900 | + * Return: |
---|
| 901 | + * * 0 - On success. |
---|
| 902 | + * * -EEXIST - Section has been present. |
---|
| 903 | + * * -ENOMEM - Out of memory. |
---|
| 904 | + */ |
---|
| 905 | +int __meminit sparse_add_section(int nid, unsigned long start_pfn, |
---|
| 906 | + unsigned long nr_pages, struct vmem_altmap *altmap) |
---|
663 | 907 | { |
---|
664 | 908 | unsigned long section_nr = pfn_to_section_nr(start_pfn); |
---|
665 | 909 | struct mem_section *ms; |
---|
666 | 910 | struct page *memmap; |
---|
667 | | - unsigned long *usemap; |
---|
668 | 911 | int ret; |
---|
669 | 912 | |
---|
670 | | - /* |
---|
671 | | - * no locking for this, because it does its own |
---|
672 | | - * plus, it does a kmalloc |
---|
673 | | - */ |
---|
674 | 913 | ret = sparse_index_init(section_nr, nid); |
---|
675 | | - if (ret < 0 && ret != -EEXIST) |
---|
| 914 | + if (ret < 0) |
---|
676 | 915 | return ret; |
---|
677 | | - ret = 0; |
---|
678 | | - memmap = kmalloc_section_memmap(section_nr, nid, altmap); |
---|
679 | | - if (!memmap) |
---|
680 | | - return -ENOMEM; |
---|
681 | | - usemap = __kmalloc_section_usemap(); |
---|
682 | | - if (!usemap) { |
---|
683 | | - __kfree_section_memmap(memmap, altmap); |
---|
684 | | - return -ENOMEM; |
---|
685 | | - } |
---|
686 | 916 | |
---|
687 | | - ms = __pfn_to_section(start_pfn); |
---|
688 | | - if (ms->section_mem_map & SECTION_MARKED_PRESENT) { |
---|
689 | | - ret = -EEXIST; |
---|
690 | | - goto out; |
---|
691 | | - } |
---|
| 917 | + memmap = section_activate(nid, start_pfn, nr_pages, altmap); |
---|
| 918 | + if (IS_ERR(memmap)) |
---|
| 919 | + return PTR_ERR(memmap); |
---|
692 | 920 | |
---|
693 | | -#ifdef CONFIG_DEBUG_VM |
---|
694 | 921 | /* |
---|
695 | 922 | * Poison uninitialized struct pages in order to catch invalid flags |
---|
696 | 923 | * combinations. |
---|
697 | 924 | */ |
---|
698 | | - memset(memmap, PAGE_POISON_PATTERN, sizeof(struct page) * PAGES_PER_SECTION); |
---|
699 | | -#endif |
---|
| 925 | + page_init_poison(memmap, sizeof(struct page) * nr_pages); |
---|
700 | 926 | |
---|
| 927 | + ms = __nr_to_section(section_nr); |
---|
| 928 | + set_section_nid(section_nr, nid); |
---|
701 | 929 | section_mark_present(ms); |
---|
702 | | - sparse_init_one_section(ms, section_nr, memmap, usemap); |
---|
703 | 930 | |
---|
704 | | -out: |
---|
705 | | - if (ret < 0) { |
---|
706 | | - kfree(usemap); |
---|
707 | | - __kfree_section_memmap(memmap, altmap); |
---|
708 | | - } |
---|
709 | | - return ret; |
---|
| 931 | + /* Align memmap to section boundary in the subsection case */ |
---|
| 932 | + if (section_nr_to_pfn(section_nr) != start_pfn) |
---|
| 933 | + memmap = pfn_to_page(section_nr_to_pfn(section_nr)); |
---|
| 934 | + sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0); |
---|
| 935 | + |
---|
| 936 | + return 0; |
---|
710 | 937 | } |
---|
711 | 938 | |
---|
712 | 939 | #ifdef CONFIG_MEMORY_FAILURE |
---|
.. | .. |
---|
714 | 941 | { |
---|
715 | 942 | int i; |
---|
716 | 943 | |
---|
717 | | - if (!memmap) |
---|
| 944 | + /* |
---|
| 945 | + * A further optimization is to have per section refcounted |
---|
| 946 | + * num_poisoned_pages. But that would need more space per memmap, so |
---|
| 947 | + * for now just do a quick global check to speed up this routine in the |
---|
| 948 | + * absence of bad pages. |
---|
| 949 | + */ |
---|
| 950 | + if (atomic_long_read(&num_poisoned_pages) == 0) |
---|
718 | 951 | return; |
---|
719 | 952 | |
---|
720 | 953 | for (i = 0; i < nr_pages; i++) { |
---|
721 | 954 | if (PageHWPoison(&memmap[i])) { |
---|
722 | | - atomic_long_sub(1, &num_poisoned_pages); |
---|
| 955 | + num_poisoned_pages_dec(); |
---|
723 | 956 | ClearPageHWPoison(&memmap[i]); |
---|
724 | 957 | } |
---|
725 | 958 | } |
---|
.. | .. |
---|
730 | 963 | } |
---|
731 | 964 | #endif |
---|
732 | 965 | |
---|
733 | | -static void free_section_usemap(struct page *memmap, unsigned long *usemap, |
---|
| 966 | +void sparse_remove_section(struct mem_section *ms, unsigned long pfn, |
---|
| 967 | + unsigned long nr_pages, unsigned long map_offset, |
---|
734 | 968 | struct vmem_altmap *altmap) |
---|
735 | 969 | { |
---|
736 | | - struct page *usemap_page; |
---|
737 | | - |
---|
738 | | - if (!usemap) |
---|
739 | | - return; |
---|
740 | | - |
---|
741 | | - usemap_page = virt_to_page(usemap); |
---|
742 | | - /* |
---|
743 | | - * Check to see if allocation came from hot-plug-add |
---|
744 | | - */ |
---|
745 | | - if (PageSlab(usemap_page) || PageCompound(usemap_page)) { |
---|
746 | | - kfree(usemap); |
---|
747 | | - if (memmap) |
---|
748 | | - __kfree_section_memmap(memmap, altmap); |
---|
749 | | - return; |
---|
750 | | - } |
---|
751 | | - |
---|
752 | | - /* |
---|
753 | | - * The usemap came from bootmem. This is packed with other usemaps |
---|
754 | | - * on the section which has pgdat at boot time. Just keep it as is now. |
---|
755 | | - */ |
---|
756 | | - |
---|
757 | | - if (memmap) |
---|
758 | | - free_map_bootmem(memmap); |
---|
759 | | -} |
---|
760 | | - |
---|
761 | | -void sparse_remove_one_section(struct mem_section *ms, unsigned long map_offset, |
---|
762 | | - struct vmem_altmap *altmap) |
---|
763 | | -{ |
---|
764 | | - struct page *memmap = NULL; |
---|
765 | | - unsigned long *usemap = NULL; |
---|
766 | | - |
---|
767 | | - if (ms->section_mem_map) { |
---|
768 | | - usemap = ms->pageblock_flags; |
---|
769 | | - memmap = sparse_decode_mem_map(ms->section_mem_map, |
---|
770 | | - __section_nr(ms)); |
---|
771 | | - ms->section_mem_map = 0; |
---|
772 | | - ms->pageblock_flags = NULL; |
---|
773 | | - } |
---|
774 | | - |
---|
775 | | - clear_hwpoisoned_pages(memmap + map_offset, |
---|
776 | | - PAGES_PER_SECTION - map_offset); |
---|
777 | | - free_section_usemap(memmap, usemap, altmap); |
---|
| 970 | + clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset, |
---|
| 971 | + nr_pages - map_offset); |
---|
| 972 | + section_deactivate(pfn, nr_pages, altmap); |
---|
778 | 973 | } |
---|
779 | 974 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
---|