From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/drivers/base/memory.c | 563 ++++++++++++++++++++++++-------------------------------- 1 files changed, 243 insertions(+), 320 deletions(-) diff --git a/kernel/drivers/base/memory.c b/kernel/drivers/base/memory.c index e270abc..49eb142 100644 --- a/kernel/drivers/base/memory.c +++ b/kernel/drivers/base/memory.c @@ -19,29 +19,50 @@ #include <linux/memory.h> #include <linux/memory_hotplug.h> #include <linux/mm.h> -#include <linux/mutex.h> #include <linux/stat.h> #include <linux/slab.h> +#include <linux/xarray.h> #include <linux/atomic.h> #include <linux/uaccess.h> -static DEFINE_MUTEX(mem_sysfs_mutex); - #define MEMORY_CLASS_NAME "memory" + +static const char *const online_type_to_str[] = { + [MMOP_OFFLINE] = "offline", + [MMOP_ONLINE] = "online", + [MMOP_ONLINE_KERNEL] = "online_kernel", + [MMOP_ONLINE_MOVABLE] = "online_movable", +}; + +int memhp_online_type_from_str(const char *str) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(online_type_to_str); i++) { + if (sysfs_streq(str, online_type_to_str[i])) + return i; + } + return -EINVAL; +} #define to_memory_block(dev) container_of(dev, struct memory_block, dev) static int sections_per_block; -static inline int base_memory_block_id(int section_nr) +static inline unsigned long memory_block_id(unsigned long section_nr) { return section_nr / sections_per_block; } -static inline int pfn_to_block_id(unsigned long pfn) +static inline unsigned long pfn_to_block_id(unsigned long pfn) { - return base_memory_block_id(pfn_to_section_nr(pfn)); + return memory_block_id(pfn_to_section_nr(pfn)); +} + +static inline unsigned long phys_to_block_id(unsigned long phys) +{ + return pfn_to_block_id(PFN_DOWN(phys)); } static int memory_subsys_online(struct device *dev); @@ -53,6 +74,13 @@ .online = memory_subsys_online, .offline = memory_subsys_offline, }; + +/* + * Memory blocks are cached in a local radix tree to avoid + * a costly linear search for the corresponding device on + * the subsystem bus. + */ +static DEFINE_XARRAY(memory_blocks); static BLOCKING_NOTIFIER_HEAD(memory_chain); @@ -68,20 +96,6 @@ } EXPORT_SYMBOL(unregister_memory_notifier); -static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain); - -int register_memory_isolate_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&memory_isolate_chain, nb); -} -EXPORT_SYMBOL(register_memory_isolate_notifier); - -void unregister_memory_isolate_notifier(struct notifier_block *nb) -{ - atomic_notifier_chain_unregister(&memory_isolate_chain, nb); -} -EXPORT_SYMBOL(unregister_memory_isolate_notifier); - static void memory_block_release(struct device *dev) { struct memory_block *mem = to_memory_block(dev); @@ -93,69 +107,40 @@ { return MIN_MEMORY_BLOCK_SIZE; } - -static unsigned long get_memory_block_size(void) -{ - unsigned long block_sz; - - block_sz = memory_block_size_bytes(); - - /* Validate blk_sz is a power of 2 and not less than section size */ - if ((block_sz & (block_sz - 1)) || (block_sz < MIN_MEMORY_BLOCK_SIZE)) { - WARN_ON(1); - block_sz = MIN_MEMORY_BLOCK_SIZE; - } - - return block_sz; -} +EXPORT_SYMBOL_GPL(memory_block_size_bytes); /* - * use this as the physical section index that this memsection - * uses. + * Show the first physical section index (number) of this memory block. */ - -static ssize_t show_mem_start_phys_index(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t phys_index_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct memory_block *mem = to_memory_block(dev); unsigned long phys_index; phys_index = mem->start_section_nr / sections_per_block; - return sprintf(buf, "%08lx\n", phys_index); + + return sysfs_emit(buf, "%08lx\n", phys_index); } /* - * Show whether the section of memory is likely to be hot-removable + * Legacy interface that we cannot remove. Always indicate "removable" + * with CONFIG_MEMORY_HOTREMOVE - bad heuristic. */ -static ssize_t show_mem_removable(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t removable_show(struct device *dev, struct device_attribute *attr, + char *buf) { - unsigned long i, pfn; - int ret = 1; - struct memory_block *mem = to_memory_block(dev); - - if (mem->state != MEM_ONLINE) - goto out; - - for (i = 0; i < sections_per_block; i++) { - if (!present_section_nr(mem->start_section_nr + i)) - continue; - pfn = section_nr_to_pfn(mem->start_section_nr + i); - ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION); - } - -out: - return sprintf(buf, "%d\n", ret); + return sysfs_emit(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE)); } /* * online, offline, going offline, etc. */ -static ssize_t show_mem_state(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t state_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct memory_block *mem = to_memory_block(dev); - ssize_t len = 0; + const char *output; /* * We can probably put these states in a nice little array @@ -163,71 +148,25 @@ */ switch (mem->state) { case MEM_ONLINE: - len = sprintf(buf, "online\n"); + output = "online"; break; case MEM_OFFLINE: - len = sprintf(buf, "offline\n"); + output = "offline"; break; case MEM_GOING_OFFLINE: - len = sprintf(buf, "going-offline\n"); + output = "going-offline"; break; default: - len = sprintf(buf, "ERROR-UNKNOWN-%ld\n", - mem->state); WARN_ON(1); - break; + return sysfs_emit(buf, "ERROR-UNKNOWN-%ld\n", mem->state); } - return len; + return sysfs_emit(buf, "%s\n", output); } int memory_notify(unsigned long val, void *v) { return blocking_notifier_call_chain(&memory_chain, val, v); -} - -int memory_isolate_notify(unsigned long val, void *v) -{ - return atomic_notifier_call_chain(&memory_isolate_chain, val, v); -} - -/* - * The probe routines leave the pages uninitialized, just as the bootmem code - * does. Make sure we do not access them, but instead use only information from - * within sections. - */ -static bool pages_correctly_probed(unsigned long start_pfn) -{ - unsigned long section_nr = pfn_to_section_nr(start_pfn); - unsigned long section_nr_end = section_nr + sections_per_block; - unsigned long pfn = start_pfn; - - /* - * memmap between sections is not contiguous except with - * SPARSEMEM_VMEMMAP. We lookup the page once per section - * and assume memmap is contiguous within each section - */ - for (; section_nr < section_nr_end; section_nr++) { - if (WARN_ON_ONCE(!pfn_valid(pfn))) - return false; - - if (!present_section_nr(section_nr)) { - pr_warn("section %ld pfn[%lx, %lx) not present", - section_nr, pfn, pfn + PAGES_PER_SECTION); - return false; - } else if (!valid_section_nr(section_nr)) { - pr_warn("section %ld pfn[%lx, %lx) no valid memmap", - section_nr, pfn, pfn + PAGES_PER_SECTION); - return false; - } else if (online_section_nr(section_nr)) { - pr_warn("section %ld pfn[%lx, %lx) is already online", - section_nr, pfn, pfn + PAGES_PER_SECTION); - return false; - } - pfn += PAGES_PER_SECTION; - } - - return true; } /* @@ -236,7 +175,7 @@ */ static int memory_block_action(unsigned long start_section_nr, unsigned long action, - int online_type) + int online_type, int nid) { unsigned long start_pfn; unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; @@ -246,10 +185,7 @@ switch (action) { case MEM_ONLINE: - if (!pages_correctly_probed(start_pfn)) - return -EBUSY; - - ret = online_pages(start_pfn, nr_pages, online_type); + ret = online_pages(start_pfn, nr_pages, online_type, nid); break; case MEM_OFFLINE: ret = offline_pages(start_pfn, nr_pages); @@ -275,7 +211,7 @@ mem->state = MEM_GOING_OFFLINE; ret = memory_block_action(mem->start_section_nr, to_state, - mem->online_type); + mem->online_type, mem->nid); mem->state = ret ? from_state_req : to_state; @@ -292,17 +228,14 @@ return 0; /* - * If we are called from store_mem_state(), online_type will be - * set >= 0 Otherwise we were called from the device online - * attribute and need to set the online_type. + * When called via device_online() without configuring the online_type, + * we want to default to MMOP_ONLINE. */ - if (mem->online_type < 0) - mem->online_type = MMOP_ONLINE_KEEP; + if (mem->online_type == MMOP_OFFLINE) + mem->online_type = MMOP_ONLINE; ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); - - /* clear online_type */ - mem->online_type = -1; + mem->online_type = MMOP_OFFLINE; return ret; } @@ -314,41 +247,27 @@ if (mem->state == MEM_OFFLINE) return 0; - /* Can't offline block with non-present sections */ - if (mem->section_count != sections_per_block) - return -EINVAL; - return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); } -static ssize_t -store_mem_state(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t state_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { + const int online_type = memhp_online_type_from_str(buf); struct memory_block *mem = to_memory_block(dev); - int ret, online_type; + int ret; + + if (online_type < 0) + return -EINVAL; ret = lock_device_hotplug_sysfs(); if (ret) return ret; - if (sysfs_streq(buf, "online_kernel")) - online_type = MMOP_ONLINE_KERNEL; - else if (sysfs_streq(buf, "online_movable")) - online_type = MMOP_ONLINE_MOVABLE; - else if (sysfs_streq(buf, "online")) - online_type = MMOP_ONLINE_KEEP; - else if (sysfs_streq(buf, "offline")) - online_type = MMOP_OFFLINE; - else { - ret = -EINVAL; - goto err; - } - switch (online_type) { case MMOP_ONLINE_KERNEL: case MMOP_ONLINE_MOVABLE: - case MMOP_ONLINE_KEEP: + case MMOP_ONLINE: /* mem->online_type is protected by device_hotplug_lock */ mem->online_type = online_type; ret = device_online(&mem->dev); @@ -360,7 +279,6 @@ ret = -EINVAL; /* should never happen */ } -err: unlock_device_hotplug(); if (ret < 0) @@ -372,43 +290,44 @@ } /* - * phys_device is a bad name for this. What I really want - * is a way to differentiate between memory ranges that - * are part of physical devices that constitute - * a complete removable unit or fru. - * i.e. do these ranges belong to the same physical device, - * s.t. if I offline all of these sections I can then - * remove the physical device? + * Legacy interface that we cannot remove: s390x exposes the storage increment + * covered by a memory block, allowing for identifying which memory blocks + * comprise a storage increment. Since a memory block spans complete + * storage increments nowadays, this interface is basically unused. Other + * archs never exposed != 0. */ -static ssize_t show_phys_device(struct device *dev, +static ssize_t phys_device_show(struct device *dev, struct device_attribute *attr, char *buf) { struct memory_block *mem = to_memory_block(dev); - return sprintf(buf, "%d\n", mem->phys_device); + unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); + + return sysfs_emit(buf, "%d\n", + arch_get_memory_phys_device(start_pfn)); } #ifdef CONFIG_MEMORY_HOTREMOVE -static void print_allowed_zone(char *buf, int nid, unsigned long start_pfn, - unsigned long nr_pages, int online_type, - struct zone *default_zone) +static int print_allowed_zone(char *buf, int len, int nid, + unsigned long start_pfn, unsigned long nr_pages, + int online_type, struct zone *default_zone) { struct zone *zone; zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages); - if (zone != default_zone) { - strcat(buf, " "); - strcat(buf, zone->name); - } + if (zone == default_zone) + return 0; + + return sysfs_emit_at(buf, len, " %s", zone->name); } -static ssize_t show_valid_zones(struct device *dev, +static ssize_t valid_zones_show(struct device *dev, struct device_attribute *attr, char *buf) { struct memory_block *mem = to_memory_block(dev); unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; - unsigned long valid_start_pfn, valid_end_pfn; struct zone *default_zone; + int len = 0; int nid; /* @@ -420,77 +339,71 @@ * The block contains more than one zone can not be offlined. * This can happen e.g. for ZONE_DMA and ZONE_DMA32 */ - if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages, - &valid_start_pfn, &valid_end_pfn)) - return sprintf(buf, "none\n"); - start_pfn = valid_start_pfn; - strcat(buf, page_zone(pfn_to_page(start_pfn))->name); + default_zone = test_pages_in_a_zone(start_pfn, + start_pfn + nr_pages); + if (!default_zone) + return sysfs_emit(buf, "%s\n", "none"); + len += sysfs_emit_at(buf, len, "%s", default_zone->name); goto out; } nid = mem->nid; - default_zone = zone_for_pfn_range(MMOP_ONLINE_KEEP, nid, start_pfn, nr_pages); - strcat(buf, default_zone->name); + default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, start_pfn, + nr_pages); - print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL, - default_zone); - print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE, - default_zone); + len += sysfs_emit_at(buf, len, "%s", default_zone->name); + len += print_allowed_zone(buf, len, nid, start_pfn, nr_pages, + MMOP_ONLINE_KERNEL, default_zone); + len += print_allowed_zone(buf, len, nid, start_pfn, nr_pages, + MMOP_ONLINE_MOVABLE, default_zone); out: - strcat(buf, "\n"); - - return strlen(buf); + len += sysfs_emit_at(buf, len, "\n"); + return len; } -static DEVICE_ATTR(valid_zones, 0444, show_valid_zones, NULL); +static DEVICE_ATTR_RO(valid_zones); #endif -static DEVICE_ATTR(phys_index, 0444, show_mem_start_phys_index, NULL); -static DEVICE_ATTR(state, 0644, show_mem_state, store_mem_state); -static DEVICE_ATTR(phys_device, 0444, show_phys_device, NULL); -static DEVICE_ATTR(removable, 0444, show_mem_removable, NULL); +static DEVICE_ATTR_RO(phys_index); +static DEVICE_ATTR_RW(state); +static DEVICE_ATTR_RO(phys_device); +static DEVICE_ATTR_RO(removable); /* - * Block size attribute stuff + * Show the memory block size (shared by all memory blocks). */ -static ssize_t -print_block_size(struct device *dev, struct device_attribute *attr, - char *buf) +static ssize_t block_size_bytes_show(struct device *dev, + struct device_attribute *attr, char *buf) { - return sprintf(buf, "%lx\n", get_memory_block_size()); + return sysfs_emit(buf, "%lx\n", memory_block_size_bytes()); } -static DEVICE_ATTR(block_size_bytes, 0444, print_block_size, NULL); +static DEVICE_ATTR_RO(block_size_bytes); /* * Memory auto online policy. */ -static ssize_t -show_auto_online_blocks(struct device *dev, struct device_attribute *attr, - char *buf) +static ssize_t auto_online_blocks_show(struct device *dev, + struct device_attribute *attr, char *buf) { - if (memhp_auto_online) - return sprintf(buf, "online\n"); - else - return sprintf(buf, "offline\n"); + return sysfs_emit(buf, "%s\n", + online_type_to_str[memhp_default_online_type]); } -static ssize_t -store_auto_online_blocks(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t auto_online_blocks_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { - if (sysfs_streq(buf, "online")) - memhp_auto_online = true; - else if (sysfs_streq(buf, "offline")) - memhp_auto_online = false; - else + const int online_type = memhp_online_type_from_str(buf); + + if (online_type < 0) return -EINVAL; + memhp_default_online_type = online_type; return count; } -static DEVICE_ATTR(auto_online_blocks, 0644, show_auto_online_blocks, - store_auto_online_blocks); +static DEVICE_ATTR_RW(auto_online_blocks); /* * Some architectures will have custom drivers to do this, and @@ -499,9 +412,8 @@ * and will require this interface. */ #ifdef CONFIG_ARCH_MEMORY_PROBE -static ssize_t -memory_probe_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t probe_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { u64 phys_addr; int nid, ret; @@ -520,7 +432,8 @@ nid = memory_add_physaddr_to_nid(phys_addr); ret = __add_memory(nid, phys_addr, - MIN_MEMORY_BLOCK_SIZE * sections_per_block); + MIN_MEMORY_BLOCK_SIZE * sections_per_block, + MHP_NONE); if (ret) goto out; @@ -531,7 +444,7 @@ return ret; } -static DEVICE_ATTR(probe, S_IWUSR, NULL, memory_probe_store); +static DEVICE_ATTR_WO(probe); #endif #ifdef CONFIG_MEMORY_FAILURE @@ -540,10 +453,9 @@ */ /* Soft offline a page */ -static ssize_t -store_soft_offline_page(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t soft_offline_page_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { int ret; u64 pfn; @@ -552,20 +464,14 @@ if (kstrtoull(buf, 0, &pfn) < 0) return -EINVAL; pfn >>= PAGE_SHIFT; - if (!pfn_valid(pfn)) - return -ENXIO; - /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ - if (!pfn_to_online_page(pfn)) - return -EIO; - ret = soft_offline_page(pfn_to_page(pfn), 0); + ret = soft_offline_page(pfn, 0); return ret == 0 ? count : ret; } /* Forcibly offline a page, including killing processes. */ -static ssize_t -store_hard_offline_page(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t hard_offline_page_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { int ret; u64 pfn; @@ -578,57 +484,39 @@ return ret ? ret : count; } -static DEVICE_ATTR(soft_offline_page, S_IWUSR, NULL, store_soft_offline_page); -static DEVICE_ATTR(hard_offline_page, S_IWUSR, NULL, store_hard_offline_page); +static DEVICE_ATTR_WO(soft_offline_page); +static DEVICE_ATTR_WO(hard_offline_page); #endif -/* - * Note that phys_device is optional. It is here to allow for - * differentiation between which *physical* devices each - * section belongs to... - */ +/* See phys_device_show(). */ int __weak arch_get_memory_phys_device(unsigned long start_pfn) { return 0; } /* - * A reference for the returned object is held and the reference for the - * hinted object is released. + * A reference for the returned memory block device is acquired. + * + * Called under device_hotplug_lock. */ -static struct memory_block *find_memory_block_by_id(int block_id, - struct memory_block *hint) +static struct memory_block *find_memory_block_by_id(unsigned long block_id) { - struct device *hintdev = hint ? &hint->dev : NULL; - struct device *dev; + struct memory_block *mem; - dev = subsys_find_device_by_id(&memory_subsys, block_id, hintdev); - if (hint) - put_device(&hint->dev); - if (!dev) - return NULL; - return to_memory_block(dev); -} - -struct memory_block *find_memory_block_hinted(struct mem_section *section, - struct memory_block *hint) -{ - int block_id = base_memory_block_id(__section_nr(section)); - - return find_memory_block_by_id(block_id, hint); + mem = xa_load(&memory_blocks, block_id); + if (mem) + get_device(&mem->dev); + return mem; } /* - * For now, we have a linear search to go find the appropriate - * memory_block corresponding to a particular phys_index. If - * this gets to be a real problem, we can always use a radix - * tree or something here. - * - * This could be made generic for all device subsystems. + * Called under device_hotplug_lock. */ struct memory_block *find_memory_block(struct mem_section *section) { - return find_memory_block_hinted(section, NULL); + unsigned long block_id = memory_block_id(__section_nr(section)); + + return find_memory_block_by_id(block_id); } static struct attribute *memory_memblk_attrs[] = { @@ -666,20 +554,24 @@ memory->dev.offline = memory->state == MEM_OFFLINE; ret = device_register(&memory->dev); - if (ret) + if (ret) { put_device(&memory->dev); + return ret; + } + ret = xa_err(xa_store(&memory_blocks, memory->dev.id, memory, + GFP_KERNEL)); + if (ret) + device_unregister(&memory->dev); return ret; } -static int init_memory_block(struct memory_block **memory, int block_id, - unsigned long state) +static int init_memory_block(unsigned long block_id, unsigned long state) { struct memory_block *mem; - unsigned long start_pfn; int ret = 0; - mem = find_memory_block_by_id(block_id, NULL); + mem = find_memory_block_by_id(block_id); if (mem) { put_device(&mem->dev); return -EEXIST; @@ -689,43 +581,36 @@ return -ENOMEM; mem->start_section_nr = block_id * sections_per_block; - mem->end_section_nr = mem->start_section_nr + sections_per_block - 1; mem->state = state; - start_pfn = section_nr_to_pfn(mem->start_section_nr); - mem->phys_device = arch_get_memory_phys_device(start_pfn); mem->nid = NUMA_NO_NODE; ret = register_memory(mem); - *memory = mem; return ret; } -static int add_memory_block(int base_section_nr) +static int add_memory_block(unsigned long base_section_nr) { - struct memory_block *mem; - int i, ret, section_count = 0; + int section_count = 0; + unsigned long nr; - for (i = base_section_nr; - i < base_section_nr + sections_per_block; - i++) - if (present_section_nr(i)) + for (nr = base_section_nr; nr < base_section_nr + sections_per_block; + nr++) + if (present_section_nr(nr)) section_count++; if (section_count == 0) return 0; - ret = init_memory_block(&mem, base_memory_block_id(base_section_nr), - MEM_ONLINE); - if (ret) - return ret; - mem->section_count = section_count; - return 0; + return init_memory_block(memory_block_id(base_section_nr), + MEM_ONLINE); } static void unregister_memory(struct memory_block *memory) { if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) return; + + WARN_ON(xa_erase(&memory_blocks, memory->dev.id) == NULL); /* drop the ref. we got via find_memory_block() */ put_device(&memory->dev); @@ -736,11 +621,13 @@ * Create memory block devices for the given memory area. Start and size * have to be aligned to memory block granularity. Memory block devices * will be initialized as offline. + * + * Called under device_hotplug_lock. */ int create_memory_block_devices(unsigned long start, unsigned long size) { - const int start_block_id = pfn_to_block_id(PFN_DOWN(start)); - int end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); + const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); + unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); struct memory_block *mem; unsigned long block_id; int ret = 0; @@ -749,23 +636,21 @@ !IS_ALIGNED(size, memory_block_size_bytes()))) return -EINVAL; - mutex_lock(&mem_sysfs_mutex); for (block_id = start_block_id; block_id != end_block_id; block_id++) { - ret = init_memory_block(&mem, block_id, MEM_OFFLINE); + ret = init_memory_block(block_id, MEM_OFFLINE); if (ret) break; - mem->section_count = sections_per_block; } if (ret) { end_block_id = block_id; for (block_id = start_block_id; block_id != end_block_id; block_id++) { - mem = find_memory_block_by_id(block_id, NULL); - mem->section_count = 0; + mem = find_memory_block_by_id(block_id); + if (WARN_ON_ONCE(!mem)) + continue; unregister_memory(mem); } } - mutex_unlock(&mem_sysfs_mutex); return ret; } @@ -773,28 +658,27 @@ * Remove memory block devices for the given memory area. Start and size * have to be aligned to memory block granularity. Memory block devices * have to be offline. + * + * Called under device_hotplug_lock. */ void remove_memory_block_devices(unsigned long start, unsigned long size) { - const int start_block_id = pfn_to_block_id(PFN_DOWN(start)); - const int end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); + const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); + const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); struct memory_block *mem; - int block_id; + unsigned long block_id; if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || !IS_ALIGNED(size, memory_block_size_bytes()))) return; - mutex_lock(&mem_sysfs_mutex); for (block_id = start_block_id; block_id != end_block_id; block_id++) { - mem = find_memory_block_by_id(block_id, NULL); + mem = find_memory_block_by_id(block_id); if (WARN_ON_ONCE(!mem)) continue; - mem->section_count = 0; unregister_memory_block_under_nodes(mem); unregister_memory(mem); } - mutex_unlock(&mem_sysfs_mutex); } /* return true if the memory block is offlined, otherwise, return false */ @@ -828,38 +712,77 @@ }; /* - * Initialize the sysfs support for memory devices... + * Initialize the sysfs support for memory devices. At the time this function + * is called, we cannot have concurrent creation/deletion of memory block + * devices, the device_hotplug_lock is not needed. */ -int __init memory_dev_init(void) +void __init memory_dev_init(void) { - unsigned int i; int ret; - int err; - unsigned long block_sz; + unsigned long block_sz, nr; + + /* Validate the configured memory block size */ + block_sz = memory_block_size_bytes(); + if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE) + panic("Memory block size not suitable: 0x%lx\n", block_sz); + sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; ret = subsys_system_register(&memory_subsys, memory_root_attr_groups); if (ret) - goto out; - - block_sz = get_memory_block_size(); - sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; + panic("%s() failed to register subsystem: %d\n", __func__, ret); /* * Create entries for memory sections that were found * during boot and have been initialized */ - mutex_lock(&mem_sysfs_mutex); - for (i = 0; i <= __highest_present_section_nr; - i += sections_per_block) { - err = add_memory_block(i); - if (!ret) - ret = err; + for (nr = 0; nr <= __highest_present_section_nr; + nr += sections_per_block) { + ret = add_memory_block(nr); + if (ret) + panic("%s() failed to add memory block: %d\n", __func__, + ret); } - mutex_unlock(&mem_sysfs_mutex); +} -out: - if (ret) - printk(KERN_ERR "%s() failed: %d\n", __func__, ret); +/** + * walk_memory_blocks - walk through all present memory blocks overlapped + * by the range [start, start + size) + * + * @start: start address of the memory range + * @size: size of the memory range + * @arg: argument passed to func + * @func: callback for each memory section walked + * + * This function walks through all present memory blocks overlapped by the + * range [start, start + size), calling func on each memory block. + * + * In case func() returns an error, walking is aborted and the error is + * returned. + * + * Called under device_hotplug_lock. + */ +int walk_memory_blocks(unsigned long start, unsigned long size, + void *arg, walk_memory_blocks_func_t func) +{ + const unsigned long start_block_id = phys_to_block_id(start); + const unsigned long end_block_id = phys_to_block_id(start + size - 1); + struct memory_block *mem; + unsigned long block_id; + int ret = 0; + + if (!size) + return 0; + + for (block_id = start_block_id; block_id <= end_block_id; block_id++) { + mem = find_memory_block_by_id(block_id); + if (!mem) + continue; + + ret = func(mem, arg); + put_device(&mem->dev); + if (ret) + break; + } return ret; } -- Gitblit v1.6.2