From d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 02:45:28 +0000 Subject: [PATCH] add boot partition size --- kernel/kernel/resource.c | 336 ++++++++++++++++++++++++++++++++++++++++++------------- 1 files changed, 255 insertions(+), 81 deletions(-) diff --git a/kernel/kernel/resource.c b/kernel/kernel/resource.c index 8205d8e..817545f 100644 --- a/kernel/kernel/resource.c +++ b/kernel/kernel/resource.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/kernel/resource.c * @@ -139,7 +140,7 @@ { proc_create_seq_data("ioports", 0, NULL, &resource_op, &ioport_resource); - proc_create_seq_data("iomem", 0400, NULL, &resource_op, &iomem_resource); + proc_create_seq_data("iomem", 0, NULL, &resource_op, &iomem_resource); return 0; } __initcall(ioresources_init); @@ -319,52 +320,75 @@ EXPORT_SYMBOL(release_resource); /** - * Finds the lowest iomem resource that covers part of [start..end]. The - * caller must specify start, end, flags, and desc (which may be + * Finds the lowest iomem resource that covers part of [@start..@end]. The + * caller must specify @start, @end, @flags, and @desc (which may be * IORES_DESC_NONE). * - * If a resource is found, returns 0 and *res is overwritten with the part - * of the resource that's within [start..end]; if none is found, returns + * If a resource is found, returns 0 and @*res is overwritten with the part + * of the resource that's within [@start..@end]; if none is found, returns * -ENODEV. Returns -EINVAL for invalid parameters. * * This function walks the whole tree and not just first level children - * unless @first_level_children_only is true. + * unless @first_lvl is true. + * + * @start: start address of the resource searched for + * @end: end address of same resource + * @flags: flags which the resource must have + * @desc: descriptor the resource must have + * @first_lvl: walk only the first level children, if set + * @res: return ptr, if resource found */ static int find_next_iomem_res(resource_size_t start, resource_size_t end, unsigned long flags, unsigned long desc, - bool first_level_children_only, - struct resource *res) + bool first_lvl, struct resource *res) { + bool siblings_only = true; struct resource *p; - bool sibling_only = false; - BUG_ON(!res); - BUG_ON(start >= end); + if (!res) + return -EINVAL; - if (first_level_children_only) - sibling_only = true; + if (start >= end) + return -EINVAL; read_lock(&resource_lock); - for (p = iomem_resource.child; p; p = next_resource(p, sibling_only)) { - if ((p->flags & flags) != flags) - continue; - if ((desc != IORES_DESC_NONE) && (desc != p->desc)) - continue; + for (p = iomem_resource.child; p; p = next_resource(p, siblings_only)) { + /* If we passed the resource we are looking for, stop */ if (p->start > end) { p = NULL; break; } - if ((p->end >= start) && (p->start <= end)) - break; + + /* Skip until we find a range that matches what we look for */ + if (p->end < start) + continue; + + /* + * Now that we found a range that matches what we look for, + * check the flags and the descriptor. If we were not asked to + * use only the first level, start looking at children as well. + */ + siblings_only = first_lvl; + + if ((p->flags & flags) != flags) + continue; + if ((desc != IORES_DESC_NONE) && (desc != p->desc)) + continue; + + /* Found a match, break */ + break; } if (p) { /* copy data */ - res->start = max(start, p->start); - res->end = min(end, p->end); - res->flags = p->flags; - res->desc = p->desc; + *res = (struct resource) { + .start = max(start, p->start), + .end = min(end, p->end), + .flags = p->flags, + .desc = p->desc, + .parent = p->parent, + }; } read_unlock(&resource_lock); @@ -373,15 +397,14 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end, unsigned long flags, unsigned long desc, - bool first_level_children_only, void *arg, + bool first_lvl, void *arg, int (*func)(struct resource *, void *)) { struct resource res; - int ret = -1; + int ret = -EINVAL; while (start < end && - !find_next_iomem_res(start, end, flags, desc, - first_level_children_only, &res)) { + !find_next_iomem_res(start, end, flags, desc, first_lvl, &res)) { ret = (*func)(&res, arg); if (ret) break; @@ -392,7 +415,7 @@ return ret; } -/* +/** * Walks through iomem resources and calls func() with matching resource * ranges. This walks through whole tree and not just first level children. * All the memory ranges which overlap start,end and also match flags and @@ -402,6 +425,8 @@ * @flags: I/O resource flags * @start: start addr * @end: end addr + * @arg: function argument for the callback @func + * @func: callback function that is called for each qualifying resource area * * NOTE: For a new descriptor search, define a new IORES_DESC in * <linux/ioport.h> and set it in 'desc' of a target resource entry. @@ -421,11 +446,11 @@ * ranges. */ int walk_system_ram_res(u64 start, u64 end, void *arg, - int (*func)(struct resource *, void *)) + int (*func)(struct resource *, void *)) { unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, true, + return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, false, arg, func); } @@ -438,34 +463,35 @@ { unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY; - return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, true, + return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, false, arg, func); } - -#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY) /* * This function calls the @func callback against all memory ranges of type * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY. * It is to be used only for System RAM. + * + * This will find System RAM ranges that are children of top-level resources + * in addition to top-level System RAM resources. */ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, - void *arg, int (*func)(unsigned long, unsigned long, void *)) + void *arg, int (*func)(unsigned long, unsigned long, void *)) { resource_size_t start, end; unsigned long flags; struct resource res; unsigned long pfn, end_pfn; - int ret = -1; + int ret = -EINVAL; start = (u64) start_pfn << PAGE_SHIFT; end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1; flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; while (start < end && !find_next_iomem_res(start, end, flags, IORES_DESC_NONE, - true, &res)) { - pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT; - end_pfn = (res.end + 1) >> PAGE_SHIFT; + false, &res)) { + pfn = PFN_UP(res.start); + end_pfn = PFN_DOWN(res.end + 1); if (end_pfn > pfn) ret = (*func)(pfn, end_pfn - pfn, arg); if (ret) @@ -474,8 +500,6 @@ } return ret; } - -#endif static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg) { @@ -515,9 +539,12 @@ int region_intersects(resource_size_t start, size_t size, unsigned long flags, unsigned long desc) { - resource_size_t end = start + size - 1; + struct resource res; int type = 0; int other = 0; struct resource *p; + + res.start = start; + res.end = start + size - 1; read_lock(&resource_lock); for (p = iomem_resource.child; p ; p = p->sibling) { @@ -525,11 +552,7 @@ ((desc == IORES_DESC_NONE) || (desc == p->desc))); - if (start >= p->start && start <= p->end) - is_type ? type++ : other++; - if (end >= p->start && end <= p->end) - is_type ? type++ : other++; - if (p->start >= start && p->end <= end) + if (resource_overlaps(p, &res)) is_type ? type++ : other++; } read_unlock(&resource_lock); @@ -646,8 +669,8 @@ * @constraint: the size and alignment constraints to be met. */ static int reallocate_resource(struct resource *root, struct resource *old, - resource_size_t newsize, - struct resource_constraint *constraint) + resource_size_t newsize, + struct resource_constraint *constraint) { int err=0; struct resource new = *old; @@ -960,7 +983,7 @@ * Existing children of the resource are assumed to be immutable. */ int adjust_resource(struct resource *res, resource_size_t start, - resource_size_t size) + resource_size_t size) { int result; @@ -971,9 +994,9 @@ } EXPORT_SYMBOL(adjust_resource); -static void __init __reserve_region_with_split(struct resource *root, - resource_size_t start, resource_size_t end, - const char *name) +static void __init +__reserve_region_with_split(struct resource *root, resource_size_t start, + resource_size_t end, const char *name) { struct resource *parent = root; struct resource *conflict; @@ -1032,9 +1055,9 @@ } -void __init reserve_region_with_split(struct resource *root, - resource_size_t start, resource_size_t end, - const char *name) +void __init +reserve_region_with_split(struct resource *root, resource_size_t start, + resource_size_t end, const char *name) { int abort = 0; @@ -1106,6 +1129,7 @@ { DECLARE_WAITQUEUE(wait, current); struct resource *res = alloc_resource(GFP_KERNEL); + struct resource *orig_parent = parent; if (!res) return NULL; @@ -1126,6 +1150,15 @@ conflict = __request_resource(parent, res); if (!conflict) break; + /* + * mm/hmm.c reserves physical addresses which then + * become unavailable to other users. Conflicts are + * not expected. Warn to aid debugging if encountered. + */ + if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) { + pr_warn("Unaddressable device %s %pR conflicts with %pR", + conflict->name, conflict, res); + } if (conflict != parent) { if (!(conflict->flags & IORESOURCE_BUSY)) { parent = conflict; @@ -1147,6 +1180,10 @@ break; } write_unlock(&resource_lock); + + if (res && orig_parent == &iomem_resource) + revoke_devmem(res); + return res; } EXPORT_SYMBOL(__request_region); @@ -1160,7 +1197,7 @@ * The described resource region must match a currently busy region. */ void __release_region(struct resource *parent, resource_size_t start, - resource_size_t n) + resource_size_t n) { struct resource **p; resource_size_t end; @@ -1203,7 +1240,6 @@ #ifdef CONFIG_MEMORY_HOTREMOVE /** * release_mem_region_adjustable - release a previously reserved memory region - * @parent: parent resource descriptor * @start: resource start address * @size: resource region size * @@ -1221,21 +1257,28 @@ * assumes that all children remain in the lower address entry for * simplicity. Enhance this logic when necessary. */ -int release_mem_region_adjustable(struct resource *parent, - resource_size_t start, resource_size_t size) +void release_mem_region_adjustable(resource_size_t start, resource_size_t size) { + struct resource *parent = &iomem_resource; + struct resource *new_res = NULL; + bool alloc_nofail = false; struct resource **p; struct resource *res; - struct resource *new_res; resource_size_t end; - int ret = -EINVAL; end = start + size - 1; - if ((start < parent->start) || (end > parent->end)) - return ret; + if (WARN_ON_ONCE((start < parent->start) || (end > parent->end))) + return; - /* The alloc_resource() result gets checked later */ - new_res = alloc_resource(GFP_KERNEL); + /* + * We free up quite a lot of memory on memory hotunplug (esp., memap), + * just before releasing the region. This is highly unlikely to + * fail - let's play save and make it never fail as the caller cannot + * perform any error handling (e.g., trying to re-add memory will fail + * similarly). + */ +retry: + new_res = alloc_resource(GFP_KERNEL | (alloc_nofail ? __GFP_NOFAIL : 0)); p = &parent->child; write_lock(&resource_lock); @@ -1248,6 +1291,20 @@ if (res->start > start || res->end < end) { p = &res->sibling; continue; + } + + /* + * All memory regions added from memory-hotplug path have the + * flag IORESOURCE_SYSTEM_RAM. If the resource does not have + * this flag, we know that we are dealing with a resource coming + * from HMM/devm. HMM/devm use another mechanism to add/release + * a resource. This goes via devm_request_mem_region and + * devm_release_mem_region. + * HMM/devm take care to release their resources when they want, + * so if we are dealing with them, let us just back off here. + */ + if (!(res->flags & IORESOURCE_SYSRAM)) { + break; } if (!(res->flags & IORESOURCE_MEM)) @@ -1263,20 +1320,23 @@ /* free the whole entry */ *p = res->sibling; free_resource(res); - ret = 0; } else if (res->start == start && res->end != end) { /* adjust the start */ - ret = __adjust_resource(res, end + 1, - res->end - end); + WARN_ON_ONCE(__adjust_resource(res, end + 1, + res->end - end)); } else if (res->start != start && res->end == end) { /* adjust the end */ - ret = __adjust_resource(res, res->start, - start - res->start); + WARN_ON_ONCE(__adjust_resource(res, res->start, + start - res->start)); } else { - /* split into two entries */ + /* split into two entries - we need a new resource */ if (!new_res) { - ret = -ENOMEM; - break; + new_res = alloc_resource(GFP_ATOMIC); + if (!new_res) { + alloc_nofail = true; + write_unlock(&resource_lock); + goto retry; + } } new_res->name = res->name; new_res->start = end + 1; @@ -1287,9 +1347,8 @@ new_res->sibling = res->sibling; new_res->child = NULL; - ret = __adjust_resource(res, res->start, - start - res->start); - if (ret) + if (WARN_ON_ONCE(__adjust_resource(res, res->start, + start - res->start))) break; res->sibling = new_res; new_res = NULL; @@ -1300,9 +1359,68 @@ write_unlock(&resource_lock); free_resource(new_res); - return ret; } #endif /* CONFIG_MEMORY_HOTREMOVE */ + +#ifdef CONFIG_MEMORY_HOTPLUG +static bool system_ram_resources_mergeable(struct resource *r1, + struct resource *r2) +{ + /* We assume either r1 or r2 is IORESOURCE_SYSRAM_MERGEABLE. */ + return r1->flags == r2->flags && r1->end + 1 == r2->start && + r1->name == r2->name && r1->desc == r2->desc && + !r1->child && !r2->child; +} + +/* + * merge_system_ram_resource - mark the System RAM resource mergeable and try to + * merge it with adjacent, mergeable resources + * @res: resource descriptor + * + * This interface is intended for memory hotplug, whereby lots of contiguous + * system ram resources are added (e.g., via add_memory*()) by a driver, and + * the actual resource boundaries are not of interest (e.g., it might be + * relevant for DIMMs). Only resources that are marked mergeable, that have the + * same parent, and that don't have any children are considered. All mergeable + * resources must be immutable during the request. + * + * Note: + * - The caller has to make sure that no pointers to resources that are + * marked mergeable are used anymore after this call - the resource might + * be freed and the pointer might be stale! + * - release_mem_region_adjustable() will split on demand on memory hotunplug + */ +void merge_system_ram_resource(struct resource *res) +{ + const unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + struct resource *cur; + + if (WARN_ON_ONCE((res->flags & flags) != flags)) + return; + + write_lock(&resource_lock); + res->flags |= IORESOURCE_SYSRAM_MERGEABLE; + + /* Try to merge with next item in the list. */ + cur = res->sibling; + if (cur && system_ram_resources_mergeable(res, cur)) { + res->end = cur->end; + res->sibling = cur->sibling; + free_resource(cur); + } + + /* Try to merge with previous item in the list. */ + cur = res->parent->child; + while (cur && cur->sibling != res) + cur = cur->sibling; + if (cur && system_ram_resources_mergeable(cur, res)) { + cur->end = res->end; + cur->sibling = res->sibling; + free_resource(res); + } + write_unlock(&resource_lock); +} +#endif /* CONFIG_MEMORY_HOTPLUG */ /* * Managed region resource @@ -1398,9 +1516,9 @@ this->start == match->start && this->n == match->n; } -struct resource * __devm_request_region(struct device *dev, - struct resource *parent, resource_size_t start, - resource_size_t n, const char *name) +struct resource * +__devm_request_region(struct device *dev, struct resource *parent, + resource_size_t start, resource_size_t n, const char *name) { struct region_devres *dr = NULL; struct resource *res; @@ -1599,6 +1717,62 @@ } EXPORT_SYMBOL(resource_list_free); +#ifdef CONFIG_DEVICE_PRIVATE +static struct resource *__request_free_mem_region(struct device *dev, + struct resource *base, unsigned long size, const char *name) +{ + resource_size_t end, addr; + struct resource *res; + + size = ALIGN(size, 1UL << PA_SECTION_SHIFT); + end = min_t(unsigned long, base->end, (1UL << MAX_PHYSMEM_BITS) - 1); + addr = end - size + 1UL; + + for (; addr > size && addr >= base->start; addr -= size) { + if (region_intersects(addr, size, 0, IORES_DESC_NONE) != + REGION_DISJOINT) + continue; + + if (dev) + res = devm_request_mem_region(dev, addr, size, name); + else + res = request_mem_region(addr, size, name); + if (!res) + return ERR_PTR(-ENOMEM); + res->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY; + return res; + } + + return ERR_PTR(-ERANGE); +} + +/** + * devm_request_free_mem_region - find free region for device private memory + * + * @dev: device struct to bind the resource to + * @size: size in bytes of the device memory to add + * @base: resource tree to look in + * + * This function tries to find an empty range of physical address big enough to + * contain the new resource, so that it can later be hotplugged as ZONE_DEVICE + * memory, which in turn allocates struct pages. + */ +struct resource *devm_request_free_mem_region(struct device *dev, + struct resource *base, unsigned long size) +{ + return __request_free_mem_region(dev, base, size, dev_name(dev)); +} +EXPORT_SYMBOL_GPL(devm_request_free_mem_region); + +struct resource *request_free_mem_region(struct resource *base, + unsigned long size, const char *name) +{ + return __request_free_mem_region(NULL, base, size, name); +} +EXPORT_SYMBOL_GPL(request_free_mem_region); + +#endif /* CONFIG_DEVICE_PRIVATE */ + static int __init strict_iomem(char *str) { if (strstr(str, "relaxed")) -- Gitblit v1.6.2