hc
2024-05-10 10ebd8556b7990499c896a550e3d416b444211e6
kernel/kernel/resource.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * linux/kernel/resource.c
34 *
....@@ -139,7 +140,7 @@
139140 {
140141 proc_create_seq_data("ioports", 0, NULL, &resource_op,
141142 &ioport_resource);
142
- proc_create_seq_data("iomem", 0400, NULL, &resource_op, &iomem_resource);
143
+ proc_create_seq_data("iomem", 0, NULL, &resource_op, &iomem_resource);
143144 return 0;
144145 }
145146 __initcall(ioresources_init);
....@@ -319,52 +320,75 @@
319320 EXPORT_SYMBOL(release_resource);
320321
321322 /**
322
- * Finds the lowest iomem resource that covers part of [start..end]. The
323
- * caller must specify start, end, flags, and desc (which may be
323
+ * Finds the lowest iomem resource that covers part of [@start..@end]. The
324
+ * caller must specify @start, @end, @flags, and @desc (which may be
324325 * IORES_DESC_NONE).
325326 *
326
- * If a resource is found, returns 0 and *res is overwritten with the part
327
- * of the resource that's within [start..end]; if none is found, returns
327
+ * If a resource is found, returns 0 and @*res is overwritten with the part
328
+ * of the resource that's within [@start..@end]; if none is found, returns
328329 * -ENODEV. Returns -EINVAL for invalid parameters.
329330 *
330331 * This function walks the whole tree and not just first level children
331
- * unless @first_level_children_only is true.
332
+ * unless @first_lvl is true.
333
+ *
334
+ * @start: start address of the resource searched for
335
+ * @end: end address of same resource
336
+ * @flags: flags which the resource must have
337
+ * @desc: descriptor the resource must have
338
+ * @first_lvl: walk only the first level children, if set
339
+ * @res: return ptr, if resource found
332340 */
333341 static int find_next_iomem_res(resource_size_t start, resource_size_t end,
334342 unsigned long flags, unsigned long desc,
335
- bool first_level_children_only,
336
- struct resource *res)
343
+ bool first_lvl, struct resource *res)
337344 {
345
+ bool siblings_only = true;
338346 struct resource *p;
339
- bool sibling_only = false;
340347
341
- BUG_ON(!res);
342
- BUG_ON(start >= end);
348
+ if (!res)
349
+ return -EINVAL;
343350
344
- if (first_level_children_only)
345
- sibling_only = true;
351
+ if (start >= end)
352
+ return -EINVAL;
346353
347354 read_lock(&resource_lock);
348355
349
- for (p = iomem_resource.child; p; p = next_resource(p, sibling_only)) {
350
- if ((p->flags & flags) != flags)
351
- continue;
352
- if ((desc != IORES_DESC_NONE) && (desc != p->desc))
353
- continue;
356
+ for (p = iomem_resource.child; p; p = next_resource(p, siblings_only)) {
357
+ /* If we passed the resource we are looking for, stop */
354358 if (p->start > end) {
355359 p = NULL;
356360 break;
357361 }
358
- if ((p->end >= start) && (p->start <= end))
359
- break;
362
+
363
+ /* Skip until we find a range that matches what we look for */
364
+ if (p->end < start)
365
+ continue;
366
+
367
+ /*
368
+ * Now that we found a range that matches what we look for,
369
+ * check the flags and the descriptor. If we were not asked to
370
+ * use only the first level, start looking at children as well.
371
+ */
372
+ siblings_only = first_lvl;
373
+
374
+ if ((p->flags & flags) != flags)
375
+ continue;
376
+ if ((desc != IORES_DESC_NONE) && (desc != p->desc))
377
+ continue;
378
+
379
+ /* Found a match, break */
380
+ break;
360381 }
361382
362383 if (p) {
363384 /* copy data */
364
- res->start = max(start, p->start);
365
- res->end = min(end, p->end);
366
- res->flags = p->flags;
367
- res->desc = p->desc;
385
+ *res = (struct resource) {
386
+ .start = max(start, p->start),
387
+ .end = min(end, p->end),
388
+ .flags = p->flags,
389
+ .desc = p->desc,
390
+ .parent = p->parent,
391
+ };
368392 }
369393
370394 read_unlock(&resource_lock);
....@@ -373,15 +397,14 @@
373397
374398 static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
375399 unsigned long flags, unsigned long desc,
376
- bool first_level_children_only, void *arg,
400
+ bool first_lvl, void *arg,
377401 int (*func)(struct resource *, void *))
378402 {
379403 struct resource res;
380
- int ret = -1;
404
+ int ret = -EINVAL;
381405
382406 while (start < end &&
383
- !find_next_iomem_res(start, end, flags, desc,
384
- first_level_children_only, &res)) {
407
+ !find_next_iomem_res(start, end, flags, desc, first_lvl, &res)) {
385408 ret = (*func)(&res, arg);
386409 if (ret)
387410 break;
....@@ -392,7 +415,7 @@
392415 return ret;
393416 }
394417
395
-/*
418
+/**
396419 * Walks through iomem resources and calls func() with matching resource
397420 * ranges. This walks through whole tree and not just first level children.
398421 * All the memory ranges which overlap start,end and also match flags and
....@@ -402,6 +425,8 @@
402425 * @flags: I/O resource flags
403426 * @start: start addr
404427 * @end: end addr
428
+ * @arg: function argument for the callback @func
429
+ * @func: callback function that is called for each qualifying resource area
405430 *
406431 * NOTE: For a new descriptor search, define a new IORES_DESC in
407432 * <linux/ioport.h> and set it in 'desc' of a target resource entry.
....@@ -421,11 +446,11 @@
421446 * ranges.
422447 */
423448 int walk_system_ram_res(u64 start, u64 end, void *arg,
424
- int (*func)(struct resource *, void *))
449
+ int (*func)(struct resource *, void *))
425450 {
426451 unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
427452
428
- return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, true,
453
+ return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, false,
429454 arg, func);
430455 }
431456
....@@ -438,34 +463,35 @@
438463 {
439464 unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
440465
441
- return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, true,
466
+ return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, false,
442467 arg, func);
443468 }
444
-
445
-#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
446469
447470 /*
448471 * This function calls the @func callback against all memory ranges of type
449472 * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
450473 * It is to be used only for System RAM.
474
+ *
475
+ * This will find System RAM ranges that are children of top-level resources
476
+ * in addition to top-level System RAM resources.
451477 */
452478 int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
453
- void *arg, int (*func)(unsigned long, unsigned long, void *))
479
+ void *arg, int (*func)(unsigned long, unsigned long, void *))
454480 {
455481 resource_size_t start, end;
456482 unsigned long flags;
457483 struct resource res;
458484 unsigned long pfn, end_pfn;
459
- int ret = -1;
485
+ int ret = -EINVAL;
460486
461487 start = (u64) start_pfn << PAGE_SHIFT;
462488 end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
463489 flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
464490 while (start < end &&
465491 !find_next_iomem_res(start, end, flags, IORES_DESC_NONE,
466
- true, &res)) {
467
- pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
468
- end_pfn = (res.end + 1) >> PAGE_SHIFT;
492
+ false, &res)) {
493
+ pfn = PFN_UP(res.start);
494
+ end_pfn = PFN_DOWN(res.end + 1);
469495 if (end_pfn > pfn)
470496 ret = (*func)(pfn, end_pfn - pfn, arg);
471497 if (ret)
....@@ -474,8 +500,6 @@
474500 }
475501 return ret;
476502 }
477
-
478
-#endif
479503
480504 static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg)
481505 {
....@@ -515,9 +539,12 @@
515539 int region_intersects(resource_size_t start, size_t size, unsigned long flags,
516540 unsigned long desc)
517541 {
518
- resource_size_t end = start + size - 1;
542
+ struct resource res;
519543 int type = 0; int other = 0;
520544 struct resource *p;
545
+
546
+ res.start = start;
547
+ res.end = start + size - 1;
521548
522549 read_lock(&resource_lock);
523550 for (p = iomem_resource.child; p ; p = p->sibling) {
....@@ -525,11 +552,7 @@
525552 ((desc == IORES_DESC_NONE) ||
526553 (desc == p->desc)));
527554
528
- if (start >= p->start && start <= p->end)
529
- is_type ? type++ : other++;
530
- if (end >= p->start && end <= p->end)
531
- is_type ? type++ : other++;
532
- if (p->start >= start && p->end <= end)
555
+ if (resource_overlaps(p, &res))
533556 is_type ? type++ : other++;
534557 }
535558 read_unlock(&resource_lock);
....@@ -646,8 +669,8 @@
646669 * @constraint: the size and alignment constraints to be met.
647670 */
648671 static int reallocate_resource(struct resource *root, struct resource *old,
649
- resource_size_t newsize,
650
- struct resource_constraint *constraint)
672
+ resource_size_t newsize,
673
+ struct resource_constraint *constraint)
651674 {
652675 int err=0;
653676 struct resource new = *old;
....@@ -960,7 +983,7 @@
960983 * Existing children of the resource are assumed to be immutable.
961984 */
962985 int adjust_resource(struct resource *res, resource_size_t start,
963
- resource_size_t size)
986
+ resource_size_t size)
964987 {
965988 int result;
966989
....@@ -971,9 +994,9 @@
971994 }
972995 EXPORT_SYMBOL(adjust_resource);
973996
974
-static void __init __reserve_region_with_split(struct resource *root,
975
- resource_size_t start, resource_size_t end,
976
- const char *name)
997
+static void __init
998
+__reserve_region_with_split(struct resource *root, resource_size_t start,
999
+ resource_size_t end, const char *name)
9771000 {
9781001 struct resource *parent = root;
9791002 struct resource *conflict;
....@@ -1032,9 +1055,9 @@
10321055
10331056 }
10341057
1035
-void __init reserve_region_with_split(struct resource *root,
1036
- resource_size_t start, resource_size_t end,
1037
- const char *name)
1058
+void __init
1059
+reserve_region_with_split(struct resource *root, resource_size_t start,
1060
+ resource_size_t end, const char *name)
10381061 {
10391062 int abort = 0;
10401063
....@@ -1106,6 +1129,7 @@
11061129 {
11071130 DECLARE_WAITQUEUE(wait, current);
11081131 struct resource *res = alloc_resource(GFP_KERNEL);
1132
+ struct resource *orig_parent = parent;
11091133
11101134 if (!res)
11111135 return NULL;
....@@ -1126,6 +1150,15 @@
11261150 conflict = __request_resource(parent, res);
11271151 if (!conflict)
11281152 break;
1153
+ /*
1154
+ * mm/hmm.c reserves physical addresses which then
1155
+ * become unavailable to other users. Conflicts are
1156
+ * not expected. Warn to aid debugging if encountered.
1157
+ */
1158
+ if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) {
1159
+ pr_warn("Unaddressable device %s %pR conflicts with %pR",
1160
+ conflict->name, conflict, res);
1161
+ }
11291162 if (conflict != parent) {
11301163 if (!(conflict->flags & IORESOURCE_BUSY)) {
11311164 parent = conflict;
....@@ -1147,6 +1180,10 @@
11471180 break;
11481181 }
11491182 write_unlock(&resource_lock);
1183
+
1184
+ if (res && orig_parent == &iomem_resource)
1185
+ revoke_devmem(res);
1186
+
11501187 return res;
11511188 }
11521189 EXPORT_SYMBOL(__request_region);
....@@ -1160,7 +1197,7 @@
11601197 * The described resource region must match a currently busy region.
11611198 */
11621199 void __release_region(struct resource *parent, resource_size_t start,
1163
- resource_size_t n)
1200
+ resource_size_t n)
11641201 {
11651202 struct resource **p;
11661203 resource_size_t end;
....@@ -1203,7 +1240,6 @@
12031240 #ifdef CONFIG_MEMORY_HOTREMOVE
12041241 /**
12051242 * release_mem_region_adjustable - release a previously reserved memory region
1206
- * @parent: parent resource descriptor
12071243 * @start: resource start address
12081244 * @size: resource region size
12091245 *
....@@ -1221,21 +1257,28 @@
12211257 * assumes that all children remain in the lower address entry for
12221258 * simplicity. Enhance this logic when necessary.
12231259 */
1224
-int release_mem_region_adjustable(struct resource *parent,
1225
- resource_size_t start, resource_size_t size)
1260
+void release_mem_region_adjustable(resource_size_t start, resource_size_t size)
12261261 {
1262
+ struct resource *parent = &iomem_resource;
1263
+ struct resource *new_res = NULL;
1264
+ bool alloc_nofail = false;
12271265 struct resource **p;
12281266 struct resource *res;
1229
- struct resource *new_res;
12301267 resource_size_t end;
1231
- int ret = -EINVAL;
12321268
12331269 end = start + size - 1;
1234
- if ((start < parent->start) || (end > parent->end))
1235
- return ret;
1270
+ if (WARN_ON_ONCE((start < parent->start) || (end > parent->end)))
1271
+ return;
12361272
1237
- /* The alloc_resource() result gets checked later */
1238
- new_res = alloc_resource(GFP_KERNEL);
1273
+ /*
1274
+ * We free up quite a lot of memory on memory hotunplug (esp., memap),
1275
+ * just before releasing the region. This is highly unlikely to
1276
+ * fail - let's play save and make it never fail as the caller cannot
1277
+ * perform any error handling (e.g., trying to re-add memory will fail
1278
+ * similarly).
1279
+ */
1280
+retry:
1281
+ new_res = alloc_resource(GFP_KERNEL | (alloc_nofail ? __GFP_NOFAIL : 0));
12391282
12401283 p = &parent->child;
12411284 write_lock(&resource_lock);
....@@ -1263,20 +1306,23 @@
12631306 /* free the whole entry */
12641307 *p = res->sibling;
12651308 free_resource(res);
1266
- ret = 0;
12671309 } else if (res->start == start && res->end != end) {
12681310 /* adjust the start */
1269
- ret = __adjust_resource(res, end + 1,
1270
- res->end - end);
1311
+ WARN_ON_ONCE(__adjust_resource(res, end + 1,
1312
+ res->end - end));
12711313 } else if (res->start != start && res->end == end) {
12721314 /* adjust the end */
1273
- ret = __adjust_resource(res, res->start,
1274
- start - res->start);
1315
+ WARN_ON_ONCE(__adjust_resource(res, res->start,
1316
+ start - res->start));
12751317 } else {
1276
- /* split into two entries */
1318
+ /* split into two entries - we need a new resource */
12771319 if (!new_res) {
1278
- ret = -ENOMEM;
1279
- break;
1320
+ new_res = alloc_resource(GFP_ATOMIC);
1321
+ if (!new_res) {
1322
+ alloc_nofail = true;
1323
+ write_unlock(&resource_lock);
1324
+ goto retry;
1325
+ }
12801326 }
12811327 new_res->name = res->name;
12821328 new_res->start = end + 1;
....@@ -1287,9 +1333,8 @@
12871333 new_res->sibling = res->sibling;
12881334 new_res->child = NULL;
12891335
1290
- ret = __adjust_resource(res, res->start,
1291
- start - res->start);
1292
- if (ret)
1336
+ if (WARN_ON_ONCE(__adjust_resource(res, res->start,
1337
+ start - res->start)))
12931338 break;
12941339 res->sibling = new_res;
12951340 new_res = NULL;
....@@ -1300,9 +1345,68 @@
13001345
13011346 write_unlock(&resource_lock);
13021347 free_resource(new_res);
1303
- return ret;
13041348 }
13051349 #endif /* CONFIG_MEMORY_HOTREMOVE */
1350
+
1351
+#ifdef CONFIG_MEMORY_HOTPLUG
1352
+static bool system_ram_resources_mergeable(struct resource *r1,
1353
+ struct resource *r2)
1354
+{
1355
+ /* We assume either r1 or r2 is IORESOURCE_SYSRAM_MERGEABLE. */
1356
+ return r1->flags == r2->flags && r1->end + 1 == r2->start &&
1357
+ r1->name == r2->name && r1->desc == r2->desc &&
1358
+ !r1->child && !r2->child;
1359
+}
1360
+
1361
+/*
1362
+ * merge_system_ram_resource - mark the System RAM resource mergeable and try to
1363
+ * merge it with adjacent, mergeable resources
1364
+ * @res: resource descriptor
1365
+ *
1366
+ * This interface is intended for memory hotplug, whereby lots of contiguous
1367
+ * system ram resources are added (e.g., via add_memory*()) by a driver, and
1368
+ * the actual resource boundaries are not of interest (e.g., it might be
1369
+ * relevant for DIMMs). Only resources that are marked mergeable, that have the
1370
+ * same parent, and that don't have any children are considered. All mergeable
1371
+ * resources must be immutable during the request.
1372
+ *
1373
+ * Note:
1374
+ * - The caller has to make sure that no pointers to resources that are
1375
+ * marked mergeable are used anymore after this call - the resource might
1376
+ * be freed and the pointer might be stale!
1377
+ * - release_mem_region_adjustable() will split on demand on memory hotunplug
1378
+ */
1379
+void merge_system_ram_resource(struct resource *res)
1380
+{
1381
+ const unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
1382
+ struct resource *cur;
1383
+
1384
+ if (WARN_ON_ONCE((res->flags & flags) != flags))
1385
+ return;
1386
+
1387
+ write_lock(&resource_lock);
1388
+ res->flags |= IORESOURCE_SYSRAM_MERGEABLE;
1389
+
1390
+ /* Try to merge with next item in the list. */
1391
+ cur = res->sibling;
1392
+ if (cur && system_ram_resources_mergeable(res, cur)) {
1393
+ res->end = cur->end;
1394
+ res->sibling = cur->sibling;
1395
+ free_resource(cur);
1396
+ }
1397
+
1398
+ /* Try to merge with previous item in the list. */
1399
+ cur = res->parent->child;
1400
+ while (cur && cur->sibling != res)
1401
+ cur = cur->sibling;
1402
+ if (cur && system_ram_resources_mergeable(cur, res)) {
1403
+ cur->end = res->end;
1404
+ cur->sibling = res->sibling;
1405
+ free_resource(res);
1406
+ }
1407
+ write_unlock(&resource_lock);
1408
+}
1409
+#endif /* CONFIG_MEMORY_HOTPLUG */
13061410
13071411 /*
13081412 * Managed region resource
....@@ -1398,9 +1502,9 @@
13981502 this->start == match->start && this->n == match->n;
13991503 }
14001504
1401
-struct resource * __devm_request_region(struct device *dev,
1402
- struct resource *parent, resource_size_t start,
1403
- resource_size_t n, const char *name)
1505
+struct resource *
1506
+__devm_request_region(struct device *dev, struct resource *parent,
1507
+ resource_size_t start, resource_size_t n, const char *name)
14041508 {
14051509 struct region_devres *dr = NULL;
14061510 struct resource *res;
....@@ -1599,6 +1703,62 @@
15991703 }
16001704 EXPORT_SYMBOL(resource_list_free);
16011705
1706
+#ifdef CONFIG_DEVICE_PRIVATE
1707
+static struct resource *__request_free_mem_region(struct device *dev,
1708
+ struct resource *base, unsigned long size, const char *name)
1709
+{
1710
+ resource_size_t end, addr;
1711
+ struct resource *res;
1712
+
1713
+ size = ALIGN(size, 1UL << PA_SECTION_SHIFT);
1714
+ end = min_t(unsigned long, base->end, (1UL << MAX_PHYSMEM_BITS) - 1);
1715
+ addr = end - size + 1UL;
1716
+
1717
+ for (; addr > size && addr >= base->start; addr -= size) {
1718
+ if (region_intersects(addr, size, 0, IORES_DESC_NONE) !=
1719
+ REGION_DISJOINT)
1720
+ continue;
1721
+
1722
+ if (dev)
1723
+ res = devm_request_mem_region(dev, addr, size, name);
1724
+ else
1725
+ res = request_mem_region(addr, size, name);
1726
+ if (!res)
1727
+ return ERR_PTR(-ENOMEM);
1728
+ res->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY;
1729
+ return res;
1730
+ }
1731
+
1732
+ return ERR_PTR(-ERANGE);
1733
+}
1734
+
1735
+/**
1736
+ * devm_request_free_mem_region - find free region for device private memory
1737
+ *
1738
+ * @dev: device struct to bind the resource to
1739
+ * @size: size in bytes of the device memory to add
1740
+ * @base: resource tree to look in
1741
+ *
1742
+ * This function tries to find an empty range of physical address big enough to
1743
+ * contain the new resource, so that it can later be hotplugged as ZONE_DEVICE
1744
+ * memory, which in turn allocates struct pages.
1745
+ */
1746
+struct resource *devm_request_free_mem_region(struct device *dev,
1747
+ struct resource *base, unsigned long size)
1748
+{
1749
+ return __request_free_mem_region(dev, base, size, dev_name(dev));
1750
+}
1751
+EXPORT_SYMBOL_GPL(devm_request_free_mem_region);
1752
+
1753
+struct resource *request_free_mem_region(struct resource *base,
1754
+ unsigned long size, const char *name)
1755
+{
1756
+ return __request_free_mem_region(NULL, base, size, name);
1757
+}
1758
+EXPORT_SYMBOL_GPL(request_free_mem_region);
1759
+
1760
+#endif /* CONFIG_DEVICE_PRIVATE */
1761
+
16021762 static int __init strict_iomem(char *str)
16031763 {
16041764 if (strstr(str, "relaxed"))