| .. | .. |
|---|
| 3 | 3 | * Copyright IBM Corporation, 2012 |
|---|
| 4 | 4 | * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> |
|---|
| 5 | 5 | * |
|---|
| 6 | + * Cgroup v2 |
|---|
| 7 | + * Copyright (C) 2019 Red Hat, Inc. |
|---|
| 8 | + * Author: Giuseppe Scrivano <gscrivan@redhat.com> |
|---|
| 9 | + * |
|---|
| 6 | 10 | * This program is free software; you can redistribute it and/or modify it |
|---|
| 7 | 11 | * under the terms of version 2.1 of the GNU Lesser General Public License |
|---|
| 8 | 12 | * as published by the Free Software Foundation. |
|---|
| .. | .. |
|---|
| 19 | 23 | #include <linux/hugetlb.h> |
|---|
| 20 | 24 | #include <linux/hugetlb_cgroup.h> |
|---|
| 21 | 25 | |
|---|
| 22 | | -struct hugetlb_cgroup { |
|---|
| 23 | | - struct cgroup_subsys_state css; |
|---|
| 24 | | - /* |
|---|
| 25 | | - * the counter to account for hugepages from hugetlb. |
|---|
| 26 | | - */ |
|---|
| 27 | | - struct page_counter hugepage[HUGE_MAX_HSTATE]; |
|---|
| 28 | | -}; |
|---|
| 29 | | - |
|---|
| 30 | 26 | #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) |
|---|
| 31 | 27 | #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff) |
|---|
| 32 | 28 | #define MEMFILE_ATTR(val) ((val) & 0xffff) |
|---|
| 33 | 29 | |
|---|
| 30 | +#define hugetlb_cgroup_from_counter(counter, idx) \ |
|---|
| 31 | + container_of(counter, struct hugetlb_cgroup, hugepage[idx]) |
|---|
| 32 | + |
|---|
| 34 | 33 | static struct hugetlb_cgroup *root_h_cgroup __read_mostly; |
|---|
| 34 | + |
|---|
| 35 | +static inline struct page_counter * |
|---|
| 36 | +__hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx, |
|---|
| 37 | + bool rsvd) |
|---|
| 38 | +{ |
|---|
| 39 | + if (rsvd) |
|---|
| 40 | + return &h_cg->rsvd_hugepage[idx]; |
|---|
| 41 | + return &h_cg->hugepage[idx]; |
|---|
| 42 | +} |
|---|
| 43 | + |
|---|
| 44 | +static inline struct page_counter * |
|---|
| 45 | +hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx) |
|---|
| 46 | +{ |
|---|
| 47 | + return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false); |
|---|
| 48 | +} |
|---|
| 49 | + |
|---|
| 50 | +static inline struct page_counter * |
|---|
| 51 | +hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx) |
|---|
| 52 | +{ |
|---|
| 53 | + return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true); |
|---|
| 54 | +} |
|---|
| 35 | 55 | |
|---|
| 36 | 56 | static inline |
|---|
| 37 | 57 | struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) |
|---|
| .. | .. |
|---|
| 61 | 81 | int idx; |
|---|
| 62 | 82 | |
|---|
| 63 | 83 | for (idx = 0; idx < hugetlb_max_hstate; idx++) { |
|---|
| 64 | | - if (page_counter_read(&h_cg->hugepage[idx])) |
|---|
| 84 | + if (page_counter_read( |
|---|
| 85 | + hugetlb_cgroup_counter_from_cgroup(h_cg, idx))) |
|---|
| 65 | 86 | return true; |
|---|
| 66 | 87 | } |
|---|
| 67 | 88 | return false; |
|---|
| .. | .. |
|---|
| 73 | 94 | int idx; |
|---|
| 74 | 95 | |
|---|
| 75 | 96 | for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) { |
|---|
| 76 | | - struct page_counter *counter = &h_cgroup->hugepage[idx]; |
|---|
| 77 | | - struct page_counter *parent = NULL; |
|---|
| 97 | + struct page_counter *fault_parent = NULL; |
|---|
| 98 | + struct page_counter *rsvd_parent = NULL; |
|---|
| 78 | 99 | unsigned long limit; |
|---|
| 79 | 100 | int ret; |
|---|
| 80 | 101 | |
|---|
| 81 | | - if (parent_h_cgroup) |
|---|
| 82 | | - parent = &parent_h_cgroup->hugepage[idx]; |
|---|
| 83 | | - page_counter_init(counter, parent); |
|---|
| 102 | + if (parent_h_cgroup) { |
|---|
| 103 | + fault_parent = hugetlb_cgroup_counter_from_cgroup( |
|---|
| 104 | + parent_h_cgroup, idx); |
|---|
| 105 | + rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd( |
|---|
| 106 | + parent_h_cgroup, idx); |
|---|
| 107 | + } |
|---|
| 108 | + page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup, |
|---|
| 109 | + idx), |
|---|
| 110 | + fault_parent); |
|---|
| 111 | + page_counter_init( |
|---|
| 112 | + hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), |
|---|
| 113 | + rsvd_parent); |
|---|
| 84 | 114 | |
|---|
| 85 | 115 | limit = round_down(PAGE_COUNTER_MAX, |
|---|
| 86 | 116 | 1 << huge_page_order(&hstates[idx])); |
|---|
| 87 | | - ret = page_counter_set_max(counter, limit); |
|---|
| 117 | + |
|---|
| 118 | + ret = page_counter_set_max( |
|---|
| 119 | + hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx), |
|---|
| 120 | + limit); |
|---|
| 121 | + VM_BUG_ON(ret); |
|---|
| 122 | + ret = page_counter_set_max( |
|---|
| 123 | + hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), |
|---|
| 124 | + limit); |
|---|
| 88 | 125 | VM_BUG_ON(ret); |
|---|
| 89 | 126 | } |
|---|
| 90 | 127 | } |
|---|
| .. | .. |
|---|
| 114 | 151 | kfree(h_cgroup); |
|---|
| 115 | 152 | } |
|---|
| 116 | 153 | |
|---|
| 117 | | - |
|---|
| 118 | 154 | /* |
|---|
| 119 | 155 | * Should be called with hugetlb_lock held. |
|---|
| 120 | 156 | * Since we are holding hugetlb_lock, pages cannot get moved from |
|---|
| .. | .. |
|---|
| 139 | 175 | if (!page_hcg || page_hcg != h_cg) |
|---|
| 140 | 176 | goto out; |
|---|
| 141 | 177 | |
|---|
| 142 | | - nr_pages = 1 << compound_order(page); |
|---|
| 178 | + nr_pages = compound_nr(page); |
|---|
| 143 | 179 | if (!parent) { |
|---|
| 144 | 180 | parent = root_h_cgroup; |
|---|
| 145 | 181 | /* root has no limit */ |
|---|
| .. | .. |
|---|
| 163 | 199 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); |
|---|
| 164 | 200 | struct hstate *h; |
|---|
| 165 | 201 | struct page *page; |
|---|
| 166 | | - int idx = 0; |
|---|
| 202 | + int idx; |
|---|
| 167 | 203 | |
|---|
| 168 | 204 | do { |
|---|
| 205 | + idx = 0; |
|---|
| 169 | 206 | for_each_hstate(h) { |
|---|
| 170 | 207 | spin_lock(&hugetlb_lock); |
|---|
| 171 | 208 | list_for_each_entry(page, &h->hugepage_activelist, lru) |
|---|
| .. | .. |
|---|
| 178 | 215 | } while (hugetlb_cgroup_have_usage(h_cg)); |
|---|
| 179 | 216 | } |
|---|
| 180 | 217 | |
|---|
| 181 | | -int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, |
|---|
| 182 | | - struct hugetlb_cgroup **ptr) |
|---|
| 218 | +static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx, |
|---|
| 219 | + enum hugetlb_memory_event event) |
|---|
| 220 | +{ |
|---|
| 221 | + atomic_long_inc(&hugetlb->events_local[idx][event]); |
|---|
| 222 | + cgroup_file_notify(&hugetlb->events_local_file[idx]); |
|---|
| 223 | + |
|---|
| 224 | + do { |
|---|
| 225 | + atomic_long_inc(&hugetlb->events[idx][event]); |
|---|
| 226 | + cgroup_file_notify(&hugetlb->events_file[idx]); |
|---|
| 227 | + } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) && |
|---|
| 228 | + !hugetlb_cgroup_is_root(hugetlb)); |
|---|
| 229 | +} |
|---|
| 230 | + |
|---|
| 231 | +static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, |
|---|
| 232 | + struct hugetlb_cgroup **ptr, |
|---|
| 233 | + bool rsvd) |
|---|
| 183 | 234 | { |
|---|
| 184 | 235 | int ret = 0; |
|---|
| 185 | 236 | struct page_counter *counter; |
|---|
| .. | .. |
|---|
| 202 | 253 | } |
|---|
| 203 | 254 | rcu_read_unlock(); |
|---|
| 204 | 255 | |
|---|
| 205 | | - if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages, &counter)) |
|---|
| 256 | + if (!page_counter_try_charge( |
|---|
| 257 | + __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), |
|---|
| 258 | + nr_pages, &counter)) { |
|---|
| 206 | 259 | ret = -ENOMEM; |
|---|
| 207 | | - css_put(&h_cg->css); |
|---|
| 260 | + hugetlb_event(h_cg, idx, HUGETLB_MAX); |
|---|
| 261 | + css_put(&h_cg->css); |
|---|
| 262 | + goto done; |
|---|
| 263 | + } |
|---|
| 264 | + /* Reservations take a reference to the css because they do not get |
|---|
| 265 | + * reparented. |
|---|
| 266 | + */ |
|---|
| 267 | + if (!rsvd) |
|---|
| 268 | + css_put(&h_cg->css); |
|---|
| 208 | 269 | done: |
|---|
| 209 | 270 | *ptr = h_cg; |
|---|
| 210 | 271 | return ret; |
|---|
| 211 | 272 | } |
|---|
| 212 | 273 | |
|---|
| 274 | +int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, |
|---|
| 275 | + struct hugetlb_cgroup **ptr) |
|---|
| 276 | +{ |
|---|
| 277 | + return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false); |
|---|
| 278 | +} |
|---|
| 279 | + |
|---|
| 280 | +int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, |
|---|
| 281 | + struct hugetlb_cgroup **ptr) |
|---|
| 282 | +{ |
|---|
| 283 | + return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true); |
|---|
| 284 | +} |
|---|
| 285 | + |
|---|
| 213 | 286 | /* Should be called with hugetlb_lock held */ |
|---|
| 214 | | -void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, |
|---|
| 215 | | - struct hugetlb_cgroup *h_cg, |
|---|
| 216 | | - struct page *page) |
|---|
| 287 | +static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, |
|---|
| 288 | + struct hugetlb_cgroup *h_cg, |
|---|
| 289 | + struct page *page, bool rsvd) |
|---|
| 217 | 290 | { |
|---|
| 218 | 291 | if (hugetlb_cgroup_disabled() || !h_cg) |
|---|
| 219 | 292 | return; |
|---|
| 220 | 293 | |
|---|
| 221 | | - set_hugetlb_cgroup(page, h_cg); |
|---|
| 294 | + __set_hugetlb_cgroup(page, h_cg, rsvd); |
|---|
| 222 | 295 | return; |
|---|
| 296 | +} |
|---|
| 297 | + |
|---|
| 298 | +void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, |
|---|
| 299 | + struct hugetlb_cgroup *h_cg, |
|---|
| 300 | + struct page *page) |
|---|
| 301 | +{ |
|---|
| 302 | + __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false); |
|---|
| 303 | +} |
|---|
| 304 | + |
|---|
| 305 | +void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, |
|---|
| 306 | + struct hugetlb_cgroup *h_cg, |
|---|
| 307 | + struct page *page) |
|---|
| 308 | +{ |
|---|
| 309 | + __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true); |
|---|
| 223 | 310 | } |
|---|
| 224 | 311 | |
|---|
| 225 | 312 | /* |
|---|
| 226 | 313 | * Should be called with hugetlb_lock held |
|---|
| 227 | 314 | */ |
|---|
| 228 | | -void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, |
|---|
| 229 | | - struct page *page) |
|---|
| 315 | +static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, |
|---|
| 316 | + struct page *page, bool rsvd) |
|---|
| 230 | 317 | { |
|---|
| 231 | 318 | struct hugetlb_cgroup *h_cg; |
|---|
| 232 | 319 | |
|---|
| 233 | 320 | if (hugetlb_cgroup_disabled()) |
|---|
| 234 | 321 | return; |
|---|
| 235 | 322 | lockdep_assert_held(&hugetlb_lock); |
|---|
| 236 | | - h_cg = hugetlb_cgroup_from_page(page); |
|---|
| 323 | + h_cg = __hugetlb_cgroup_from_page(page, rsvd); |
|---|
| 237 | 324 | if (unlikely(!h_cg)) |
|---|
| 238 | 325 | return; |
|---|
| 239 | | - set_hugetlb_cgroup(page, NULL); |
|---|
| 240 | | - page_counter_uncharge(&h_cg->hugepage[idx], nr_pages); |
|---|
| 326 | + __set_hugetlb_cgroup(page, NULL, rsvd); |
|---|
| 327 | + |
|---|
| 328 | + page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, |
|---|
| 329 | + rsvd), |
|---|
| 330 | + nr_pages); |
|---|
| 331 | + |
|---|
| 332 | + if (rsvd) |
|---|
| 333 | + css_put(&h_cg->css); |
|---|
| 334 | + |
|---|
| 241 | 335 | return; |
|---|
| 242 | 336 | } |
|---|
| 243 | 337 | |
|---|
| 244 | | -void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, |
|---|
| 245 | | - struct hugetlb_cgroup *h_cg) |
|---|
| 338 | +void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, |
|---|
| 339 | + struct page *page) |
|---|
| 340 | +{ |
|---|
| 341 | + __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false); |
|---|
| 342 | +} |
|---|
| 343 | + |
|---|
| 344 | +void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages, |
|---|
| 345 | + struct page *page) |
|---|
| 346 | +{ |
|---|
| 347 | + __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true); |
|---|
| 348 | +} |
|---|
| 349 | + |
|---|
| 350 | +static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, |
|---|
| 351 | + struct hugetlb_cgroup *h_cg, |
|---|
| 352 | + bool rsvd) |
|---|
| 246 | 353 | { |
|---|
| 247 | 354 | if (hugetlb_cgroup_disabled() || !h_cg) |
|---|
| 248 | 355 | return; |
|---|
| .. | .. |
|---|
| 250 | 357 | if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) |
|---|
| 251 | 358 | return; |
|---|
| 252 | 359 | |
|---|
| 253 | | - page_counter_uncharge(&h_cg->hugepage[idx], nr_pages); |
|---|
| 254 | | - return; |
|---|
| 360 | + page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, |
|---|
| 361 | + rsvd), |
|---|
| 362 | + nr_pages); |
|---|
| 363 | + |
|---|
| 364 | + if (rsvd) |
|---|
| 365 | + css_put(&h_cg->css); |
|---|
| 366 | +} |
|---|
| 367 | + |
|---|
| 368 | +void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, |
|---|
| 369 | + struct hugetlb_cgroup *h_cg) |
|---|
| 370 | +{ |
|---|
| 371 | + __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false); |
|---|
| 372 | +} |
|---|
| 373 | + |
|---|
| 374 | +void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, |
|---|
| 375 | + struct hugetlb_cgroup *h_cg) |
|---|
| 376 | +{ |
|---|
| 377 | + __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true); |
|---|
| 378 | +} |
|---|
| 379 | + |
|---|
| 380 | +void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, |
|---|
| 381 | + unsigned long end) |
|---|
| 382 | +{ |
|---|
| 383 | + if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter || |
|---|
| 384 | + !resv->css) |
|---|
| 385 | + return; |
|---|
| 386 | + |
|---|
| 387 | + page_counter_uncharge(resv->reservation_counter, |
|---|
| 388 | + (end - start) * resv->pages_per_hpage); |
|---|
| 389 | + css_put(resv->css); |
|---|
| 390 | +} |
|---|
| 391 | + |
|---|
| 392 | +void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, |
|---|
| 393 | + struct file_region *rg, |
|---|
| 394 | + unsigned long nr_pages, |
|---|
| 395 | + bool region_del) |
|---|
| 396 | +{ |
|---|
| 397 | + if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages) |
|---|
| 398 | + return; |
|---|
| 399 | + |
|---|
| 400 | + if (rg->reservation_counter && resv->pages_per_hpage && nr_pages > 0 && |
|---|
| 401 | + !resv->reservation_counter) { |
|---|
| 402 | + page_counter_uncharge(rg->reservation_counter, |
|---|
| 403 | + nr_pages * resv->pages_per_hpage); |
|---|
| 404 | + /* |
|---|
| 405 | + * Only do css_put(rg->css) when we delete the entire region |
|---|
| 406 | + * because one file_region must hold exactly one css reference. |
|---|
| 407 | + */ |
|---|
| 408 | + if (region_del) |
|---|
| 409 | + css_put(rg->css); |
|---|
| 410 | + } |
|---|
| 255 | 411 | } |
|---|
| 256 | 412 | |
|---|
| 257 | 413 | enum { |
|---|
| 258 | 414 | RES_USAGE, |
|---|
| 415 | + RES_RSVD_USAGE, |
|---|
| 259 | 416 | RES_LIMIT, |
|---|
| 417 | + RES_RSVD_LIMIT, |
|---|
| 260 | 418 | RES_MAX_USAGE, |
|---|
| 419 | + RES_RSVD_MAX_USAGE, |
|---|
| 261 | 420 | RES_FAILCNT, |
|---|
| 421 | + RES_RSVD_FAILCNT, |
|---|
| 262 | 422 | }; |
|---|
| 263 | 423 | |
|---|
| 264 | 424 | static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, |
|---|
| 265 | 425 | struct cftype *cft) |
|---|
| 266 | 426 | { |
|---|
| 267 | 427 | struct page_counter *counter; |
|---|
| 428 | + struct page_counter *rsvd_counter; |
|---|
| 268 | 429 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); |
|---|
| 269 | 430 | |
|---|
| 270 | 431 | counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)]; |
|---|
| 432 | + rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)]; |
|---|
| 271 | 433 | |
|---|
| 272 | 434 | switch (MEMFILE_ATTR(cft->private)) { |
|---|
| 273 | 435 | case RES_USAGE: |
|---|
| 274 | 436 | return (u64)page_counter_read(counter) * PAGE_SIZE; |
|---|
| 437 | + case RES_RSVD_USAGE: |
|---|
| 438 | + return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE; |
|---|
| 275 | 439 | case RES_LIMIT: |
|---|
| 276 | 440 | return (u64)counter->max * PAGE_SIZE; |
|---|
| 441 | + case RES_RSVD_LIMIT: |
|---|
| 442 | + return (u64)rsvd_counter->max * PAGE_SIZE; |
|---|
| 277 | 443 | case RES_MAX_USAGE: |
|---|
| 278 | 444 | return (u64)counter->watermark * PAGE_SIZE; |
|---|
| 445 | + case RES_RSVD_MAX_USAGE: |
|---|
| 446 | + return (u64)rsvd_counter->watermark * PAGE_SIZE; |
|---|
| 279 | 447 | case RES_FAILCNT: |
|---|
| 280 | 448 | return counter->failcnt; |
|---|
| 449 | + case RES_RSVD_FAILCNT: |
|---|
| 450 | + return rsvd_counter->failcnt; |
|---|
| 281 | 451 | default: |
|---|
| 282 | 452 | BUG(); |
|---|
| 283 | 453 | } |
|---|
| 284 | 454 | } |
|---|
| 285 | 455 | |
|---|
| 456 | +static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v) |
|---|
| 457 | +{ |
|---|
| 458 | + int idx; |
|---|
| 459 | + u64 val; |
|---|
| 460 | + struct cftype *cft = seq_cft(seq); |
|---|
| 461 | + unsigned long limit; |
|---|
| 462 | + struct page_counter *counter; |
|---|
| 463 | + struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); |
|---|
| 464 | + |
|---|
| 465 | + idx = MEMFILE_IDX(cft->private); |
|---|
| 466 | + counter = &h_cg->hugepage[idx]; |
|---|
| 467 | + |
|---|
| 468 | + limit = round_down(PAGE_COUNTER_MAX, |
|---|
| 469 | + 1 << huge_page_order(&hstates[idx])); |
|---|
| 470 | + |
|---|
| 471 | + switch (MEMFILE_ATTR(cft->private)) { |
|---|
| 472 | + case RES_RSVD_USAGE: |
|---|
| 473 | + counter = &h_cg->rsvd_hugepage[idx]; |
|---|
| 474 | + fallthrough; |
|---|
| 475 | + case RES_USAGE: |
|---|
| 476 | + val = (u64)page_counter_read(counter); |
|---|
| 477 | + seq_printf(seq, "%llu\n", val * PAGE_SIZE); |
|---|
| 478 | + break; |
|---|
| 479 | + case RES_RSVD_LIMIT: |
|---|
| 480 | + counter = &h_cg->rsvd_hugepage[idx]; |
|---|
| 481 | + fallthrough; |
|---|
| 482 | + case RES_LIMIT: |
|---|
| 483 | + val = (u64)counter->max; |
|---|
| 484 | + if (val == limit) |
|---|
| 485 | + seq_puts(seq, "max\n"); |
|---|
| 486 | + else |
|---|
| 487 | + seq_printf(seq, "%llu\n", val * PAGE_SIZE); |
|---|
| 488 | + break; |
|---|
| 489 | + default: |
|---|
| 490 | + BUG(); |
|---|
| 491 | + } |
|---|
| 492 | + |
|---|
| 493 | + return 0; |
|---|
| 494 | +} |
|---|
| 495 | + |
|---|
| 286 | 496 | static DEFINE_MUTEX(hugetlb_limit_mutex); |
|---|
| 287 | 497 | |
|---|
| 288 | 498 | static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, |
|---|
| 289 | | - char *buf, size_t nbytes, loff_t off) |
|---|
| 499 | + char *buf, size_t nbytes, loff_t off, |
|---|
| 500 | + const char *max) |
|---|
| 290 | 501 | { |
|---|
| 291 | 502 | int ret, idx; |
|---|
| 292 | 503 | unsigned long nr_pages; |
|---|
| 293 | 504 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); |
|---|
| 505 | + bool rsvd = false; |
|---|
| 294 | 506 | |
|---|
| 295 | 507 | if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */ |
|---|
| 296 | 508 | return -EINVAL; |
|---|
| 297 | 509 | |
|---|
| 298 | 510 | buf = strstrip(buf); |
|---|
| 299 | | - ret = page_counter_memparse(buf, "-1", &nr_pages); |
|---|
| 511 | + ret = page_counter_memparse(buf, max, &nr_pages); |
|---|
| 300 | 512 | if (ret) |
|---|
| 301 | 513 | return ret; |
|---|
| 302 | 514 | |
|---|
| .. | .. |
|---|
| 304 | 516 | nr_pages = round_down(nr_pages, 1 << huge_page_order(&hstates[idx])); |
|---|
| 305 | 517 | |
|---|
| 306 | 518 | switch (MEMFILE_ATTR(of_cft(of)->private)) { |
|---|
| 519 | + case RES_RSVD_LIMIT: |
|---|
| 520 | + rsvd = true; |
|---|
| 521 | + fallthrough; |
|---|
| 307 | 522 | case RES_LIMIT: |
|---|
| 308 | 523 | mutex_lock(&hugetlb_limit_mutex); |
|---|
| 309 | | - ret = page_counter_set_max(&h_cg->hugepage[idx], nr_pages); |
|---|
| 524 | + ret = page_counter_set_max( |
|---|
| 525 | + __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), |
|---|
| 526 | + nr_pages); |
|---|
| 310 | 527 | mutex_unlock(&hugetlb_limit_mutex); |
|---|
| 311 | 528 | break; |
|---|
| 312 | 529 | default: |
|---|
| .. | .. |
|---|
| 316 | 533 | return ret ?: nbytes; |
|---|
| 317 | 534 | } |
|---|
| 318 | 535 | |
|---|
| 536 | +static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of, |
|---|
| 537 | + char *buf, size_t nbytes, loff_t off) |
|---|
| 538 | +{ |
|---|
| 539 | + return hugetlb_cgroup_write(of, buf, nbytes, off, "-1"); |
|---|
| 540 | +} |
|---|
| 541 | + |
|---|
| 542 | +static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of, |
|---|
| 543 | + char *buf, size_t nbytes, loff_t off) |
|---|
| 544 | +{ |
|---|
| 545 | + return hugetlb_cgroup_write(of, buf, nbytes, off, "max"); |
|---|
| 546 | +} |
|---|
| 547 | + |
|---|
| 319 | 548 | static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, |
|---|
| 320 | 549 | char *buf, size_t nbytes, loff_t off) |
|---|
| 321 | 550 | { |
|---|
| 322 | 551 | int ret = 0; |
|---|
| 323 | | - struct page_counter *counter; |
|---|
| 552 | + struct page_counter *counter, *rsvd_counter; |
|---|
| 324 | 553 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); |
|---|
| 325 | 554 | |
|---|
| 326 | 555 | counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)]; |
|---|
| 556 | + rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)]; |
|---|
| 327 | 557 | |
|---|
| 328 | 558 | switch (MEMFILE_ATTR(of_cft(of)->private)) { |
|---|
| 329 | 559 | case RES_MAX_USAGE: |
|---|
| 330 | 560 | page_counter_reset_watermark(counter); |
|---|
| 331 | 561 | break; |
|---|
| 562 | + case RES_RSVD_MAX_USAGE: |
|---|
| 563 | + page_counter_reset_watermark(rsvd_counter); |
|---|
| 564 | + break; |
|---|
| 332 | 565 | case RES_FAILCNT: |
|---|
| 333 | 566 | counter->failcnt = 0; |
|---|
| 567 | + break; |
|---|
| 568 | + case RES_RSVD_FAILCNT: |
|---|
| 569 | + rsvd_counter->failcnt = 0; |
|---|
| 334 | 570 | break; |
|---|
| 335 | 571 | default: |
|---|
| 336 | 572 | ret = -EINVAL; |
|---|
| .. | .. |
|---|
| 350 | 586 | return buf; |
|---|
| 351 | 587 | } |
|---|
| 352 | 588 | |
|---|
| 353 | | -static void __init __hugetlb_cgroup_file_init(int idx) |
|---|
| 589 | +static int __hugetlb_events_show(struct seq_file *seq, bool local) |
|---|
| 590 | +{ |
|---|
| 591 | + int idx; |
|---|
| 592 | + long max; |
|---|
| 593 | + struct cftype *cft = seq_cft(seq); |
|---|
| 594 | + struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); |
|---|
| 595 | + |
|---|
| 596 | + idx = MEMFILE_IDX(cft->private); |
|---|
| 597 | + |
|---|
| 598 | + if (local) |
|---|
| 599 | + max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]); |
|---|
| 600 | + else |
|---|
| 601 | + max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]); |
|---|
| 602 | + |
|---|
| 603 | + seq_printf(seq, "max %lu\n", max); |
|---|
| 604 | + |
|---|
| 605 | + return 0; |
|---|
| 606 | +} |
|---|
| 607 | + |
|---|
| 608 | +static int hugetlb_events_show(struct seq_file *seq, void *v) |
|---|
| 609 | +{ |
|---|
| 610 | + return __hugetlb_events_show(seq, false); |
|---|
| 611 | +} |
|---|
| 612 | + |
|---|
| 613 | +static int hugetlb_events_local_show(struct seq_file *seq, void *v) |
|---|
| 614 | +{ |
|---|
| 615 | + return __hugetlb_events_show(seq, true); |
|---|
| 616 | +} |
|---|
| 617 | + |
|---|
| 618 | +static void __init __hugetlb_cgroup_file_dfl_init(int idx) |
|---|
| 354 | 619 | { |
|---|
| 355 | 620 | char buf[32]; |
|---|
| 356 | 621 | struct cftype *cft; |
|---|
| 357 | 622 | struct hstate *h = &hstates[idx]; |
|---|
| 358 | 623 | |
|---|
| 359 | 624 | /* format the size */ |
|---|
| 360 | | - mem_fmt(buf, 32, huge_page_size(h)); |
|---|
| 625 | + mem_fmt(buf, sizeof(buf), huge_page_size(h)); |
|---|
| 361 | 626 | |
|---|
| 362 | 627 | /* Add the limit file */ |
|---|
| 363 | | - cft = &h->cgroup_files[0]; |
|---|
| 628 | + cft = &h->cgroup_files_dfl[0]; |
|---|
| 629 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf); |
|---|
| 630 | + cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); |
|---|
| 631 | + cft->seq_show = hugetlb_cgroup_read_u64_max; |
|---|
| 632 | + cft->write = hugetlb_cgroup_write_dfl; |
|---|
| 633 | + cft->flags = CFTYPE_NOT_ON_ROOT; |
|---|
| 634 | + |
|---|
| 635 | + /* Add the reservation limit file */ |
|---|
| 636 | + cft = &h->cgroup_files_dfl[1]; |
|---|
| 637 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf); |
|---|
| 638 | + cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); |
|---|
| 639 | + cft->seq_show = hugetlb_cgroup_read_u64_max; |
|---|
| 640 | + cft->write = hugetlb_cgroup_write_dfl; |
|---|
| 641 | + cft->flags = CFTYPE_NOT_ON_ROOT; |
|---|
| 642 | + |
|---|
| 643 | + /* Add the current usage file */ |
|---|
| 644 | + cft = &h->cgroup_files_dfl[2]; |
|---|
| 645 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf); |
|---|
| 646 | + cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); |
|---|
| 647 | + cft->seq_show = hugetlb_cgroup_read_u64_max; |
|---|
| 648 | + cft->flags = CFTYPE_NOT_ON_ROOT; |
|---|
| 649 | + |
|---|
| 650 | + /* Add the current reservation usage file */ |
|---|
| 651 | + cft = &h->cgroup_files_dfl[3]; |
|---|
| 652 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf); |
|---|
| 653 | + cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); |
|---|
| 654 | + cft->seq_show = hugetlb_cgroup_read_u64_max; |
|---|
| 655 | + cft->flags = CFTYPE_NOT_ON_ROOT; |
|---|
| 656 | + |
|---|
| 657 | + /* Add the events file */ |
|---|
| 658 | + cft = &h->cgroup_files_dfl[4]; |
|---|
| 659 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf); |
|---|
| 660 | + cft->private = MEMFILE_PRIVATE(idx, 0); |
|---|
| 661 | + cft->seq_show = hugetlb_events_show; |
|---|
| 662 | + cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]); |
|---|
| 663 | + cft->flags = CFTYPE_NOT_ON_ROOT; |
|---|
| 664 | + |
|---|
| 665 | + /* Add the events.local file */ |
|---|
| 666 | + cft = &h->cgroup_files_dfl[5]; |
|---|
| 667 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf); |
|---|
| 668 | + cft->private = MEMFILE_PRIVATE(idx, 0); |
|---|
| 669 | + cft->seq_show = hugetlb_events_local_show; |
|---|
| 670 | + cft->file_offset = offsetof(struct hugetlb_cgroup, |
|---|
| 671 | + events_local_file[idx]); |
|---|
| 672 | + cft->flags = CFTYPE_NOT_ON_ROOT; |
|---|
| 673 | + |
|---|
| 674 | + /* NULL terminate the last cft */ |
|---|
| 675 | + cft = &h->cgroup_files_dfl[6]; |
|---|
| 676 | + memset(cft, 0, sizeof(*cft)); |
|---|
| 677 | + |
|---|
| 678 | + WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys, |
|---|
| 679 | + h->cgroup_files_dfl)); |
|---|
| 680 | +} |
|---|
| 681 | + |
|---|
| 682 | +static void __init __hugetlb_cgroup_file_legacy_init(int idx) |
|---|
| 683 | +{ |
|---|
| 684 | + char buf[32]; |
|---|
| 685 | + struct cftype *cft; |
|---|
| 686 | + struct hstate *h = &hstates[idx]; |
|---|
| 687 | + |
|---|
| 688 | + /* format the size */ |
|---|
| 689 | + mem_fmt(buf, sizeof(buf), huge_page_size(h)); |
|---|
| 690 | + |
|---|
| 691 | + /* Add the limit file */ |
|---|
| 692 | + cft = &h->cgroup_files_legacy[0]; |
|---|
| 364 | 693 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); |
|---|
| 365 | 694 | cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); |
|---|
| 366 | 695 | cft->read_u64 = hugetlb_cgroup_read_u64; |
|---|
| 367 | | - cft->write = hugetlb_cgroup_write; |
|---|
| 696 | + cft->write = hugetlb_cgroup_write_legacy; |
|---|
| 697 | + |
|---|
| 698 | + /* Add the reservation limit file */ |
|---|
| 699 | + cft = &h->cgroup_files_legacy[1]; |
|---|
| 700 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf); |
|---|
| 701 | + cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); |
|---|
| 702 | + cft->read_u64 = hugetlb_cgroup_read_u64; |
|---|
| 703 | + cft->write = hugetlb_cgroup_write_legacy; |
|---|
| 368 | 704 | |
|---|
| 369 | 705 | /* Add the usage file */ |
|---|
| 370 | | - cft = &h->cgroup_files[1]; |
|---|
| 706 | + cft = &h->cgroup_files_legacy[2]; |
|---|
| 371 | 707 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf); |
|---|
| 372 | 708 | cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); |
|---|
| 373 | 709 | cft->read_u64 = hugetlb_cgroup_read_u64; |
|---|
| 374 | 710 | |
|---|
| 711 | + /* Add the reservation usage file */ |
|---|
| 712 | + cft = &h->cgroup_files_legacy[3]; |
|---|
| 713 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf); |
|---|
| 714 | + cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); |
|---|
| 715 | + cft->read_u64 = hugetlb_cgroup_read_u64; |
|---|
| 716 | + |
|---|
| 375 | 717 | /* Add the MAX usage file */ |
|---|
| 376 | | - cft = &h->cgroup_files[2]; |
|---|
| 718 | + cft = &h->cgroup_files_legacy[4]; |
|---|
| 377 | 719 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); |
|---|
| 378 | 720 | cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); |
|---|
| 379 | 721 | cft->write = hugetlb_cgroup_reset; |
|---|
| 380 | 722 | cft->read_u64 = hugetlb_cgroup_read_u64; |
|---|
| 381 | 723 | |
|---|
| 724 | + /* Add the MAX reservation usage file */ |
|---|
| 725 | + cft = &h->cgroup_files_legacy[5]; |
|---|
| 726 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf); |
|---|
| 727 | + cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE); |
|---|
| 728 | + cft->write = hugetlb_cgroup_reset; |
|---|
| 729 | + cft->read_u64 = hugetlb_cgroup_read_u64; |
|---|
| 730 | + |
|---|
| 382 | 731 | /* Add the failcntfile */ |
|---|
| 383 | | - cft = &h->cgroup_files[3]; |
|---|
| 732 | + cft = &h->cgroup_files_legacy[6]; |
|---|
| 384 | 733 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); |
|---|
| 385 | | - cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); |
|---|
| 734 | + cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); |
|---|
| 735 | + cft->write = hugetlb_cgroup_reset; |
|---|
| 736 | + cft->read_u64 = hugetlb_cgroup_read_u64; |
|---|
| 737 | + |
|---|
| 738 | + /* Add the reservation failcntfile */ |
|---|
| 739 | + cft = &h->cgroup_files_legacy[7]; |
|---|
| 740 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf); |
|---|
| 741 | + cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT); |
|---|
| 386 | 742 | cft->write = hugetlb_cgroup_reset; |
|---|
| 387 | 743 | cft->read_u64 = hugetlb_cgroup_read_u64; |
|---|
| 388 | 744 | |
|---|
| 389 | 745 | /* NULL terminate the last cft */ |
|---|
| 390 | | - cft = &h->cgroup_files[4]; |
|---|
| 746 | + cft = &h->cgroup_files_legacy[8]; |
|---|
| 391 | 747 | memset(cft, 0, sizeof(*cft)); |
|---|
| 392 | 748 | |
|---|
| 393 | 749 | WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, |
|---|
| 394 | | - h->cgroup_files)); |
|---|
| 750 | + h->cgroup_files_legacy)); |
|---|
| 751 | +} |
|---|
| 752 | + |
|---|
| 753 | +static void __init __hugetlb_cgroup_file_init(int idx) |
|---|
| 754 | +{ |
|---|
| 755 | + __hugetlb_cgroup_file_dfl_init(idx); |
|---|
| 756 | + __hugetlb_cgroup_file_legacy_init(idx); |
|---|
| 395 | 757 | } |
|---|
| 396 | 758 | |
|---|
| 397 | 759 | void __init hugetlb_cgroup_file_init(void) |
|---|
| .. | .. |
|---|
| 416 | 778 | void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) |
|---|
| 417 | 779 | { |
|---|
| 418 | 780 | struct hugetlb_cgroup *h_cg; |
|---|
| 781 | + struct hugetlb_cgroup *h_cg_rsvd; |
|---|
| 419 | 782 | struct hstate *h = page_hstate(oldhpage); |
|---|
| 420 | 783 | |
|---|
| 421 | 784 | if (hugetlb_cgroup_disabled()) |
|---|
| .. | .. |
|---|
| 424 | 787 | VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage); |
|---|
| 425 | 788 | spin_lock(&hugetlb_lock); |
|---|
| 426 | 789 | h_cg = hugetlb_cgroup_from_page(oldhpage); |
|---|
| 790 | + h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage); |
|---|
| 427 | 791 | set_hugetlb_cgroup(oldhpage, NULL); |
|---|
| 792 | + set_hugetlb_cgroup_rsvd(oldhpage, NULL); |
|---|
| 428 | 793 | |
|---|
| 429 | 794 | /* move the h_cg details to new cgroup */ |
|---|
| 430 | 795 | set_hugetlb_cgroup(newhpage, h_cg); |
|---|
| 796 | + set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd); |
|---|
| 431 | 797 | list_move(&newhpage->lru, &h->hugepage_activelist); |
|---|
| 432 | 798 | spin_unlock(&hugetlb_lock); |
|---|
| 433 | 799 | return; |
|---|
| 434 | 800 | } |
|---|
| 435 | 801 | |
|---|
| 802 | +static struct cftype hugetlb_files[] = { |
|---|
| 803 | + {} /* terminate */ |
|---|
| 804 | +}; |
|---|
| 805 | + |
|---|
| 436 | 806 | struct cgroup_subsys hugetlb_cgrp_subsys = { |
|---|
| 437 | 807 | .css_alloc = hugetlb_cgroup_css_alloc, |
|---|
| 438 | 808 | .css_offline = hugetlb_cgroup_css_offline, |
|---|
| 439 | 809 | .css_free = hugetlb_cgroup_css_free, |
|---|
| 810 | + .dfl_cftypes = hugetlb_files, |
|---|
| 811 | + .legacy_cftypes = hugetlb_files, |
|---|
| 440 | 812 | }; |
|---|