.. | .. |
---|
3 | 3 | * Copyright IBM Corporation, 2012 |
---|
4 | 4 | * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> |
---|
5 | 5 | * |
---|
| 6 | + * Cgroup v2 |
---|
| 7 | + * Copyright (C) 2019 Red Hat, Inc. |
---|
| 8 | + * Author: Giuseppe Scrivano <gscrivan@redhat.com> |
---|
| 9 | + * |
---|
6 | 10 | * This program is free software; you can redistribute it and/or modify it |
---|
7 | 11 | * under the terms of version 2.1 of the GNU Lesser General Public License |
---|
8 | 12 | * as published by the Free Software Foundation. |
---|
.. | .. |
---|
19 | 23 | #include <linux/hugetlb.h> |
---|
20 | 24 | #include <linux/hugetlb_cgroup.h> |
---|
21 | 25 | |
---|
22 | | -struct hugetlb_cgroup { |
---|
23 | | - struct cgroup_subsys_state css; |
---|
24 | | - /* |
---|
25 | | - * the counter to account for hugepages from hugetlb. |
---|
26 | | - */ |
---|
27 | | - struct page_counter hugepage[HUGE_MAX_HSTATE]; |
---|
28 | | -}; |
---|
29 | | - |
---|
30 | 26 | #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) |
---|
31 | 27 | #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff) |
---|
32 | 28 | #define MEMFILE_ATTR(val) ((val) & 0xffff) |
---|
33 | 29 | |
---|
| 30 | +#define hugetlb_cgroup_from_counter(counter, idx) \ |
---|
| 31 | + container_of(counter, struct hugetlb_cgroup, hugepage[idx]) |
---|
| 32 | + |
---|
34 | 33 | static struct hugetlb_cgroup *root_h_cgroup __read_mostly; |
---|
| 34 | + |
---|
| 35 | +static inline struct page_counter * |
---|
| 36 | +__hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx, |
---|
| 37 | + bool rsvd) |
---|
| 38 | +{ |
---|
| 39 | + if (rsvd) |
---|
| 40 | + return &h_cg->rsvd_hugepage[idx]; |
---|
| 41 | + return &h_cg->hugepage[idx]; |
---|
| 42 | +} |
---|
| 43 | + |
---|
| 44 | +static inline struct page_counter * |
---|
| 45 | +hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx) |
---|
| 46 | +{ |
---|
| 47 | + return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false); |
---|
| 48 | +} |
---|
| 49 | + |
---|
| 50 | +static inline struct page_counter * |
---|
| 51 | +hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx) |
---|
| 52 | +{ |
---|
| 53 | + return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true); |
---|
| 54 | +} |
---|
35 | 55 | |
---|
36 | 56 | static inline |
---|
37 | 57 | struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) |
---|
.. | .. |
---|
61 | 81 | int idx; |
---|
62 | 82 | |
---|
63 | 83 | for (idx = 0; idx < hugetlb_max_hstate; idx++) { |
---|
64 | | - if (page_counter_read(&h_cg->hugepage[idx])) |
---|
| 84 | + if (page_counter_read( |
---|
| 85 | + hugetlb_cgroup_counter_from_cgroup(h_cg, idx))) |
---|
65 | 86 | return true; |
---|
66 | 87 | } |
---|
67 | 88 | return false; |
---|
.. | .. |
---|
73 | 94 | int idx; |
---|
74 | 95 | |
---|
75 | 96 | for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) { |
---|
76 | | - struct page_counter *counter = &h_cgroup->hugepage[idx]; |
---|
77 | | - struct page_counter *parent = NULL; |
---|
| 97 | + struct page_counter *fault_parent = NULL; |
---|
| 98 | + struct page_counter *rsvd_parent = NULL; |
---|
78 | 99 | unsigned long limit; |
---|
79 | 100 | int ret; |
---|
80 | 101 | |
---|
81 | | - if (parent_h_cgroup) |
---|
82 | | - parent = &parent_h_cgroup->hugepage[idx]; |
---|
83 | | - page_counter_init(counter, parent); |
---|
| 102 | + if (parent_h_cgroup) { |
---|
| 103 | + fault_parent = hugetlb_cgroup_counter_from_cgroup( |
---|
| 104 | + parent_h_cgroup, idx); |
---|
| 105 | + rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd( |
---|
| 106 | + parent_h_cgroup, idx); |
---|
| 107 | + } |
---|
| 108 | + page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup, |
---|
| 109 | + idx), |
---|
| 110 | + fault_parent); |
---|
| 111 | + page_counter_init( |
---|
| 112 | + hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), |
---|
| 113 | + rsvd_parent); |
---|
84 | 114 | |
---|
85 | 115 | limit = round_down(PAGE_COUNTER_MAX, |
---|
86 | 116 | 1 << huge_page_order(&hstates[idx])); |
---|
87 | | - ret = page_counter_set_max(counter, limit); |
---|
| 117 | + |
---|
| 118 | + ret = page_counter_set_max( |
---|
| 119 | + hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx), |
---|
| 120 | + limit); |
---|
| 121 | + VM_BUG_ON(ret); |
---|
| 122 | + ret = page_counter_set_max( |
---|
| 123 | + hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), |
---|
| 124 | + limit); |
---|
88 | 125 | VM_BUG_ON(ret); |
---|
89 | 126 | } |
---|
90 | 127 | } |
---|
.. | .. |
---|
114 | 151 | kfree(h_cgroup); |
---|
115 | 152 | } |
---|
116 | 153 | |
---|
117 | | - |
---|
118 | 154 | /* |
---|
119 | 155 | * Should be called with hugetlb_lock held. |
---|
120 | 156 | * Since we are holding hugetlb_lock, pages cannot get moved from |
---|
.. | .. |
---|
139 | 175 | if (!page_hcg || page_hcg != h_cg) |
---|
140 | 176 | goto out; |
---|
141 | 177 | |
---|
142 | | - nr_pages = 1 << compound_order(page); |
---|
| 178 | + nr_pages = compound_nr(page); |
---|
143 | 179 | if (!parent) { |
---|
144 | 180 | parent = root_h_cgroup; |
---|
145 | 181 | /* root has no limit */ |
---|
.. | .. |
---|
163 | 199 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); |
---|
164 | 200 | struct hstate *h; |
---|
165 | 201 | struct page *page; |
---|
166 | | - int idx = 0; |
---|
| 202 | + int idx; |
---|
167 | 203 | |
---|
168 | 204 | do { |
---|
| 205 | + idx = 0; |
---|
169 | 206 | for_each_hstate(h) { |
---|
170 | 207 | spin_lock(&hugetlb_lock); |
---|
171 | 208 | list_for_each_entry(page, &h->hugepage_activelist, lru) |
---|
.. | .. |
---|
178 | 215 | } while (hugetlb_cgroup_have_usage(h_cg)); |
---|
179 | 216 | } |
---|
180 | 217 | |
---|
181 | | -int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, |
---|
182 | | - struct hugetlb_cgroup **ptr) |
---|
| 218 | +static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx, |
---|
| 219 | + enum hugetlb_memory_event event) |
---|
| 220 | +{ |
---|
| 221 | + atomic_long_inc(&hugetlb->events_local[idx][event]); |
---|
| 222 | + cgroup_file_notify(&hugetlb->events_local_file[idx]); |
---|
| 223 | + |
---|
| 224 | + do { |
---|
| 225 | + atomic_long_inc(&hugetlb->events[idx][event]); |
---|
| 226 | + cgroup_file_notify(&hugetlb->events_file[idx]); |
---|
| 227 | + } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) && |
---|
| 228 | + !hugetlb_cgroup_is_root(hugetlb)); |
---|
| 229 | +} |
---|
| 230 | + |
---|
| 231 | +static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, |
---|
| 232 | + struct hugetlb_cgroup **ptr, |
---|
| 233 | + bool rsvd) |
---|
183 | 234 | { |
---|
184 | 235 | int ret = 0; |
---|
185 | 236 | struct page_counter *counter; |
---|
.. | .. |
---|
202 | 253 | } |
---|
203 | 254 | rcu_read_unlock(); |
---|
204 | 255 | |
---|
205 | | - if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages, &counter)) |
---|
| 256 | + if (!page_counter_try_charge( |
---|
| 257 | + __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), |
---|
| 258 | + nr_pages, &counter)) { |
---|
206 | 259 | ret = -ENOMEM; |
---|
207 | | - css_put(&h_cg->css); |
---|
| 260 | + hugetlb_event(h_cg, idx, HUGETLB_MAX); |
---|
| 261 | + css_put(&h_cg->css); |
---|
| 262 | + goto done; |
---|
| 263 | + } |
---|
| 264 | + /* Reservations take a reference to the css because they do not get |
---|
| 265 | + * reparented. |
---|
| 266 | + */ |
---|
| 267 | + if (!rsvd) |
---|
| 268 | + css_put(&h_cg->css); |
---|
208 | 269 | done: |
---|
209 | 270 | *ptr = h_cg; |
---|
210 | 271 | return ret; |
---|
211 | 272 | } |
---|
212 | 273 | |
---|
| 274 | +int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, |
---|
| 275 | + struct hugetlb_cgroup **ptr) |
---|
| 276 | +{ |
---|
| 277 | + return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false); |
---|
| 278 | +} |
---|
| 279 | + |
---|
| 280 | +int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, |
---|
| 281 | + struct hugetlb_cgroup **ptr) |
---|
| 282 | +{ |
---|
| 283 | + return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true); |
---|
| 284 | +} |
---|
| 285 | + |
---|
213 | 286 | /* Should be called with hugetlb_lock held */ |
---|
214 | | -void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, |
---|
215 | | - struct hugetlb_cgroup *h_cg, |
---|
216 | | - struct page *page) |
---|
| 287 | +static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, |
---|
| 288 | + struct hugetlb_cgroup *h_cg, |
---|
| 289 | + struct page *page, bool rsvd) |
---|
217 | 290 | { |
---|
218 | 291 | if (hugetlb_cgroup_disabled() || !h_cg) |
---|
219 | 292 | return; |
---|
220 | 293 | |
---|
221 | | - set_hugetlb_cgroup(page, h_cg); |
---|
| 294 | + __set_hugetlb_cgroup(page, h_cg, rsvd); |
---|
222 | 295 | return; |
---|
| 296 | +} |
---|
| 297 | + |
---|
| 298 | +void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, |
---|
| 299 | + struct hugetlb_cgroup *h_cg, |
---|
| 300 | + struct page *page) |
---|
| 301 | +{ |
---|
| 302 | + __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false); |
---|
| 303 | +} |
---|
| 304 | + |
---|
| 305 | +void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, |
---|
| 306 | + struct hugetlb_cgroup *h_cg, |
---|
| 307 | + struct page *page) |
---|
| 308 | +{ |
---|
| 309 | + __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true); |
---|
223 | 310 | } |
---|
224 | 311 | |
---|
225 | 312 | /* |
---|
226 | 313 | * Should be called with hugetlb_lock held |
---|
227 | 314 | */ |
---|
228 | | -void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, |
---|
229 | | - struct page *page) |
---|
| 315 | +static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, |
---|
| 316 | + struct page *page, bool rsvd) |
---|
230 | 317 | { |
---|
231 | 318 | struct hugetlb_cgroup *h_cg; |
---|
232 | 319 | |
---|
233 | 320 | if (hugetlb_cgroup_disabled()) |
---|
234 | 321 | return; |
---|
235 | 322 | lockdep_assert_held(&hugetlb_lock); |
---|
236 | | - h_cg = hugetlb_cgroup_from_page(page); |
---|
| 323 | + h_cg = __hugetlb_cgroup_from_page(page, rsvd); |
---|
237 | 324 | if (unlikely(!h_cg)) |
---|
238 | 325 | return; |
---|
239 | | - set_hugetlb_cgroup(page, NULL); |
---|
240 | | - page_counter_uncharge(&h_cg->hugepage[idx], nr_pages); |
---|
| 326 | + __set_hugetlb_cgroup(page, NULL, rsvd); |
---|
| 327 | + |
---|
| 328 | + page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, |
---|
| 329 | + rsvd), |
---|
| 330 | + nr_pages); |
---|
| 331 | + |
---|
| 332 | + if (rsvd) |
---|
| 333 | + css_put(&h_cg->css); |
---|
| 334 | + |
---|
241 | 335 | return; |
---|
242 | 336 | } |
---|
243 | 337 | |
---|
244 | | -void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, |
---|
245 | | - struct hugetlb_cgroup *h_cg) |
---|
| 338 | +void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, |
---|
| 339 | + struct page *page) |
---|
| 340 | +{ |
---|
| 341 | + __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false); |
---|
| 342 | +} |
---|
| 343 | + |
---|
| 344 | +void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages, |
---|
| 345 | + struct page *page) |
---|
| 346 | +{ |
---|
| 347 | + __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true); |
---|
| 348 | +} |
---|
| 349 | + |
---|
| 350 | +static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, |
---|
| 351 | + struct hugetlb_cgroup *h_cg, |
---|
| 352 | + bool rsvd) |
---|
246 | 353 | { |
---|
247 | 354 | if (hugetlb_cgroup_disabled() || !h_cg) |
---|
248 | 355 | return; |
---|
.. | .. |
---|
250 | 357 | if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) |
---|
251 | 358 | return; |
---|
252 | 359 | |
---|
253 | | - page_counter_uncharge(&h_cg->hugepage[idx], nr_pages); |
---|
254 | | - return; |
---|
| 360 | + page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, |
---|
| 361 | + rsvd), |
---|
| 362 | + nr_pages); |
---|
| 363 | + |
---|
| 364 | + if (rsvd) |
---|
| 365 | + css_put(&h_cg->css); |
---|
| 366 | +} |
---|
| 367 | + |
---|
| 368 | +void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, |
---|
| 369 | + struct hugetlb_cgroup *h_cg) |
---|
| 370 | +{ |
---|
| 371 | + __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false); |
---|
| 372 | +} |
---|
| 373 | + |
---|
| 374 | +void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, |
---|
| 375 | + struct hugetlb_cgroup *h_cg) |
---|
| 376 | +{ |
---|
| 377 | + __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true); |
---|
| 378 | +} |
---|
| 379 | + |
---|
| 380 | +void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, |
---|
| 381 | + unsigned long end) |
---|
| 382 | +{ |
---|
| 383 | + if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter || |
---|
| 384 | + !resv->css) |
---|
| 385 | + return; |
---|
| 386 | + |
---|
| 387 | + page_counter_uncharge(resv->reservation_counter, |
---|
| 388 | + (end - start) * resv->pages_per_hpage); |
---|
| 389 | + css_put(resv->css); |
---|
| 390 | +} |
---|
| 391 | + |
---|
| 392 | +void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, |
---|
| 393 | + struct file_region *rg, |
---|
| 394 | + unsigned long nr_pages, |
---|
| 395 | + bool region_del) |
---|
| 396 | +{ |
---|
| 397 | + if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages) |
---|
| 398 | + return; |
---|
| 399 | + |
---|
| 400 | + if (rg->reservation_counter && resv->pages_per_hpage && nr_pages > 0 && |
---|
| 401 | + !resv->reservation_counter) { |
---|
| 402 | + page_counter_uncharge(rg->reservation_counter, |
---|
| 403 | + nr_pages * resv->pages_per_hpage); |
---|
| 404 | + /* |
---|
| 405 | + * Only do css_put(rg->css) when we delete the entire region |
---|
| 406 | + * because one file_region must hold exactly one css reference. |
---|
| 407 | + */ |
---|
| 408 | + if (region_del) |
---|
| 409 | + css_put(rg->css); |
---|
| 410 | + } |
---|
255 | 411 | } |
---|
256 | 412 | |
---|
257 | 413 | enum { |
---|
258 | 414 | RES_USAGE, |
---|
| 415 | + RES_RSVD_USAGE, |
---|
259 | 416 | RES_LIMIT, |
---|
| 417 | + RES_RSVD_LIMIT, |
---|
260 | 418 | RES_MAX_USAGE, |
---|
| 419 | + RES_RSVD_MAX_USAGE, |
---|
261 | 420 | RES_FAILCNT, |
---|
| 421 | + RES_RSVD_FAILCNT, |
---|
262 | 422 | }; |
---|
263 | 423 | |
---|
264 | 424 | static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, |
---|
265 | 425 | struct cftype *cft) |
---|
266 | 426 | { |
---|
267 | 427 | struct page_counter *counter; |
---|
| 428 | + struct page_counter *rsvd_counter; |
---|
268 | 429 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); |
---|
269 | 430 | |
---|
270 | 431 | counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)]; |
---|
| 432 | + rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)]; |
---|
271 | 433 | |
---|
272 | 434 | switch (MEMFILE_ATTR(cft->private)) { |
---|
273 | 435 | case RES_USAGE: |
---|
274 | 436 | return (u64)page_counter_read(counter) * PAGE_SIZE; |
---|
| 437 | + case RES_RSVD_USAGE: |
---|
| 438 | + return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE; |
---|
275 | 439 | case RES_LIMIT: |
---|
276 | 440 | return (u64)counter->max * PAGE_SIZE; |
---|
| 441 | + case RES_RSVD_LIMIT: |
---|
| 442 | + return (u64)rsvd_counter->max * PAGE_SIZE; |
---|
277 | 443 | case RES_MAX_USAGE: |
---|
278 | 444 | return (u64)counter->watermark * PAGE_SIZE; |
---|
| 445 | + case RES_RSVD_MAX_USAGE: |
---|
| 446 | + return (u64)rsvd_counter->watermark * PAGE_SIZE; |
---|
279 | 447 | case RES_FAILCNT: |
---|
280 | 448 | return counter->failcnt; |
---|
| 449 | + case RES_RSVD_FAILCNT: |
---|
| 450 | + return rsvd_counter->failcnt; |
---|
281 | 451 | default: |
---|
282 | 452 | BUG(); |
---|
283 | 453 | } |
---|
284 | 454 | } |
---|
285 | 455 | |
---|
| 456 | +static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v) |
---|
| 457 | +{ |
---|
| 458 | + int idx; |
---|
| 459 | + u64 val; |
---|
| 460 | + struct cftype *cft = seq_cft(seq); |
---|
| 461 | + unsigned long limit; |
---|
| 462 | + struct page_counter *counter; |
---|
| 463 | + struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); |
---|
| 464 | + |
---|
| 465 | + idx = MEMFILE_IDX(cft->private); |
---|
| 466 | + counter = &h_cg->hugepage[idx]; |
---|
| 467 | + |
---|
| 468 | + limit = round_down(PAGE_COUNTER_MAX, |
---|
| 469 | + 1 << huge_page_order(&hstates[idx])); |
---|
| 470 | + |
---|
| 471 | + switch (MEMFILE_ATTR(cft->private)) { |
---|
| 472 | + case RES_RSVD_USAGE: |
---|
| 473 | + counter = &h_cg->rsvd_hugepage[idx]; |
---|
| 474 | + fallthrough; |
---|
| 475 | + case RES_USAGE: |
---|
| 476 | + val = (u64)page_counter_read(counter); |
---|
| 477 | + seq_printf(seq, "%llu\n", val * PAGE_SIZE); |
---|
| 478 | + break; |
---|
| 479 | + case RES_RSVD_LIMIT: |
---|
| 480 | + counter = &h_cg->rsvd_hugepage[idx]; |
---|
| 481 | + fallthrough; |
---|
| 482 | + case RES_LIMIT: |
---|
| 483 | + val = (u64)counter->max; |
---|
| 484 | + if (val == limit) |
---|
| 485 | + seq_puts(seq, "max\n"); |
---|
| 486 | + else |
---|
| 487 | + seq_printf(seq, "%llu\n", val * PAGE_SIZE); |
---|
| 488 | + break; |
---|
| 489 | + default: |
---|
| 490 | + BUG(); |
---|
| 491 | + } |
---|
| 492 | + |
---|
| 493 | + return 0; |
---|
| 494 | +} |
---|
| 495 | + |
---|
286 | 496 | static DEFINE_MUTEX(hugetlb_limit_mutex); |
---|
287 | 497 | |
---|
288 | 498 | static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, |
---|
289 | | - char *buf, size_t nbytes, loff_t off) |
---|
| 499 | + char *buf, size_t nbytes, loff_t off, |
---|
| 500 | + const char *max) |
---|
290 | 501 | { |
---|
291 | 502 | int ret, idx; |
---|
292 | 503 | unsigned long nr_pages; |
---|
293 | 504 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); |
---|
| 505 | + bool rsvd = false; |
---|
294 | 506 | |
---|
295 | 507 | if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */ |
---|
296 | 508 | return -EINVAL; |
---|
297 | 509 | |
---|
298 | 510 | buf = strstrip(buf); |
---|
299 | | - ret = page_counter_memparse(buf, "-1", &nr_pages); |
---|
| 511 | + ret = page_counter_memparse(buf, max, &nr_pages); |
---|
300 | 512 | if (ret) |
---|
301 | 513 | return ret; |
---|
302 | 514 | |
---|
.. | .. |
---|
304 | 516 | nr_pages = round_down(nr_pages, 1 << huge_page_order(&hstates[idx])); |
---|
305 | 517 | |
---|
306 | 518 | switch (MEMFILE_ATTR(of_cft(of)->private)) { |
---|
| 519 | + case RES_RSVD_LIMIT: |
---|
| 520 | + rsvd = true; |
---|
| 521 | + fallthrough; |
---|
307 | 522 | case RES_LIMIT: |
---|
308 | 523 | mutex_lock(&hugetlb_limit_mutex); |
---|
309 | | - ret = page_counter_set_max(&h_cg->hugepage[idx], nr_pages); |
---|
| 524 | + ret = page_counter_set_max( |
---|
| 525 | + __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), |
---|
| 526 | + nr_pages); |
---|
310 | 527 | mutex_unlock(&hugetlb_limit_mutex); |
---|
311 | 528 | break; |
---|
312 | 529 | default: |
---|
.. | .. |
---|
316 | 533 | return ret ?: nbytes; |
---|
317 | 534 | } |
---|
318 | 535 | |
---|
| 536 | +static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of, |
---|
| 537 | + char *buf, size_t nbytes, loff_t off) |
---|
| 538 | +{ |
---|
| 539 | + return hugetlb_cgroup_write(of, buf, nbytes, off, "-1"); |
---|
| 540 | +} |
---|
| 541 | + |
---|
| 542 | +static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of, |
---|
| 543 | + char *buf, size_t nbytes, loff_t off) |
---|
| 544 | +{ |
---|
| 545 | + return hugetlb_cgroup_write(of, buf, nbytes, off, "max"); |
---|
| 546 | +} |
---|
| 547 | + |
---|
319 | 548 | static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, |
---|
320 | 549 | char *buf, size_t nbytes, loff_t off) |
---|
321 | 550 | { |
---|
322 | 551 | int ret = 0; |
---|
323 | | - struct page_counter *counter; |
---|
| 552 | + struct page_counter *counter, *rsvd_counter; |
---|
324 | 553 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); |
---|
325 | 554 | |
---|
326 | 555 | counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)]; |
---|
| 556 | + rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)]; |
---|
327 | 557 | |
---|
328 | 558 | switch (MEMFILE_ATTR(of_cft(of)->private)) { |
---|
329 | 559 | case RES_MAX_USAGE: |
---|
330 | 560 | page_counter_reset_watermark(counter); |
---|
331 | 561 | break; |
---|
| 562 | + case RES_RSVD_MAX_USAGE: |
---|
| 563 | + page_counter_reset_watermark(rsvd_counter); |
---|
| 564 | + break; |
---|
332 | 565 | case RES_FAILCNT: |
---|
333 | 566 | counter->failcnt = 0; |
---|
| 567 | + break; |
---|
| 568 | + case RES_RSVD_FAILCNT: |
---|
| 569 | + rsvd_counter->failcnt = 0; |
---|
334 | 570 | break; |
---|
335 | 571 | default: |
---|
336 | 572 | ret = -EINVAL; |
---|
.. | .. |
---|
350 | 586 | return buf; |
---|
351 | 587 | } |
---|
352 | 588 | |
---|
353 | | -static void __init __hugetlb_cgroup_file_init(int idx) |
---|
| 589 | +static int __hugetlb_events_show(struct seq_file *seq, bool local) |
---|
| 590 | +{ |
---|
| 591 | + int idx; |
---|
| 592 | + long max; |
---|
| 593 | + struct cftype *cft = seq_cft(seq); |
---|
| 594 | + struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); |
---|
| 595 | + |
---|
| 596 | + idx = MEMFILE_IDX(cft->private); |
---|
| 597 | + |
---|
| 598 | + if (local) |
---|
| 599 | + max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]); |
---|
| 600 | + else |
---|
| 601 | + max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]); |
---|
| 602 | + |
---|
| 603 | + seq_printf(seq, "max %lu\n", max); |
---|
| 604 | + |
---|
| 605 | + return 0; |
---|
| 606 | +} |
---|
| 607 | + |
---|
| 608 | +static int hugetlb_events_show(struct seq_file *seq, void *v) |
---|
| 609 | +{ |
---|
| 610 | + return __hugetlb_events_show(seq, false); |
---|
| 611 | +} |
---|
| 612 | + |
---|
| 613 | +static int hugetlb_events_local_show(struct seq_file *seq, void *v) |
---|
| 614 | +{ |
---|
| 615 | + return __hugetlb_events_show(seq, true); |
---|
| 616 | +} |
---|
| 617 | + |
---|
| 618 | +static void __init __hugetlb_cgroup_file_dfl_init(int idx) |
---|
354 | 619 | { |
---|
355 | 620 | char buf[32]; |
---|
356 | 621 | struct cftype *cft; |
---|
357 | 622 | struct hstate *h = &hstates[idx]; |
---|
358 | 623 | |
---|
359 | 624 | /* format the size */ |
---|
360 | | - mem_fmt(buf, 32, huge_page_size(h)); |
---|
| 625 | + mem_fmt(buf, sizeof(buf), huge_page_size(h)); |
---|
361 | 626 | |
---|
362 | 627 | /* Add the limit file */ |
---|
363 | | - cft = &h->cgroup_files[0]; |
---|
| 628 | + cft = &h->cgroup_files_dfl[0]; |
---|
| 629 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf); |
---|
| 630 | + cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); |
---|
| 631 | + cft->seq_show = hugetlb_cgroup_read_u64_max; |
---|
| 632 | + cft->write = hugetlb_cgroup_write_dfl; |
---|
| 633 | + cft->flags = CFTYPE_NOT_ON_ROOT; |
---|
| 634 | + |
---|
| 635 | + /* Add the reservation limit file */ |
---|
| 636 | + cft = &h->cgroup_files_dfl[1]; |
---|
| 637 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf); |
---|
| 638 | + cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); |
---|
| 639 | + cft->seq_show = hugetlb_cgroup_read_u64_max; |
---|
| 640 | + cft->write = hugetlb_cgroup_write_dfl; |
---|
| 641 | + cft->flags = CFTYPE_NOT_ON_ROOT; |
---|
| 642 | + |
---|
| 643 | + /* Add the current usage file */ |
---|
| 644 | + cft = &h->cgroup_files_dfl[2]; |
---|
| 645 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf); |
---|
| 646 | + cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); |
---|
| 647 | + cft->seq_show = hugetlb_cgroup_read_u64_max; |
---|
| 648 | + cft->flags = CFTYPE_NOT_ON_ROOT; |
---|
| 649 | + |
---|
| 650 | + /* Add the current reservation usage file */ |
---|
| 651 | + cft = &h->cgroup_files_dfl[3]; |
---|
| 652 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf); |
---|
| 653 | + cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); |
---|
| 654 | + cft->seq_show = hugetlb_cgroup_read_u64_max; |
---|
| 655 | + cft->flags = CFTYPE_NOT_ON_ROOT; |
---|
| 656 | + |
---|
| 657 | + /* Add the events file */ |
---|
| 658 | + cft = &h->cgroup_files_dfl[4]; |
---|
| 659 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf); |
---|
| 660 | + cft->private = MEMFILE_PRIVATE(idx, 0); |
---|
| 661 | + cft->seq_show = hugetlb_events_show; |
---|
| 662 | + cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]); |
---|
| 663 | + cft->flags = CFTYPE_NOT_ON_ROOT; |
---|
| 664 | + |
---|
| 665 | + /* Add the events.local file */ |
---|
| 666 | + cft = &h->cgroup_files_dfl[5]; |
---|
| 667 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf); |
---|
| 668 | + cft->private = MEMFILE_PRIVATE(idx, 0); |
---|
| 669 | + cft->seq_show = hugetlb_events_local_show; |
---|
| 670 | + cft->file_offset = offsetof(struct hugetlb_cgroup, |
---|
| 671 | + events_local_file[idx]); |
---|
| 672 | + cft->flags = CFTYPE_NOT_ON_ROOT; |
---|
| 673 | + |
---|
| 674 | + /* NULL terminate the last cft */ |
---|
| 675 | + cft = &h->cgroup_files_dfl[6]; |
---|
| 676 | + memset(cft, 0, sizeof(*cft)); |
---|
| 677 | + |
---|
| 678 | + WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys, |
---|
| 679 | + h->cgroup_files_dfl)); |
---|
| 680 | +} |
---|
| 681 | + |
---|
| 682 | +static void __init __hugetlb_cgroup_file_legacy_init(int idx) |
---|
| 683 | +{ |
---|
| 684 | + char buf[32]; |
---|
| 685 | + struct cftype *cft; |
---|
| 686 | + struct hstate *h = &hstates[idx]; |
---|
| 687 | + |
---|
| 688 | + /* format the size */ |
---|
| 689 | + mem_fmt(buf, sizeof(buf), huge_page_size(h)); |
---|
| 690 | + |
---|
| 691 | + /* Add the limit file */ |
---|
| 692 | + cft = &h->cgroup_files_legacy[0]; |
---|
364 | 693 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); |
---|
365 | 694 | cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); |
---|
366 | 695 | cft->read_u64 = hugetlb_cgroup_read_u64; |
---|
367 | | - cft->write = hugetlb_cgroup_write; |
---|
| 696 | + cft->write = hugetlb_cgroup_write_legacy; |
---|
| 697 | + |
---|
| 698 | + /* Add the reservation limit file */ |
---|
| 699 | + cft = &h->cgroup_files_legacy[1]; |
---|
| 700 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf); |
---|
| 701 | + cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); |
---|
| 702 | + cft->read_u64 = hugetlb_cgroup_read_u64; |
---|
| 703 | + cft->write = hugetlb_cgroup_write_legacy; |
---|
368 | 704 | |
---|
369 | 705 | /* Add the usage file */ |
---|
370 | | - cft = &h->cgroup_files[1]; |
---|
| 706 | + cft = &h->cgroup_files_legacy[2]; |
---|
371 | 707 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf); |
---|
372 | 708 | cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); |
---|
373 | 709 | cft->read_u64 = hugetlb_cgroup_read_u64; |
---|
374 | 710 | |
---|
| 711 | + /* Add the reservation usage file */ |
---|
| 712 | + cft = &h->cgroup_files_legacy[3]; |
---|
| 713 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf); |
---|
| 714 | + cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); |
---|
| 715 | + cft->read_u64 = hugetlb_cgroup_read_u64; |
---|
| 716 | + |
---|
375 | 717 | /* Add the MAX usage file */ |
---|
376 | | - cft = &h->cgroup_files[2]; |
---|
| 718 | + cft = &h->cgroup_files_legacy[4]; |
---|
377 | 719 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); |
---|
378 | 720 | cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); |
---|
379 | 721 | cft->write = hugetlb_cgroup_reset; |
---|
380 | 722 | cft->read_u64 = hugetlb_cgroup_read_u64; |
---|
381 | 723 | |
---|
| 724 | + /* Add the MAX reservation usage file */ |
---|
| 725 | + cft = &h->cgroup_files_legacy[5]; |
---|
| 726 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf); |
---|
| 727 | + cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE); |
---|
| 728 | + cft->write = hugetlb_cgroup_reset; |
---|
| 729 | + cft->read_u64 = hugetlb_cgroup_read_u64; |
---|
| 730 | + |
---|
382 | 731 | /* Add the failcntfile */ |
---|
383 | | - cft = &h->cgroup_files[3]; |
---|
| 732 | + cft = &h->cgroup_files_legacy[6]; |
---|
384 | 733 | snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); |
---|
385 | | - cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); |
---|
| 734 | + cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); |
---|
| 735 | + cft->write = hugetlb_cgroup_reset; |
---|
| 736 | + cft->read_u64 = hugetlb_cgroup_read_u64; |
---|
| 737 | + |
---|
| 738 | + /* Add the reservation failcntfile */ |
---|
| 739 | + cft = &h->cgroup_files_legacy[7]; |
---|
| 740 | + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf); |
---|
| 741 | + cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT); |
---|
386 | 742 | cft->write = hugetlb_cgroup_reset; |
---|
387 | 743 | cft->read_u64 = hugetlb_cgroup_read_u64; |
---|
388 | 744 | |
---|
389 | 745 | /* NULL terminate the last cft */ |
---|
390 | | - cft = &h->cgroup_files[4]; |
---|
| 746 | + cft = &h->cgroup_files_legacy[8]; |
---|
391 | 747 | memset(cft, 0, sizeof(*cft)); |
---|
392 | 748 | |
---|
393 | 749 | WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, |
---|
394 | | - h->cgroup_files)); |
---|
| 750 | + h->cgroup_files_legacy)); |
---|
| 751 | +} |
---|
| 752 | + |
---|
| 753 | +static void __init __hugetlb_cgroup_file_init(int idx) |
---|
| 754 | +{ |
---|
| 755 | + __hugetlb_cgroup_file_dfl_init(idx); |
---|
| 756 | + __hugetlb_cgroup_file_legacy_init(idx); |
---|
395 | 757 | } |
---|
396 | 758 | |
---|
397 | 759 | void __init hugetlb_cgroup_file_init(void) |
---|
.. | .. |
---|
416 | 778 | void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) |
---|
417 | 779 | { |
---|
418 | 780 | struct hugetlb_cgroup *h_cg; |
---|
| 781 | + struct hugetlb_cgroup *h_cg_rsvd; |
---|
419 | 782 | struct hstate *h = page_hstate(oldhpage); |
---|
420 | 783 | |
---|
421 | 784 | if (hugetlb_cgroup_disabled()) |
---|
.. | .. |
---|
424 | 787 | VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage); |
---|
425 | 788 | spin_lock(&hugetlb_lock); |
---|
426 | 789 | h_cg = hugetlb_cgroup_from_page(oldhpage); |
---|
| 790 | + h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage); |
---|
427 | 791 | set_hugetlb_cgroup(oldhpage, NULL); |
---|
| 792 | + set_hugetlb_cgroup_rsvd(oldhpage, NULL); |
---|
428 | 793 | |
---|
429 | 794 | /* move the h_cg details to new cgroup */ |
---|
430 | 795 | set_hugetlb_cgroup(newhpage, h_cg); |
---|
| 796 | + set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd); |
---|
431 | 797 | list_move(&newhpage->lru, &h->hugepage_activelist); |
---|
432 | 798 | spin_unlock(&hugetlb_lock); |
---|
433 | 799 | return; |
---|
434 | 800 | } |
---|
435 | 801 | |
---|
| 802 | +static struct cftype hugetlb_files[] = { |
---|
| 803 | + {} /* terminate */ |
---|
| 804 | +}; |
---|
| 805 | + |
---|
436 | 806 | struct cgroup_subsys hugetlb_cgrp_subsys = { |
---|
437 | 807 | .css_alloc = hugetlb_cgroup_css_alloc, |
---|
438 | 808 | .css_offline = hugetlb_cgroup_css_offline, |
---|
439 | 809 | .css_free = hugetlb_cgroup_css_free, |
---|
| 810 | + .dfl_cftypes = hugetlb_files, |
---|
| 811 | + .legacy_cftypes = hugetlb_files, |
---|
440 | 812 | }; |
---|