hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/kernel/cgroup/rstat.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 #include "cgroup-internal.h"
23
34 #include <linux/sched/cputime.h>
....@@ -63,7 +64,6 @@
6364
6465 raw_spin_unlock_irqrestore(cpu_lock, flags);
6566 }
66
-EXPORT_SYMBOL_GPL(cgroup_rstat_updated);
6767
6868 /**
6969 * cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree
....@@ -149,8 +149,9 @@
149149 raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
150150 cpu);
151151 struct cgroup *pos = NULL;
152
+ unsigned long flags;
152153
153
- raw_spin_lock(cpu_lock);
154
+ raw_spin_lock_irqsave(cpu_lock, flags);
154155 while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
155156 struct cgroup_subsys_state *css;
156157
....@@ -162,7 +163,7 @@
162163 css->ss->css_rstat_flush(css, cpu);
163164 rcu_read_unlock();
164165 }
165
- raw_spin_unlock(cpu_lock);
166
+ raw_spin_unlock_irqrestore(cpu_lock, flags);
166167
167168 /* if @may_sleep, play nice and yield if necessary */
168169 if (may_sleep && (need_resched() ||
....@@ -293,44 +294,48 @@
293294 * Functions for cgroup basic resource statistics implemented on top of
294295 * rstat.
295296 */
296
-static void cgroup_base_stat_accumulate(struct cgroup_base_stat *dst_bstat,
297
- struct cgroup_base_stat *src_bstat)
297
+static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat,
298
+ struct cgroup_base_stat *src_bstat)
298299 {
299300 dst_bstat->cputime.utime += src_bstat->cputime.utime;
300301 dst_bstat->cputime.stime += src_bstat->cputime.stime;
301302 dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime;
302303 }
303304
305
+static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
306
+ struct cgroup_base_stat *src_bstat)
307
+{
308
+ dst_bstat->cputime.utime -= src_bstat->cputime.utime;
309
+ dst_bstat->cputime.stime -= src_bstat->cputime.stime;
310
+ dst_bstat->cputime.sum_exec_runtime -= src_bstat->cputime.sum_exec_runtime;
311
+}
312
+
304313 static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
305314 {
306315 struct cgroup *parent = cgroup_parent(cgrp);
307316 struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
308
- struct task_cputime *last_cputime = &rstatc->last_bstat.cputime;
309
- struct task_cputime cputime;
310
- struct cgroup_base_stat delta;
317
+ struct cgroup_base_stat cur, delta;
311318 unsigned seq;
312319
313320 /* fetch the current per-cpu values */
314321 do {
315322 seq = __u64_stats_fetch_begin(&rstatc->bsync);
316
- cputime = rstatc->bstat.cputime;
323
+ cur.cputime = rstatc->bstat.cputime;
317324 } while (__u64_stats_fetch_retry(&rstatc->bsync, seq));
318325
319
- /* calculate the delta to propgate */
320
- delta.cputime.utime = cputime.utime - last_cputime->utime;
321
- delta.cputime.stime = cputime.stime - last_cputime->stime;
322
- delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime -
323
- last_cputime->sum_exec_runtime;
324
- *last_cputime = cputime;
326
+ /* propagate percpu delta to global */
327
+ delta = cur;
328
+ cgroup_base_stat_sub(&delta, &rstatc->last_bstat);
329
+ cgroup_base_stat_add(&cgrp->bstat, &delta);
330
+ cgroup_base_stat_add(&rstatc->last_bstat, &delta);
325331
326
- /* transfer the pending stat into delta */
327
- cgroup_base_stat_accumulate(&delta, &cgrp->pending_bstat);
328
- memset(&cgrp->pending_bstat, 0, sizeof(cgrp->pending_bstat));
329
-
330
- /* propagate delta into the global stat and the parent's pending */
331
- cgroup_base_stat_accumulate(&cgrp->bstat, &delta);
332
- if (parent)
333
- cgroup_base_stat_accumulate(&parent->pending_bstat, &delta);
332
+ /* propagate global delta to parent */
333
+ if (parent) {
334
+ delta = cgrp->bstat;
335
+ cgroup_base_stat_sub(&delta, &cgrp->last_bstat);
336
+ cgroup_base_stat_add(&parent->bstat, &delta);
337
+ cgroup_base_stat_add(&cgrp->last_bstat, &delta);
338
+ }
334339 }
335340
336341 static struct cgroup_rstat_cpu *
....@@ -384,18 +389,60 @@
384389 cgroup_base_stat_cputime_account_end(cgrp, rstatc);
385390 }
386391
392
+/*
393
+ * compute the cputime for the root cgroup by getting the per cpu data
394
+ * at a global level, then categorizing the fields in a manner consistent
395
+ * with how it is done by __cgroup_account_cputime_field for each bit of
396
+ * cpu time attributed to a cgroup.
397
+ */
398
+static void root_cgroup_cputime(struct task_cputime *cputime)
399
+{
400
+ int i;
401
+
402
+ cputime->stime = 0;
403
+ cputime->utime = 0;
404
+ cputime->sum_exec_runtime = 0;
405
+ for_each_possible_cpu(i) {
406
+ struct kernel_cpustat kcpustat;
407
+ u64 *cpustat = kcpustat.cpustat;
408
+ u64 user = 0;
409
+ u64 sys = 0;
410
+
411
+ kcpustat_cpu_fetch(&kcpustat, i);
412
+
413
+ user += cpustat[CPUTIME_USER];
414
+ user += cpustat[CPUTIME_NICE];
415
+ cputime->utime += user;
416
+
417
+ sys += cpustat[CPUTIME_SYSTEM];
418
+ sys += cpustat[CPUTIME_IRQ];
419
+ sys += cpustat[CPUTIME_SOFTIRQ];
420
+ cputime->stime += sys;
421
+
422
+ cputime->sum_exec_runtime += user;
423
+ cputime->sum_exec_runtime += sys;
424
+ cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL];
425
+ }
426
+}
427
+
387428 void cgroup_base_stat_cputime_show(struct seq_file *seq)
388429 {
389430 struct cgroup *cgrp = seq_css(seq)->cgroup;
390431 u64 usage, utime, stime;
432
+ struct task_cputime cputime;
391433
392
- if (!cgroup_parent(cgrp))
393
- return;
394
-
395
- cgroup_rstat_flush_hold(cgrp);
396
- usage = cgrp->bstat.cputime.sum_exec_runtime;
397
- cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime, &utime, &stime);
398
- cgroup_rstat_flush_release();
434
+ if (cgroup_parent(cgrp)) {
435
+ cgroup_rstat_flush_hold(cgrp);
436
+ usage = cgrp->bstat.cputime.sum_exec_runtime;
437
+ cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
438
+ &utime, &stime);
439
+ cgroup_rstat_flush_release();
440
+ } else {
441
+ root_cgroup_cputime(&cputime);
442
+ usage = cputime.sum_exec_runtime;
443
+ utime = cputime.utime;
444
+ stime = cputime.stime;
445
+ }
399446
400447 do_div(usage, NSEC_PER_USEC);
401448 do_div(utime, NSEC_PER_USEC);