hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/kernel/time/timekeeping.c
....@@ -1,13 +1,8 @@
1
+// SPDX-License-Identifier: GPL-2.0
12 /*
2
- * linux/kernel/time/timekeeping.c
3
- *
4
- * Kernel timekeeping code and accessor functions
5
- *
6
- * This code was moved from linux/kernel/timer.c.
7
- * Please see that file for copyright and history logs.
8
- *
3
+ * Kernel timekeeping code and accessor functions. Based on code from
4
+ * timer.c, moved in commit 8524070b7982.
95 */
10
-
116 #include <linux/timekeeper_internal.h>
127 #include <linux/module.h>
138 #include <linux/interrupt.h>
....@@ -22,10 +17,13 @@
2217 #include <linux/clocksource.h>
2318 #include <linux/jiffies.h>
2419 #include <linux/time.h>
20
+#include <linux/timex.h>
2521 #include <linux/tick.h>
2622 #include <linux/stop_machine.h>
2723 #include <linux/pvclock_gtod.h>
2824 #include <linux/compiler.h>
25
+#include <linux/audit.h>
26
+#include <linux/random.h>
2927
3028 #include "tick-internal.h"
3129 #include "ntp_internal.h"
....@@ -43,19 +41,23 @@
4341 TK_ADV_FREQ
4442 };
4543
44
+DEFINE_RAW_SPINLOCK(timekeeper_lock);
45
+
4646 /*
4747 * The most important data for readout fits into a single 64 byte
4848 * cache line.
4949 */
5050 static struct {
51
- seqcount_t seq;
51
+ seqcount_raw_spinlock_t seq;
5252 struct timekeeper timekeeper;
5353 } tk_core ____cacheline_aligned = {
54
- .seq = SEQCNT_ZERO(tk_core.seq),
54
+ .seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_core.seq, &timekeeper_lock),
5555 };
5656
57
-static DEFINE_RAW_SPINLOCK(timekeeper_lock);
5857 static struct timekeeper shadow_timekeeper;
58
+
59
+/* flag for if timekeeping is suspended */
60
+int __read_mostly timekeeping_suspended;
5961
6062 /**
6163 * struct tk_fast - NMI safe timekeeper
....@@ -67,7 +69,7 @@
6769 * See @update_fast_timekeeper() below.
6870 */
6971 struct tk_fast {
70
- seqcount_t seq;
72
+ seqcount_latch_t seq;
7173 struct tk_read_base base[2];
7274 };
7375
....@@ -76,25 +78,41 @@
7678
7779 static u64 dummy_clock_read(struct clocksource *cs)
7880 {
79
- return cycles_at_suspend;
81
+ if (timekeeping_suspended)
82
+ return cycles_at_suspend;
83
+ return local_clock();
8084 }
8185
8286 static struct clocksource dummy_clock = {
8387 .read = dummy_clock_read,
8488 };
8589
90
+/*
91
+ * Boot time initialization which allows local_clock() to be utilized
92
+ * during early boot when clocksources are not available. local_clock()
93
+ * returns nanoseconds already so no conversion is required, hence mult=1
94
+ * and shift=0. When the first proper clocksource is installed then
95
+ * the fast time keepers are updated with the correct values.
96
+ */
97
+#define FAST_TK_INIT \
98
+ { \
99
+ .clock = &dummy_clock, \
100
+ .mask = CLOCKSOURCE_MASK(64), \
101
+ .mult = 1, \
102
+ .shift = 0, \
103
+ }
104
+
86105 static struct tk_fast tk_fast_mono ____cacheline_aligned = {
87
- .base[0] = { .clock = &dummy_clock, },
88
- .base[1] = { .clock = &dummy_clock, },
106
+ .seq = SEQCNT_LATCH_ZERO(tk_fast_mono.seq),
107
+ .base[0] = FAST_TK_INIT,
108
+ .base[1] = FAST_TK_INIT,
89109 };
90110
91111 static struct tk_fast tk_fast_raw ____cacheline_aligned = {
92
- .base[0] = { .clock = &dummy_clock, },
93
- .base[1] = { .clock = &dummy_clock, },
112
+ .seq = SEQCNT_LATCH_ZERO(tk_fast_raw.seq),
113
+ .base[0] = FAST_TK_INIT,
114
+ .base[1] = FAST_TK_INIT,
94115 };
95
-
96
-/* flag for if timekeeping is suspended */
97
-int __read_mostly timekeeping_suspended;
98116
99117 static inline void tk_normalize_xtime(struct timekeeper *tk)
100118 {
....@@ -161,7 +179,7 @@
161179 * tk_clock_read - atomic clocksource read() helper
162180 *
163181 * This helper is necessary to use in the read paths because, while the
164
- * seqlock ensures we don't return a bad value while structures are updated,
182
+ * seqcount ensures we don't return a bad value while structures are updated,
165183 * it doesn't protect from potential crashes. There is the possibility that
166184 * the tkr's clocksource may change between the read reference, and the
167185 * clock reference passed to the read function. This can cause crashes if
....@@ -226,10 +244,10 @@
226244 unsigned int seq;
227245
228246 /*
229
- * Since we're called holding a seqlock, the data may shift
247
+ * Since we're called holding a seqcount, the data may shift
230248 * under us while we're doing the calculation. This can cause
231249 * false positives, since we'd note a problem but throw the
232
- * results away. So nest another seqlock here to atomically
250
+ * results away. So nest another seqcount here to atomically
233251 * grab the points we are checking with.
234252 */
235253 do {
....@@ -468,7 +486,7 @@
468486 tk_clock_read(tkr),
469487 tkr->cycle_last,
470488 tkr->mask));
471
- } while (read_seqcount_retry(&tkf->seq, seq));
489
+ } while (read_seqcount_latch_retry(&tkf->seq, seq));
472490
473491 return now;
474492 }
....@@ -490,7 +508,7 @@
490508 *
491509 * To keep it NMI safe since we're accessing from tracing, we're not using a
492510 * separate timekeeper with updates to monotonic clock and boot offset
493
- * protected with seqlocks. This has the following minor side effects:
511
+ * protected with seqcounts. This has the following minor side effects:
494512 *
495513 * (1) Its possible that a timestamp be taken after the boot offset is updated
496514 * but before the timekeeper is updated. If this happens, the new boot offset
....@@ -514,29 +532,29 @@
514532 }
515533 EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
516534
517
-
518535 /*
519536 * See comment for __ktime_get_fast_ns() vs. timestamp ordering
520537 */
521
-static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf)
538
+static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
522539 {
523540 struct tk_read_base *tkr;
541
+ u64 basem, baser, delta;
524542 unsigned int seq;
525
- u64 now;
526543
527544 do {
528545 seq = raw_read_seqcount_latch(&tkf->seq);
529546 tkr = tkf->base + (seq & 0x01);
530
- now = ktime_to_ns(tkr->base_real);
547
+ basem = ktime_to_ns(tkr->base);
548
+ baser = ktime_to_ns(tkr->base_real);
531549
532
- now += timekeeping_delta_to_ns(tkr,
533
- clocksource_delta(
534
- tk_clock_read(tkr),
535
- tkr->cycle_last,
536
- tkr->mask));
537
- } while (read_seqcount_retry(&tkf->seq, seq));
550
+ delta = timekeeping_delta_to_ns(tkr,
551
+ clocksource_delta(tk_clock_read(tkr),
552
+ tkr->cycle_last, tkr->mask));
553
+ } while (read_seqcount_latch_retry(&tkf->seq, seq));
538554
539
- return now;
555
+ if (mono)
556
+ *mono = basem + delta;
557
+ return baser + delta;
540558 }
541559
542560 /**
....@@ -544,9 +562,63 @@
544562 */
545563 u64 ktime_get_real_fast_ns(void)
546564 {
547
- return __ktime_get_real_fast_ns(&tk_fast_mono);
565
+ return __ktime_get_real_fast(&tk_fast_mono, NULL);
548566 }
549567 EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns);
568
+
569
+/**
570
+ * ktime_get_fast_timestamps: - NMI safe timestamps
571
+ * @snapshot: Pointer to timestamp storage
572
+ *
573
+ * Stores clock monotonic, boottime and realtime timestamps.
574
+ *
575
+ * Boot time is a racy access on 32bit systems if the sleep time injection
576
+ * happens late during resume and not in timekeeping_resume(). That could
577
+ * be avoided by expanding struct tk_read_base with boot offset for 32bit
578
+ * and adding more overhead to the update. As this is a hard to observe
579
+ * once per resume event which can be filtered with reasonable effort using
580
+ * the accurate mono/real timestamps, it's probably not worth the trouble.
581
+ *
582
+ * Aside of that it might be possible on 32 and 64 bit to observe the
583
+ * following when the sleep time injection happens late:
584
+ *
585
+ * CPU 0 CPU 1
586
+ * timekeeping_resume()
587
+ * ktime_get_fast_timestamps()
588
+ * mono, real = __ktime_get_real_fast()
589
+ * inject_sleep_time()
590
+ * update boot offset
591
+ * boot = mono + bootoffset;
592
+ *
593
+ * That means that boot time already has the sleep time adjustment, but
594
+ * real time does not. On the next readout both are in sync again.
595
+ *
596
+ * Preventing this for 64bit is not really feasible without destroying the
597
+ * careful cache layout of the timekeeper because the sequence count and
598
+ * struct tk_read_base would then need two cache lines instead of one.
599
+ *
600
+ * Access to the time keeper clock source is disabled accross the innermost
601
+ * steps of suspend/resume. The accessors still work, but the timestamps
602
+ * are frozen until time keeping is resumed which happens very early.
603
+ *
604
+ * For regular suspend/resume there is no observable difference vs. sched
605
+ * clock, but it might affect some of the nasty low level debug printks.
606
+ *
607
+ * OTOH, access to sched clock is not guaranteed accross suspend/resume on
608
+ * all systems either so it depends on the hardware in use.
609
+ *
610
+ * If that turns out to be a real problem then this could be mitigated by
611
+ * using sched clock in a similar way as during early boot. But it's not as
612
+ * trivial as on early boot because it needs some careful protection
613
+ * against the clock monotonic timestamp jumping backwards on resume.
614
+ */
615
+void ktime_get_fast_timestamps(struct ktime_timestamps *snapshot)
616
+{
617
+ struct timekeeper *tk = &tk_core.timekeeper;
618
+
619
+ snapshot->real = __ktime_get_real_fast(&tk_fast_mono, &snapshot->mono);
620
+ snapshot->boot = snapshot->mono + ktime_to_ns(data_race(tk->offs_boot));
621
+}
550622
551623 /**
552624 * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
....@@ -730,7 +802,7 @@
730802 void ktime_get_real_ts64(struct timespec64 *ts)
731803 {
732804 struct timekeeper *tk = &tk_core.timekeeper;
733
- unsigned long seq;
805
+ unsigned int seq;
734806 u64 nsecs;
735807
736808 WARN_ON(timekeeping_suspended);
....@@ -840,7 +912,7 @@
840912 ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
841913 {
842914 ktime_t *offset = offsets[offs];
843
- unsigned long seq;
915
+ unsigned int seq;
844916 ktime_t tconv;
845917
846918 do {
....@@ -957,7 +1029,7 @@
9571029 * but without the sequence counter protect. This internal function
9581030 * is called just when timekeeping lock is already held.
9591031 */
960
-time64_t __ktime_get_real_seconds(void)
1032
+noinstr time64_t __ktime_get_real_seconds(void)
9611033 {
9621034 struct timekeeper *tk = &tk_core.timekeeper;
9631035
....@@ -971,7 +1043,7 @@
9711043 void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
9721044 {
9731045 struct timekeeper *tk = &tk_core.timekeeper;
974
- unsigned long seq;
1046
+ unsigned int seq;
9751047 ktime_t base_raw;
9761048 ktime_t base_real;
9771049 u64 nsec_raw;
....@@ -1132,7 +1204,7 @@
11321204 ktime_t base_real, base_raw;
11331205 u64 nsec_real, nsec_raw;
11341206 u8 cs_was_changed_seq;
1135
- unsigned long seq;
1207
+ unsigned int seq;
11361208 bool do_interp;
11371209 int ret;
11381210
....@@ -1258,6 +1330,11 @@
12581330
12591331 /* signal hrtimers about time change */
12601332 clock_was_set();
1333
+
1334
+ if (!ret) {
1335
+ audit_tk_injoffset(ts_delta);
1336
+ add_device_randomness(ts, sizeof(*ts));
1337
+ }
12611338
12621339 return ret;
12631340 }
....@@ -1418,7 +1495,7 @@
14181495 void ktime_get_raw_ts64(struct timespec64 *ts)
14191496 {
14201497 struct timekeeper *tk = &tk_core.timekeeper;
1421
- unsigned long seq;
1498
+ unsigned int seq;
14221499 u64 nsecs;
14231500
14241501 do {
....@@ -1440,7 +1517,7 @@
14401517 int timekeeping_valid_for_hres(void)
14411518 {
14421519 struct timekeeper *tk = &tk_core.timekeeper;
1443
- unsigned long seq;
1520
+ unsigned int seq;
14441521 int ret;
14451522
14461523 do {
....@@ -1459,7 +1536,7 @@
14591536 u64 timekeeping_max_deferment(void)
14601537 {
14611538 struct timekeeper *tk = &tk_core.timekeeper;
1462
- unsigned long seq;
1539
+ unsigned int seq;
14631540 u64 ret;
14641541
14651542 do {
....@@ -1473,7 +1550,7 @@
14731550 }
14741551
14751552 /**
1476
- * read_persistent_clock - Return time from the persistent clock.
1553
+ * read_persistent_clock64 - Return time from the persistent clock.
14771554 *
14781555 * Weak dummy function for arches that do not yet support it.
14791556 * Reads the time from the battery backed persistent clock.
....@@ -1481,18 +1558,10 @@
14811558 *
14821559 * XXX - Do be sure to remove it once all arches implement it.
14831560 */
1484
-void __weak read_persistent_clock(struct timespec *ts)
1561
+void __weak read_persistent_clock64(struct timespec64 *ts)
14851562 {
14861563 ts->tv_sec = 0;
14871564 ts->tv_nsec = 0;
1488
-}
1489
-
1490
-void __weak read_persistent_clock64(struct timespec64 *ts64)
1491
-{
1492
- struct timespec ts;
1493
-
1494
- read_persistent_clock(&ts);
1495
- *ts64 = timespec_to_timespec64(ts);
14961565 }
14971566
14981567 /**
....@@ -2009,7 +2078,7 @@
20092078 * logarithmic_accumulation - shifted accumulation of cycles
20102079 *
20112080 * This functions accumulates a shifted interval of cycles into
2012
- * into a shifted interval nanoseconds. Allows for O(log) accumulation
2081
+ * a shifted interval nanoseconds. Allows for O(log) accumulation
20132082 * loop.
20142083 *
20152084 * Returns the unconsumed cycles.
....@@ -2167,7 +2236,7 @@
21672236 void ktime_get_coarse_real_ts64(struct timespec64 *ts)
21682237 {
21692238 struct timekeeper *tk = &tk_core.timekeeper;
2170
- unsigned long seq;
2239
+ unsigned int seq;
21712240
21722241 do {
21732242 seq = read_seqcount_begin(&tk_core.seq);
....@@ -2181,7 +2250,7 @@
21812250 {
21822251 struct timekeeper *tk = &tk_core.timekeeper;
21832252 struct timespec64 now, mono;
2184
- unsigned long seq;
2253
+ unsigned int seq;
21852254
21862255 do {
21872256 seq = read_seqcount_begin(&tk_core.seq);
....@@ -2201,7 +2270,7 @@
22012270 void do_timer(unsigned long ticks)
22022271 {
22032272 jiffies_64 += ticks;
2204
- calc_global_load(ticks);
2273
+ calc_global_load();
22052274 }
22062275
22072276 /**
....@@ -2251,7 +2320,7 @@
22512320 /**
22522321 * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex
22532322 */
2254
-static int timekeeping_validate_timex(const struct timex *txc)
2323
+static int timekeeping_validate_timex(const struct __kernel_timex *txc)
22552324 {
22562325 if (txc->modes & ADJ_ADJTIME) {
22572326 /* singleshot must not be used with any other mode bits */
....@@ -2313,13 +2382,28 @@
23132382 return 0;
23142383 }
23152384
2385
+/**
2386
+ * random_get_entropy_fallback - Returns the raw clock source value,
2387
+ * used by random.c for platforms with no valid random_get_entropy().
2388
+ */
2389
+unsigned long random_get_entropy_fallback(void)
2390
+{
2391
+ struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono;
2392
+ struct clocksource *clock = READ_ONCE(tkr->clock);
2393
+
2394
+ if (unlikely(timekeeping_suspended || !clock))
2395
+ return 0;
2396
+ return clock->read(clock);
2397
+}
2398
+EXPORT_SYMBOL_GPL(random_get_entropy_fallback);
23162399
23172400 /**
23182401 * do_adjtimex() - Accessor function to NTP __do_adjtimex function
23192402 */
2320
-int do_adjtimex(struct timex *txc)
2403
+int do_adjtimex(struct __kernel_timex *txc)
23212404 {
23222405 struct timekeeper *tk = &tk_core.timekeeper;
2406
+ struct audit_ntp_data ad;
23232407 unsigned long flags;
23242408 struct timespec64 ts;
23252409 s32 orig_tai, tai;
....@@ -2329,6 +2413,7 @@
23292413 ret = timekeeping_validate_timex(txc);
23302414 if (ret)
23312415 return ret;
2416
+ add_device_randomness(txc, sizeof(*txc));
23322417
23332418 if (txc->modes & ADJ_SETOFFSET) {
23342419 struct timespec64 delta;
....@@ -2339,15 +2424,20 @@
23392424 ret = timekeeping_inject_offset(&delta);
23402425 if (ret)
23412426 return ret;
2427
+
2428
+ audit_tk_injoffset(delta);
23422429 }
23432430
2431
+ audit_ntp_init(&ad);
2432
+
23442433 ktime_get_real_ts64(&ts);
2434
+ add_device_randomness(&ts, sizeof(ts));
23452435
23462436 raw_spin_lock_irqsave(&timekeeper_lock, flags);
23472437 write_seqcount_begin(&tk_core.seq);
23482438
23492439 orig_tai = tai = tk->tai_offset;
2350
- ret = __do_adjtimex(txc, &ts, &tai);
2440
+ ret = __do_adjtimex(txc, &ts, &tai, &ad);
23512441
23522442 if (tai != orig_tai) {
23532443 __timekeeping_set_tai_offset(tk, tai);
....@@ -2357,6 +2447,8 @@
23572447
23582448 write_seqcount_end(&tk_core.seq);
23592449 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
2450
+
2451
+ audit_ntp_log(&ad);
23602452
23612453 /* Update the multiplier immediately if frequency was set directly */
23622454 if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK))
....@@ -2397,8 +2489,10 @@
23972489 */
23982490 void xtime_update(unsigned long ticks)
23992491 {
2400
- write_seqlock(&jiffies_lock);
2492
+ raw_spin_lock(&jiffies_lock);
2493
+ write_seqcount_begin(&jiffies_seq);
24012494 do_timer(ticks);
2402
- write_sequnlock(&jiffies_lock);
2495
+ write_seqcount_end(&jiffies_seq);
2496
+ raw_spin_unlock(&jiffies_lock);
24032497 update_wall_time();
24042498 }