hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/lib/vdso/gettimeofday.c
....@@ -17,6 +17,13 @@
1717 }
1818 #endif
1919
20
+#ifndef vdso_shift_ns
21
+static __always_inline u64 vdso_shift_ns(u64 ns, u32 shift)
22
+{
23
+ return ns >> shift;
24
+}
25
+#endif
26
+
2027 #ifndef __arch_vdso_hres_capable
2128 static inline bool __arch_vdso_hres_capable(void)
2229 {
....@@ -24,8 +31,81 @@
2431 }
2532 #endif
2633
34
+#ifndef vdso_clocksource_ok
35
+static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
36
+{
37
+ return vd->clock_mode != VDSO_CLOCKMODE_NONE;
38
+}
39
+#endif
40
+
41
+#ifndef vdso_cycles_ok
42
+static inline bool vdso_cycles_ok(u64 cycles)
43
+{
44
+ return true;
45
+}
46
+#endif
47
+
48
+#ifdef CONFIG_TIME_NS
49
+static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
50
+ struct __kernel_timespec *ts)
51
+{
52
+ const struct vdso_data *vd = __arch_get_timens_vdso_data();
53
+ const struct timens_offset *offs = &vdns->offset[clk];
54
+ const struct vdso_timestamp *vdso_ts;
55
+ u64 cycles, last, ns;
56
+ u32 seq;
57
+ s64 sec;
58
+
59
+ if (clk != CLOCK_MONOTONIC_RAW)
60
+ vd = &vd[CS_HRES_COARSE];
61
+ else
62
+ vd = &vd[CS_RAW];
63
+ vdso_ts = &vd->basetime[clk];
64
+
65
+ do {
66
+ seq = vdso_read_begin(vd);
67
+
68
+ if (unlikely(!vdso_clocksource_ok(vd)))
69
+ return -1;
70
+
71
+ cycles = __arch_get_hw_counter(vd->clock_mode, vd);
72
+ if (unlikely(!vdso_cycles_ok(cycles)))
73
+ return -1;
74
+ ns = vdso_ts->nsec;
75
+ last = vd->cycle_last;
76
+ ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
77
+ ns = vdso_shift_ns(ns, vd->shift);
78
+ sec = vdso_ts->sec;
79
+ } while (unlikely(vdso_read_retry(vd, seq)));
80
+
81
+ /* Add the namespace offset */
82
+ sec += offs->sec;
83
+ ns += offs->nsec;
84
+
85
+ /*
86
+ * Do this outside the loop: a race inside the loop could result
87
+ * in __iter_div_u64_rem() being extremely slow.
88
+ */
89
+ ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
90
+ ts->tv_nsec = ns;
91
+
92
+ return 0;
93
+}
94
+#else
95
+static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
96
+{
97
+ return NULL;
98
+}
99
+
100
+static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
101
+ struct __kernel_timespec *ts)
102
+{
103
+ return -EINVAL;
104
+}
105
+#endif
106
+
27107 static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk,
28
- struct __kernel_timespec *ts)
108
+ struct __kernel_timespec *ts)
29109 {
30110 const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
31111 u64 cycles, last, sec, ns;
....@@ -36,15 +116,35 @@
36116 return -1;
37117
38118 do {
39
- seq = vdso_read_begin(vd);
40
- cycles = __arch_get_hw_counter(vd->clock_mode);
41
- ns = vdso_ts->nsec;
42
- last = vd->cycle_last;
43
- if (unlikely((s64)cycles < 0))
119
+ /*
120
+ * Open coded to handle VDSO_CLOCKMODE_TIMENS. Time namespace
121
+ * enabled tasks have a special VVAR page installed which
122
+ * has vd->seq set to 1 and vd->clock_mode set to
123
+ * VDSO_CLOCKMODE_TIMENS. For non time namespace affected tasks
124
+ * this does not affect performance because if vd->seq is
125
+ * odd, i.e. a concurrent update is in progress the extra
126
+ * check for vd->clock_mode is just a few extra
127
+ * instructions while spin waiting for vd->seq to become
128
+ * even again.
129
+ */
130
+ while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) {
131
+ if (IS_ENABLED(CONFIG_TIME_NS) &&
132
+ vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
133
+ return do_hres_timens(vd, clk, ts);
134
+ cpu_relax();
135
+ }
136
+ smp_rmb();
137
+
138
+ if (unlikely(!vdso_clocksource_ok(vd)))
44139 return -1;
45140
141
+ cycles = __arch_get_hw_counter(vd->clock_mode, vd);
142
+ if (unlikely(!vdso_cycles_ok(cycles)))
143
+ return -1;
144
+ ns = vdso_ts->nsec;
145
+ last = vd->cycle_last;
46146 ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
47
- ns >>= vd->shift;
147
+ ns = vdso_shift_ns(ns, vd->shift);
48148 sec = vdso_ts->sec;
49149 } while (unlikely(vdso_read_retry(vd, seq)));
50150
....@@ -58,6 +158,43 @@
58158 return 0;
59159 }
60160
161
+#ifdef CONFIG_TIME_NS
162
+static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
163
+ struct __kernel_timespec *ts)
164
+{
165
+ const struct vdso_data *vd = __arch_get_timens_vdso_data();
166
+ const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
167
+ const struct timens_offset *offs = &vdns->offset[clk];
168
+ u64 nsec;
169
+ s64 sec;
170
+ s32 seq;
171
+
172
+ do {
173
+ seq = vdso_read_begin(vd);
174
+ sec = vdso_ts->sec;
175
+ nsec = vdso_ts->nsec;
176
+ } while (unlikely(vdso_read_retry(vd, seq)));
177
+
178
+ /* Add the namespace offset */
179
+ sec += offs->sec;
180
+ nsec += offs->nsec;
181
+
182
+ /*
183
+ * Do this outside the loop: a race inside the loop could result
184
+ * in __iter_div_u64_rem() being extremely slow.
185
+ */
186
+ ts->tv_sec = sec + __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec);
187
+ ts->tv_nsec = nsec;
188
+ return 0;
189
+}
190
+#else
191
+static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
192
+ struct __kernel_timespec *ts)
193
+{
194
+ return -1;
195
+}
196
+#endif
197
+
61198 static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk,
62199 struct __kernel_timespec *ts)
63200 {
....@@ -65,7 +202,18 @@
65202 u32 seq;
66203
67204 do {
68
- seq = vdso_read_begin(vd);
205
+ /*
206
+ * Open coded to handle VDSO_CLOCK_TIMENS. See comment in
207
+ * do_hres().
208
+ */
209
+ while ((seq = READ_ONCE(vd->seq)) & 1) {
210
+ if (IS_ENABLED(CONFIG_TIME_NS) &&
211
+ vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
212
+ return do_coarse_timens(vd, clk, ts);
213
+ cpu_relax();
214
+ }
215
+ smp_rmb();
216
+
69217 ts->tv_sec = vdso_ts->sec;
70218 ts->tv_nsec = vdso_ts->nsec;
71219 } while (unlikely(vdso_read_retry(vd, seq)));
....@@ -73,10 +221,10 @@
73221 return 0;
74222 }
75223
76
-static __maybe_unused int
77
-__cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts)
224
+static __always_inline int
225
+__cvdso_clock_gettime_common(const struct vdso_data *vd, clockid_t clock,
226
+ struct __kernel_timespec *ts)
78227 {
79
- const struct vdso_data *vd = __arch_get_vdso_data();
80228 u32 msk;
81229
82230 /* Check for negative values or invalid clocks */
....@@ -101,31 +249,34 @@
101249 }
102250
103251 static __maybe_unused int
104
-__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
252
+__cvdso_clock_gettime_data(const struct vdso_data *vd, clockid_t clock,
253
+ struct __kernel_timespec *ts)
105254 {
106
- int ret = __cvdso_clock_gettime_common(clock, ts);
255
+ int ret = __cvdso_clock_gettime_common(vd, clock, ts);
107256
108257 if (unlikely(ret))
109258 return clock_gettime_fallback(clock, ts);
110259 return 0;
111260 }
112261
262
+static __maybe_unused int
263
+__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
264
+{
265
+ return __cvdso_clock_gettime_data(__arch_get_vdso_data(), clock, ts);
266
+}
267
+
113268 #ifdef BUILD_VDSO32
114269 static __maybe_unused int
115
-__cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
270
+__cvdso_clock_gettime32_data(const struct vdso_data *vd, clockid_t clock,
271
+ struct old_timespec32 *res)
116272 {
117273 struct __kernel_timespec ts;
118274 int ret;
119275
120
- ret = __cvdso_clock_gettime_common(clock, &ts);
276
+ ret = __cvdso_clock_gettime_common(vd, clock, &ts);
121277
122
-#ifdef VDSO_HAS_32BIT_FALLBACK
123278 if (unlikely(ret))
124279 return clock_gettime32_fallback(clock, res);
125
-#else
126
- if (unlikely(ret))
127
- ret = clock_gettime_fallback(clock, &ts);
128
-#endif
129280
130281 /* For ret == 0 */
131282 res->tv_sec = ts.tv_sec;
....@@ -133,12 +284,18 @@
133284
134285 return ret;
135286 }
287
+
288
+static __maybe_unused int
289
+__cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
290
+{
291
+ return __cvdso_clock_gettime32_data(__arch_get_vdso_data(), clock, res);
292
+}
136293 #endif /* BUILD_VDSO32 */
137294
138295 static __maybe_unused int
139
-__cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
296
+__cvdso_gettimeofday_data(const struct vdso_data *vd,
297
+ struct __kernel_old_timeval *tv, struct timezone *tz)
140298 {
141
- const struct vdso_data *vd = __arch_get_vdso_data();
142299
143300 if (likely(tv != NULL)) {
144301 struct __kernel_timespec ts;
....@@ -151,6 +308,10 @@
151308 }
152309
153310 if (unlikely(tz != NULL)) {
311
+ if (IS_ENABLED(CONFIG_TIME_NS) &&
312
+ vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
313
+ vd = __arch_get_timens_vdso_data();
314
+
154315 tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest;
155316 tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime;
156317 }
....@@ -158,30 +319,51 @@
158319 return 0;
159320 }
160321
161
-#ifdef VDSO_HAS_TIME
162
-static __maybe_unused time_t __cvdso_time(time_t *time)
322
+static __maybe_unused int
323
+__cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
163324 {
164
- const struct vdso_data *vd = __arch_get_vdso_data();
165
- time_t t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec);
325
+ return __cvdso_gettimeofday_data(__arch_get_vdso_data(), tv, tz);
326
+}
327
+
328
+#ifdef VDSO_HAS_TIME
329
+static __maybe_unused __kernel_old_time_t
330
+__cvdso_time_data(const struct vdso_data *vd, __kernel_old_time_t *time)
331
+{
332
+ __kernel_old_time_t t;
333
+
334
+ if (IS_ENABLED(CONFIG_TIME_NS) &&
335
+ vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
336
+ vd = __arch_get_timens_vdso_data();
337
+
338
+ t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec);
166339
167340 if (time)
168341 *time = t;
169342
170343 return t;
171344 }
345
+
346
+static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time)
347
+{
348
+ return __cvdso_time_data(__arch_get_vdso_data(), time);
349
+}
172350 #endif /* VDSO_HAS_TIME */
173351
174352 #ifdef VDSO_HAS_CLOCK_GETRES
175353 static __maybe_unused
176
-int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res)
354
+int __cvdso_clock_getres_common(const struct vdso_data *vd, clockid_t clock,
355
+ struct __kernel_timespec *res)
177356 {
178
- const struct vdso_data *vd = __arch_get_vdso_data();
179357 u32 msk;
180358 u64 ns;
181359
182360 /* Check for negative values or invalid clocks */
183361 if (unlikely((u32) clock >= MAX_CLOCKS))
184362 return -1;
363
+
364
+ if (IS_ENABLED(CONFIG_TIME_NS) &&
365
+ vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
366
+ vd = __arch_get_timens_vdso_data();
185367
186368 /*
187369 * Convert the clockid to a bitmask and use it to check which
....@@ -210,34 +392,34 @@
210392 }
211393
212394 static __maybe_unused
213
-int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
395
+int __cvdso_clock_getres_data(const struct vdso_data *vd, clockid_t clock,
396
+ struct __kernel_timespec *res)
214397 {
215
- int ret = __cvdso_clock_getres_common(clock, res);
398
+ int ret = __cvdso_clock_getres_common(vd, clock, res);
216399
217400 if (unlikely(ret))
218401 return clock_getres_fallback(clock, res);
219402 return 0;
220403 }
221404
405
+static __maybe_unused
406
+int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
407
+{
408
+ return __cvdso_clock_getres_data(__arch_get_vdso_data(), clock, res);
409
+}
410
+
222411 #ifdef BUILD_VDSO32
223412 static __maybe_unused int
224
-__cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res)
413
+__cvdso_clock_getres_time32_data(const struct vdso_data *vd, clockid_t clock,
414
+ struct old_timespec32 *res)
225415 {
226416 struct __kernel_timespec ts;
227417 int ret;
228418
229
- ret = __cvdso_clock_getres_common(clock, &ts);
419
+ ret = __cvdso_clock_getres_common(vd, clock, &ts);
230420
231
-#ifdef VDSO_HAS_32BIT_FALLBACK
232421 if (unlikely(ret))
233422 return clock_getres32_fallback(clock, res);
234
-#else
235
- if (unlikely(ret)) {
236
- ret = clock_getres_fallback(clock, &ts);
237
- if (unlikely(ret))
238
- return ret;
239
- }
240
-#endif
241423
242424 if (likely(res)) {
243425 res->tv_sec = ts.tv_sec;
....@@ -245,5 +427,12 @@
245427 }
246428 return ret;
247429 }
430
+
431
+static __maybe_unused int
432
+__cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res)
433
+{
434
+ return __cvdso_clock_getres_time32_data(__arch_get_vdso_data(),
435
+ clock, res);
436
+}
248437 #endif /* BUILD_VDSO32 */
249438 #endif /* VDSO_HAS_CLOCK_GETRES */