hc
2024-11-01 2f529f9b558ca1c1bd74be7437a84e4711743404
kernel/lib/vdso/gettimeofday.c
....@@ -5,6 +5,245 @@
55 #include <vdso/datapage.h>
66 #include <vdso/helpers.h>
77
8
+static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
9
+ struct __kernel_timespec *ts);
10
+
11
+#ifndef vdso_clocksource_ok
12
+static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
13
+{
14
+ return vd->clock_mode != VDSO_CLOCKMODE_NONE;
15
+}
16
+#endif
17
+
18
+#ifndef vdso_cycles_ok
19
+static inline bool vdso_cycles_ok(u64 cycles)
20
+{
21
+ return true;
22
+}
23
+#endif
24
+
25
+#if defined(CONFIG_GENERIC_CLOCKSOURCE_VDSO) && !defined(BUILD_VDSO32)
26
+
27
+#include <linux/fcntl.h>
28
+#include <linux/io.h>
29
+#include <linux/ioctl.h>
30
+#include <uapi/linux/clocksource.h>
31
+
32
+static notrace u64 readl_mmio_up(const struct clksrc_info *vinfo)
33
+{
34
+ const struct clksrc_user_mmio_info *info = &vinfo->mmio;
35
+ return readl_relaxed(info->reg_lower);
36
+}
37
+
38
+static notrace u64 readl_mmio_down(const struct clksrc_info *vinfo)
39
+{
40
+ const struct clksrc_user_mmio_info *info = &vinfo->mmio;
41
+ return ~(u64)readl_relaxed(info->reg_lower) & info->mask_lower;
42
+}
43
+
44
+static notrace u64 readw_mmio_up(const struct clksrc_info *vinfo)
45
+{
46
+ const struct clksrc_user_mmio_info *info = &vinfo->mmio;
47
+ return readw_relaxed(info->reg_lower);
48
+}
49
+
50
+static notrace u64 readw_mmio_down(const struct clksrc_info *vinfo)
51
+{
52
+ const struct clksrc_user_mmio_info *info = &vinfo->mmio;
53
+ return ~(u64)readl_relaxed(info->reg_lower) & info->mask_lower;
54
+}
55
+
56
+static notrace u64 readl_dmmio_up(const struct clksrc_info *vinfo)
57
+{
58
+ const struct clksrc_user_mmio_info *info = &vinfo->mmio;
59
+ void __iomem *reg_lower, *reg_upper;
60
+ u32 upper, old_upper, lower;
61
+
62
+ reg_lower = info->reg_lower;
63
+ reg_upper = info->reg_upper;
64
+
65
+ upper = readl_relaxed(reg_upper);
66
+ do {
67
+ old_upper = upper;
68
+ lower = readl_relaxed(reg_lower);
69
+ upper = readl_relaxed(reg_upper);
70
+ } while (upper != old_upper);
71
+
72
+ return (((u64)upper) << info->bits_lower) | lower;
73
+}
74
+
75
+static notrace u64 readw_dmmio_up(const struct clksrc_info *vinfo)
76
+{
77
+ const struct clksrc_user_mmio_info *info = &vinfo->mmio;
78
+ void __iomem *reg_lower, *reg_upper;
79
+ u16 upper, old_upper, lower;
80
+
81
+ reg_lower = info->reg_lower;
82
+ reg_upper = info->reg_upper;
83
+
84
+ upper = readw_relaxed(reg_upper);
85
+ do {
86
+ old_upper = upper;
87
+ lower = readw_relaxed(reg_lower);
88
+ upper = readw_relaxed(reg_upper);
89
+ } while (upper != old_upper);
90
+
91
+ return (((u64)upper) << info->bits_lower) | lower;
92
+}
93
+
94
+static notrace __cold vdso_read_cycles_t *get_mmio_read_cycles(unsigned int type)
95
+{
96
+ switch (type) {
97
+ case CLKSRC_MMIO_L_UP:
98
+ return &readl_mmio_up;
99
+ case CLKSRC_MMIO_L_DOWN:
100
+ return &readl_mmio_down;
101
+ case CLKSRC_MMIO_W_UP:
102
+ return &readw_mmio_up;
103
+ case CLKSRC_MMIO_W_DOWN:
104
+ return &readw_mmio_down;
105
+ case CLKSRC_DMMIO_L_UP:
106
+ return &readl_dmmio_up;
107
+ case CLKSRC_DMMIO_W_UP:
108
+ return &readw_dmmio_up;
109
+ default:
110
+ return NULL;
111
+ }
112
+}
113
+
114
+static __always_inline u16 to_cs_type(u32 cs_type_seq)
115
+{
116
+ return cs_type_seq >> 16;
117
+}
118
+
119
+static __always_inline u16 to_seq(u32 cs_type_seq)
120
+{
121
+ return cs_type_seq;
122
+}
123
+
124
+static __always_inline u32 to_cs_type_seq(u16 type, u16 seq)
125
+{
126
+ return (u32)type << 16U | seq;
127
+}
128
+
129
+static notrace noinline __cold
130
+void map_clocksource(const struct vdso_data *vd, struct vdso_priv *vp,
131
+ u32 seq, u32 new_cs_type_seq)
132
+{
133
+ vdso_read_cycles_t *read_cycles = NULL;
134
+ u32 new_cs_seq, new_cs_type;
135
+ struct clksrc_info *info;
136
+ int fd, ret;
137
+
138
+ new_cs_seq = to_seq(new_cs_type_seq);
139
+ new_cs_type = to_cs_type(new_cs_type_seq);
140
+ info = &vp->clksrc_info[new_cs_type];
141
+
142
+ if (new_cs_type < CLOCKSOURCE_VDSO_MMIO)
143
+ goto done;
144
+
145
+ fd = clock_open_device(vd->cs_mmdev, O_RDONLY);
146
+ if (fd < 0)
147
+ goto fallback_to_syscall;
148
+
149
+ if (vdso_read_retry(vd, seq)) {
150
+ vdso_read_begin(vd);
151
+ if (to_seq(vd->cs_type_seq) != new_cs_seq) {
152
+ /*
153
+ * cs_mmdev no longer corresponds to
154
+ * vd->cs_type_seq.
155
+ */
156
+ clock_close_device(fd);
157
+ return;
158
+ }
159
+ }
160
+
161
+ ret = clock_ioctl_device(fd, CLKSRC_USER_MMIO_MAP, (long)&info->mmio);
162
+ clock_close_device(fd);
163
+ if (ret < 0)
164
+ goto fallback_to_syscall;
165
+
166
+ read_cycles = get_mmio_read_cycles(info->mmio.type);
167
+ if (read_cycles == NULL) /* Mmhf, misconfigured. */
168
+ goto fallback_to_syscall;
169
+done:
170
+ info->read_cycles = read_cycles;
171
+ smp_wmb();
172
+ new_cs_type_seq = to_cs_type_seq(new_cs_type, new_cs_seq);
173
+ WRITE_ONCE(vp->current_cs_type_seq, new_cs_type_seq);
174
+
175
+ return;
176
+
177
+fallback_to_syscall:
178
+ new_cs_type = CLOCKSOURCE_VDSO_NONE;
179
+ info = &vp->clksrc_info[new_cs_type];
180
+ goto done;
181
+}
182
+
183
+static inline notrace
184
+bool get_hw_counter(const struct vdso_data *vd, u32 *r_seq, u64 *cycles)
185
+{
186
+ const struct clksrc_info *info;
187
+ struct vdso_priv *vp;
188
+ u32 seq, cs_type_seq;
189
+ unsigned int cs;
190
+
191
+ vp = __arch_get_vdso_priv();
192
+
193
+ for (;;) {
194
+ seq = vdso_read_begin(vd);
195
+ cs_type_seq = READ_ONCE(vp->current_cs_type_seq);
196
+ if (likely(to_seq(cs_type_seq) == to_seq(vd->cs_type_seq)))
197
+ break;
198
+
199
+ map_clocksource(vd, vp, seq, vd->cs_type_seq);
200
+ }
201
+
202
+ switch (to_cs_type(cs_type_seq)) {
203
+ case CLOCKSOURCE_VDSO_NONE:
204
+ return false; /* Use fallback. */
205
+ case CLOCKSOURCE_VDSO_ARCHITECTED:
206
+ if (unlikely(!vdso_clocksource_ok(vd)))
207
+ return false;
208
+ *cycles = __arch_get_hw_counter(vd->clock_mode, vd);
209
+ if (unlikely(!vdso_cycles_ok(*cycles)))
210
+ return false;
211
+ break;
212
+ default:
213
+ cs = to_cs_type(READ_ONCE(cs_type_seq));
214
+ info = &vp->clksrc_info[cs];
215
+ *cycles = info->read_cycles(info);
216
+ break;
217
+ }
218
+
219
+ *r_seq = seq;
220
+
221
+ return true;
222
+}
223
+
224
+#else
225
+
226
+static inline notrace
227
+bool get_hw_counter(const struct vdso_data *vd, u32 *r_seq, u64 *cycles)
228
+{
229
+ *r_seq = vdso_read_begin(vd);
230
+
231
+ /*
232
+ * CAUTION: checking the clocksource mode must happen inside
233
+ * the seqlocked section.
234
+ */
235
+ if (unlikely(!vdso_clocksource_ok(vd)))
236
+ return false;
237
+
238
+ *cycles = __arch_get_hw_counter(vd->clock_mode, vd);
239
+ if (unlikely(!vdso_cycles_ok(*cycles)))
240
+ return false;
241
+
242
+ return true;
243
+}
244
+
245
+#endif /* CONFIG_GENERIC_CLOCKSOURCE_VDSO */
246
+
8247 #ifndef vdso_calc_delta
9248 /*
10249 * Default implementation which works for all sane clocksources. That
....@@ -31,20 +270,6 @@
31270 }
32271 #endif
33272
34
-#ifndef vdso_clocksource_ok
35
-static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
36
-{
37
- return vd->clock_mode != VDSO_CLOCKMODE_NONE;
38
-}
39
-#endif
40
-
41
-#ifndef vdso_cycles_ok
42
-static inline bool vdso_cycles_ok(u64 cycles)
43
-{
44
- return true;
45
-}
46
-#endif
47
-
48273 #ifdef CONFIG_TIME_NS
49274 static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
50275 struct __kernel_timespec *ts)
....@@ -63,13 +288,7 @@
63288 vdso_ts = &vd->basetime[clk];
64289
65290 do {
66
- seq = vdso_read_begin(vd);
67
-
68
- if (unlikely(!vdso_clocksource_ok(vd)))
69
- return -1;
70
-
71
- cycles = __arch_get_hw_counter(vd->clock_mode, vd);
72
- if (unlikely(!vdso_cycles_ok(cycles)))
291
+ if (!get_hw_counter(vd, &seq, &cycles))
73292 return -1;
74293 ns = vdso_ts->nsec;
75294 last = vd->cycle_last;
....@@ -117,30 +336,29 @@
117336
118337 do {
119338 /*
120
- * Open coded to handle VDSO_CLOCKMODE_TIMENS. Time namespace
121
- * enabled tasks have a special VVAR page installed which
122
- * has vd->seq set to 1 and vd->clock_mode set to
123
- * VDSO_CLOCKMODE_TIMENS. For non time namespace affected tasks
124
- * this does not affect performance because if vd->seq is
125
- * odd, i.e. a concurrent update is in progress the extra
339
+ * Open coded to handle VDSO_CLOCKMODE_TIMENS. Time
340
+ * namespace enabled tasks have a special VVAR page
341
+ * installed which has vd->seq set to 1 and
342
+ * vd->clock_mode set to VDSO_CLOCKMODE_TIMENS. For
343
+ * non time namespace affected tasks this does not
344
+ * affect performance because if vd->seq is odd,
345
+ * i.e. a concurrent update is in progress the extra
126346 * check for vd->clock_mode is just a few extra
127
- * instructions while spin waiting for vd->seq to become
128
- * even again.
347
+ * instructions while spin waiting for vd->seq to
348
+ * become even again.
129349 */
130350 while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) {
131351 if (IS_ENABLED(CONFIG_TIME_NS) &&
132
- vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
352
+ vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
133353 return do_hres_timens(vd, clk, ts);
134354 cpu_relax();
135355 }
356
+
136357 smp_rmb();
137358
138
- if (unlikely(!vdso_clocksource_ok(vd)))
359
+ if (!get_hw_counter(vd, &seq, &cycles))
139360 return -1;
140361
141
- cycles = __arch_get_hw_counter(vd->clock_mode, vd);
142
- if (unlikely(!vdso_cycles_ok(cycles)))
143
- return -1;
144362 ns = vdso_ts->nsec;
145363 last = vd->cycle_last;
146364 ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);