hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/include/linux/seqlock.h
....@@ -1,49 +1,66 @@
11 /* SPDX-License-Identifier: GPL-2.0 */
22 #ifndef __LINUX_SEQLOCK_H
33 #define __LINUX_SEQLOCK_H
4
+
45 /*
5
- * Reader/writer consistent mechanism without starving writers. This type of
6
- * lock for data where the reader wants a consistent set of information
7
- * and is willing to retry if the information changes. There are two types
8
- * of readers:
9
- * 1. Sequence readers which never block a writer but they may have to retry
10
- * if a writer is in progress by detecting change in sequence number.
11
- * Writers do not wait for a sequence reader.
12
- * 2. Locking readers which will wait if a writer or another locking reader
13
- * is in progress. A locking reader in progress will also block a writer
14
- * from going forward. Unlike the regular rwlock, the read lock here is
15
- * exclusive so that only one locking reader can get it.
6
+ * seqcount_t / seqlock_t - a reader-writer consistency mechanism with
7
+ * lockless readers (read-only retry loops), and no writer starvation.
168 *
17
- * This is not as cache friendly as brlock. Also, this may not work well
18
- * for data that contains pointers, because any writer could
19
- * invalidate a pointer that a reader was following.
9
+ * See Documentation/locking/seqlock.rst
2010 *
21
- * Expected non-blocking reader usage:
22
- * do {
23
- * seq = read_seqbegin(&foo);
24
- * ...
25
- * } while (read_seqretry(&foo, seq));
26
- *
27
- *
28
- * On non-SMP the spin locks disappear but the writer still needs
29
- * to increment the sequence variables because an interrupt routine could
30
- * change the state of the data.
31
- *
32
- * Based on x86_64 vsyscall gettimeofday
33
- * by Keith Owens and Andrea Arcangeli
11
+ * Copyrights:
12
+ * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli
13
+ * - Sequence counters with associated locks, (C) 2020 Linutronix GmbH
3414 */
3515
36
-#include <linux/spinlock.h>
37
-#include <linux/preempt.h>
38
-#include <linux/lockdep.h>
3916 #include <linux/compiler.h>
17
+#include <linux/kcsan-checks.h>
18
+#include <linux/lockdep.h>
19
+#include <linux/mutex.h>
20
+#include <linux/ww_mutex.h>
21
+#include <linux/preempt.h>
22
+#include <linux/spinlock.h>
23
+
4024 #include <asm/processor.h>
4125
4226 /*
43
- * Version using sequence counter only.
44
- * This can be used when code has its own mutex protecting the
45
- * updating starting before the write_seqcountbeqin() and ending
46
- * after the write_seqcount_end().
27
+ * The seqlock seqcount_t interface does not prescribe a precise sequence of
28
+ * read begin/retry/end. For readers, typically there is a call to
29
+ * read_seqcount_begin() and read_seqcount_retry(), however, there are more
30
+ * esoteric cases which do not follow this pattern.
31
+ *
32
+ * As a consequence, we take the following best-effort approach for raw usage
33
+ * via seqcount_t under KCSAN: upon beginning a seq-reader critical section,
34
+ * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as
35
+ * atomics; if there is a matching read_seqcount_retry() call, no following
36
+ * memory operations are considered atomic. Usage of the seqlock_t interface
37
+ * is not affected.
38
+ */
39
+#define KCSAN_SEQLOCK_REGION_MAX 1000
40
+
41
+/*
42
+ * Sequence counters (seqcount_t)
43
+ *
44
+ * This is the raw counting mechanism, without any writer protection.
45
+ *
46
+ * Write side critical sections must be serialized and non-preemptible.
47
+ *
48
+ * If readers can be invoked from hardirq or softirq contexts,
49
+ * interrupts or bottom halves must also be respectively disabled before
50
+ * entering the write section.
51
+ *
52
+ * This mechanism can't be used if the protected data contains pointers,
53
+ * as the writer can invalidate a pointer that a reader is following.
54
+ *
55
+ * If the write serialization mechanism is one of the common kernel
56
+ * locking primitives, use a sequence counter with associated lock
57
+ * (seqcount_LOCKNAME_t) instead.
58
+ *
59
+ * If it's desired to automatically handle the sequence counter writer
60
+ * serialization and non-preemptibility requirements, use a sequential
61
+ * lock (seqlock_t) instead.
62
+ *
63
+ * See Documentation/locking/seqlock.rst
4764 */
4865 typedef struct seqcount {
4966 unsigned sequence;
....@@ -63,13 +80,18 @@
6380 }
6481
6582 #ifdef CONFIG_DEBUG_LOCK_ALLOC
66
-# define SEQCOUNT_DEP_MAP_INIT(lockname) \
67
- .dep_map = { .name = #lockname } \
6883
69
-# define seqcount_init(s) \
70
- do { \
71
- static struct lock_class_key __key; \
72
- __seqcount_init((s), #s, &__key); \
84
+# define SEQCOUNT_DEP_MAP_INIT(lockname) \
85
+ .dep_map = { .name = #lockname }
86
+
87
+/**
88
+ * seqcount_init() - runtime initializer for seqcount_t
89
+ * @s: Pointer to the seqcount_t instance
90
+ */
91
+# define seqcount_init(s) \
92
+ do { \
93
+ static struct lock_class_key __key; \
94
+ __seqcount_init((s), #s, &__key); \
7395 } while (0)
7496
7597 static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
....@@ -79,7 +101,7 @@
79101
80102 local_irq_save(flags);
81103 seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_);
82
- seqcount_release(&l->dep_map, 1, _RET_IP_);
104
+ seqcount_release(&l->dep_map, _RET_IP_);
83105 local_irq_restore(flags);
84106 }
85107
....@@ -89,13 +111,210 @@
89111 # define seqcount_lockdep_reader_access(x)
90112 #endif
91113
92
-#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)}
114
+/**
115
+ * SEQCNT_ZERO() - static initializer for seqcount_t
116
+ * @name: Name of the seqcount_t instance
117
+ */
118
+#define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) }
93119
120
+/*
121
+ * Sequence counters with associated locks (seqcount_LOCKNAME_t)
122
+ *
123
+ * A sequence counter which associates the lock used for writer
124
+ * serialization at initialization time. This enables lockdep to validate
125
+ * that the write side critical section is properly serialized.
126
+ *
127
+ * For associated locks which do not implicitly disable preemption,
128
+ * preemption protection is enforced in the write side function.
129
+ *
130
+ * Lockdep is never used in any for the raw write variants.
131
+ *
132
+ * See Documentation/locking/seqlock.rst
133
+ */
134
+
135
+/*
136
+ * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
137
+ * disable preemption. It can lead to higher latencies, and the write side
138
+ * sections will not be able to acquire locks which become sleeping locks
139
+ * (e.g. spinlock_t).
140
+ *
141
+ * To remain preemptible while avoiding a possible livelock caused by the
142
+ * reader preempting the writer, use a different technique: let the reader
143
+ * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
144
+ * case, acquire then release the associated LOCKNAME writer serialization
145
+ * lock. This will allow any possibly-preempted writer to make progress
146
+ * until the end of its writer serialization lock critical section.
147
+ *
148
+ * This lock-unlock technique must be implemented for all of PREEMPT_RT
149
+ * sleeping locks. See Documentation/locking/locktypes.rst
150
+ */
151
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
152
+#define __SEQ_LOCK(expr) expr
153
+#else
154
+#define __SEQ_LOCK(expr)
155
+#endif
156
+
157
+/*
158
+ * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated
159
+ * @seqcount: The real sequence counter
160
+ * @lock: Pointer to the associated lock
161
+ *
162
+ * A plain sequence counter with external writer synchronization by
163
+ * LOCKNAME @lock. The lock is associated to the sequence counter in the
164
+ * static initializer or init function. This enables lockdep to validate
165
+ * that the write side critical section is properly serialized.
166
+ *
167
+ * LOCKNAME: raw_spinlock, spinlock, rwlock, mutex, or ww_mutex.
168
+ */
169
+
170
+/*
171
+ * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t
172
+ * @s: Pointer to the seqcount_LOCKNAME_t instance
173
+ * @lock: Pointer to the associated lock
174
+ */
175
+
176
+#define seqcount_LOCKNAME_init(s, _lock, lockname) \
177
+ do { \
178
+ seqcount_##lockname##_t *____s = (s); \
179
+ seqcount_init(&____s->seqcount); \
180
+ __SEQ_LOCK(____s->lock = (_lock)); \
181
+ } while (0)
182
+
183
+#define seqcount_raw_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, raw_spinlock)
184
+#define seqcount_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, spinlock)
185
+#define seqcount_rwlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, rwlock);
186
+#define seqcount_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, mutex);
187
+#define seqcount_ww_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, ww_mutex);
188
+
189
+/*
190
+ * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers
191
+ * seqprop_LOCKNAME_*() - Property accessors for seqcount_LOCKNAME_t
192
+ *
193
+ * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t
194
+ * @locktype: LOCKNAME canonical C data type
195
+ * @preemptible: preemptibility of above locktype
196
+ * @lockmember: argument for lockdep_assert_held()
197
+ * @lockbase: associated lock release function (prefix only)
198
+ * @lock_acquire: associated lock acquisition function (full call)
199
+ */
200
+#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \
201
+typedef struct seqcount_##lockname { \
202
+ seqcount_t seqcount; \
203
+ __SEQ_LOCK(locktype *lock); \
204
+} seqcount_##lockname##_t; \
205
+ \
206
+static __always_inline seqcount_t * \
207
+__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \
208
+{ \
209
+ return &s->seqcount; \
210
+} \
211
+ \
212
+static __always_inline unsigned \
213
+__seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \
214
+{ \
215
+ unsigned seq = READ_ONCE(s->seqcount.sequence); \
216
+ \
217
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \
218
+ return seq; \
219
+ \
220
+ if (preemptible && unlikely(seq & 1)) { \
221
+ __SEQ_LOCK(lock_acquire); \
222
+ __SEQ_LOCK(lockbase##_unlock(s->lock)); \
223
+ \
224
+ /* \
225
+ * Re-read the sequence counter since the (possibly \
226
+ * preempted) writer made progress. \
227
+ */ \
228
+ seq = READ_ONCE(s->seqcount.sequence); \
229
+ } \
230
+ \
231
+ return seq; \
232
+} \
233
+ \
234
+static __always_inline bool \
235
+__seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \
236
+{ \
237
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \
238
+ return preemptible; \
239
+ \
240
+ /* PREEMPT_RT relies on the above LOCK+UNLOCK */ \
241
+ return false; \
242
+} \
243
+ \
244
+static __always_inline void \
245
+__seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \
246
+{ \
247
+ __SEQ_LOCK(lockdep_assert_held(lockmember)); \
248
+}
249
+
250
+/*
251
+ * __seqprop() for seqcount_t
252
+ */
253
+
254
+static inline seqcount_t *__seqprop_ptr(seqcount_t *s)
255
+{
256
+ return s;
257
+}
258
+
259
+static inline unsigned __seqprop_sequence(const seqcount_t *s)
260
+{
261
+ return READ_ONCE(s->sequence);
262
+}
263
+
264
+static inline bool __seqprop_preemptible(const seqcount_t *s)
265
+{
266
+ return false;
267
+}
268
+
269
+static inline void __seqprop_assert(const seqcount_t *s)
270
+{
271
+ lockdep_assert_preemption_disabled();
272
+}
273
+
274
+#define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT)
275
+
276
+SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_spin, raw_spin_lock(s->lock))
277
+SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock))
278
+SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock))
279
+SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock))
280
+SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL))
281
+
282
+/*
283
+ * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
284
+ * @name: Name of the seqcount_LOCKNAME_t instance
285
+ * @lock: Pointer to the associated LOCKNAME
286
+ */
287
+
288
+#define SEQCOUNT_LOCKNAME_ZERO(seq_name, assoc_lock) { \
289
+ .seqcount = SEQCNT_ZERO(seq_name.seqcount), \
290
+ __SEQ_LOCK(.lock = (assoc_lock)) \
291
+}
292
+
293
+#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock)
294
+#define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock)
295
+#define SEQCNT_RWLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock)
296
+#define SEQCNT_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock)
297
+#define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock)
298
+
299
+#define __seqprop_case(s, lockname, prop) \
300
+ seqcount_##lockname##_t: __seqprop_##lockname##_##prop((void *)(s))
301
+
302
+#define __seqprop(s, prop) _Generic(*(s), \
303
+ seqcount_t: __seqprop_##prop((void *)(s)), \
304
+ __seqprop_case((s), raw_spinlock, prop), \
305
+ __seqprop_case((s), spinlock, prop), \
306
+ __seqprop_case((s), rwlock, prop), \
307
+ __seqprop_case((s), mutex, prop), \
308
+ __seqprop_case((s), ww_mutex, prop))
309
+
310
+#define __seqcount_ptr(s) __seqprop(s, ptr)
311
+#define __seqcount_sequence(s) __seqprop(s, sequence)
312
+#define __seqcount_lock_preemptible(s) __seqprop(s, preemptible)
313
+#define __seqcount_assert_lock_held(s) __seqprop(s, assert)
94314
95315 /**
96
- * __read_seqcount_begin - begin a seq-read critical section (without barrier)
97
- * @s: pointer to seqcount_t
98
- * Returns: count to be passed to read_seqcount_retry
316
+ * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
317
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
99318 *
100319 * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
101320 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
....@@ -104,93 +323,96 @@
104323 *
105324 * Use carefully, only in critical code, and comment how the barrier is
106325 * provided.
326
+ *
327
+ * Return: count to be passed to read_seqcount_retry()
107328 */
108
-static inline unsigned __read_seqcount_begin(const seqcount_t *s)
109
-{
110
- unsigned ret;
111
-
112
-repeat:
113
- ret = READ_ONCE(s->sequence);
114
- if (unlikely(ret & 1)) {
115
- cpu_relax();
116
- goto repeat;
117
- }
118
- return ret;
119
-}
329
+#define __read_seqcount_begin(s) \
330
+({ \
331
+ unsigned seq; \
332
+ \
333
+ while ((seq = __seqcount_sequence(s)) & 1) \
334
+ cpu_relax(); \
335
+ \
336
+ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \
337
+ seq; \
338
+})
120339
121340 /**
122
- * raw_read_seqcount - Read the raw seqcount
123
- * @s: pointer to seqcount_t
124
- * Returns: count to be passed to read_seqcount_retry
341
+ * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
342
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
343
+ *
344
+ * Return: count to be passed to read_seqcount_retry()
345
+ */
346
+#define raw_read_seqcount_begin(s) \
347
+({ \
348
+ unsigned seq = __read_seqcount_begin(s); \
349
+ \
350
+ smp_rmb(); \
351
+ seq; \
352
+})
353
+
354
+/**
355
+ * read_seqcount_begin() - begin a seqcount_t read critical section
356
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
357
+ *
358
+ * Return: count to be passed to read_seqcount_retry()
359
+ */
360
+#define read_seqcount_begin(s) \
361
+({ \
362
+ seqcount_lockdep_reader_access(__seqcount_ptr(s)); \
363
+ raw_read_seqcount_begin(s); \
364
+})
365
+
366
+/**
367
+ * raw_read_seqcount() - read the raw seqcount_t counter value
368
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
125369 *
126370 * raw_read_seqcount opens a read critical section of the given
127
- * seqcount without any lockdep checking and without checking or
128
- * masking the LSB. Calling code is responsible for handling that.
371
+ * seqcount_t, without any lockdep checking, and without checking or
372
+ * masking the sequence counter LSB. Calling code is responsible for
373
+ * handling that.
374
+ *
375
+ * Return: count to be passed to read_seqcount_retry()
129376 */
130
-static inline unsigned raw_read_seqcount(const seqcount_t *s)
131
-{
132
- unsigned ret = READ_ONCE(s->sequence);
133
- smp_rmb();
134
- return ret;
135
-}
377
+#define raw_read_seqcount(s) \
378
+({ \
379
+ unsigned seq = __seqcount_sequence(s); \
380
+ \
381
+ smp_rmb(); \
382
+ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \
383
+ seq; \
384
+})
136385
137386 /**
138
- * raw_read_seqcount_begin - start seq-read critical section w/o lockdep
139
- * @s: pointer to seqcount_t
140
- * Returns: count to be passed to read_seqcount_retry
387
+ * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
388
+ * lockdep and w/o counter stabilization
389
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
141390 *
142
- * raw_read_seqcount_begin opens a read critical section of the given
143
- * seqcount, but without any lockdep checking. Validity of the critical
144
- * section is tested by checking read_seqcount_retry function.
391
+ * raw_seqcount_begin opens a read critical section of the given
392
+ * seqcount_t. Unlike read_seqcount_begin(), this function will not wait
393
+ * for the count to stabilize. If a writer is active when it begins, it
394
+ * will fail the read_seqcount_retry() at the end of the read critical
395
+ * section instead of stabilizing at the beginning of it.
396
+ *
397
+ * Use this only in special kernel hot paths where the read section is
398
+ * small and has a high probability of success through other external
399
+ * means. It will save a single branching instruction.
400
+ *
401
+ * Return: count to be passed to read_seqcount_retry()
145402 */
146
-static inline unsigned raw_read_seqcount_begin(const seqcount_t *s)
147
-{
148
- unsigned ret = __read_seqcount_begin(s);
149
- smp_rmb();
150
- return ret;
151
-}
403
+#define raw_seqcount_begin(s) \
404
+({ \
405
+ /* \
406
+ * If the counter is odd, let read_seqcount_retry() fail \
407
+ * by decrementing the counter. \
408
+ */ \
409
+ raw_read_seqcount(s) & ~1; \
410
+})
152411
153412 /**
154
- * read_seqcount_begin - begin a seq-read critical section
155
- * @s: pointer to seqcount_t
156
- * Returns: count to be passed to read_seqcount_retry
157
- *
158
- * read_seqcount_begin opens a read critical section of the given seqcount.
159
- * Validity of the critical section is tested by checking read_seqcount_retry
160
- * function.
161
- */
162
-static inline unsigned read_seqcount_begin(const seqcount_t *s)
163
-{
164
- seqcount_lockdep_reader_access(s);
165
- return raw_read_seqcount_begin(s);
166
-}
167
-
168
-/**
169
- * raw_seqcount_begin - begin a seq-read critical section
170
- * @s: pointer to seqcount_t
171
- * Returns: count to be passed to read_seqcount_retry
172
- *
173
- * raw_seqcount_begin opens a read critical section of the given seqcount.
174
- * Validity of the critical section is tested by checking read_seqcount_retry
175
- * function.
176
- *
177
- * Unlike read_seqcount_begin(), this function will not wait for the count
178
- * to stabilize. If a writer is active when we begin, we will fail the
179
- * read_seqcount_retry() instead of stabilizing at the beginning of the
180
- * critical section.
181
- */
182
-static inline unsigned raw_seqcount_begin(const seqcount_t *s)
183
-{
184
- unsigned ret = READ_ONCE(s->sequence);
185
- smp_rmb();
186
- return ret & ~1;
187
-}
188
-
189
-/**
190
- * __read_seqcount_retry - end a seq-read critical section (without barrier)
191
- * @s: pointer to seqcount_t
192
- * @start: count, from read_seqcount_begin
193
- * Returns: 1 if retry is required, else 0
413
+ * __read_seqcount_retry() - end a seqcount_t read section w/o barrier
414
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
415
+ * @start: count, from read_seqcount_begin()
194416 *
195417 * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
196418 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
....@@ -199,110 +421,282 @@
199421 *
200422 * Use carefully, only in critical code, and comment how the barrier is
201423 * provided.
424
+ *
425
+ * Return: true if a read section retry is required, else false
202426 */
203
-static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
427
+#define __read_seqcount_retry(s, start) \
428
+ __read_seqcount_t_retry(__seqcount_ptr(s), start)
429
+
430
+static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
204431 {
205
- return unlikely(s->sequence != start);
432
+ kcsan_atomic_next(0);
433
+ return unlikely(READ_ONCE(s->sequence) != start);
206434 }
207435
208436 /**
209
- * read_seqcount_retry - end a seq-read critical section
210
- * @s: pointer to seqcount_t
211
- * @start: count, from read_seqcount_begin
212
- * Returns: 1 if retry is required, else 0
437
+ * read_seqcount_retry() - end a seqcount_t read critical section
438
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
439
+ * @start: count, from read_seqcount_begin()
213440 *
214
- * read_seqcount_retry closes a read critical section of the given seqcount.
215
- * If the critical section was invalid, it must be ignored (and typically
216
- * retried).
441
+ * read_seqcount_retry closes the read critical section of given
442
+ * seqcount_t. If the critical section was invalid, it must be ignored
443
+ * (and typically retried).
444
+ *
445
+ * Return: true if a read section retry is required, else false
217446 */
218
-static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
447
+#define read_seqcount_retry(s, start) \
448
+ read_seqcount_t_retry(__seqcount_ptr(s), start)
449
+
450
+static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
219451 {
220452 smp_rmb();
221
- return __read_seqcount_retry(s, start);
222
-}
223
-
224
-static inline void __raw_write_seqcount_begin(seqcount_t *s)
225
-{
226
- s->sequence++;
227
- smp_wmb();
228
-}
229
-
230
-static inline void raw_write_seqcount_begin(seqcount_t *s)
231
-{
232
- preempt_disable_rt();
233
- __raw_write_seqcount_begin(s);
234
-}
235
-
236
-static inline void __raw_write_seqcount_end(seqcount_t *s)
237
-{
238
- smp_wmb();
239
- s->sequence++;
240
-}
241
-
242
-static inline void raw_write_seqcount_end(seqcount_t *s)
243
-{
244
- __raw_write_seqcount_end(s);
245
- preempt_enable_rt();
453
+ return __read_seqcount_t_retry(s, start);
246454 }
247455
248456 /**
249
- * raw_write_seqcount_barrier - do a seq write barrier
250
- * @s: pointer to seqcount_t
457
+ * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
458
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
459
+ */
460
+#define raw_write_seqcount_begin(s) \
461
+do { \
462
+ if (__seqcount_lock_preemptible(s)) \
463
+ preempt_disable(); \
464
+ \
465
+ raw_write_seqcount_t_begin(__seqcount_ptr(s)); \
466
+} while (0)
467
+
468
+static inline void raw_write_seqcount_t_begin(seqcount_t *s)
469
+{
470
+ kcsan_nestable_atomic_begin();
471
+ s->sequence++;
472
+ smp_wmb();
473
+}
474
+
475
+/**
476
+ * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
477
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
478
+ */
479
+#define raw_write_seqcount_end(s) \
480
+do { \
481
+ raw_write_seqcount_t_end(__seqcount_ptr(s)); \
482
+ \
483
+ if (__seqcount_lock_preemptible(s)) \
484
+ preempt_enable(); \
485
+} while (0)
486
+
487
+static inline void raw_write_seqcount_t_end(seqcount_t *s)
488
+{
489
+ smp_wmb();
490
+ s->sequence++;
491
+ kcsan_nestable_atomic_end();
492
+}
493
+
494
+/**
495
+ * write_seqcount_begin_nested() - start a seqcount_t write section with
496
+ * custom lockdep nesting level
497
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
498
+ * @subclass: lockdep nesting level
251499 *
252
- * This can be used to provide an ordering guarantee instead of the
253
- * usual consistency guarantee. It is one wmb cheaper, because we can
254
- * collapse the two back-to-back wmb()s.
500
+ * See Documentation/locking/lockdep-design.rst
501
+ */
502
+#define write_seqcount_begin_nested(s, subclass) \
503
+do { \
504
+ __seqcount_assert_lock_held(s); \
505
+ \
506
+ if (__seqcount_lock_preemptible(s)) \
507
+ preempt_disable(); \
508
+ \
509
+ write_seqcount_t_begin_nested(__seqcount_ptr(s), subclass); \
510
+} while (0)
511
+
512
+static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
513
+{
514
+ raw_write_seqcount_t_begin(s);
515
+ seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
516
+}
517
+
518
+/**
519
+ * write_seqcount_begin() - start a seqcount_t write side critical section
520
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
255521 *
256
- * Note that, writes surrounding the barrier should be declared atomic (e.g.
522
+ * write_seqcount_begin opens a write side critical section of the given
523
+ * seqcount_t.
524
+ *
525
+ * Context: seqcount_t write side critical sections must be serialized and
526
+ * non-preemptible. If readers can be invoked from hardirq or softirq
527
+ * context, interrupts or bottom halves must be respectively disabled.
528
+ */
529
+#define write_seqcount_begin(s) \
530
+do { \
531
+ __seqcount_assert_lock_held(s); \
532
+ \
533
+ if (__seqcount_lock_preemptible(s)) \
534
+ preempt_disable(); \
535
+ \
536
+ write_seqcount_t_begin(__seqcount_ptr(s)); \
537
+} while (0)
538
+
539
+static inline void write_seqcount_t_begin(seqcount_t *s)
540
+{
541
+ write_seqcount_t_begin_nested(s, 0);
542
+}
543
+
544
+/**
545
+ * write_seqcount_end() - end a seqcount_t write side critical section
546
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
547
+ *
548
+ * The write section must've been opened with write_seqcount_begin().
549
+ */
550
+#define write_seqcount_end(s) \
551
+do { \
552
+ write_seqcount_t_end(__seqcount_ptr(s)); \
553
+ \
554
+ if (__seqcount_lock_preemptible(s)) \
555
+ preempt_enable(); \
556
+} while (0)
557
+
558
+static inline void write_seqcount_t_end(seqcount_t *s)
559
+{
560
+ seqcount_release(&s->dep_map, _RET_IP_);
561
+ raw_write_seqcount_t_end(s);
562
+}
563
+
564
+/**
565
+ * raw_write_seqcount_barrier() - do a seqcount_t write barrier
566
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
567
+ *
568
+ * This can be used to provide an ordering guarantee instead of the usual
569
+ * consistency guarantee. It is one wmb cheaper, because it can collapse
570
+ * the two back-to-back wmb()s.
571
+ *
572
+ * Note that writes surrounding the barrier should be declared atomic (e.g.
257573 * via WRITE_ONCE): a) to ensure the writes become visible to other threads
258574 * atomically, avoiding compiler optimizations; b) to document which writes are
259575 * meant to propagate to the reader critical section. This is necessary because
260576 * neither writes before and after the barrier are enclosed in a seq-writer
261
- * critical section that would ensure readers are aware of ongoing writes.
577
+ * critical section that would ensure readers are aware of ongoing writes::
262578 *
263
- * seqcount_t seq;
264
- * bool X = true, Y = false;
579
+ * seqcount_t seq;
580
+ * bool X = true, Y = false;
265581 *
266
- * void read(void)
267
- * {
268
- * bool x, y;
582
+ * void read(void)
583
+ * {
584
+ * bool x, y;
269585 *
270
- * do {
271
- * int s = read_seqcount_begin(&seq);
586
+ * do {
587
+ * int s = read_seqcount_begin(&seq);
272588 *
273
- * x = X; y = Y;
589
+ * x = X; y = Y;
274590 *
275
- * } while (read_seqcount_retry(&seq, s));
591
+ * } while (read_seqcount_retry(&seq, s));
276592 *
277
- * BUG_ON(!x && !y);
593
+ * BUG_ON(!x && !y);
278594 * }
279595 *
280596 * void write(void)
281597 * {
282
- * WRITE_ONCE(Y, true);
598
+ * WRITE_ONCE(Y, true);
283599 *
284
- * raw_write_seqcount_barrier(seq);
600
+ * raw_write_seqcount_barrier(seq);
285601 *
286
- * WRITE_ONCE(X, false);
602
+ * WRITE_ONCE(X, false);
287603 * }
288604 */
289
-static inline void raw_write_seqcount_barrier(seqcount_t *s)
605
+#define raw_write_seqcount_barrier(s) \
606
+ raw_write_seqcount_t_barrier(__seqcount_ptr(s))
607
+
608
+static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
290609 {
610
+ kcsan_nestable_atomic_begin();
291611 s->sequence++;
292612 smp_wmb();
293613 s->sequence++;
294
-}
295
-
296
-static inline int raw_read_seqcount_latch(seqcount_t *s)
297
-{
298
- /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
299
- int seq = READ_ONCE(s->sequence); /* ^^^ */
300
- return seq;
614
+ kcsan_nestable_atomic_end();
301615 }
302616
303617 /**
304
- * raw_write_seqcount_latch - redirect readers to even/odd copy
305
- * @s: pointer to seqcount_t
618
+ * write_seqcount_invalidate() - invalidate in-progress seqcount_t read
619
+ * side operations
620
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
621
+ *
622
+ * After write_seqcount_invalidate, no seqcount_t read side operations
623
+ * will complete successfully and see data older than this.
624
+ */
625
+#define write_seqcount_invalidate(s) \
626
+ write_seqcount_t_invalidate(__seqcount_ptr(s))
627
+
628
+static inline void write_seqcount_t_invalidate(seqcount_t *s)
629
+{
630
+ smp_wmb();
631
+ kcsan_nestable_atomic_begin();
632
+ s->sequence+=2;
633
+ kcsan_nestable_atomic_end();
634
+}
635
+
636
+/*
637
+ * Latch sequence counters (seqcount_latch_t)
638
+ *
639
+ * A sequence counter variant where the counter even/odd value is used to
640
+ * switch between two copies of protected data. This allows the read path,
641
+ * typically NMIs, to safely interrupt the write side critical section.
642
+ *
643
+ * As the write sections are fully preemptible, no special handling for
644
+ * PREEMPT_RT is needed.
645
+ */
646
+typedef struct {
647
+ seqcount_t seqcount;
648
+} seqcount_latch_t;
649
+
650
+/**
651
+ * SEQCNT_LATCH_ZERO() - static initializer for seqcount_latch_t
652
+ * @seq_name: Name of the seqcount_latch_t instance
653
+ */
654
+#define SEQCNT_LATCH_ZERO(seq_name) { \
655
+ .seqcount = SEQCNT_ZERO(seq_name.seqcount), \
656
+}
657
+
658
+/**
659
+ * seqcount_latch_init() - runtime initializer for seqcount_latch_t
660
+ * @s: Pointer to the seqcount_latch_t instance
661
+ */
662
+#define seqcount_latch_init(s) seqcount_init(&(s)->seqcount)
663
+
664
+/**
665
+ * raw_read_seqcount_latch() - pick even/odd latch data copy
666
+ * @s: Pointer to seqcount_latch_t
667
+ *
668
+ * See raw_write_seqcount_latch() for details and a full reader/writer
669
+ * usage example.
670
+ *
671
+ * Return: sequence counter raw value. Use the lowest bit as an index for
672
+ * picking which data copy to read. The full counter must then be checked
673
+ * with read_seqcount_latch_retry().
674
+ */
675
+static inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s)
676
+{
677
+ /*
678
+ * Pairs with the first smp_wmb() in raw_write_seqcount_latch().
679
+ * Due to the dependent load, a full smp_rmb() is not needed.
680
+ */
681
+ return READ_ONCE(s->seqcount.sequence);
682
+}
683
+
684
+/**
685
+ * read_seqcount_latch_retry() - end a seqcount_latch_t read section
686
+ * @s: Pointer to seqcount_latch_t
687
+ * @start: count, from raw_read_seqcount_latch()
688
+ *
689
+ * Return: true if a read section retry is required, else false
690
+ */
691
+static inline int
692
+read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
693
+{
694
+ return read_seqcount_retry(&s->seqcount, start);
695
+}
696
+
697
+/**
698
+ * raw_write_seqcount_latch() - redirect latch readers to even/odd copy
699
+ * @s: Pointer to seqcount_latch_t
306700 *
307701 * The latch technique is a multiversion concurrency control method that allows
308702 * queries during non-atomic modifications. If you can guarantee queries never
....@@ -318,213 +712,243 @@
318712 * Very simply put: we first modify one copy and then the other. This ensures
319713 * there is always one copy in a stable state, ready to give us an answer.
320714 *
321
- * The basic form is a data structure like:
715
+ * The basic form is a data structure like::
322716 *
323
- * struct latch_struct {
324
- * seqcount_t seq;
325
- * struct data_struct data[2];
326
- * };
717
+ * struct latch_struct {
718
+ * seqcount_latch_t seq;
719
+ * struct data_struct data[2];
720
+ * };
327721 *
328722 * Where a modification, which is assumed to be externally serialized, does the
329
- * following:
723
+ * following::
330724 *
331
- * void latch_modify(struct latch_struct *latch, ...)
332
- * {
333
- * smp_wmb(); <- Ensure that the last data[1] update is visible
334
- * latch->seq++;
335
- * smp_wmb(); <- Ensure that the seqcount update is visible
725
+ * void latch_modify(struct latch_struct *latch, ...)
726
+ * {
727
+ * smp_wmb(); // Ensure that the last data[1] update is visible
728
+ * latch->seq.sequence++;
729
+ * smp_wmb(); // Ensure that the seqcount update is visible
336730 *
337
- * modify(latch->data[0], ...);
731
+ * modify(latch->data[0], ...);
338732 *
339
- * smp_wmb(); <- Ensure that the data[0] update is visible
340
- * latch->seq++;
341
- * smp_wmb(); <- Ensure that the seqcount update is visible
733
+ * smp_wmb(); // Ensure that the data[0] update is visible
734
+ * latch->seq.sequence++;
735
+ * smp_wmb(); // Ensure that the seqcount update is visible
342736 *
343
- * modify(latch->data[1], ...);
344
- * }
737
+ * modify(latch->data[1], ...);
738
+ * }
345739 *
346
- * The query will have a form like:
740
+ * The query will have a form like::
347741 *
348
- * struct entry *latch_query(struct latch_struct *latch, ...)
349
- * {
350
- * struct entry *entry;
351
- * unsigned seq, idx;
742
+ * struct entry *latch_query(struct latch_struct *latch, ...)
743
+ * {
744
+ * struct entry *entry;
745
+ * unsigned seq, idx;
352746 *
353
- * do {
354
- * seq = raw_read_seqcount_latch(&latch->seq);
747
+ * do {
748
+ * seq = raw_read_seqcount_latch(&latch->seq);
355749 *
356
- * idx = seq & 0x01;
357
- * entry = data_query(latch->data[idx], ...);
750
+ * idx = seq & 0x01;
751
+ * entry = data_query(latch->data[idx], ...);
358752 *
359
- * smp_rmb();
360
- * } while (seq != latch->seq);
753
+ * // This includes needed smp_rmb()
754
+ * } while (read_seqcount_latch_retry(&latch->seq, seq));
361755 *
362
- * return entry;
363
- * }
756
+ * return entry;
757
+ * }
364758 *
365759 * So during the modification, queries are first redirected to data[1]. Then we
366760 * modify data[0]. When that is complete, we redirect queries back to data[0]
367761 * and we can modify data[1].
368762 *
369
- * NOTE: The non-requirement for atomic modifications does _NOT_ include
370
- * the publishing of new entries in the case where data is a dynamic
371
- * data structure.
763
+ * NOTE:
372764 *
373
- * An iteration might start in data[0] and get suspended long enough
374
- * to miss an entire modification sequence, once it resumes it might
375
- * observe the new entry.
765
+ * The non-requirement for atomic modifications does _NOT_ include
766
+ * the publishing of new entries in the case where data is a dynamic
767
+ * data structure.
376768 *
377
- * NOTE: When data is a dynamic data structure; one should use regular RCU
378
- * patterns to manage the lifetimes of the objects within.
769
+ * An iteration might start in data[0] and get suspended long enough
770
+ * to miss an entire modification sequence, once it resumes it might
771
+ * observe the new entry.
772
+ *
773
+ * NOTE2:
774
+ *
775
+ * When data is a dynamic data structure; one should use regular RCU
776
+ * patterns to manage the lifetimes of the objects within.
379777 */
380
-static inline void raw_write_seqcount_latch(seqcount_t *s)
778
+static inline void raw_write_seqcount_latch(seqcount_latch_t *s)
381779 {
382
- smp_wmb(); /* prior stores before incrementing "sequence" */
383
- s->sequence++;
384
- smp_wmb(); /* increment "sequence" before following stores */
780
+ smp_wmb(); /* prior stores before incrementing "sequence" */
781
+ s->seqcount.sequence++;
782
+ smp_wmb(); /* increment "sequence" before following stores */
385783 }
386784
387785 /*
388
- * Sequence counter only version assumes that callers are using their
389
- * own mutexing.
390
- */
391
-static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
392
-{
393
- raw_write_seqcount_begin(s);
394
- seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
395
-}
396
-
397
-static inline void write_seqcount_begin(seqcount_t *s)
398
-{
399
- write_seqcount_begin_nested(s, 0);
400
-}
401
-
402
-static inline void write_seqcount_end(seqcount_t *s)
403
-{
404
- seqcount_release(&s->dep_map, 1, _RET_IP_);
405
- raw_write_seqcount_end(s);
406
-}
407
-
408
-/**
409
- * write_seqcount_invalidate - invalidate in-progress read-side seq operations
410
- * @s: pointer to seqcount_t
786
+ * Sequential locks (seqlock_t)
411787 *
412
- * After write_seqcount_invalidate, no read-side seq operations will complete
413
- * successfully and see data older than this.
788
+ * Sequence counters with an embedded spinlock for writer serialization
789
+ * and non-preemptibility.
790
+ *
791
+ * For more info, see:
792
+ * - Comments on top of seqcount_t
793
+ * - Documentation/locking/seqlock.rst
414794 */
415
-static inline void write_seqcount_invalidate(seqcount_t *s)
416
-{
417
- smp_wmb();
418
- s->sequence+=2;
419
-}
420
-
421795 typedef struct {
422
- struct seqcount seqcount;
796
+ /*
797
+ * Make sure that readers don't starve writers on PREEMPT_RT: use
798
+ * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK().
799
+ */
800
+ seqcount_spinlock_t seqcount;
423801 spinlock_t lock;
424802 } seqlock_t;
425803
426
-/*
427
- * These macros triggered gcc-3.x compile-time problems. We think these are
428
- * OK now. Be cautious.
429
- */
430
-#define __SEQLOCK_UNLOCKED(lockname) \
431
- { \
432
- .seqcount = SEQCNT_ZERO(lockname), \
433
- .lock = __SPIN_LOCK_UNLOCKED(lockname) \
804
+#define __SEQLOCK_UNLOCKED(lockname) \
805
+ { \
806
+ .seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \
807
+ .lock = __SPIN_LOCK_UNLOCKED(lockname) \
434808 }
435809
436
-#define seqlock_init(x) \
437
- do { \
438
- seqcount_init(&(x)->seqcount); \
439
- spin_lock_init(&(x)->lock); \
810
+/**
811
+ * seqlock_init() - dynamic initializer for seqlock_t
812
+ * @sl: Pointer to the seqlock_t instance
813
+ */
814
+#define seqlock_init(sl) \
815
+ do { \
816
+ spin_lock_init(&(sl)->lock); \
817
+ seqcount_spinlock_init(&(sl)->seqcount, &(sl)->lock); \
440818 } while (0)
441819
442
-#define DEFINE_SEQLOCK(x) \
443
- seqlock_t x = __SEQLOCK_UNLOCKED(x)
444
-
445
-/*
446
- * Read side functions for starting and finalizing a read side section.
820
+/**
821
+ * DEFINE_SEQLOCK(sl) - Define a statically allocated seqlock_t
822
+ * @sl: Name of the seqlock_t instance
447823 */
448
-#ifndef CONFIG_PREEMPT_RT_FULL
824
+#define DEFINE_SEQLOCK(sl) \
825
+ seqlock_t sl = __SEQLOCK_UNLOCKED(sl)
826
+
827
+/**
828
+ * read_seqbegin() - start a seqlock_t read side critical section
829
+ * @sl: Pointer to seqlock_t
830
+ *
831
+ * Return: count, to be passed to read_seqretry()
832
+ */
449833 static inline unsigned read_seqbegin(const seqlock_t *sl)
450834 {
451
- return read_seqcount_begin(&sl->seqcount);
452
-}
453
-#else
454
-/*
455
- * Starvation safe read side for RT
456
- */
457
-static inline unsigned read_seqbegin(seqlock_t *sl)
458
-{
459
- unsigned ret;
835
+ unsigned ret = read_seqcount_begin(&sl->seqcount);
460836
461
-repeat:
462
- ret = READ_ONCE(sl->seqcount.sequence);
463
- if (unlikely(ret & 1)) {
464
- /*
465
- * Take the lock and let the writer proceed (i.e. evtl
466
- * boost it), otherwise we could loop here forever.
467
- */
468
- spin_unlock_wait(&sl->lock);
469
- goto repeat;
470
- }
471
- smp_rmb();
837
+ kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry() */
838
+ kcsan_flat_atomic_begin();
472839 return ret;
473840 }
474
-#endif
475841
842
+/**
843
+ * read_seqretry() - end a seqlock_t read side section
844
+ * @sl: Pointer to seqlock_t
845
+ * @start: count, from read_seqbegin()
846
+ *
847
+ * read_seqretry closes the read side critical section of given seqlock_t.
848
+ * If the critical section was invalid, it must be ignored (and typically
849
+ * retried).
850
+ *
851
+ * Return: true if a read section retry is required, else false
852
+ */
476853 static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
477854 {
855
+ /*
856
+ * Assume not nested: read_seqretry() may be called multiple times when
857
+ * completing read critical section.
858
+ */
859
+ kcsan_flat_atomic_end();
860
+
478861 return read_seqcount_retry(&sl->seqcount, start);
479862 }
480863
481864 /*
482
- * Lock out other writers and update the count.
483
- * Acts like a normal spin_lock/unlock.
484
- * Don't need preempt_disable() because that is in the spin_lock already.
865
+ * For all seqlock_t write side functions, use write_seqcount_*t*_begin()
866
+ * instead of the generic write_seqcount_begin(). This way, no redundant
867
+ * lockdep_assert_held() checks are added.
868
+ */
869
+
870
+/**
871
+ * write_seqlock() - start a seqlock_t write side critical section
872
+ * @sl: Pointer to seqlock_t
873
+ *
874
+ * write_seqlock opens a write side critical section for the given
875
+ * seqlock_t. It also implicitly acquires the spinlock_t embedded inside
876
+ * that sequential lock. All seqlock_t write side sections are thus
877
+ * automatically serialized and non-preemptible.
878
+ *
879
+ * Context: if the seqlock_t read section, or other write side critical
880
+ * sections, can be invoked from hardirq or softirq contexts, use the
881
+ * _irqsave or _bh variants of this function instead.
485882 */
486883 static inline void write_seqlock(seqlock_t *sl)
487884 {
488885 spin_lock(&sl->lock);
489
- __raw_write_seqcount_begin(&sl->seqcount);
886
+ write_seqcount_t_begin(&sl->seqcount.seqcount);
490887 }
491888
492
-static inline int try_write_seqlock(seqlock_t *sl)
493
-{
494
- if (spin_trylock(&sl->lock)) {
495
- __raw_write_seqcount_begin(&sl->seqcount);
496
- return 1;
497
- }
498
- return 0;
499
-}
500
-
889
+/**
890
+ * write_sequnlock() - end a seqlock_t write side critical section
891
+ * @sl: Pointer to seqlock_t
892
+ *
893
+ * write_sequnlock closes the (serialized and non-preemptible) write side
894
+ * critical section of given seqlock_t.
895
+ */
501896 static inline void write_sequnlock(seqlock_t *sl)
502897 {
503
- __raw_write_seqcount_end(&sl->seqcount);
898
+ write_seqcount_t_end(&sl->seqcount.seqcount);
504899 spin_unlock(&sl->lock);
505900 }
506901
902
+/**
903
+ * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section
904
+ * @sl: Pointer to seqlock_t
905
+ *
906
+ * _bh variant of write_seqlock(). Use only if the read side section, or
907
+ * other write side sections, can be invoked from softirq contexts.
908
+ */
507909 static inline void write_seqlock_bh(seqlock_t *sl)
508910 {
509911 spin_lock_bh(&sl->lock);
510
- __raw_write_seqcount_begin(&sl->seqcount);
912
+ write_seqcount_t_begin(&sl->seqcount.seqcount);
511913 }
512914
915
+/**
916
+ * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section
917
+ * @sl: Pointer to seqlock_t
918
+ *
919
+ * write_sequnlock_bh closes the serialized, non-preemptible, and
920
+ * softirqs-disabled, seqlock_t write side critical section opened with
921
+ * write_seqlock_bh().
922
+ */
513923 static inline void write_sequnlock_bh(seqlock_t *sl)
514924 {
515
- __raw_write_seqcount_end(&sl->seqcount);
925
+ write_seqcount_t_end(&sl->seqcount.seqcount);
516926 spin_unlock_bh(&sl->lock);
517927 }
518928
929
+/**
930
+ * write_seqlock_irq() - start a non-interruptible seqlock_t write section
931
+ * @sl: Pointer to seqlock_t
932
+ *
933
+ * _irq variant of write_seqlock(). Use only if the read side section, or
934
+ * other write sections, can be invoked from hardirq contexts.
935
+ */
519936 static inline void write_seqlock_irq(seqlock_t *sl)
520937 {
521938 spin_lock_irq(&sl->lock);
522
- __raw_write_seqcount_begin(&sl->seqcount);
939
+ write_seqcount_t_begin(&sl->seqcount.seqcount);
523940 }
524941
942
+/**
943
+ * write_sequnlock_irq() - end a non-interruptible seqlock_t write section
944
+ * @sl: Pointer to seqlock_t
945
+ *
946
+ * write_sequnlock_irq closes the serialized and non-interruptible
947
+ * seqlock_t write side section opened with write_seqlock_irq().
948
+ */
525949 static inline void write_sequnlock_irq(seqlock_t *sl)
526950 {
527
- __raw_write_seqcount_end(&sl->seqcount);
951
+ write_seqcount_t_end(&sl->seqcount.seqcount);
528952 spin_unlock_irq(&sl->lock);
529953 }
530954
....@@ -533,79 +957,112 @@
533957 unsigned long flags;
534958
535959 spin_lock_irqsave(&sl->lock, flags);
536
- __raw_write_seqcount_begin(&sl->seqcount);
960
+ write_seqcount_t_begin(&sl->seqcount.seqcount);
537961 return flags;
538962 }
539963
964
+/**
965
+ * write_seqlock_irqsave() - start a non-interruptible seqlock_t write
966
+ * section
967
+ * @lock: Pointer to seqlock_t
968
+ * @flags: Stack-allocated storage for saving caller's local interrupt
969
+ * state, to be passed to write_sequnlock_irqrestore().
970
+ *
971
+ * _irqsave variant of write_seqlock(). Use it only if the read side
972
+ * section, or other write sections, can be invoked from hardirq context.
973
+ */
540974 #define write_seqlock_irqsave(lock, flags) \
541975 do { flags = __write_seqlock_irqsave(lock); } while (0)
542976
977
+/**
978
+ * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write
979
+ * section
980
+ * @sl: Pointer to seqlock_t
981
+ * @flags: Caller's saved interrupt state, from write_seqlock_irqsave()
982
+ *
983
+ * write_sequnlock_irqrestore closes the serialized and non-interruptible
984
+ * seqlock_t write section previously opened with write_seqlock_irqsave().
985
+ */
543986 static inline void
544987 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
545988 {
546
- __raw_write_seqcount_end(&sl->seqcount);
989
+ write_seqcount_t_end(&sl->seqcount.seqcount);
547990 spin_unlock_irqrestore(&sl->lock, flags);
548991 }
549992
550
-/*
551
- * A locking reader exclusively locks out other writers and locking readers,
552
- * but doesn't update the sequence number. Acts like a normal spin_lock/unlock.
553
- * Don't need preempt_disable() because that is in the spin_lock already.
993
+/**
994
+ * read_seqlock_excl() - begin a seqlock_t locking reader section
995
+ * @sl: Pointer to seqlock_t
996
+ *
997
+ * read_seqlock_excl opens a seqlock_t locking reader critical section. A
998
+ * locking reader exclusively locks out *both* other writers *and* other
999
+ * locking readers, but it does not update the embedded sequence number.
1000
+ *
1001
+ * Locking readers act like a normal spin_lock()/spin_unlock().
1002
+ *
1003
+ * Context: if the seqlock_t write section, *or other read sections*, can
1004
+ * be invoked from hardirq or softirq contexts, use the _irqsave or _bh
1005
+ * variant of this function instead.
1006
+ *
1007
+ * The opened read section must be closed with read_sequnlock_excl().
5541008 */
5551009 static inline void read_seqlock_excl(seqlock_t *sl)
5561010 {
5571011 spin_lock(&sl->lock);
5581012 }
5591013
1014
+/**
1015
+ * read_sequnlock_excl() - end a seqlock_t locking reader critical section
1016
+ * @sl: Pointer to seqlock_t
1017
+ */
5601018 static inline void read_sequnlock_excl(seqlock_t *sl)
5611019 {
5621020 spin_unlock(&sl->lock);
5631021 }
5641022
5651023 /**
566
- * read_seqbegin_or_lock - begin a sequence number check or locking block
567
- * @lock: sequence lock
568
- * @seq : sequence number to be checked
1024
+ * read_seqlock_excl_bh() - start a seqlock_t locking reader section with
1025
+ * softirqs disabled
1026
+ * @sl: Pointer to seqlock_t
5691027 *
570
- * First try it once optimistically without taking the lock. If that fails,
571
- * take the lock. The sequence number is also used as a marker for deciding
572
- * whether to be a reader (even) or writer (odd).
573
- * N.B. seq must be initialized to an even number to begin with.
1028
+ * _bh variant of read_seqlock_excl(). Use this variant only if the
1029
+ * seqlock_t write side section, *or other read sections*, can be invoked
1030
+ * from softirq contexts.
5741031 */
575
-static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
576
-{
577
- if (!(*seq & 1)) /* Even */
578
- *seq = read_seqbegin(lock);
579
- else /* Odd */
580
- read_seqlock_excl(lock);
581
-}
582
-
583
-static inline int need_seqretry(seqlock_t *lock, int seq)
584
-{
585
- return !(seq & 1) && read_seqretry(lock, seq);
586
-}
587
-
588
-static inline void done_seqretry(seqlock_t *lock, int seq)
589
-{
590
- if (seq & 1)
591
- read_sequnlock_excl(lock);
592
-}
593
-
5941032 static inline void read_seqlock_excl_bh(seqlock_t *sl)
5951033 {
5961034 spin_lock_bh(&sl->lock);
5971035 }
5981036
1037
+/**
1038
+ * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking
1039
+ * reader section
1040
+ * @sl: Pointer to seqlock_t
1041
+ */
5991042 static inline void read_sequnlock_excl_bh(seqlock_t *sl)
6001043 {
6011044 spin_unlock_bh(&sl->lock);
6021045 }
6031046
1047
+/**
1048
+ * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking
1049
+ * reader section
1050
+ * @sl: Pointer to seqlock_t
1051
+ *
1052
+ * _irq variant of read_seqlock_excl(). Use this only if the seqlock_t
1053
+ * write side section, *or other read sections*, can be invoked from a
1054
+ * hardirq context.
1055
+ */
6041056 static inline void read_seqlock_excl_irq(seqlock_t *sl)
6051057 {
6061058 spin_lock_irq(&sl->lock);
6071059 }
6081060
1061
+/**
1062
+ * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t
1063
+ * locking reader section
1064
+ * @sl: Pointer to seqlock_t
1065
+ */
6091066 static inline void read_sequnlock_excl_irq(seqlock_t *sl)
6101067 {
6111068 spin_unlock_irq(&sl->lock);
....@@ -619,15 +1076,117 @@
6191076 return flags;
6201077 }
6211078
1079
+/**
1080
+ * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t
1081
+ * locking reader section
1082
+ * @lock: Pointer to seqlock_t
1083
+ * @flags: Stack-allocated storage for saving caller's local interrupt
1084
+ * state, to be passed to read_sequnlock_excl_irqrestore().
1085
+ *
1086
+ * _irqsave variant of read_seqlock_excl(). Use this only if the seqlock_t
1087
+ * write side section, *or other read sections*, can be invoked from a
1088
+ * hardirq context.
1089
+ */
6221090 #define read_seqlock_excl_irqsave(lock, flags) \
6231091 do { flags = __read_seqlock_excl_irqsave(lock); } while (0)
6241092
1093
+/**
1094
+ * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t
1095
+ * locking reader section
1096
+ * @sl: Pointer to seqlock_t
1097
+ * @flags: Caller saved interrupt state, from read_seqlock_excl_irqsave()
1098
+ */
6251099 static inline void
6261100 read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
6271101 {
6281102 spin_unlock_irqrestore(&sl->lock, flags);
6291103 }
6301104
1105
+/**
1106
+ * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader
1107
+ * @lock: Pointer to seqlock_t
1108
+ * @seq : Marker and return parameter. If the passed value is even, the
1109
+ * reader will become a *lockless* seqlock_t reader as in read_seqbegin().
1110
+ * If the passed value is odd, the reader will become a *locking* reader
1111
+ * as in read_seqlock_excl(). In the first call to this function, the
1112
+ * caller *must* initialize and pass an even value to @seq; this way, a
1113
+ * lockless read can be optimistically tried first.
1114
+ *
1115
+ * read_seqbegin_or_lock is an API designed to optimistically try a normal
1116
+ * lockless seqlock_t read section first. If an odd counter is found, the
1117
+ * lockless read trial has failed, and the next read iteration transforms
1118
+ * itself into a full seqlock_t locking reader.
1119
+ *
1120
+ * This is typically used to avoid seqlock_t lockless readers starvation
1121
+ * (too much retry loops) in the case of a sharp spike in write side
1122
+ * activity.
1123
+ *
1124
+ * Context: if the seqlock_t write section, *or other read sections*, can
1125
+ * be invoked from hardirq or softirq contexts, use the _irqsave or _bh
1126
+ * variant of this function instead.
1127
+ *
1128
+ * Check Documentation/locking/seqlock.rst for template example code.
1129
+ *
1130
+ * Return: the encountered sequence counter value, through the @seq
1131
+ * parameter, which is overloaded as a return parameter. This returned
1132
+ * value must be checked with need_seqretry(). If the read section need to
1133
+ * be retried, this returned value must also be passed as the @seq
1134
+ * parameter of the next read_seqbegin_or_lock() iteration.
1135
+ */
1136
+static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
1137
+{
1138
+ if (!(*seq & 1)) /* Even */
1139
+ *seq = read_seqbegin(lock);
1140
+ else /* Odd */
1141
+ read_seqlock_excl(lock);
1142
+}
1143
+
1144
+/**
1145
+ * need_seqretry() - validate seqlock_t "locking or lockless" read section
1146
+ * @lock: Pointer to seqlock_t
1147
+ * @seq: sequence count, from read_seqbegin_or_lock()
1148
+ *
1149
+ * Return: true if a read section retry is required, false otherwise
1150
+ */
1151
+static inline int need_seqretry(seqlock_t *lock, int seq)
1152
+{
1153
+ return !(seq & 1) && read_seqretry(lock, seq);
1154
+}
1155
+
1156
+/**
1157
+ * done_seqretry() - end seqlock_t "locking or lockless" reader section
1158
+ * @lock: Pointer to seqlock_t
1159
+ * @seq: count, from read_seqbegin_or_lock()
1160
+ *
1161
+ * done_seqretry finishes the seqlock_t read side critical section started
1162
+ * with read_seqbegin_or_lock() and validated by need_seqretry().
1163
+ */
1164
+static inline void done_seqretry(seqlock_t *lock, int seq)
1165
+{
1166
+ if (seq & 1)
1167
+ read_sequnlock_excl(lock);
1168
+}
1169
+
1170
+/**
1171
+ * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or
1172
+ * a non-interruptible locking reader
1173
+ * @lock: Pointer to seqlock_t
1174
+ * @seq: Marker and return parameter. Check read_seqbegin_or_lock().
1175
+ *
1176
+ * This is the _irqsave variant of read_seqbegin_or_lock(). Use it only if
1177
+ * the seqlock_t write section, *or other read sections*, can be invoked
1178
+ * from hardirq context.
1179
+ *
1180
+ * Note: Interrupts will be disabled only for "locking reader" mode.
1181
+ *
1182
+ * Return:
1183
+ *
1184
+ * 1. The saved local interrupts state in case of a locking reader, to
1185
+ * be passed to done_seqretry_irqrestore().
1186
+ *
1187
+ * 2. The encountered sequence counter value, returned through @seq
1188
+ * overloaded as a return parameter. Check read_seqbegin_or_lock().
1189
+ */
6311190 static inline unsigned long
6321191 read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
6331192 {
....@@ -641,6 +1200,18 @@
6411200 return flags;
6421201 }
6431202
1203
+/**
1204
+ * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a
1205
+ * non-interruptible locking reader section
1206
+ * @lock: Pointer to seqlock_t
1207
+ * @seq: Count, from read_seqbegin_or_lock_irqsave()
1208
+ * @flags: Caller's saved local interrupt state in case of a locking
1209
+ * reader, also from read_seqbegin_or_lock_irqsave()
1210
+ *
1211
+ * This is the _irqrestore variant of done_seqretry(). The read section
1212
+ * must've been opened with read_seqbegin_or_lock_irqsave(), and validated
1213
+ * by need_seqretry().
1214
+ */
6441215 static inline void
6451216 done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags)
6461217 {