hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/kernel/locking/rwsem.c
....@@ -3,17 +3,1529 @@
33 *
44 * Written by David Howells (dhowells@redhat.com).
55 * Derived from asm-i386/semaphore.h
6
+ *
7
+ * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
8
+ * and Michel Lespinasse <walken@google.com>
9
+ *
10
+ * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
11
+ * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
12
+ *
13
+ * Rwsem count bit fields re-definition and rwsem rearchitecture by
14
+ * Waiman Long <longman@redhat.com> and
15
+ * Peter Zijlstra <peterz@infradead.org>.
616 */
717
818 #include <linux/types.h>
919 #include <linux/kernel.h>
1020 #include <linux/sched.h>
21
+#include <linux/sched/rt.h>
22
+#include <linux/sched/task.h>
1123 #include <linux/sched/debug.h>
24
+#include <linux/sched/wake_q.h>
25
+#include <linux/sched/signal.h>
26
+#include <linux/sched/clock.h>
1227 #include <linux/export.h>
1328 #include <linux/rwsem.h>
1429 #include <linux/atomic.h>
1530
16
-#include "rwsem.h"
31
+#ifndef CONFIG_PREEMPT_RT
32
+#include "lock_events.h"
33
+#include <trace/hooks/rwsem.h>
34
+#include <trace/hooks/dtask.h>
35
+
36
+/*
37
+ * The least significant 3 bits of the owner value has the following
38
+ * meanings when set.
39
+ * - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers
40
+ * - Bit 1: RWSEM_RD_NONSPINNABLE - Readers cannot spin on this lock.
41
+ * - Bit 2: RWSEM_WR_NONSPINNABLE - Writers cannot spin on this lock.
42
+ *
43
+ * When the rwsem is either owned by an anonymous writer, or it is
44
+ * reader-owned, but a spinning writer has timed out, both nonspinnable
45
+ * bits will be set to disable optimistic spinning by readers and writers.
46
+ * In the later case, the last unlocking reader should then check the
47
+ * writer nonspinnable bit and clear it only to give writers preference
48
+ * to acquire the lock via optimistic spinning, but not readers. Similar
49
+ * action is also done in the reader slowpath.
50
+
51
+ * When a writer acquires a rwsem, it puts its task_struct pointer
52
+ * into the owner field. It is cleared after an unlock.
53
+ *
54
+ * When a reader acquires a rwsem, it will also puts its task_struct
55
+ * pointer into the owner field with the RWSEM_READER_OWNED bit set.
56
+ * On unlock, the owner field will largely be left untouched. So
57
+ * for a free or reader-owned rwsem, the owner value may contain
58
+ * information about the last reader that acquires the rwsem.
59
+ *
60
+ * That information may be helpful in debugging cases where the system
61
+ * seems to hang on a reader owned rwsem especially if only one reader
62
+ * is involved. Ideally we would like to track all the readers that own
63
+ * a rwsem, but the overhead is simply too big.
64
+ *
65
+ * Reader optimistic spinning is helpful when the reader critical section
66
+ * is short and there aren't that many readers around. It makes readers
67
+ * relatively more preferred than writers. When a writer times out spinning
68
+ * on a reader-owned lock and set the nospinnable bits, there are two main
69
+ * reasons for that.
70
+ *
71
+ * 1) The reader critical section is long, perhaps the task sleeps after
72
+ * acquiring the read lock.
73
+ * 2) There are just too many readers contending the lock causing it to
74
+ * take a while to service all of them.
75
+ *
76
+ * In the former case, long reader critical section will impede the progress
77
+ * of writers which is usually more important for system performance. In
78
+ * the later case, reader optimistic spinning tends to make the reader
79
+ * groups that contain readers that acquire the lock together smaller
80
+ * leading to more of them. That may hurt performance in some cases. In
81
+ * other words, the setting of nonspinnable bits indicates that reader
82
+ * optimistic spinning may not be helpful for those workloads that cause
83
+ * it.
84
+ *
85
+ * Therefore, any writers that had observed the setting of the writer
86
+ * nonspinnable bit for a given rwsem after they fail to acquire the lock
87
+ * via optimistic spinning will set the reader nonspinnable bit once they
88
+ * acquire the write lock. Similarly, readers that observe the setting
89
+ * of reader nonspinnable bit at slowpath entry will set the reader
90
+ * nonspinnable bits when they acquire the read lock via the wakeup path.
91
+ *
92
+ * Once the reader nonspinnable bit is on, it will only be reset when
93
+ * a writer is able to acquire the rwsem in the fast path or somehow a
94
+ * reader or writer in the slowpath doesn't observe the nonspinable bit.
95
+ *
96
+ * This is to discourage reader optmistic spinning on that particular
97
+ * rwsem and make writers more preferred. This adaptive disabling of reader
98
+ * optimistic spinning will alleviate the negative side effect of this
99
+ * feature.
100
+ */
101
+#define RWSEM_READER_OWNED (1UL << 0)
102
+#define RWSEM_RD_NONSPINNABLE (1UL << 1)
103
+#define RWSEM_WR_NONSPINNABLE (1UL << 2)
104
+#define RWSEM_NONSPINNABLE (RWSEM_RD_NONSPINNABLE | RWSEM_WR_NONSPINNABLE)
105
+#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE)
106
+
107
+#ifdef CONFIG_DEBUG_RWSEMS
108
+# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
109
+ if (!debug_locks_silent && \
110
+ WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
111
+ #c, atomic_long_read(&(sem)->count), \
112
+ (unsigned long) sem->magic, \
113
+ atomic_long_read(&(sem)->owner), (long)current, \
114
+ list_empty(&(sem)->wait_list) ? "" : "not ")) \
115
+ debug_locks_off(); \
116
+ } while (0)
117
+#else
118
+# define DEBUG_RWSEMS_WARN_ON(c, sem)
119
+#endif
120
+
121
+/*
122
+ * On 64-bit architectures, the bit definitions of the count are:
123
+ *
124
+ * Bit 0 - writer locked bit
125
+ * Bit 1 - waiters present bit
126
+ * Bit 2 - lock handoff bit
127
+ * Bits 3-7 - reserved
128
+ * Bits 8-62 - 55-bit reader count
129
+ * Bit 63 - read fail bit
130
+ *
131
+ * On 32-bit architectures, the bit definitions of the count are:
132
+ *
133
+ * Bit 0 - writer locked bit
134
+ * Bit 1 - waiters present bit
135
+ * Bit 2 - lock handoff bit
136
+ * Bits 3-7 - reserved
137
+ * Bits 8-30 - 23-bit reader count
138
+ * Bit 31 - read fail bit
139
+ *
140
+ * It is not likely that the most significant bit (read fail bit) will ever
141
+ * be set. This guard bit is still checked anyway in the down_read() fastpath
142
+ * just in case we need to use up more of the reader bits for other purpose
143
+ * in the future.
144
+ *
145
+ * atomic_long_fetch_add() is used to obtain reader lock, whereas
146
+ * atomic_long_cmpxchg() will be used to obtain writer lock.
147
+ *
148
+ * There are three places where the lock handoff bit may be set or cleared.
149
+ * 1) rwsem_mark_wake() for readers.
150
+ * 2) rwsem_try_write_lock() for writers.
151
+ * 3) Error path of rwsem_down_write_slowpath().
152
+ *
153
+ * For all the above cases, wait_lock will be held. A writer must also
154
+ * be the first one in the wait_list to be eligible for setting the handoff
155
+ * bit. So concurrent setting/clearing of handoff bit is not possible.
156
+ */
157
+#define RWSEM_WRITER_LOCKED (1UL << 0)
158
+#define RWSEM_FLAG_WAITERS (1UL << 1)
159
+#define RWSEM_FLAG_HANDOFF (1UL << 2)
160
+#define RWSEM_FLAG_READFAIL (1UL << (BITS_PER_LONG - 1))
161
+
162
+#define RWSEM_READER_SHIFT 8
163
+#define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT)
164
+#define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1))
165
+#define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED
166
+#define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK)
167
+#define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\
168
+ RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL)
169
+
170
+/*
171
+ * All writes to owner are protected by WRITE_ONCE() to make sure that
172
+ * store tearing can't happen as optimistic spinners may read and use
173
+ * the owner value concurrently without lock. Read from owner, however,
174
+ * may not need READ_ONCE() as long as the pointer value is only used
175
+ * for comparison and isn't being dereferenced.
176
+ */
177
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
178
+{
179
+ atomic_long_set(&sem->owner, (long)current);
180
+ trace_android_vh_rwsem_set_owner(sem);
181
+}
182
+
183
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
184
+{
185
+ atomic_long_set(&sem->owner, 0);
186
+}
187
+
188
+/*
189
+ * Test the flags in the owner field.
190
+ */
191
+static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags)
192
+{
193
+ return atomic_long_read(&sem->owner) & flags;
194
+}
195
+
196
+/*
197
+ * The task_struct pointer of the last owning reader will be left in
198
+ * the owner field.
199
+ *
200
+ * Note that the owner value just indicates the task has owned the rwsem
201
+ * previously, it may not be the real owner or one of the real owners
202
+ * anymore when that field is examined, so take it with a grain of salt.
203
+ *
204
+ * The reader non-spinnable bit is preserved.
205
+ */
206
+static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
207
+ struct task_struct *owner)
208
+{
209
+ unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED |
210
+ (atomic_long_read(&sem->owner) & RWSEM_RD_NONSPINNABLE);
211
+
212
+ atomic_long_set(&sem->owner, val);
213
+}
214
+
215
+static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
216
+{
217
+ __rwsem_set_reader_owned(sem, current);
218
+ trace_android_vh_rwsem_set_reader_owned(sem);
219
+}
220
+
221
+/*
222
+ * Return true if the rwsem is owned by a reader.
223
+ */
224
+static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
225
+{
226
+#ifdef CONFIG_DEBUG_RWSEMS
227
+ /*
228
+ * Check the count to see if it is write-locked.
229
+ */
230
+ long count = atomic_long_read(&sem->count);
231
+
232
+ if (count & RWSEM_WRITER_MASK)
233
+ return false;
234
+#endif
235
+ return rwsem_test_oflags(sem, RWSEM_READER_OWNED);
236
+}
237
+
238
+#ifdef CONFIG_DEBUG_RWSEMS
239
+/*
240
+ * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
241
+ * is a task pointer in owner of a reader-owned rwsem, it will be the
242
+ * real owner or one of the real owners. The only exception is when the
243
+ * unlock is done by up_read_non_owner().
244
+ */
245
+static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
246
+{
247
+ unsigned long val = atomic_long_read(&sem->owner);
248
+
249
+ while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) {
250
+ if (atomic_long_try_cmpxchg(&sem->owner, &val,
251
+ val & RWSEM_OWNER_FLAGS_MASK))
252
+ return;
253
+ }
254
+}
255
+#else
256
+static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
257
+{
258
+}
259
+#endif
260
+
261
+/*
262
+ * Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag
263
+ * remains set. Otherwise, the operation will be aborted.
264
+ */
265
+static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)
266
+{
267
+ unsigned long owner = atomic_long_read(&sem->owner);
268
+
269
+ do {
270
+ if (!(owner & RWSEM_READER_OWNED))
271
+ break;
272
+ if (owner & RWSEM_NONSPINNABLE)
273
+ break;
274
+ } while (!atomic_long_try_cmpxchg(&sem->owner, &owner,
275
+ owner | RWSEM_NONSPINNABLE));
276
+}
277
+
278
+static inline bool rwsem_read_trylock(struct rw_semaphore *sem)
279
+{
280
+ long cnt = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count);
281
+ if (WARN_ON_ONCE(cnt < 0))
282
+ rwsem_set_nonspinnable(sem);
283
+
284
+ if ((cnt & RWSEM_READ_FAILED_MASK) == 0)
285
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
286
+
287
+ return !(cnt & RWSEM_READ_FAILED_MASK);
288
+}
289
+
290
+/*
291
+ * Return just the real task structure pointer of the owner
292
+ */
293
+static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
294
+{
295
+ return (struct task_struct *)
296
+ (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
297
+}
298
+
299
+/*
300
+ * Return the real task structure pointer of the owner and the embedded
301
+ * flags in the owner. pflags must be non-NULL.
302
+ */
303
+static inline struct task_struct *
304
+rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags)
305
+{
306
+ unsigned long owner = atomic_long_read(&sem->owner);
307
+
308
+ *pflags = owner & RWSEM_OWNER_FLAGS_MASK;
309
+ return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK);
310
+}
311
+
312
+/*
313
+ * Guide to the rw_semaphore's count field.
314
+ *
315
+ * When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned
316
+ * by a writer.
317
+ *
318
+ * The lock is owned by readers when
319
+ * (1) the RWSEM_WRITER_LOCKED isn't set in count,
320
+ * (2) some of the reader bits are set in count, and
321
+ * (3) the owner field has RWSEM_READ_OWNED bit set.
322
+ *
323
+ * Having some reader bits set is not enough to guarantee a readers owned
324
+ * lock as the readers may be in the process of backing out from the count
325
+ * and a writer has just released the lock. So another writer may steal
326
+ * the lock immediately after that.
327
+ */
328
+
329
+/*
330
+ * Initialize an rwsem:
331
+ */
332
+void __init_rwsem(struct rw_semaphore *sem, const char *name,
333
+ struct lock_class_key *key)
334
+{
335
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
336
+ /*
337
+ * Make sure we are not reinitializing a held semaphore:
338
+ */
339
+ debug_check_no_locks_freed((void *)sem, sizeof(*sem));
340
+ lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
341
+#endif
342
+#ifdef CONFIG_DEBUG_RWSEMS
343
+ sem->magic = sem;
344
+#endif
345
+ atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
346
+ raw_spin_lock_init(&sem->wait_lock);
347
+ INIT_LIST_HEAD(&sem->wait_list);
348
+ atomic_long_set(&sem->owner, 0L);
349
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
350
+ osq_lock_init(&sem->osq);
351
+#endif
352
+ trace_android_vh_rwsem_init(sem);
353
+}
354
+EXPORT_SYMBOL(__init_rwsem);
355
+
356
+#define rwsem_first_waiter(sem) \
357
+ list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
358
+
359
+enum rwsem_wake_type {
360
+ RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
361
+ RWSEM_WAKE_READERS, /* Wake readers only */
362
+ RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
363
+};
364
+
365
+enum writer_wait_state {
366
+ WRITER_NOT_FIRST, /* Writer is not first in wait list */
367
+ WRITER_FIRST, /* Writer is first in wait list */
368
+ WRITER_HANDOFF /* Writer is first & handoff needed */
369
+};
370
+
371
+/*
372
+ * The typical HZ value is either 250 or 1000. So set the minimum waiting
373
+ * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
374
+ * queue before initiating the handoff protocol.
375
+ */
376
+#define RWSEM_WAIT_TIMEOUT DIV_ROUND_UP(HZ, 250)
377
+
378
+/*
379
+ * Magic number to batch-wakeup waiting readers, even when writers are
380
+ * also present in the queue. This both limits the amount of work the
381
+ * waking thread must do and also prevents any potential counter overflow,
382
+ * however unlikely.
383
+ */
384
+#define MAX_READERS_WAKEUP 0x100
385
+
386
+/*
387
+ * handle the lock release when processes blocked on it that can now run
388
+ * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
389
+ * have been set.
390
+ * - there must be someone on the queue
391
+ * - the wait_lock must be held by the caller
392
+ * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
393
+ * to actually wakeup the blocked task(s) and drop the reference count,
394
+ * preferably when the wait_lock is released
395
+ * - woken process blocks are discarded from the list after having task zeroed
396
+ * - writers are only marked woken if downgrading is false
397
+ */
398
+static void rwsem_mark_wake(struct rw_semaphore *sem,
399
+ enum rwsem_wake_type wake_type,
400
+ struct wake_q_head *wake_q)
401
+{
402
+ struct rwsem_waiter *waiter, *tmp;
403
+ long oldcount, woken = 0, adjustment = 0;
404
+ struct list_head wlist;
405
+
406
+ lockdep_assert_held(&sem->wait_lock);
407
+
408
+ /*
409
+ * Take a peek at the queue head waiter such that we can determine
410
+ * the wakeup(s) to perform.
411
+ */
412
+ waiter = rwsem_first_waiter(sem);
413
+
414
+ if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
415
+ if (wake_type == RWSEM_WAKE_ANY) {
416
+ /*
417
+ * Mark writer at the front of the queue for wakeup.
418
+ * Until the task is actually later awoken later by
419
+ * the caller, other writers are able to steal it.
420
+ * Readers, on the other hand, will block as they
421
+ * will notice the queued writer.
422
+ */
423
+ wake_q_add(wake_q, waiter->task);
424
+ lockevent_inc(rwsem_wake_writer);
425
+ }
426
+
427
+ return;
428
+ }
429
+
430
+ /*
431
+ * No reader wakeup if there are too many of them already.
432
+ */
433
+ if (unlikely(atomic_long_read(&sem->count) < 0))
434
+ return;
435
+
436
+ /*
437
+ * Writers might steal the lock before we grant it to the next reader.
438
+ * We prefer to do the first reader grant before counting readers
439
+ * so we can bail out early if a writer stole the lock.
440
+ */
441
+ if (wake_type != RWSEM_WAKE_READ_OWNED) {
442
+ struct task_struct *owner;
443
+
444
+ adjustment = RWSEM_READER_BIAS;
445
+ oldcount = atomic_long_fetch_add(adjustment, &sem->count);
446
+ if (unlikely(oldcount & RWSEM_WRITER_MASK)) {
447
+ /*
448
+ * When we've been waiting "too" long (for writers
449
+ * to give up the lock), request a HANDOFF to
450
+ * force the issue.
451
+ */
452
+ if (!(oldcount & RWSEM_FLAG_HANDOFF) &&
453
+ time_after(jiffies, waiter->timeout)) {
454
+ adjustment -= RWSEM_FLAG_HANDOFF;
455
+ lockevent_inc(rwsem_rlock_handoff);
456
+ }
457
+
458
+ atomic_long_add(-adjustment, &sem->count);
459
+ return;
460
+ }
461
+ /*
462
+ * Set it to reader-owned to give spinners an early
463
+ * indication that readers now have the lock.
464
+ * The reader nonspinnable bit seen at slowpath entry of
465
+ * the reader is copied over.
466
+ */
467
+ owner = waiter->task;
468
+ if (waiter->last_rowner & RWSEM_RD_NONSPINNABLE) {
469
+ owner = (void *)((unsigned long)owner | RWSEM_RD_NONSPINNABLE);
470
+ lockevent_inc(rwsem_opt_norspin);
471
+ }
472
+ __rwsem_set_reader_owned(sem, owner);
473
+ }
474
+
475
+ /*
476
+ * Grant up to MAX_READERS_WAKEUP read locks to all the readers in the
477
+ * queue. We know that the woken will be at least 1 as we accounted
478
+ * for above. Note we increment the 'active part' of the count by the
479
+ * number of readers before waking any processes up.
480
+ *
481
+ * This is an adaptation of the phase-fair R/W locks where at the
482
+ * reader phase (first waiter is a reader), all readers are eligible
483
+ * to acquire the lock at the same time irrespective of their order
484
+ * in the queue. The writers acquire the lock according to their
485
+ * order in the queue.
486
+ *
487
+ * We have to do wakeup in 2 passes to prevent the possibility that
488
+ * the reader count may be decremented before it is incremented. It
489
+ * is because the to-be-woken waiter may not have slept yet. So it
490
+ * may see waiter->task got cleared, finish its critical section and
491
+ * do an unlock before the reader count increment.
492
+ *
493
+ * 1) Collect the read-waiters in a separate list, count them and
494
+ * fully increment the reader count in rwsem.
495
+ * 2) For each waiters in the new list, clear waiter->task and
496
+ * put them into wake_q to be woken up later.
497
+ */
498
+ INIT_LIST_HEAD(&wlist);
499
+ list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
500
+ if (waiter->type == RWSEM_WAITING_FOR_WRITE)
501
+ continue;
502
+
503
+ woken++;
504
+ list_move_tail(&waiter->list, &wlist);
505
+
506
+ trace_android_vh_rwsem_mark_wake_readers(sem, waiter);
507
+ /*
508
+ * Limit # of readers that can be woken up per wakeup call.
509
+ */
510
+ if (woken >= MAX_READERS_WAKEUP)
511
+ break;
512
+ }
513
+
514
+ adjustment = woken * RWSEM_READER_BIAS - adjustment;
515
+ lockevent_cond_inc(rwsem_wake_reader, woken);
516
+ if (list_empty(&sem->wait_list)) {
517
+ /* hit end of list above */
518
+ adjustment -= RWSEM_FLAG_WAITERS;
519
+ }
520
+
521
+ /*
522
+ * When we've woken a reader, we no longer need to force writers
523
+ * to give up the lock and we can clear HANDOFF.
524
+ */
525
+ if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF))
526
+ adjustment -= RWSEM_FLAG_HANDOFF;
527
+
528
+ if (adjustment)
529
+ atomic_long_add(adjustment, &sem->count);
530
+
531
+ /* 2nd pass */
532
+ list_for_each_entry_safe(waiter, tmp, &wlist, list) {
533
+ struct task_struct *tsk;
534
+
535
+ tsk = waiter->task;
536
+ get_task_struct(tsk);
537
+
538
+ /*
539
+ * Ensure calling get_task_struct() before setting the reader
540
+ * waiter to nil such that rwsem_down_read_slowpath() cannot
541
+ * race with do_exit() by always holding a reference count
542
+ * to the task to wakeup.
543
+ */
544
+ smp_store_release(&waiter->task, NULL);
545
+ /*
546
+ * Ensure issuing the wakeup (either by us or someone else)
547
+ * after setting the reader waiter to nil.
548
+ */
549
+ wake_q_add_safe(wake_q, tsk);
550
+ }
551
+}
552
+
553
+/*
554
+ * This function must be called with the sem->wait_lock held to prevent
555
+ * race conditions between checking the rwsem wait list and setting the
556
+ * sem->count accordingly.
557
+ *
558
+ * If wstate is WRITER_HANDOFF, it will make sure that either the handoff
559
+ * bit is set or the lock is acquired with handoff bit cleared.
560
+ */
561
+static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
562
+ enum writer_wait_state wstate)
563
+{
564
+ long count, new;
565
+
566
+ lockdep_assert_held(&sem->wait_lock);
567
+
568
+ count = atomic_long_read(&sem->count);
569
+ do {
570
+ bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
571
+
572
+ if (has_handoff && wstate == WRITER_NOT_FIRST)
573
+ return false;
574
+
575
+ new = count;
576
+
577
+ if (count & RWSEM_LOCK_MASK) {
578
+ if (has_handoff || (wstate != WRITER_HANDOFF))
579
+ return false;
580
+
581
+ new |= RWSEM_FLAG_HANDOFF;
582
+ } else {
583
+ new |= RWSEM_WRITER_LOCKED;
584
+ new &= ~RWSEM_FLAG_HANDOFF;
585
+
586
+ if (list_is_singular(&sem->wait_list))
587
+ new &= ~RWSEM_FLAG_WAITERS;
588
+ }
589
+ } while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
590
+
591
+ /*
592
+ * We have either acquired the lock with handoff bit cleared or
593
+ * set the handoff bit.
594
+ */
595
+ if (new & RWSEM_FLAG_HANDOFF)
596
+ return false;
597
+
598
+ rwsem_set_owner(sem);
599
+ return true;
600
+}
601
+
602
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
603
+/*
604
+ * Try to acquire read lock before the reader is put on wait queue.
605
+ * Lock acquisition isn't allowed if the rwsem is locked or a writer handoff
606
+ * is ongoing.
607
+ */
608
+static inline bool rwsem_try_read_lock_unqueued(struct rw_semaphore *sem)
609
+{
610
+ long count = atomic_long_read(&sem->count);
611
+
612
+ if (count & (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))
613
+ return false;
614
+
615
+ count = atomic_long_fetch_add_acquire(RWSEM_READER_BIAS, &sem->count);
616
+ if (!(count & (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
617
+ rwsem_set_reader_owned(sem);
618
+ lockevent_inc(rwsem_opt_rlock);
619
+ return true;
620
+ }
621
+
622
+ /* Back out the change */
623
+ atomic_long_add(-RWSEM_READER_BIAS, &sem->count);
624
+ return false;
625
+}
626
+
627
+/*
628
+ * Try to acquire write lock before the writer has been put on wait queue.
629
+ */
630
+static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
631
+{
632
+ long count = atomic_long_read(&sem->count);
633
+
634
+ while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) {
635
+ if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
636
+ count | RWSEM_WRITER_LOCKED)) {
637
+ rwsem_set_owner(sem);
638
+ lockevent_inc(rwsem_opt_wlock);
639
+ return true;
640
+ }
641
+ }
642
+ return false;
643
+}
644
+
645
+static inline bool owner_on_cpu(struct task_struct *owner)
646
+{
647
+ /*
648
+ * As lock holder preemption issue, we both skip spinning if
649
+ * task is not on cpu or its cpu is preempted
650
+ */
651
+ return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
652
+}
653
+
654
+static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,
655
+ unsigned long nonspinnable)
656
+{
657
+ struct task_struct *owner;
658
+ unsigned long flags;
659
+ bool ret = true;
660
+
661
+ if (need_resched()) {
662
+ lockevent_inc(rwsem_opt_fail);
663
+ return false;
664
+ }
665
+
666
+ preempt_disable();
667
+ rcu_read_lock();
668
+ owner = rwsem_owner_flags(sem, &flags);
669
+ /*
670
+ * Don't check the read-owner as the entry may be stale.
671
+ */
672
+ if ((flags & nonspinnable) ||
673
+ (owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner)))
674
+ ret = false;
675
+ rcu_read_unlock();
676
+ preempt_enable();
677
+
678
+ lockevent_cond_inc(rwsem_opt_fail, !ret);
679
+ return ret;
680
+}
681
+
682
+/*
683
+ * The rwsem_spin_on_owner() function returns the folowing 4 values
684
+ * depending on the lock owner state.
685
+ * OWNER_NULL : owner is currently NULL
686
+ * OWNER_WRITER: when owner changes and is a writer
687
+ * OWNER_READER: when owner changes and the new owner may be a reader.
688
+ * OWNER_NONSPINNABLE:
689
+ * when optimistic spinning has to stop because either the
690
+ * owner stops running, is unknown, or its timeslice has
691
+ * been used up.
692
+ */
693
+enum owner_state {
694
+ OWNER_NULL = 1 << 0,
695
+ OWNER_WRITER = 1 << 1,
696
+ OWNER_READER = 1 << 2,
697
+ OWNER_NONSPINNABLE = 1 << 3,
698
+};
699
+#define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER)
700
+
701
+static inline enum owner_state
702
+rwsem_owner_state(struct task_struct *owner, unsigned long flags, unsigned long nonspinnable)
703
+{
704
+ if (flags & nonspinnable)
705
+ return OWNER_NONSPINNABLE;
706
+
707
+ if (flags & RWSEM_READER_OWNED)
708
+ return OWNER_READER;
709
+
710
+ return owner ? OWNER_WRITER : OWNER_NULL;
711
+}
712
+
713
+static noinline enum owner_state
714
+rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
715
+{
716
+ struct task_struct *new, *owner;
717
+ unsigned long flags, new_flags;
718
+ enum owner_state state;
719
+
720
+ owner = rwsem_owner_flags(sem, &flags);
721
+ state = rwsem_owner_state(owner, flags, nonspinnable);
722
+ if (state != OWNER_WRITER)
723
+ return state;
724
+
725
+ rcu_read_lock();
726
+ for (;;) {
727
+ /*
728
+ * When a waiting writer set the handoff flag, it may spin
729
+ * on the owner as well. Once that writer acquires the lock,
730
+ * we can spin on it. So we don't need to quit even when the
731
+ * handoff bit is set.
732
+ */
733
+ new = rwsem_owner_flags(sem, &new_flags);
734
+ if ((new != owner) || (new_flags != flags)) {
735
+ state = rwsem_owner_state(new, new_flags, nonspinnable);
736
+ break;
737
+ }
738
+
739
+ /*
740
+ * Ensure we emit the owner->on_cpu, dereference _after_
741
+ * checking sem->owner still matches owner, if that fails,
742
+ * owner might point to free()d memory, if it still matches,
743
+ * the rcu_read_lock() ensures the memory stays valid.
744
+ */
745
+ barrier();
746
+
747
+ if (need_resched() || !owner_on_cpu(owner)) {
748
+ state = OWNER_NONSPINNABLE;
749
+ break;
750
+ }
751
+
752
+ cpu_relax();
753
+ }
754
+ rcu_read_unlock();
755
+
756
+ return state;
757
+}
758
+
759
+/*
760
+ * Calculate reader-owned rwsem spinning threshold for writer
761
+ *
762
+ * The more readers own the rwsem, the longer it will take for them to
763
+ * wind down and free the rwsem. So the empirical formula used to
764
+ * determine the actual spinning time limit here is:
765
+ *
766
+ * Spinning threshold = (10 + nr_readers/2)us
767
+ *
768
+ * The limit is capped to a maximum of 25us (30 readers). This is just
769
+ * a heuristic and is subjected to change in the future.
770
+ */
771
+static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)
772
+{
773
+ long count = atomic_long_read(&sem->count);
774
+ int readers = count >> RWSEM_READER_SHIFT;
775
+ u64 delta;
776
+
777
+ if (readers > 30)
778
+ readers = 30;
779
+ delta = (20 + readers) * NSEC_PER_USEC / 2;
780
+
781
+ return sched_clock() + delta;
782
+}
783
+
784
+static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
785
+{
786
+ bool taken = false;
787
+ int prev_owner_state = OWNER_NULL;
788
+ int loop = 0;
789
+ u64 rspin_threshold = 0;
790
+ unsigned long nonspinnable = wlock ? RWSEM_WR_NONSPINNABLE
791
+ : RWSEM_RD_NONSPINNABLE;
792
+
793
+ preempt_disable();
794
+
795
+ /* sem->wait_lock should not be held when doing optimistic spinning */
796
+ if (!osq_lock(&sem->osq))
797
+ goto done;
798
+
799
+ /*
800
+ * Optimistically spin on the owner field and attempt to acquire the
801
+ * lock whenever the owner changes. Spinning will be stopped when:
802
+ * 1) the owning writer isn't running; or
803
+ * 2) readers own the lock and spinning time has exceeded limit.
804
+ */
805
+ for (;;) {
806
+ enum owner_state owner_state;
807
+
808
+ owner_state = rwsem_spin_on_owner(sem, nonspinnable);
809
+ if (!(owner_state & OWNER_SPINNABLE))
810
+ break;
811
+
812
+ /*
813
+ * Try to acquire the lock
814
+ */
815
+ taken = wlock ? rwsem_try_write_lock_unqueued(sem)
816
+ : rwsem_try_read_lock_unqueued(sem);
817
+
818
+ if (taken)
819
+ break;
820
+
821
+ /*
822
+ * Time-based reader-owned rwsem optimistic spinning
823
+ */
824
+ if (wlock && (owner_state == OWNER_READER)) {
825
+ /*
826
+ * Re-initialize rspin_threshold every time when
827
+ * the owner state changes from non-reader to reader.
828
+ * This allows a writer to steal the lock in between
829
+ * 2 reader phases and have the threshold reset at
830
+ * the beginning of the 2nd reader phase.
831
+ */
832
+ if (prev_owner_state != OWNER_READER) {
833
+ if (rwsem_test_oflags(sem, nonspinnable))
834
+ break;
835
+ rspin_threshold = rwsem_rspin_threshold(sem);
836
+ loop = 0;
837
+ }
838
+
839
+ /*
840
+ * Check time threshold once every 16 iterations to
841
+ * avoid calling sched_clock() too frequently so
842
+ * as to reduce the average latency between the times
843
+ * when the lock becomes free and when the spinner
844
+ * is ready to do a trylock.
845
+ */
846
+ else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) {
847
+ rwsem_set_nonspinnable(sem);
848
+ lockevent_inc(rwsem_opt_nospin);
849
+ break;
850
+ }
851
+ }
852
+
853
+ /*
854
+ * An RT task cannot do optimistic spinning if it cannot
855
+ * be sure the lock holder is running or live-lock may
856
+ * happen if the current task and the lock holder happen
857
+ * to run in the same CPU. However, aborting optimistic
858
+ * spinning while a NULL owner is detected may miss some
859
+ * opportunity where spinning can continue without causing
860
+ * problem.
861
+ *
862
+ * There are 2 possible cases where an RT task may be able
863
+ * to continue spinning.
864
+ *
865
+ * 1) The lock owner is in the process of releasing the
866
+ * lock, sem->owner is cleared but the lock has not
867
+ * been released yet.
868
+ * 2) The lock was free and owner cleared, but another
869
+ * task just comes in and acquire the lock before
870
+ * we try to get it. The new owner may be a spinnable
871
+ * writer.
872
+ *
873
+ * To take advantage of two scenarios listed agove, the RT
874
+ * task is made to retry one more time to see if it can
875
+ * acquire the lock or continue spinning on the new owning
876
+ * writer. Of course, if the time lag is long enough or the
877
+ * new owner is not a writer or spinnable, the RT task will
878
+ * quit spinning.
879
+ *
880
+ * If the owner is a writer, the need_resched() check is
881
+ * done inside rwsem_spin_on_owner(). If the owner is not
882
+ * a writer, need_resched() check needs to be done here.
883
+ */
884
+ if (owner_state != OWNER_WRITER) {
885
+ if (need_resched())
886
+ break;
887
+ if (rt_task(current) &&
888
+ (prev_owner_state != OWNER_WRITER))
889
+ break;
890
+ }
891
+ prev_owner_state = owner_state;
892
+
893
+ /*
894
+ * The cpu_relax() call is a compiler barrier which forces
895
+ * everything in this loop to be re-loaded. We don't need
896
+ * memory barriers as we'll eventually observe the right
897
+ * values at the cost of a few extra spins.
898
+ */
899
+ cpu_relax();
900
+ }
901
+ osq_unlock(&sem->osq);
902
+done:
903
+ preempt_enable();
904
+ lockevent_cond_inc(rwsem_opt_fail, !taken);
905
+ return taken;
906
+}
907
+
908
+/*
909
+ * Clear the owner's RWSEM_WR_NONSPINNABLE bit if it is set. This should
910
+ * only be called when the reader count reaches 0.
911
+ *
912
+ * This give writers better chance to acquire the rwsem first before
913
+ * readers when the rwsem was being held by readers for a relatively long
914
+ * period of time. Race can happen that an optimistic spinner may have
915
+ * just stolen the rwsem and set the owner, but just clearing the
916
+ * RWSEM_WR_NONSPINNABLE bit will do no harm anyway.
917
+ */
918
+static inline void clear_wr_nonspinnable(struct rw_semaphore *sem)
919
+{
920
+ if (rwsem_test_oflags(sem, RWSEM_WR_NONSPINNABLE))
921
+ atomic_long_andnot(RWSEM_WR_NONSPINNABLE, &sem->owner);
922
+}
923
+
924
+/*
925
+ * This function is called when the reader fails to acquire the lock via
926
+ * optimistic spinning. In this case we will still attempt to do a trylock
927
+ * when comparing the rwsem state right now with the state when entering
928
+ * the slowpath indicates that the reader is still in a valid reader phase.
929
+ * This happens when the following conditions are true:
930
+ *
931
+ * 1) The lock is currently reader owned, and
932
+ * 2) The lock is previously not reader-owned or the last read owner changes.
933
+ *
934
+ * In the former case, we have transitioned from a writer phase to a
935
+ * reader-phase while spinning. In the latter case, it means the reader
936
+ * phase hasn't ended when we entered the optimistic spinning loop. In
937
+ * both cases, the reader is eligible to acquire the lock. This is the
938
+ * secondary path where a read lock is acquired optimistically.
939
+ *
940
+ * The reader non-spinnable bit wasn't set at time of entry or it will
941
+ * not be here at all.
942
+ */
943
+static inline bool rwsem_reader_phase_trylock(struct rw_semaphore *sem,
944
+ unsigned long last_rowner)
945
+{
946
+ unsigned long owner = atomic_long_read(&sem->owner);
947
+
948
+ if (!(owner & RWSEM_READER_OWNED))
949
+ return false;
950
+
951
+ if (((owner ^ last_rowner) & ~RWSEM_OWNER_FLAGS_MASK) &&
952
+ rwsem_try_read_lock_unqueued(sem)) {
953
+ lockevent_inc(rwsem_opt_rlock2);
954
+ lockevent_add(rwsem_opt_fail, -1);
955
+ return true;
956
+ }
957
+ return false;
958
+}
959
+#else
960
+static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,
961
+ unsigned long nonspinnable)
962
+{
963
+ return false;
964
+}
965
+
966
+static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
967
+{
968
+ return false;
969
+}
970
+
971
+static inline void clear_wr_nonspinnable(struct rw_semaphore *sem) { }
972
+
973
+static inline bool rwsem_reader_phase_trylock(struct rw_semaphore *sem,
974
+ unsigned long last_rowner)
975
+{
976
+ return false;
977
+}
978
+
979
+static inline int
980
+rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
981
+{
982
+ return 0;
983
+}
984
+#define OWNER_NULL 1
985
+#endif
986
+
987
+/*
988
+ * Wait for the read lock to be granted
989
+ */
990
+static struct rw_semaphore __sched *
991
+rwsem_down_read_slowpath(struct rw_semaphore *sem, int state)
992
+{
993
+ long count, adjustment = -RWSEM_READER_BIAS;
994
+ struct rwsem_waiter waiter;
995
+ DEFINE_WAKE_Q(wake_q);
996
+ bool wake = false;
997
+ bool already_on_list = false;
998
+
999
+ /*
1000
+ * Save the current read-owner of rwsem, if available, and the
1001
+ * reader nonspinnable bit.
1002
+ */
1003
+ waiter.last_rowner = atomic_long_read(&sem->owner);
1004
+ if (!(waiter.last_rowner & RWSEM_READER_OWNED))
1005
+ waiter.last_rowner &= RWSEM_RD_NONSPINNABLE;
1006
+
1007
+ if (!rwsem_can_spin_on_owner(sem, RWSEM_RD_NONSPINNABLE))
1008
+ goto queue;
1009
+
1010
+ /*
1011
+ * Undo read bias from down_read() and do optimistic spinning.
1012
+ */
1013
+ atomic_long_add(-RWSEM_READER_BIAS, &sem->count);
1014
+ adjustment = 0;
1015
+ if (rwsem_optimistic_spin(sem, false)) {
1016
+ /* rwsem_optimistic_spin() implies ACQUIRE on success */
1017
+ /*
1018
+ * Wake up other readers in the wait list if the front
1019
+ * waiter is a reader.
1020
+ */
1021
+ if ((atomic_long_read(&sem->count) & RWSEM_FLAG_WAITERS)) {
1022
+ raw_spin_lock_irq(&sem->wait_lock);
1023
+ if (!list_empty(&sem->wait_list))
1024
+ rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED,
1025
+ &wake_q);
1026
+ raw_spin_unlock_irq(&sem->wait_lock);
1027
+ wake_up_q(&wake_q);
1028
+ }
1029
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1030
+ return sem;
1031
+ } else if (rwsem_reader_phase_trylock(sem, waiter.last_rowner)) {
1032
+ /* rwsem_reader_phase_trylock() implies ACQUIRE on success */
1033
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1034
+ return sem;
1035
+ }
1036
+
1037
+queue:
1038
+ waiter.task = current;
1039
+ waiter.type = RWSEM_WAITING_FOR_READ;
1040
+ waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
1041
+
1042
+ raw_spin_lock_irq(&sem->wait_lock);
1043
+ if (list_empty(&sem->wait_list)) {
1044
+ /*
1045
+ * In case the wait queue is empty and the lock isn't owned
1046
+ * by a writer or has the handoff bit set, this reader can
1047
+ * exit the slowpath and return immediately as its
1048
+ * RWSEM_READER_BIAS has already been set in the count.
1049
+ */
1050
+ if (adjustment && !(atomic_long_read(&sem->count) &
1051
+ (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
1052
+ /* Provide lock ACQUIRE */
1053
+ smp_acquire__after_ctrl_dep();
1054
+ raw_spin_unlock_irq(&sem->wait_lock);
1055
+ rwsem_set_reader_owned(sem);
1056
+ lockevent_inc(rwsem_rlock_fast);
1057
+ return sem;
1058
+ }
1059
+ adjustment += RWSEM_FLAG_WAITERS;
1060
+ }
1061
+ trace_android_vh_alter_rwsem_list_add(
1062
+ &waiter,
1063
+ sem, &already_on_list);
1064
+ if (!already_on_list)
1065
+ list_add_tail(&waiter.list, &sem->wait_list);
1066
+
1067
+ /* we're now waiting on the lock, but no longer actively locking */
1068
+ if (adjustment)
1069
+ count = atomic_long_add_return(adjustment, &sem->count);
1070
+ else
1071
+ count = atomic_long_read(&sem->count);
1072
+
1073
+ /*
1074
+ * If there are no active locks, wake the front queued process(es).
1075
+ *
1076
+ * If there are no writers and we are first in the queue,
1077
+ * wake our own waiter to join the existing active readers !
1078
+ */
1079
+ if (!(count & RWSEM_LOCK_MASK)) {
1080
+ clear_wr_nonspinnable(sem);
1081
+ wake = true;
1082
+ }
1083
+ if (wake || (!(count & RWSEM_WRITER_MASK) &&
1084
+ (adjustment & RWSEM_FLAG_WAITERS)))
1085
+ rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
1086
+
1087
+ trace_android_vh_rwsem_wake(sem);
1088
+ raw_spin_unlock_irq(&sem->wait_lock);
1089
+ wake_up_q(&wake_q);
1090
+
1091
+ /* wait to be given the lock */
1092
+ trace_android_vh_rwsem_read_wait_start(sem);
1093
+ for (;;) {
1094
+ set_current_state(state);
1095
+ if (!smp_load_acquire(&waiter.task)) {
1096
+ /* Matches rwsem_mark_wake()'s smp_store_release(). */
1097
+ break;
1098
+ }
1099
+ if (signal_pending_state(state, current)) {
1100
+ raw_spin_lock_irq(&sem->wait_lock);
1101
+ if (waiter.task)
1102
+ goto out_nolock;
1103
+ raw_spin_unlock_irq(&sem->wait_lock);
1104
+ /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
1105
+ break;
1106
+ }
1107
+ schedule();
1108
+ lockevent_inc(rwsem_sleep_reader);
1109
+ }
1110
+
1111
+ __set_current_state(TASK_RUNNING);
1112
+ trace_android_vh_rwsem_read_wait_finish(sem);
1113
+ lockevent_inc(rwsem_rlock);
1114
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1115
+ return sem;
1116
+
1117
+out_nolock:
1118
+ list_del(&waiter.list);
1119
+ if (list_empty(&sem->wait_list)) {
1120
+ atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF,
1121
+ &sem->count);
1122
+ }
1123
+ raw_spin_unlock_irq(&sem->wait_lock);
1124
+ __set_current_state(TASK_RUNNING);
1125
+ trace_android_vh_rwsem_read_wait_finish(sem);
1126
+ lockevent_inc(rwsem_rlock_fail);
1127
+ return ERR_PTR(-EINTR);
1128
+}
1129
+
1130
+/*
1131
+ * This function is called by the a write lock owner. So the owner value
1132
+ * won't get changed by others.
1133
+ */
1134
+static inline void rwsem_disable_reader_optspin(struct rw_semaphore *sem,
1135
+ bool disable)
1136
+{
1137
+ if (unlikely(disable)) {
1138
+ atomic_long_or(RWSEM_RD_NONSPINNABLE, &sem->owner);
1139
+ lockevent_inc(rwsem_opt_norspin);
1140
+ }
1141
+}
1142
+
1143
+/*
1144
+ * Wait until we successfully acquire the write lock
1145
+ */
1146
+static struct rw_semaphore *
1147
+rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
1148
+{
1149
+ long count;
1150
+ bool disable_rspin;
1151
+ enum writer_wait_state wstate;
1152
+ struct rwsem_waiter waiter;
1153
+ struct rw_semaphore *ret = sem;
1154
+ DEFINE_WAKE_Q(wake_q);
1155
+ bool already_on_list = false;
1156
+
1157
+ /* do optimistic spinning and steal lock if possible */
1158
+ if (rwsem_can_spin_on_owner(sem, RWSEM_WR_NONSPINNABLE) &&
1159
+ rwsem_optimistic_spin(sem, true)) {
1160
+ /* rwsem_optimistic_spin() implies ACQUIRE on success */
1161
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1162
+ return sem;
1163
+ }
1164
+
1165
+ /*
1166
+ * Disable reader optimistic spinning for this rwsem after
1167
+ * acquiring the write lock when the setting of the nonspinnable
1168
+ * bits are observed.
1169
+ */
1170
+ disable_rspin = atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE;
1171
+
1172
+ /*
1173
+ * Optimistic spinning failed, proceed to the slowpath
1174
+ * and block until we can acquire the sem.
1175
+ */
1176
+ waiter.task = current;
1177
+ waiter.type = RWSEM_WAITING_FOR_WRITE;
1178
+ waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
1179
+
1180
+ raw_spin_lock_irq(&sem->wait_lock);
1181
+
1182
+ /* account for this before adding a new element to the list */
1183
+ wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST;
1184
+
1185
+ trace_android_vh_alter_rwsem_list_add(
1186
+ &waiter,
1187
+ sem, &already_on_list);
1188
+ if (!already_on_list)
1189
+ list_add_tail(&waiter.list, &sem->wait_list);
1190
+
1191
+ /* we're now waiting on the lock */
1192
+ if (wstate == WRITER_NOT_FIRST) {
1193
+ count = atomic_long_read(&sem->count);
1194
+
1195
+ /*
1196
+ * If there were already threads queued before us and:
1197
+ * 1) there are no active locks, wake the front
1198
+ * queued process(es) as the handoff bit might be set.
1199
+ * 2) there are no active writers and some readers, the lock
1200
+ * must be read owned; so we try to wake any read lock
1201
+ * waiters that were queued ahead of us.
1202
+ */
1203
+ if (count & RWSEM_WRITER_MASK)
1204
+ goto wait;
1205
+
1206
+ rwsem_mark_wake(sem, (count & RWSEM_READER_MASK)
1207
+ ? RWSEM_WAKE_READERS
1208
+ : RWSEM_WAKE_ANY, &wake_q);
1209
+
1210
+ if (!wake_q_empty(&wake_q)) {
1211
+ /*
1212
+ * We want to minimize wait_lock hold time especially
1213
+ * when a large number of readers are to be woken up.
1214
+ */
1215
+ raw_spin_unlock_irq(&sem->wait_lock);
1216
+ wake_up_q(&wake_q);
1217
+ wake_q_init(&wake_q); /* Used again, reinit */
1218
+ raw_spin_lock_irq(&sem->wait_lock);
1219
+ }
1220
+ } else {
1221
+ atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
1222
+ }
1223
+
1224
+wait:
1225
+ trace_android_vh_rwsem_wake(sem);
1226
+ /* wait until we successfully acquire the lock */
1227
+ trace_android_vh_rwsem_write_wait_start(sem);
1228
+ set_current_state(state);
1229
+ for (;;) {
1230
+ if (rwsem_try_write_lock(sem, wstate)) {
1231
+ /* rwsem_try_write_lock() implies ACQUIRE on success */
1232
+ break;
1233
+ }
1234
+
1235
+ raw_spin_unlock_irq(&sem->wait_lock);
1236
+
1237
+ /*
1238
+ * After setting the handoff bit and failing to acquire
1239
+ * the lock, attempt to spin on owner to accelerate lock
1240
+ * transfer. If the previous owner is a on-cpu writer and it
1241
+ * has just released the lock, OWNER_NULL will be returned.
1242
+ * In this case, we attempt to acquire the lock again
1243
+ * without sleeping.
1244
+ */
1245
+ if (wstate == WRITER_HANDOFF &&
1246
+ rwsem_spin_on_owner(sem, RWSEM_NONSPINNABLE) == OWNER_NULL)
1247
+ goto trylock_again;
1248
+
1249
+ /* Block until there are no active lockers. */
1250
+ for (;;) {
1251
+ if (signal_pending_state(state, current))
1252
+ goto out_nolock;
1253
+
1254
+ schedule();
1255
+ lockevent_inc(rwsem_sleep_writer);
1256
+ set_current_state(state);
1257
+ /*
1258
+ * If HANDOFF bit is set, unconditionally do
1259
+ * a trylock.
1260
+ */
1261
+ if (wstate == WRITER_HANDOFF)
1262
+ break;
1263
+
1264
+ if ((wstate == WRITER_NOT_FIRST) &&
1265
+ (rwsem_first_waiter(sem) == &waiter))
1266
+ wstate = WRITER_FIRST;
1267
+
1268
+ count = atomic_long_read(&sem->count);
1269
+ if (!(count & RWSEM_LOCK_MASK))
1270
+ break;
1271
+
1272
+ /*
1273
+ * The setting of the handoff bit is deferred
1274
+ * until rwsem_try_write_lock() is called.
1275
+ */
1276
+ if ((wstate == WRITER_FIRST) && (rt_task(current) ||
1277
+ time_after(jiffies, waiter.timeout))) {
1278
+ wstate = WRITER_HANDOFF;
1279
+ lockevent_inc(rwsem_wlock_handoff);
1280
+ break;
1281
+ }
1282
+ }
1283
+trylock_again:
1284
+ raw_spin_lock_irq(&sem->wait_lock);
1285
+ }
1286
+ __set_current_state(TASK_RUNNING);
1287
+ trace_android_vh_rwsem_write_wait_finish(sem);
1288
+ list_del(&waiter.list);
1289
+ rwsem_disable_reader_optspin(sem, disable_rspin);
1290
+ raw_spin_unlock_irq(&sem->wait_lock);
1291
+ lockevent_inc(rwsem_wlock);
1292
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1293
+ return ret;
1294
+
1295
+out_nolock:
1296
+ __set_current_state(TASK_RUNNING);
1297
+ trace_android_vh_rwsem_write_wait_finish(sem);
1298
+ raw_spin_lock_irq(&sem->wait_lock);
1299
+ list_del(&waiter.list);
1300
+
1301
+ if (unlikely(wstate == WRITER_HANDOFF))
1302
+ atomic_long_andnot(RWSEM_FLAG_HANDOFF, &sem->count);
1303
+
1304
+ if (list_empty(&sem->wait_list))
1305
+ atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);
1306
+ else
1307
+ rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
1308
+ raw_spin_unlock_irq(&sem->wait_lock);
1309
+ wake_up_q(&wake_q);
1310
+ lockevent_inc(rwsem_wlock_fail);
1311
+
1312
+ return ERR_PTR(-EINTR);
1313
+}
1314
+
1315
+/*
1316
+ * handle waking up a waiter on the semaphore
1317
+ * - up_read/up_write has decremented the active part of count if we come here
1318
+ */
1319
+static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem, long count)
1320
+{
1321
+ unsigned long flags;
1322
+ DEFINE_WAKE_Q(wake_q);
1323
+
1324
+ raw_spin_lock_irqsave(&sem->wait_lock, flags);
1325
+
1326
+ if (!list_empty(&sem->wait_list))
1327
+ rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
1328
+ trace_android_vh_rwsem_wake_finish(sem);
1329
+
1330
+ raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
1331
+ wake_up_q(&wake_q);
1332
+
1333
+ return sem;
1334
+}
1335
+
1336
+/*
1337
+ * downgrade a write lock into a read lock
1338
+ * - caller incremented waiting part of count and discovered it still negative
1339
+ * - just wake up any readers at the front of the queue
1340
+ */
1341
+static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
1342
+{
1343
+ unsigned long flags;
1344
+ DEFINE_WAKE_Q(wake_q);
1345
+
1346
+ raw_spin_lock_irqsave(&sem->wait_lock, flags);
1347
+
1348
+ if (!list_empty(&sem->wait_list))
1349
+ rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
1350
+
1351
+ raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
1352
+ wake_up_q(&wake_q);
1353
+
1354
+ return sem;
1355
+}
1356
+
1357
+/*
1358
+ * lock for reading
1359
+ */
1360
+static inline void __down_read(struct rw_semaphore *sem)
1361
+{
1362
+ if (!rwsem_read_trylock(sem)) {
1363
+ rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE);
1364
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1365
+ } else {
1366
+ rwsem_set_reader_owned(sem);
1367
+ }
1368
+}
1369
+
1370
+static inline int __down_read_interruptible(struct rw_semaphore *sem)
1371
+{
1372
+ if (!rwsem_read_trylock(sem)) {
1373
+ if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_INTERRUPTIBLE)))
1374
+ return -EINTR;
1375
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1376
+ } else {
1377
+ rwsem_set_reader_owned(sem);
1378
+ }
1379
+ return 0;
1380
+}
1381
+
1382
+static inline int __down_read_killable(struct rw_semaphore *sem)
1383
+{
1384
+ if (!rwsem_read_trylock(sem)) {
1385
+ if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_KILLABLE)))
1386
+ return -EINTR;
1387
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1388
+ } else {
1389
+ rwsem_set_reader_owned(sem);
1390
+ }
1391
+ return 0;
1392
+}
1393
+
1394
+static inline int __down_read_trylock(struct rw_semaphore *sem)
1395
+{
1396
+ long tmp;
1397
+
1398
+ DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1399
+
1400
+ /*
1401
+ * Optimize for the case when the rwsem is not locked at all.
1402
+ */
1403
+ tmp = RWSEM_UNLOCKED_VALUE;
1404
+ do {
1405
+ if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
1406
+ tmp + RWSEM_READER_BIAS)) {
1407
+ rwsem_set_reader_owned(sem);
1408
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1409
+ return 1;
1410
+ }
1411
+ } while (!(tmp & RWSEM_READ_FAILED_MASK));
1412
+ return 0;
1413
+}
1414
+
1415
+/*
1416
+ * lock for writing
1417
+ */
1418
+static inline void __down_write(struct rw_semaphore *sem)
1419
+{
1420
+ long tmp = RWSEM_UNLOCKED_VALUE;
1421
+
1422
+ if (unlikely(!atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
1423
+ RWSEM_WRITER_LOCKED))) {
1424
+ rwsem_down_write_slowpath(sem, TASK_UNINTERRUPTIBLE);
1425
+ } else {
1426
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1427
+ rwsem_set_owner(sem);
1428
+ }
1429
+}
1430
+
1431
+static inline int __down_write_killable(struct rw_semaphore *sem)
1432
+{
1433
+ long tmp = RWSEM_UNLOCKED_VALUE;
1434
+
1435
+ if (unlikely(!atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
1436
+ RWSEM_WRITER_LOCKED))) {
1437
+ if (IS_ERR(rwsem_down_write_slowpath(sem, TASK_KILLABLE)))
1438
+ return -EINTR;
1439
+ } else {
1440
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1441
+ rwsem_set_owner(sem);
1442
+ }
1443
+ return 0;
1444
+}
1445
+
1446
+static inline int __down_write_trylock(struct rw_semaphore *sem)
1447
+{
1448
+ long tmp;
1449
+
1450
+ DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1451
+
1452
+ tmp = RWSEM_UNLOCKED_VALUE;
1453
+ if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
1454
+ RWSEM_WRITER_LOCKED)) {
1455
+ rwsem_set_owner(sem);
1456
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1457
+ return true;
1458
+ }
1459
+ return false;
1460
+}
1461
+
1462
+/*
1463
+ * unlock after reading
1464
+ */
1465
+static inline void __up_read(struct rw_semaphore *sem)
1466
+{
1467
+ long tmp;
1468
+
1469
+ DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1470
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1471
+
1472
+ trace_android_vh_record_rwsem_lock_starttime(current, 0);
1473
+ rwsem_clear_reader_owned(sem);
1474
+ tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
1475
+ DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
1476
+ if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
1477
+ RWSEM_FLAG_WAITERS)) {
1478
+ clear_wr_nonspinnable(sem);
1479
+ rwsem_wake(sem, tmp);
1480
+ }
1481
+ trace_android_vh_rwsem_up_read_end(sem);
1482
+}
1483
+
1484
+/*
1485
+ * unlock after writing
1486
+ */
1487
+static inline void __up_write(struct rw_semaphore *sem)
1488
+{
1489
+ long tmp;
1490
+
1491
+ DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1492
+ /*
1493
+ * sem->owner may differ from current if the ownership is transferred
1494
+ * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits.
1495
+ */
1496
+ DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) &&
1497
+ !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem);
1498
+
1499
+ trace_android_vh_record_rwsem_lock_starttime(current, 0);
1500
+ rwsem_clear_owner(sem);
1501
+ tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
1502
+ if (unlikely(tmp & RWSEM_FLAG_WAITERS))
1503
+ rwsem_wake(sem, tmp);
1504
+ trace_android_vh_rwsem_up_write_end(sem);
1505
+}
1506
+
1507
+/*
1508
+ * downgrade write lock to read lock
1509
+ */
1510
+static inline void __downgrade_write(struct rw_semaphore *sem)
1511
+{
1512
+ long tmp;
1513
+
1514
+ /*
1515
+ * When downgrading from exclusive to shared ownership,
1516
+ * anything inside the write-locked region cannot leak
1517
+ * into the read side. In contrast, anything in the
1518
+ * read-locked region is ok to be re-ordered into the
1519
+ * write side. As such, rely on RELEASE semantics.
1520
+ */
1521
+ DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem);
1522
+ tmp = atomic_long_fetch_add_release(
1523
+ -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
1524
+ rwsem_set_reader_owned(sem);
1525
+ if (tmp & RWSEM_FLAG_WAITERS)
1526
+ rwsem_downgrade_wake(sem);
1527
+}
1528
+#endif
171529
181530 /*
191531 * lock for reading
....@@ -24,10 +1536,22 @@
241536 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
251537
261538 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
27
- rwsem_set_reader_owned(sem);
281539 }
29
-
301540 EXPORT_SYMBOL(down_read);
1541
+
1542
+int __sched down_read_interruptible(struct rw_semaphore *sem)
1543
+{
1544
+ might_sleep();
1545
+ rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
1546
+
1547
+ if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) {
1548
+ rwsem_release(&sem->dep_map, _RET_IP_);
1549
+ return -EINTR;
1550
+ }
1551
+
1552
+ return 0;
1553
+}
1554
+EXPORT_SYMBOL(down_read_interruptible);
311555
321556 int __sched down_read_killable(struct rw_semaphore *sem)
331557 {
....@@ -35,14 +1559,12 @@
351559 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
361560
371561 if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
38
- rwsem_release(&sem->dep_map, 1, _RET_IP_);
1562
+ rwsem_release(&sem->dep_map, _RET_IP_);
391563 return -EINTR;
401564 }
411565
42
- rwsem_set_reader_owned(sem);
431566 return 0;
441567 }
45
-
461568 EXPORT_SYMBOL(down_read_killable);
471569
481570 /*
....@@ -52,13 +1574,10 @@
521574 {
531575 int ret = __down_read_trylock(sem);
541576
55
- if (ret == 1) {
1577
+ if (ret == 1)
561578 rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
57
- rwsem_set_reader_owned(sem);
58
- }
591579 return ret;
601580 }
61
-
621581 EXPORT_SYMBOL(down_read_trylock);
631582
641583 /*
....@@ -68,11 +1587,8 @@
681587 {
691588 might_sleep();
701589 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
71
-
721590 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
73
- rwsem_set_owner(sem);
741591 }
75
-
761592 EXPORT_SYMBOL(down_write);
771593
781594 /*
....@@ -83,15 +1599,14 @@
831599 might_sleep();
841600 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
851601
86
- if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) {
87
- rwsem_release(&sem->dep_map, 1, _RET_IP_);
1602
+ if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
1603
+ __down_write_killable)) {
1604
+ rwsem_release(&sem->dep_map, _RET_IP_);
881605 return -EINTR;
891606 }
901607
91
- rwsem_set_owner(sem);
921608 return 0;
931609 }
94
-
951610 EXPORT_SYMBOL(down_write_killable);
961611
971612 /*
....@@ -101,14 +1616,11 @@
1011616 {
1021617 int ret = __down_write_trylock(sem);
1031618
104
- if (ret == 1) {
1619
+ if (ret == 1)
1051620 rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
106
- rwsem_set_owner(sem);
107
- }
1081621
1091622 return ret;
1101623 }
111
-
1121624 EXPORT_SYMBOL(down_write_trylock);
1131625
1141626 /*
....@@ -116,12 +1628,9 @@
1161628 */
1171629 void up_read(struct rw_semaphore *sem)
1181630 {
119
- rwsem_release(&sem->dep_map, 1, _RET_IP_);
120
- DEBUG_RWSEMS_WARN_ON(sem->owner != RWSEM_READER_OWNED);
121
-
1631
+ rwsem_release(&sem->dep_map, _RET_IP_);
1221632 __up_read(sem);
1231633 }
124
-
1251634 EXPORT_SYMBOL(up_read);
1261635
1271636 /*
....@@ -129,13 +1638,9 @@
1291638 */
1301639 void up_write(struct rw_semaphore *sem)
1311640 {
132
- rwsem_release(&sem->dep_map, 1, _RET_IP_);
133
- DEBUG_RWSEMS_WARN_ON(sem->owner != current);
134
-
135
- rwsem_clear_owner(sem);
1641
+ rwsem_release(&sem->dep_map, _RET_IP_);
1361642 __up_write(sem);
1371643 }
138
-
1391644 EXPORT_SYMBOL(up_write);
1401645
1411646 /*
....@@ -144,12 +1649,8 @@
1441649 void downgrade_write(struct rw_semaphore *sem)
1451650 {
1461651 lock_downgrade(&sem->dep_map, _RET_IP_);
147
- DEBUG_RWSEMS_WARN_ON(sem->owner != current);
148
-
149
- rwsem_set_reader_owned(sem);
1501652 __downgrade_write(sem);
1511653 }
152
-
1531654 EXPORT_SYMBOL(downgrade_write);
1541655
1551656 #ifdef CONFIG_DEBUG_LOCK_ALLOC
....@@ -158,43 +1659,48 @@
1581659 {
1591660 might_sleep();
1601661 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
161
-
1621662 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
163
- rwsem_set_reader_owned(sem);
1641663 }
165
-
1661664 EXPORT_SYMBOL(down_read_nested);
1665
+
1666
+int down_read_killable_nested(struct rw_semaphore *sem, int subclass)
1667
+{
1668
+ might_sleep();
1669
+ rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
1670
+
1671
+ if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
1672
+ rwsem_release(&sem->dep_map, _RET_IP_);
1673
+ return -EINTR;
1674
+ }
1675
+
1676
+ return 0;
1677
+}
1678
+EXPORT_SYMBOL(down_read_killable_nested);
1671679
1681680 void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
1691681 {
1701682 might_sleep();
1711683 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
172
-
1731684 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
174
- rwsem_set_owner(sem);
1751685 }
176
-
1771686 EXPORT_SYMBOL(_down_write_nest_lock);
1781687
1791688 void down_read_non_owner(struct rw_semaphore *sem)
1801689 {
1811690 might_sleep();
182
-
1831691 __down_read(sem);
184
- rwsem_set_reader_owned(sem);
1692
+#ifndef CONFIG_PREEMPT_RT
1693
+ __rwsem_set_reader_owned(sem, NULL);
1694
+#endif
1851695 }
186
-
1871696 EXPORT_SYMBOL(down_read_non_owner);
1881697
1891698 void down_write_nested(struct rw_semaphore *sem, int subclass)
1901699 {
1911700 might_sleep();
1921701 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
193
-
1941702 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
195
- rwsem_set_owner(sem);
1961703 }
197
-
1981704 EXPORT_SYMBOL(down_write_nested);
1991705
2001706 int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
....@@ -202,23 +1708,23 @@
2021708 might_sleep();
2031709 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
2041710
205
- if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) {
206
- rwsem_release(&sem->dep_map, 1, _RET_IP_);
1711
+ if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
1712
+ __down_write_killable)) {
1713
+ rwsem_release(&sem->dep_map, _RET_IP_);
2071714 return -EINTR;
2081715 }
2091716
210
- rwsem_set_owner(sem);
2111717 return 0;
2121718 }
213
-
2141719 EXPORT_SYMBOL(down_write_killable_nested);
2151720
2161721 void up_read_non_owner(struct rw_semaphore *sem)
2171722 {
218
- DEBUG_RWSEMS_WARN_ON(sem->owner != RWSEM_READER_OWNED);
1723
+#ifndef CONFIG_PREEMPT_RT
1724
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1725
+#endif
2191726 __up_read(sem);
2201727 }
221
-
2221728 EXPORT_SYMBOL(up_read_non_owner);
2231729
2241730 #endif