hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/kernel/locking/rwsem.c
....@@ -3,17 +3,1542 @@
33 *
44 * Written by David Howells (dhowells@redhat.com).
55 * Derived from asm-i386/semaphore.h
6
+ *
7
+ * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
8
+ * and Michel Lespinasse <walken@google.com>
9
+ *
10
+ * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
11
+ * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
12
+ *
13
+ * Rwsem count bit fields re-definition and rwsem rearchitecture by
14
+ * Waiman Long <longman@redhat.com> and
15
+ * Peter Zijlstra <peterz@infradead.org>.
616 */
717
818 #include <linux/types.h>
919 #include <linux/kernel.h>
1020 #include <linux/sched.h>
21
+#include <linux/sched/rt.h>
22
+#include <linux/sched/task.h>
1123 #include <linux/sched/debug.h>
24
+#include <linux/sched/wake_q.h>
25
+#include <linux/sched/signal.h>
26
+#include <linux/sched/clock.h>
1227 #include <linux/export.h>
1328 #include <linux/rwsem.h>
1429 #include <linux/atomic.h>
1530
16
-#include "rwsem.h"
31
+#include "lock_events.h"
32
+#include <trace/hooks/rwsem.h>
33
+#include <trace/hooks/dtask.h>
34
+
35
+/*
36
+ * The least significant 3 bits of the owner value has the following
37
+ * meanings when set.
38
+ * - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers
39
+ * - Bit 1: RWSEM_RD_NONSPINNABLE - Readers cannot spin on this lock.
40
+ * - Bit 2: RWSEM_WR_NONSPINNABLE - Writers cannot spin on this lock.
41
+ *
42
+ * When the rwsem is either owned by an anonymous writer, or it is
43
+ * reader-owned, but a spinning writer has timed out, both nonspinnable
44
+ * bits will be set to disable optimistic spinning by readers and writers.
45
+ * In the later case, the last unlocking reader should then check the
46
+ * writer nonspinnable bit and clear it only to give writers preference
47
+ * to acquire the lock via optimistic spinning, but not readers. Similar
48
+ * action is also done in the reader slowpath.
49
+
50
+ * When a writer acquires a rwsem, it puts its task_struct pointer
51
+ * into the owner field. It is cleared after an unlock.
52
+ *
53
+ * When a reader acquires a rwsem, it will also puts its task_struct
54
+ * pointer into the owner field with the RWSEM_READER_OWNED bit set.
55
+ * On unlock, the owner field will largely be left untouched. So
56
+ * for a free or reader-owned rwsem, the owner value may contain
57
+ * information about the last reader that acquires the rwsem.
58
+ *
59
+ * That information may be helpful in debugging cases where the system
60
+ * seems to hang on a reader owned rwsem especially if only one reader
61
+ * is involved. Ideally we would like to track all the readers that own
62
+ * a rwsem, but the overhead is simply too big.
63
+ *
64
+ * Reader optimistic spinning is helpful when the reader critical section
65
+ * is short and there aren't that many readers around. It makes readers
66
+ * relatively more preferred than writers. When a writer times out spinning
67
+ * on a reader-owned lock and set the nospinnable bits, there are two main
68
+ * reasons for that.
69
+ *
70
+ * 1) The reader critical section is long, perhaps the task sleeps after
71
+ * acquiring the read lock.
72
+ * 2) There are just too many readers contending the lock causing it to
73
+ * take a while to service all of them.
74
+ *
75
+ * In the former case, long reader critical section will impede the progress
76
+ * of writers which is usually more important for system performance. In
77
+ * the later case, reader optimistic spinning tends to make the reader
78
+ * groups that contain readers that acquire the lock together smaller
79
+ * leading to more of them. That may hurt performance in some cases. In
80
+ * other words, the setting of nonspinnable bits indicates that reader
81
+ * optimistic spinning may not be helpful for those workloads that cause
82
+ * it.
83
+ *
84
+ * Therefore, any writers that had observed the setting of the writer
85
+ * nonspinnable bit for a given rwsem after they fail to acquire the lock
86
+ * via optimistic spinning will set the reader nonspinnable bit once they
87
+ * acquire the write lock. Similarly, readers that observe the setting
88
+ * of reader nonspinnable bit at slowpath entry will set the reader
89
+ * nonspinnable bits when they acquire the read lock via the wakeup path.
90
+ *
91
+ * Once the reader nonspinnable bit is on, it will only be reset when
92
+ * a writer is able to acquire the rwsem in the fast path or somehow a
93
+ * reader or writer in the slowpath doesn't observe the nonspinable bit.
94
+ *
95
+ * This is to discourage reader optmistic spinning on that particular
96
+ * rwsem and make writers more preferred. This adaptive disabling of reader
97
+ * optimistic spinning will alleviate the negative side effect of this
98
+ * feature.
99
+ */
100
+#define RWSEM_READER_OWNED (1UL << 0)
101
+#define RWSEM_RD_NONSPINNABLE (1UL << 1)
102
+#define RWSEM_WR_NONSPINNABLE (1UL << 2)
103
+#define RWSEM_NONSPINNABLE (RWSEM_RD_NONSPINNABLE | RWSEM_WR_NONSPINNABLE)
104
+#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE)
105
+
106
+#ifdef CONFIG_DEBUG_RWSEMS
107
+# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
108
+ if (!debug_locks_silent && \
109
+ WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
110
+ #c, atomic_long_read(&(sem)->count), \
111
+ (unsigned long) sem->magic, \
112
+ atomic_long_read(&(sem)->owner), (long)current, \
113
+ list_empty(&(sem)->wait_list) ? "" : "not ")) \
114
+ debug_locks_off(); \
115
+ } while (0)
116
+#else
117
+# define DEBUG_RWSEMS_WARN_ON(c, sem)
118
+#endif
119
+
120
+/*
121
+ * On 64-bit architectures, the bit definitions of the count are:
122
+ *
123
+ * Bit 0 - writer locked bit
124
+ * Bit 1 - waiters present bit
125
+ * Bit 2 - lock handoff bit
126
+ * Bits 3-7 - reserved
127
+ * Bits 8-62 - 55-bit reader count
128
+ * Bit 63 - read fail bit
129
+ *
130
+ * On 32-bit architectures, the bit definitions of the count are:
131
+ *
132
+ * Bit 0 - writer locked bit
133
+ * Bit 1 - waiters present bit
134
+ * Bit 2 - lock handoff bit
135
+ * Bits 3-7 - reserved
136
+ * Bits 8-30 - 23-bit reader count
137
+ * Bit 31 - read fail bit
138
+ *
139
+ * It is not likely that the most significant bit (read fail bit) will ever
140
+ * be set. This guard bit is still checked anyway in the down_read() fastpath
141
+ * just in case we need to use up more of the reader bits for other purpose
142
+ * in the future.
143
+ *
144
+ * atomic_long_fetch_add() is used to obtain reader lock, whereas
145
+ * atomic_long_cmpxchg() will be used to obtain writer lock.
146
+ *
147
+ * There are three places where the lock handoff bit may be set or cleared.
148
+ * 1) rwsem_mark_wake() for readers.
149
+ * 2) rwsem_try_write_lock() for writers.
150
+ * 3) Error path of rwsem_down_write_slowpath().
151
+ *
152
+ * For all the above cases, wait_lock will be held. A writer must also
153
+ * be the first one in the wait_list to be eligible for setting the handoff
154
+ * bit. So concurrent setting/clearing of handoff bit is not possible.
155
+ */
156
+#define RWSEM_WRITER_LOCKED (1UL << 0)
157
+#define RWSEM_FLAG_WAITERS (1UL << 1)
158
+#define RWSEM_FLAG_HANDOFF (1UL << 2)
159
+#define RWSEM_FLAG_READFAIL (1UL << (BITS_PER_LONG - 1))
160
+
161
+#define RWSEM_READER_SHIFT 8
162
+#define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT)
163
+#define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1))
164
+#define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED
165
+#define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK)
166
+#define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\
167
+ RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL)
168
+
169
+/*
170
+ * All writes to owner are protected by WRITE_ONCE() to make sure that
171
+ * store tearing can't happen as optimistic spinners may read and use
172
+ * the owner value concurrently without lock. Read from owner, however,
173
+ * may not need READ_ONCE() as long as the pointer value is only used
174
+ * for comparison and isn't being dereferenced.
175
+ */
176
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
177
+{
178
+ atomic_long_set(&sem->owner, (long)current);
179
+ trace_android_vh_rwsem_set_owner(sem);
180
+}
181
+
182
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
183
+{
184
+ atomic_long_set(&sem->owner, 0);
185
+}
186
+
187
+/*
188
+ * Test the flags in the owner field.
189
+ */
190
+static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags)
191
+{
192
+ return atomic_long_read(&sem->owner) & flags;
193
+}
194
+
195
+/*
196
+ * The task_struct pointer of the last owning reader will be left in
197
+ * the owner field.
198
+ *
199
+ * Note that the owner value just indicates the task has owned the rwsem
200
+ * previously, it may not be the real owner or one of the real owners
201
+ * anymore when that field is examined, so take it with a grain of salt.
202
+ *
203
+ * The reader non-spinnable bit is preserved.
204
+ */
205
+static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
206
+ struct task_struct *owner)
207
+{
208
+ unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED |
209
+ (atomic_long_read(&sem->owner) & RWSEM_RD_NONSPINNABLE);
210
+
211
+ atomic_long_set(&sem->owner, val);
212
+}
213
+
214
+static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
215
+{
216
+ __rwsem_set_reader_owned(sem, current);
217
+ trace_android_vh_rwsem_set_reader_owned(sem);
218
+}
219
+
220
+/*
221
+ * Return true if the rwsem is owned by a reader.
222
+ */
223
+static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
224
+{
225
+#ifdef CONFIG_DEBUG_RWSEMS
226
+ /*
227
+ * Check the count to see if it is write-locked.
228
+ */
229
+ long count = atomic_long_read(&sem->count);
230
+
231
+ if (count & RWSEM_WRITER_MASK)
232
+ return false;
233
+#endif
234
+ return rwsem_test_oflags(sem, RWSEM_READER_OWNED);
235
+}
236
+
237
+#ifdef CONFIG_DEBUG_RWSEMS
238
+/*
239
+ * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
240
+ * is a task pointer in owner of a reader-owned rwsem, it will be the
241
+ * real owner or one of the real owners. The only exception is when the
242
+ * unlock is done by up_read_non_owner().
243
+ */
244
+static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
245
+{
246
+ unsigned long val = atomic_long_read(&sem->owner);
247
+
248
+ while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) {
249
+ if (atomic_long_try_cmpxchg(&sem->owner, &val,
250
+ val & RWSEM_OWNER_FLAGS_MASK))
251
+ return;
252
+ }
253
+}
254
+#else
255
+static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
256
+{
257
+}
258
+#endif
259
+
260
+/*
261
+ * Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag
262
+ * remains set. Otherwise, the operation will be aborted.
263
+ */
264
+static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)
265
+{
266
+ unsigned long owner = atomic_long_read(&sem->owner);
267
+
268
+ do {
269
+ if (!(owner & RWSEM_READER_OWNED))
270
+ break;
271
+ if (owner & RWSEM_NONSPINNABLE)
272
+ break;
273
+ } while (!atomic_long_try_cmpxchg(&sem->owner, &owner,
274
+ owner | RWSEM_NONSPINNABLE));
275
+}
276
+
277
+static inline bool rwsem_read_trylock(struct rw_semaphore *sem)
278
+{
279
+ long cnt = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count);
280
+ if (WARN_ON_ONCE(cnt < 0))
281
+ rwsem_set_nonspinnable(sem);
282
+
283
+ if ((cnt & RWSEM_READ_FAILED_MASK) == 0)
284
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
285
+
286
+ return !(cnt & RWSEM_READ_FAILED_MASK);
287
+}
288
+
289
+/*
290
+ * Return just the real task structure pointer of the owner
291
+ */
292
+static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
293
+{
294
+ return (struct task_struct *)
295
+ (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
296
+}
297
+
298
+/*
299
+ * Return the real task structure pointer of the owner and the embedded
300
+ * flags in the owner. pflags must be non-NULL.
301
+ */
302
+static inline struct task_struct *
303
+rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags)
304
+{
305
+ unsigned long owner = atomic_long_read(&sem->owner);
306
+
307
+ *pflags = owner & RWSEM_OWNER_FLAGS_MASK;
308
+ return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK);
309
+}
310
+
311
+/*
312
+ * Guide to the rw_semaphore's count field.
313
+ *
314
+ * When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned
315
+ * by a writer.
316
+ *
317
+ * The lock is owned by readers when
318
+ * (1) the RWSEM_WRITER_LOCKED isn't set in count,
319
+ * (2) some of the reader bits are set in count, and
320
+ * (3) the owner field has RWSEM_READ_OWNED bit set.
321
+ *
322
+ * Having some reader bits set is not enough to guarantee a readers owned
323
+ * lock as the readers may be in the process of backing out from the count
324
+ * and a writer has just released the lock. So another writer may steal
325
+ * the lock immediately after that.
326
+ */
327
+
328
+/*
329
+ * Initialize an rwsem:
330
+ */
331
+void __init_rwsem(struct rw_semaphore *sem, const char *name,
332
+ struct lock_class_key *key)
333
+{
334
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
335
+ /*
336
+ * Make sure we are not reinitializing a held semaphore:
337
+ */
338
+ debug_check_no_locks_freed((void *)sem, sizeof(*sem));
339
+ lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
340
+#endif
341
+#ifdef CONFIG_DEBUG_RWSEMS
342
+ sem->magic = sem;
343
+#endif
344
+ atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
345
+ raw_spin_lock_init(&sem->wait_lock);
346
+ INIT_LIST_HEAD(&sem->wait_list);
347
+ atomic_long_set(&sem->owner, 0L);
348
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
349
+ osq_lock_init(&sem->osq);
350
+#endif
351
+ trace_android_vh_rwsem_init(sem);
352
+}
353
+EXPORT_SYMBOL(__init_rwsem);
354
+
355
+#define rwsem_first_waiter(sem) \
356
+ list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
357
+
358
+enum rwsem_wake_type {
359
+ RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
360
+ RWSEM_WAKE_READERS, /* Wake readers only */
361
+ RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
362
+};
363
+
364
+enum writer_wait_state {
365
+ WRITER_NOT_FIRST, /* Writer is not first in wait list */
366
+ WRITER_FIRST, /* Writer is first in wait list */
367
+ WRITER_HANDOFF /* Writer is first & handoff needed */
368
+};
369
+
370
+/*
371
+ * The typical HZ value is either 250 or 1000. So set the minimum waiting
372
+ * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
373
+ * queue before initiating the handoff protocol.
374
+ */
375
+#define RWSEM_WAIT_TIMEOUT DIV_ROUND_UP(HZ, 250)
376
+
377
+/*
378
+ * Magic number to batch-wakeup waiting readers, even when writers are
379
+ * also present in the queue. This both limits the amount of work the
380
+ * waking thread must do and also prevents any potential counter overflow,
381
+ * however unlikely.
382
+ */
383
+#define MAX_READERS_WAKEUP 0x100
384
+
385
+/*
386
+ * handle the lock release when processes blocked on it that can now run
387
+ * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
388
+ * have been set.
389
+ * - there must be someone on the queue
390
+ * - the wait_lock must be held by the caller
391
+ * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
392
+ * to actually wakeup the blocked task(s) and drop the reference count,
393
+ * preferably when the wait_lock is released
394
+ * - woken process blocks are discarded from the list after having task zeroed
395
+ * - writers are only marked woken if downgrading is false
396
+ */
397
+static void rwsem_mark_wake(struct rw_semaphore *sem,
398
+ enum rwsem_wake_type wake_type,
399
+ struct wake_q_head *wake_q)
400
+{
401
+ struct rwsem_waiter *waiter, *tmp;
402
+ long oldcount, woken = 0, adjustment = 0;
403
+ struct list_head wlist;
404
+
405
+ lockdep_assert_held(&sem->wait_lock);
406
+
407
+ /*
408
+ * Take a peek at the queue head waiter such that we can determine
409
+ * the wakeup(s) to perform.
410
+ */
411
+ waiter = rwsem_first_waiter(sem);
412
+
413
+ if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
414
+ if (wake_type == RWSEM_WAKE_ANY) {
415
+ /*
416
+ * Mark writer at the front of the queue for wakeup.
417
+ * Until the task is actually later awoken later by
418
+ * the caller, other writers are able to steal it.
419
+ * Readers, on the other hand, will block as they
420
+ * will notice the queued writer.
421
+ */
422
+ wake_q_add(wake_q, waiter->task);
423
+ lockevent_inc(rwsem_wake_writer);
424
+ }
425
+
426
+ return;
427
+ }
428
+
429
+ /*
430
+ * No reader wakeup if there are too many of them already.
431
+ */
432
+ if (unlikely(atomic_long_read(&sem->count) < 0))
433
+ return;
434
+
435
+ /*
436
+ * Writers might steal the lock before we grant it to the next reader.
437
+ * We prefer to do the first reader grant before counting readers
438
+ * so we can bail out early if a writer stole the lock.
439
+ */
440
+ if (wake_type != RWSEM_WAKE_READ_OWNED) {
441
+ struct task_struct *owner;
442
+
443
+ adjustment = RWSEM_READER_BIAS;
444
+ oldcount = atomic_long_fetch_add(adjustment, &sem->count);
445
+ if (unlikely(oldcount & RWSEM_WRITER_MASK)) {
446
+ /*
447
+ * When we've been waiting "too" long (for writers
448
+ * to give up the lock), request a HANDOFF to
449
+ * force the issue.
450
+ */
451
+ if (!(oldcount & RWSEM_FLAG_HANDOFF) &&
452
+ time_after(jiffies, waiter->timeout)) {
453
+ adjustment -= RWSEM_FLAG_HANDOFF;
454
+ lockevent_inc(rwsem_rlock_handoff);
455
+ }
456
+
457
+ atomic_long_add(-adjustment, &sem->count);
458
+ return;
459
+ }
460
+ /*
461
+ * Set it to reader-owned to give spinners an early
462
+ * indication that readers now have the lock.
463
+ * The reader nonspinnable bit seen at slowpath entry of
464
+ * the reader is copied over.
465
+ */
466
+ owner = waiter->task;
467
+ if (waiter->last_rowner & RWSEM_RD_NONSPINNABLE) {
468
+ owner = (void *)((unsigned long)owner | RWSEM_RD_NONSPINNABLE);
469
+ lockevent_inc(rwsem_opt_norspin);
470
+ }
471
+ __rwsem_set_reader_owned(sem, owner);
472
+ }
473
+
474
+ /*
475
+ * Grant up to MAX_READERS_WAKEUP read locks to all the readers in the
476
+ * queue. We know that the woken will be at least 1 as we accounted
477
+ * for above. Note we increment the 'active part' of the count by the
478
+ * number of readers before waking any processes up.
479
+ *
480
+ * This is an adaptation of the phase-fair R/W locks where at the
481
+ * reader phase (first waiter is a reader), all readers are eligible
482
+ * to acquire the lock at the same time irrespective of their order
483
+ * in the queue. The writers acquire the lock according to their
484
+ * order in the queue.
485
+ *
486
+ * We have to do wakeup in 2 passes to prevent the possibility that
487
+ * the reader count may be decremented before it is incremented. It
488
+ * is because the to-be-woken waiter may not have slept yet. So it
489
+ * may see waiter->task got cleared, finish its critical section and
490
+ * do an unlock before the reader count increment.
491
+ *
492
+ * 1) Collect the read-waiters in a separate list, count them and
493
+ * fully increment the reader count in rwsem.
494
+ * 2) For each waiters in the new list, clear waiter->task and
495
+ * put them into wake_q to be woken up later.
496
+ */
497
+ INIT_LIST_HEAD(&wlist);
498
+ list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
499
+ if (waiter->type == RWSEM_WAITING_FOR_WRITE)
500
+ continue;
501
+
502
+ woken++;
503
+ list_move_tail(&waiter->list, &wlist);
504
+
505
+ trace_android_vh_rwsem_mark_wake_readers(sem, waiter);
506
+ /*
507
+ * Limit # of readers that can be woken up per wakeup call.
508
+ */
509
+ if (woken >= MAX_READERS_WAKEUP)
510
+ break;
511
+ }
512
+
513
+ adjustment = woken * RWSEM_READER_BIAS - adjustment;
514
+ lockevent_cond_inc(rwsem_wake_reader, woken);
515
+ if (list_empty(&sem->wait_list)) {
516
+ /* hit end of list above */
517
+ adjustment -= RWSEM_FLAG_WAITERS;
518
+ }
519
+
520
+ /*
521
+ * When we've woken a reader, we no longer need to force writers
522
+ * to give up the lock and we can clear HANDOFF.
523
+ */
524
+ if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF))
525
+ adjustment -= RWSEM_FLAG_HANDOFF;
526
+
527
+ if (adjustment)
528
+ atomic_long_add(adjustment, &sem->count);
529
+
530
+ /* 2nd pass */
531
+ list_for_each_entry_safe(waiter, tmp, &wlist, list) {
532
+ struct task_struct *tsk;
533
+
534
+ tsk = waiter->task;
535
+ get_task_struct(tsk);
536
+
537
+ /*
538
+ * Ensure calling get_task_struct() before setting the reader
539
+ * waiter to nil such that rwsem_down_read_slowpath() cannot
540
+ * race with do_exit() by always holding a reference count
541
+ * to the task to wakeup.
542
+ */
543
+ smp_store_release(&waiter->task, NULL);
544
+ /*
545
+ * Ensure issuing the wakeup (either by us or someone else)
546
+ * after setting the reader waiter to nil.
547
+ */
548
+ wake_q_add_safe(wake_q, tsk);
549
+ }
550
+}
551
+
552
+/*
553
+ * This function must be called with the sem->wait_lock held to prevent
554
+ * race conditions between checking the rwsem wait list and setting the
555
+ * sem->count accordingly.
556
+ *
557
+ * If wstate is WRITER_HANDOFF, it will make sure that either the handoff
558
+ * bit is set or the lock is acquired with handoff bit cleared.
559
+ */
560
+static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
561
+ enum writer_wait_state wstate)
562
+{
563
+ long count, new;
564
+
565
+ lockdep_assert_held(&sem->wait_lock);
566
+
567
+ count = atomic_long_read(&sem->count);
568
+ do {
569
+ bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
570
+
571
+ if (has_handoff && wstate == WRITER_NOT_FIRST)
572
+ return false;
573
+
574
+ new = count;
575
+
576
+ if (count & RWSEM_LOCK_MASK) {
577
+ if (has_handoff || (wstate != WRITER_HANDOFF))
578
+ return false;
579
+
580
+ new |= RWSEM_FLAG_HANDOFF;
581
+ } else {
582
+ new |= RWSEM_WRITER_LOCKED;
583
+ new &= ~RWSEM_FLAG_HANDOFF;
584
+
585
+ if (list_is_singular(&sem->wait_list))
586
+ new &= ~RWSEM_FLAG_WAITERS;
587
+ }
588
+ } while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
589
+
590
+ /*
591
+ * We have either acquired the lock with handoff bit cleared or
592
+ * set the handoff bit.
593
+ */
594
+ if (new & RWSEM_FLAG_HANDOFF)
595
+ return false;
596
+
597
+ rwsem_set_owner(sem);
598
+ return true;
599
+}
600
+
601
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
602
+/*
603
+ * Try to acquire read lock before the reader is put on wait queue.
604
+ * Lock acquisition isn't allowed if the rwsem is locked or a writer handoff
605
+ * is ongoing.
606
+ */
607
+static inline bool rwsem_try_read_lock_unqueued(struct rw_semaphore *sem)
608
+{
609
+ long count = atomic_long_read(&sem->count);
610
+
611
+ if (count & (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))
612
+ return false;
613
+
614
+ count = atomic_long_fetch_add_acquire(RWSEM_READER_BIAS, &sem->count);
615
+ if (!(count & (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
616
+ rwsem_set_reader_owned(sem);
617
+ lockevent_inc(rwsem_opt_rlock);
618
+ return true;
619
+ }
620
+
621
+ /* Back out the change */
622
+ atomic_long_add(-RWSEM_READER_BIAS, &sem->count);
623
+ return false;
624
+}
625
+
626
+/*
627
+ * Try to acquire write lock before the writer has been put on wait queue.
628
+ */
629
+static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
630
+{
631
+ long count = atomic_long_read(&sem->count);
632
+
633
+ while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) {
634
+ if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
635
+ count | RWSEM_WRITER_LOCKED)) {
636
+ rwsem_set_owner(sem);
637
+ lockevent_inc(rwsem_opt_wlock);
638
+ return true;
639
+ }
640
+ }
641
+ return false;
642
+}
643
+
644
+static inline bool owner_on_cpu(struct task_struct *owner)
645
+{
646
+ /*
647
+ * As lock holder preemption issue, we both skip spinning if
648
+ * task is not on cpu or its cpu is preempted
649
+ */
650
+ return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
651
+}
652
+
653
+static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,
654
+ unsigned long nonspinnable)
655
+{
656
+ struct task_struct *owner;
657
+ unsigned long flags;
658
+ bool ret = true;
659
+
660
+ if (need_resched()) {
661
+ lockevent_inc(rwsem_opt_fail);
662
+ return false;
663
+ }
664
+
665
+ preempt_disable();
666
+ rcu_read_lock();
667
+ owner = rwsem_owner_flags(sem, &flags);
668
+ /*
669
+ * Don't check the read-owner as the entry may be stale.
670
+ */
671
+ if ((flags & nonspinnable) ||
672
+ (owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner)))
673
+ ret = false;
674
+ rcu_read_unlock();
675
+ preempt_enable();
676
+ trace_android_vh_rwsem_can_spin_on_owner(sem, &ret, nonspinnable == RWSEM_WR_NONSPINNABLE);
677
+
678
+ lockevent_cond_inc(rwsem_opt_fail, !ret);
679
+ return ret;
680
+}
681
+
682
+/*
683
+ * The rwsem_spin_on_owner() function returns the folowing 4 values
684
+ * depending on the lock owner state.
685
+ * OWNER_NULL : owner is currently NULL
686
+ * OWNER_WRITER: when owner changes and is a writer
687
+ * OWNER_READER: when owner changes and the new owner may be a reader.
688
+ * OWNER_NONSPINNABLE:
689
+ * when optimistic spinning has to stop because either the
690
+ * owner stops running, is unknown, or its timeslice has
691
+ * been used up.
692
+ */
693
+enum owner_state {
694
+ OWNER_NULL = 1 << 0,
695
+ OWNER_WRITER = 1 << 1,
696
+ OWNER_READER = 1 << 2,
697
+ OWNER_NONSPINNABLE = 1 << 3,
698
+};
699
+#define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER)
700
+
701
+static inline enum owner_state
702
+rwsem_owner_state(struct task_struct *owner, unsigned long flags, unsigned long nonspinnable)
703
+{
704
+ if (flags & nonspinnable)
705
+ return OWNER_NONSPINNABLE;
706
+
707
+ if (flags & RWSEM_READER_OWNED)
708
+ return OWNER_READER;
709
+
710
+ return owner ? OWNER_WRITER : OWNER_NULL;
711
+}
712
+
713
+static noinline enum owner_state
714
+rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
715
+{
716
+ struct task_struct *new, *owner;
717
+ unsigned long flags, new_flags;
718
+ enum owner_state state;
719
+ int cnt = 0;
720
+ bool time_out = false;
721
+
722
+ owner = rwsem_owner_flags(sem, &flags);
723
+ state = rwsem_owner_state(owner, flags, nonspinnable);
724
+ if (state != OWNER_WRITER)
725
+ return state;
726
+
727
+ rcu_read_lock();
728
+ for (;;) {
729
+ trace_android_vh_rwsem_opt_spin_start(sem, &time_out, &cnt, true);
730
+ if (time_out)
731
+ break;
732
+ /*
733
+ * When a waiting writer set the handoff flag, it may spin
734
+ * on the owner as well. Once that writer acquires the lock,
735
+ * we can spin on it. So we don't need to quit even when the
736
+ * handoff bit is set.
737
+ */
738
+ new = rwsem_owner_flags(sem, &new_flags);
739
+ if ((new != owner) || (new_flags != flags)) {
740
+ state = rwsem_owner_state(new, new_flags, nonspinnable);
741
+ break;
742
+ }
743
+
744
+ /*
745
+ * Ensure we emit the owner->on_cpu, dereference _after_
746
+ * checking sem->owner still matches owner, if that fails,
747
+ * owner might point to free()d memory, if it still matches,
748
+ * the rcu_read_lock() ensures the memory stays valid.
749
+ */
750
+ barrier();
751
+
752
+ if (need_resched() || !owner_on_cpu(owner)) {
753
+ state = OWNER_NONSPINNABLE;
754
+ break;
755
+ }
756
+
757
+ cpu_relax();
758
+ }
759
+ rcu_read_unlock();
760
+
761
+ return state;
762
+}
763
+
764
+/*
765
+ * Calculate reader-owned rwsem spinning threshold for writer
766
+ *
767
+ * The more readers own the rwsem, the longer it will take for them to
768
+ * wind down and free the rwsem. So the empirical formula used to
769
+ * determine the actual spinning time limit here is:
770
+ *
771
+ * Spinning threshold = (10 + nr_readers/2)us
772
+ *
773
+ * The limit is capped to a maximum of 25us (30 readers). This is just
774
+ * a heuristic and is subjected to change in the future.
775
+ */
776
+static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)
777
+{
778
+ long count = atomic_long_read(&sem->count);
779
+ int readers = count >> RWSEM_READER_SHIFT;
780
+ u64 delta;
781
+
782
+ if (readers > 30)
783
+ readers = 30;
784
+ delta = (20 + readers) * NSEC_PER_USEC / 2;
785
+
786
+ return sched_clock() + delta;
787
+}
788
+
789
+static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
790
+{
791
+ bool taken = false;
792
+ int prev_owner_state = OWNER_NULL;
793
+ int loop = 0;
794
+ u64 rspin_threshold = 0;
795
+ int cnt = 0;
796
+ bool time_out = false;
797
+ unsigned long nonspinnable = wlock ? RWSEM_WR_NONSPINNABLE
798
+ : RWSEM_RD_NONSPINNABLE;
799
+
800
+ preempt_disable();
801
+
802
+ /* sem->wait_lock should not be held when doing optimistic spinning */
803
+ if (!osq_lock(&sem->osq))
804
+ goto done;
805
+
806
+ /*
807
+ * Optimistically spin on the owner field and attempt to acquire the
808
+ * lock whenever the owner changes. Spinning will be stopped when:
809
+ * 1) the owning writer isn't running; or
810
+ * 2) readers own the lock and spinning time has exceeded limit.
811
+ */
812
+ for (;;) {
813
+ enum owner_state owner_state;
814
+
815
+ trace_android_vh_rwsem_opt_spin_start(sem, &time_out, &cnt, false);
816
+ if (time_out)
817
+ break;
818
+
819
+ owner_state = rwsem_spin_on_owner(sem, nonspinnable);
820
+ if (!(owner_state & OWNER_SPINNABLE))
821
+ break;
822
+
823
+ /*
824
+ * Try to acquire the lock
825
+ */
826
+ taken = wlock ? rwsem_try_write_lock_unqueued(sem)
827
+ : rwsem_try_read_lock_unqueued(sem);
828
+
829
+ if (taken)
830
+ break;
831
+
832
+ /*
833
+ * Time-based reader-owned rwsem optimistic spinning
834
+ */
835
+ if (wlock && (owner_state == OWNER_READER)) {
836
+ /*
837
+ * Re-initialize rspin_threshold every time when
838
+ * the owner state changes from non-reader to reader.
839
+ * This allows a writer to steal the lock in between
840
+ * 2 reader phases and have the threshold reset at
841
+ * the beginning of the 2nd reader phase.
842
+ */
843
+ if (prev_owner_state != OWNER_READER) {
844
+ if (rwsem_test_oflags(sem, nonspinnable))
845
+ break;
846
+ rspin_threshold = rwsem_rspin_threshold(sem);
847
+ loop = 0;
848
+ }
849
+
850
+ /*
851
+ * Check time threshold once every 16 iterations to
852
+ * avoid calling sched_clock() too frequently so
853
+ * as to reduce the average latency between the times
854
+ * when the lock becomes free and when the spinner
855
+ * is ready to do a trylock.
856
+ */
857
+ else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) {
858
+ rwsem_set_nonspinnable(sem);
859
+ lockevent_inc(rwsem_opt_nospin);
860
+ break;
861
+ }
862
+ }
863
+
864
+ /*
865
+ * An RT task cannot do optimistic spinning if it cannot
866
+ * be sure the lock holder is running or live-lock may
867
+ * happen if the current task and the lock holder happen
868
+ * to run in the same CPU. However, aborting optimistic
869
+ * spinning while a NULL owner is detected may miss some
870
+ * opportunity where spinning can continue without causing
871
+ * problem.
872
+ *
873
+ * There are 2 possible cases where an RT task may be able
874
+ * to continue spinning.
875
+ *
876
+ * 1) The lock owner is in the process of releasing the
877
+ * lock, sem->owner is cleared but the lock has not
878
+ * been released yet.
879
+ * 2) The lock was free and owner cleared, but another
880
+ * task just comes in and acquire the lock before
881
+ * we try to get it. The new owner may be a spinnable
882
+ * writer.
883
+ *
884
+ * To take advantage of two scenarios listed agove, the RT
885
+ * task is made to retry one more time to see if it can
886
+ * acquire the lock or continue spinning on the new owning
887
+ * writer. Of course, if the time lag is long enough or the
888
+ * new owner is not a writer or spinnable, the RT task will
889
+ * quit spinning.
890
+ *
891
+ * If the owner is a writer, the need_resched() check is
892
+ * done inside rwsem_spin_on_owner(). If the owner is not
893
+ * a writer, need_resched() check needs to be done here.
894
+ */
895
+ if (owner_state != OWNER_WRITER) {
896
+ if (need_resched())
897
+ break;
898
+ if (rt_task(current) &&
899
+ (prev_owner_state != OWNER_WRITER))
900
+ break;
901
+ }
902
+ prev_owner_state = owner_state;
903
+
904
+ /*
905
+ * The cpu_relax() call is a compiler barrier which forces
906
+ * everything in this loop to be re-loaded. We don't need
907
+ * memory barriers as we'll eventually observe the right
908
+ * values at the cost of a few extra spins.
909
+ */
910
+ cpu_relax();
911
+ }
912
+ osq_unlock(&sem->osq);
913
+ trace_android_vh_rwsem_opt_spin_finish(sem, taken, wlock);
914
+done:
915
+ preempt_enable();
916
+ lockevent_cond_inc(rwsem_opt_fail, !taken);
917
+ return taken;
918
+}
919
+
920
+/*
921
+ * Clear the owner's RWSEM_WR_NONSPINNABLE bit if it is set. This should
922
+ * only be called when the reader count reaches 0.
923
+ *
924
+ * This give writers better chance to acquire the rwsem first before
925
+ * readers when the rwsem was being held by readers for a relatively long
926
+ * period of time. Race can happen that an optimistic spinner may have
927
+ * just stolen the rwsem and set the owner, but just clearing the
928
+ * RWSEM_WR_NONSPINNABLE bit will do no harm anyway.
929
+ */
930
+static inline void clear_wr_nonspinnable(struct rw_semaphore *sem)
931
+{
932
+ if (rwsem_test_oflags(sem, RWSEM_WR_NONSPINNABLE))
933
+ atomic_long_andnot(RWSEM_WR_NONSPINNABLE, &sem->owner);
934
+}
935
+
936
+/*
937
+ * This function is called when the reader fails to acquire the lock via
938
+ * optimistic spinning. In this case we will still attempt to do a trylock
939
+ * when comparing the rwsem state right now with the state when entering
940
+ * the slowpath indicates that the reader is still in a valid reader phase.
941
+ * This happens when the following conditions are true:
942
+ *
943
+ * 1) The lock is currently reader owned, and
944
+ * 2) The lock is previously not reader-owned or the last read owner changes.
945
+ *
946
+ * In the former case, we have transitioned from a writer phase to a
947
+ * reader-phase while spinning. In the latter case, it means the reader
948
+ * phase hasn't ended when we entered the optimistic spinning loop. In
949
+ * both cases, the reader is eligible to acquire the lock. This is the
950
+ * secondary path where a read lock is acquired optimistically.
951
+ *
952
+ * The reader non-spinnable bit wasn't set at time of entry or it will
953
+ * not be here at all.
954
+ */
955
+static inline bool rwsem_reader_phase_trylock(struct rw_semaphore *sem,
956
+ unsigned long last_rowner)
957
+{
958
+ unsigned long owner = atomic_long_read(&sem->owner);
959
+
960
+ if (!(owner & RWSEM_READER_OWNED))
961
+ return false;
962
+
963
+ if (((owner ^ last_rowner) & ~RWSEM_OWNER_FLAGS_MASK) &&
964
+ rwsem_try_read_lock_unqueued(sem)) {
965
+ lockevent_inc(rwsem_opt_rlock2);
966
+ lockevent_add(rwsem_opt_fail, -1);
967
+ return true;
968
+ }
969
+ return false;
970
+}
971
+#else
972
+static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,
973
+ unsigned long nonspinnable)
974
+{
975
+ return false;
976
+}
977
+
978
+static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
979
+{
980
+ return false;
981
+}
982
+
983
+static inline void clear_wr_nonspinnable(struct rw_semaphore *sem) { }
984
+
985
+static inline bool rwsem_reader_phase_trylock(struct rw_semaphore *sem,
986
+ unsigned long last_rowner)
987
+{
988
+ return false;
989
+}
990
+
991
+static inline int
992
+rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
993
+{
994
+ return 0;
995
+}
996
+#define OWNER_NULL 1
997
+#endif
998
+
999
+/*
1000
+ * Wait for the read lock to be granted
1001
+ */
1002
+static struct rw_semaphore __sched *
1003
+rwsem_down_read_slowpath(struct rw_semaphore *sem, int state)
1004
+{
1005
+ long count, adjustment = -RWSEM_READER_BIAS;
1006
+ struct rwsem_waiter waiter;
1007
+ DEFINE_WAKE_Q(wake_q);
1008
+ bool wake = false;
1009
+ bool already_on_list = false;
1010
+
1011
+ /*
1012
+ * Save the current read-owner of rwsem, if available, and the
1013
+ * reader nonspinnable bit.
1014
+ */
1015
+ waiter.last_rowner = atomic_long_read(&sem->owner);
1016
+ if (!(waiter.last_rowner & RWSEM_READER_OWNED))
1017
+ waiter.last_rowner &= RWSEM_RD_NONSPINNABLE;
1018
+
1019
+ if (!rwsem_can_spin_on_owner(sem, RWSEM_RD_NONSPINNABLE))
1020
+ goto queue;
1021
+
1022
+ /*
1023
+ * Undo read bias from down_read() and do optimistic spinning.
1024
+ */
1025
+ atomic_long_add(-RWSEM_READER_BIAS, &sem->count);
1026
+ adjustment = 0;
1027
+ if (rwsem_optimistic_spin(sem, false)) {
1028
+ /* rwsem_optimistic_spin() implies ACQUIRE on success */
1029
+ /*
1030
+ * Wake up other readers in the wait list if the front
1031
+ * waiter is a reader.
1032
+ */
1033
+ if ((atomic_long_read(&sem->count) & RWSEM_FLAG_WAITERS)) {
1034
+ raw_spin_lock_irq(&sem->wait_lock);
1035
+ if (!list_empty(&sem->wait_list))
1036
+ rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED,
1037
+ &wake_q);
1038
+ raw_spin_unlock_irq(&sem->wait_lock);
1039
+ wake_up_q(&wake_q);
1040
+ }
1041
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1042
+ return sem;
1043
+ } else if (rwsem_reader_phase_trylock(sem, waiter.last_rowner)) {
1044
+ /* rwsem_reader_phase_trylock() implies ACQUIRE on success */
1045
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1046
+ return sem;
1047
+ }
1048
+
1049
+queue:
1050
+ waiter.task = current;
1051
+ waiter.type = RWSEM_WAITING_FOR_READ;
1052
+ waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
1053
+
1054
+ raw_spin_lock_irq(&sem->wait_lock);
1055
+ if (list_empty(&sem->wait_list)) {
1056
+ /*
1057
+ * In case the wait queue is empty and the lock isn't owned
1058
+ * by a writer or has the handoff bit set, this reader can
1059
+ * exit the slowpath and return immediately as its
1060
+ * RWSEM_READER_BIAS has already been set in the count.
1061
+ */
1062
+ if (adjustment && !(atomic_long_read(&sem->count) &
1063
+ (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
1064
+ /* Provide lock ACQUIRE */
1065
+ smp_acquire__after_ctrl_dep();
1066
+ raw_spin_unlock_irq(&sem->wait_lock);
1067
+ rwsem_set_reader_owned(sem);
1068
+ lockevent_inc(rwsem_rlock_fast);
1069
+ trace_android_vh_record_rwsem_lock_starttime(
1070
+ current, jiffies);
1071
+ return sem;
1072
+ }
1073
+ adjustment += RWSEM_FLAG_WAITERS;
1074
+ }
1075
+ trace_android_vh_alter_rwsem_list_add(
1076
+ &waiter,
1077
+ sem, &already_on_list);
1078
+ if (!already_on_list)
1079
+ list_add_tail(&waiter.list, &sem->wait_list);
1080
+
1081
+ /* we're now waiting on the lock, but no longer actively locking */
1082
+ if (adjustment)
1083
+ count = atomic_long_add_return(adjustment, &sem->count);
1084
+ else
1085
+ count = atomic_long_read(&sem->count);
1086
+
1087
+ /*
1088
+ * If there are no active locks, wake the front queued process(es).
1089
+ *
1090
+ * If there are no writers and we are first in the queue,
1091
+ * wake our own waiter to join the existing active readers !
1092
+ */
1093
+ if (!(count & RWSEM_LOCK_MASK)) {
1094
+ clear_wr_nonspinnable(sem);
1095
+ wake = true;
1096
+ }
1097
+ if (wake || (!(count & RWSEM_WRITER_MASK) &&
1098
+ (adjustment & RWSEM_FLAG_WAITERS)))
1099
+ rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
1100
+
1101
+ trace_android_vh_rwsem_wake(sem);
1102
+ raw_spin_unlock_irq(&sem->wait_lock);
1103
+ wake_up_q(&wake_q);
1104
+
1105
+ /* wait to be given the lock */
1106
+ trace_android_vh_rwsem_read_wait_start(sem);
1107
+ for (;;) {
1108
+ set_current_state(state);
1109
+ if (!smp_load_acquire(&waiter.task)) {
1110
+ /* Matches rwsem_mark_wake()'s smp_store_release(). */
1111
+ break;
1112
+ }
1113
+ if (signal_pending_state(state, current)) {
1114
+ raw_spin_lock_irq(&sem->wait_lock);
1115
+ if (waiter.task)
1116
+ goto out_nolock;
1117
+ raw_spin_unlock_irq(&sem->wait_lock);
1118
+ /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
1119
+ break;
1120
+ }
1121
+ schedule();
1122
+ lockevent_inc(rwsem_sleep_reader);
1123
+ }
1124
+
1125
+ __set_current_state(TASK_RUNNING);
1126
+ trace_android_vh_rwsem_read_wait_finish(sem);
1127
+ lockevent_inc(rwsem_rlock);
1128
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1129
+ return sem;
1130
+
1131
+out_nolock:
1132
+ list_del(&waiter.list);
1133
+ if (list_empty(&sem->wait_list)) {
1134
+ atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF,
1135
+ &sem->count);
1136
+ }
1137
+ raw_spin_unlock_irq(&sem->wait_lock);
1138
+ __set_current_state(TASK_RUNNING);
1139
+ trace_android_vh_rwsem_read_wait_finish(sem);
1140
+ lockevent_inc(rwsem_rlock_fail);
1141
+ return ERR_PTR(-EINTR);
1142
+}
1143
+
1144
+/*
1145
+ * This function is called by the a write lock owner. So the owner value
1146
+ * won't get changed by others.
1147
+ */
1148
+static inline void rwsem_disable_reader_optspin(struct rw_semaphore *sem,
1149
+ bool disable)
1150
+{
1151
+ if (unlikely(disable)) {
1152
+ atomic_long_or(RWSEM_RD_NONSPINNABLE, &sem->owner);
1153
+ lockevent_inc(rwsem_opt_norspin);
1154
+ }
1155
+}
1156
+
1157
+/*
1158
+ * Wait until we successfully acquire the write lock
1159
+ */
1160
+static struct rw_semaphore *
1161
+rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
1162
+{
1163
+ long count;
1164
+ bool disable_rspin;
1165
+ enum writer_wait_state wstate;
1166
+ struct rwsem_waiter waiter;
1167
+ struct rw_semaphore *ret = sem;
1168
+ DEFINE_WAKE_Q(wake_q);
1169
+ bool already_on_list = false;
1170
+
1171
+ /* do optimistic spinning and steal lock if possible */
1172
+ if (rwsem_can_spin_on_owner(sem, RWSEM_WR_NONSPINNABLE) &&
1173
+ rwsem_optimistic_spin(sem, true)) {
1174
+ /* rwsem_optimistic_spin() implies ACQUIRE on success */
1175
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1176
+ return sem;
1177
+ }
1178
+
1179
+ /*
1180
+ * Disable reader optimistic spinning for this rwsem after
1181
+ * acquiring the write lock when the setting of the nonspinnable
1182
+ * bits are observed.
1183
+ */
1184
+ disable_rspin = atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE;
1185
+
1186
+ /*
1187
+ * Optimistic spinning failed, proceed to the slowpath
1188
+ * and block until we can acquire the sem.
1189
+ */
1190
+ waiter.task = current;
1191
+ waiter.type = RWSEM_WAITING_FOR_WRITE;
1192
+ waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
1193
+
1194
+ raw_spin_lock_irq(&sem->wait_lock);
1195
+
1196
+ /* account for this before adding a new element to the list */
1197
+ wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST;
1198
+
1199
+ trace_android_vh_alter_rwsem_list_add(
1200
+ &waiter,
1201
+ sem, &already_on_list);
1202
+ if (!already_on_list)
1203
+ list_add_tail(&waiter.list, &sem->wait_list);
1204
+
1205
+ /* we're now waiting on the lock */
1206
+ if (wstate == WRITER_NOT_FIRST) {
1207
+ count = atomic_long_read(&sem->count);
1208
+
1209
+ /*
1210
+ * If there were already threads queued before us and:
1211
+ * 1) there are no active locks, wake the front
1212
+ * queued process(es) as the handoff bit might be set.
1213
+ * 2) there are no active writers and some readers, the lock
1214
+ * must be read owned; so we try to wake any read lock
1215
+ * waiters that were queued ahead of us.
1216
+ */
1217
+ if (count & RWSEM_WRITER_MASK)
1218
+ goto wait;
1219
+
1220
+ rwsem_mark_wake(sem, (count & RWSEM_READER_MASK)
1221
+ ? RWSEM_WAKE_READERS
1222
+ : RWSEM_WAKE_ANY, &wake_q);
1223
+
1224
+ if (!wake_q_empty(&wake_q)) {
1225
+ /*
1226
+ * We want to minimize wait_lock hold time especially
1227
+ * when a large number of readers are to be woken up.
1228
+ */
1229
+ raw_spin_unlock_irq(&sem->wait_lock);
1230
+ wake_up_q(&wake_q);
1231
+ wake_q_init(&wake_q); /* Used again, reinit */
1232
+ raw_spin_lock_irq(&sem->wait_lock);
1233
+ }
1234
+ } else {
1235
+ atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
1236
+ }
1237
+
1238
+wait:
1239
+ trace_android_vh_rwsem_wake(sem);
1240
+ /* wait until we successfully acquire the lock */
1241
+ trace_android_vh_rwsem_write_wait_start(sem);
1242
+ set_current_state(state);
1243
+ for (;;) {
1244
+ if (rwsem_try_write_lock(sem, wstate)) {
1245
+ /* rwsem_try_write_lock() implies ACQUIRE on success */
1246
+ break;
1247
+ }
1248
+
1249
+ raw_spin_unlock_irq(&sem->wait_lock);
1250
+
1251
+ /*
1252
+ * After setting the handoff bit and failing to acquire
1253
+ * the lock, attempt to spin on owner to accelerate lock
1254
+ * transfer. If the previous owner is a on-cpu writer and it
1255
+ * has just released the lock, OWNER_NULL will be returned.
1256
+ * In this case, we attempt to acquire the lock again
1257
+ * without sleeping.
1258
+ */
1259
+ if (wstate == WRITER_HANDOFF &&
1260
+ rwsem_spin_on_owner(sem, RWSEM_NONSPINNABLE) == OWNER_NULL)
1261
+ goto trylock_again;
1262
+
1263
+ /* Block until there are no active lockers. */
1264
+ for (;;) {
1265
+ if (signal_pending_state(state, current))
1266
+ goto out_nolock;
1267
+
1268
+ schedule();
1269
+ lockevent_inc(rwsem_sleep_writer);
1270
+ set_current_state(state);
1271
+ /*
1272
+ * If HANDOFF bit is set, unconditionally do
1273
+ * a trylock.
1274
+ */
1275
+ if (wstate == WRITER_HANDOFF)
1276
+ break;
1277
+
1278
+ if ((wstate == WRITER_NOT_FIRST) &&
1279
+ (rwsem_first_waiter(sem) == &waiter))
1280
+ wstate = WRITER_FIRST;
1281
+
1282
+ count = atomic_long_read(&sem->count);
1283
+ if (!(count & RWSEM_LOCK_MASK))
1284
+ break;
1285
+
1286
+ /*
1287
+ * The setting of the handoff bit is deferred
1288
+ * until rwsem_try_write_lock() is called.
1289
+ */
1290
+ if ((wstate == WRITER_FIRST) && (rt_task(current) ||
1291
+ time_after(jiffies, waiter.timeout))) {
1292
+ wstate = WRITER_HANDOFF;
1293
+ lockevent_inc(rwsem_wlock_handoff);
1294
+ break;
1295
+ }
1296
+ }
1297
+trylock_again:
1298
+ raw_spin_lock_irq(&sem->wait_lock);
1299
+ }
1300
+ __set_current_state(TASK_RUNNING);
1301
+ trace_android_vh_rwsem_write_wait_finish(sem);
1302
+ list_del(&waiter.list);
1303
+ rwsem_disable_reader_optspin(sem, disable_rspin);
1304
+ raw_spin_unlock_irq(&sem->wait_lock);
1305
+ lockevent_inc(rwsem_wlock);
1306
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1307
+ return ret;
1308
+
1309
+out_nolock:
1310
+ __set_current_state(TASK_RUNNING);
1311
+ trace_android_vh_rwsem_write_wait_finish(sem);
1312
+ raw_spin_lock_irq(&sem->wait_lock);
1313
+ list_del(&waiter.list);
1314
+
1315
+ if (unlikely(wstate == WRITER_HANDOFF))
1316
+ atomic_long_andnot(RWSEM_FLAG_HANDOFF, &sem->count);
1317
+
1318
+ if (list_empty(&sem->wait_list))
1319
+ atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);
1320
+ else
1321
+ rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
1322
+ raw_spin_unlock_irq(&sem->wait_lock);
1323
+ wake_up_q(&wake_q);
1324
+ lockevent_inc(rwsem_wlock_fail);
1325
+
1326
+ return ERR_PTR(-EINTR);
1327
+}
1328
+
1329
+/*
1330
+ * handle waking up a waiter on the semaphore
1331
+ * - up_read/up_write has decremented the active part of count if we come here
1332
+ */
1333
+static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem, long count)
1334
+{
1335
+ unsigned long flags;
1336
+ DEFINE_WAKE_Q(wake_q);
1337
+
1338
+ raw_spin_lock_irqsave(&sem->wait_lock, flags);
1339
+
1340
+ if (!list_empty(&sem->wait_list))
1341
+ rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
1342
+ trace_android_vh_rwsem_wake_finish(sem);
1343
+
1344
+ raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
1345
+ wake_up_q(&wake_q);
1346
+
1347
+ return sem;
1348
+}
1349
+
1350
+/*
1351
+ * downgrade a write lock into a read lock
1352
+ * - caller incremented waiting part of count and discovered it still negative
1353
+ * - just wake up any readers at the front of the queue
1354
+ */
1355
+static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
1356
+{
1357
+ unsigned long flags;
1358
+ DEFINE_WAKE_Q(wake_q);
1359
+
1360
+ raw_spin_lock_irqsave(&sem->wait_lock, flags);
1361
+
1362
+ if (!list_empty(&sem->wait_list))
1363
+ rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
1364
+
1365
+ raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
1366
+ wake_up_q(&wake_q);
1367
+
1368
+ return sem;
1369
+}
1370
+
1371
+/*
1372
+ * lock for reading
1373
+ */
1374
+static inline void __down_read(struct rw_semaphore *sem)
1375
+{
1376
+ if (!rwsem_read_trylock(sem)) {
1377
+ rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE);
1378
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1379
+ } else {
1380
+ rwsem_set_reader_owned(sem);
1381
+ }
1382
+}
1383
+
1384
+static inline int __down_read_interruptible(struct rw_semaphore *sem)
1385
+{
1386
+ if (!rwsem_read_trylock(sem)) {
1387
+ if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_INTERRUPTIBLE)))
1388
+ return -EINTR;
1389
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1390
+ } else {
1391
+ rwsem_set_reader_owned(sem);
1392
+ }
1393
+ return 0;
1394
+}
1395
+
1396
+static inline int __down_read_killable(struct rw_semaphore *sem)
1397
+{
1398
+ if (!rwsem_read_trylock(sem)) {
1399
+ if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_KILLABLE)))
1400
+ return -EINTR;
1401
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1402
+ } else {
1403
+ rwsem_set_reader_owned(sem);
1404
+ }
1405
+ return 0;
1406
+}
1407
+
1408
+static inline int __down_read_trylock(struct rw_semaphore *sem)
1409
+{
1410
+ long tmp;
1411
+
1412
+ DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1413
+
1414
+ /*
1415
+ * Optimize for the case when the rwsem is not locked at all.
1416
+ */
1417
+ tmp = RWSEM_UNLOCKED_VALUE;
1418
+ do {
1419
+ if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
1420
+ tmp + RWSEM_READER_BIAS)) {
1421
+ rwsem_set_reader_owned(sem);
1422
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1423
+ return 1;
1424
+ }
1425
+ } while (!(tmp & RWSEM_READ_FAILED_MASK));
1426
+ return 0;
1427
+}
1428
+
1429
+/*
1430
+ * lock for writing
1431
+ */
1432
+static inline void __down_write(struct rw_semaphore *sem)
1433
+{
1434
+ long tmp = RWSEM_UNLOCKED_VALUE;
1435
+
1436
+ if (unlikely(!atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
1437
+ RWSEM_WRITER_LOCKED))) {
1438
+ rwsem_down_write_slowpath(sem, TASK_UNINTERRUPTIBLE);
1439
+ } else {
1440
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1441
+ rwsem_set_owner(sem);
1442
+ }
1443
+}
1444
+
1445
+static inline int __down_write_killable(struct rw_semaphore *sem)
1446
+{
1447
+ long tmp = RWSEM_UNLOCKED_VALUE;
1448
+
1449
+ if (unlikely(!atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
1450
+ RWSEM_WRITER_LOCKED))) {
1451
+ if (IS_ERR(rwsem_down_write_slowpath(sem, TASK_KILLABLE)))
1452
+ return -EINTR;
1453
+ } else {
1454
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1455
+ rwsem_set_owner(sem);
1456
+ }
1457
+ return 0;
1458
+}
1459
+
1460
+static inline int __down_write_trylock(struct rw_semaphore *sem)
1461
+{
1462
+ long tmp;
1463
+
1464
+ DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1465
+
1466
+ tmp = RWSEM_UNLOCKED_VALUE;
1467
+ if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
1468
+ RWSEM_WRITER_LOCKED)) {
1469
+ rwsem_set_owner(sem);
1470
+ trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
1471
+ return true;
1472
+ }
1473
+ return false;
1474
+}
1475
+
1476
+/*
1477
+ * unlock after reading
1478
+ */
1479
+static inline void __up_read(struct rw_semaphore *sem)
1480
+{
1481
+ long tmp;
1482
+
1483
+ DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1484
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1485
+
1486
+ trace_android_vh_record_rwsem_lock_starttime(current, 0);
1487
+ rwsem_clear_reader_owned(sem);
1488
+ tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
1489
+ DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
1490
+ if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
1491
+ RWSEM_FLAG_WAITERS)) {
1492
+ clear_wr_nonspinnable(sem);
1493
+ rwsem_wake(sem, tmp);
1494
+ }
1495
+ trace_android_vh_rwsem_up_read_end(sem);
1496
+}
1497
+
1498
+/*
1499
+ * unlock after writing
1500
+ */
1501
+static inline void __up_write(struct rw_semaphore *sem)
1502
+{
1503
+ long tmp;
1504
+
1505
+ DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1506
+ /*
1507
+ * sem->owner may differ from current if the ownership is transferred
1508
+ * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits.
1509
+ */
1510
+ DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) &&
1511
+ !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem);
1512
+
1513
+ trace_android_vh_record_rwsem_lock_starttime(current, 0);
1514
+ rwsem_clear_owner(sem);
1515
+ tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
1516
+ if (unlikely(tmp & RWSEM_FLAG_WAITERS))
1517
+ rwsem_wake(sem, tmp);
1518
+ trace_android_vh_rwsem_up_write_end(sem);
1519
+}
1520
+
1521
+/*
1522
+ * downgrade write lock to read lock
1523
+ */
1524
+static inline void __downgrade_write(struct rw_semaphore *sem)
1525
+{
1526
+ long tmp;
1527
+
1528
+ /*
1529
+ * When downgrading from exclusive to shared ownership,
1530
+ * anything inside the write-locked region cannot leak
1531
+ * into the read side. In contrast, anything in the
1532
+ * read-locked region is ok to be re-ordered into the
1533
+ * write side. As such, rely on RELEASE semantics.
1534
+ */
1535
+ DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem);
1536
+ tmp = atomic_long_fetch_add_release(
1537
+ -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
1538
+ rwsem_set_reader_owned(sem);
1539
+ if (tmp & RWSEM_FLAG_WAITERS)
1540
+ rwsem_downgrade_wake(sem);
1541
+}
171542
181543 /*
191544 * lock for reading
....@@ -24,10 +1549,22 @@
241549 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
251550
261551 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
27
- rwsem_set_reader_owned(sem);
281552 }
29
-
301553 EXPORT_SYMBOL(down_read);
1554
+
1555
+int __sched down_read_interruptible(struct rw_semaphore *sem)
1556
+{
1557
+ might_sleep();
1558
+ rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
1559
+
1560
+ if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) {
1561
+ rwsem_release(&sem->dep_map, _RET_IP_);
1562
+ return -EINTR;
1563
+ }
1564
+
1565
+ return 0;
1566
+}
1567
+EXPORT_SYMBOL(down_read_interruptible);
311568
321569 int __sched down_read_killable(struct rw_semaphore *sem)
331570 {
....@@ -35,14 +1572,12 @@
351572 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
361573
371574 if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
38
- rwsem_release(&sem->dep_map, 1, _RET_IP_);
1575
+ rwsem_release(&sem->dep_map, _RET_IP_);
391576 return -EINTR;
401577 }
411578
42
- rwsem_set_reader_owned(sem);
431579 return 0;
441580 }
45
-
461581 EXPORT_SYMBOL(down_read_killable);
471582
481583 /*
....@@ -52,13 +1587,10 @@
521587 {
531588 int ret = __down_read_trylock(sem);
541589
55
- if (ret == 1) {
1590
+ if (ret == 1)
561591 rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
57
- rwsem_set_reader_owned(sem);
58
- }
591592 return ret;
601593 }
61
-
621594 EXPORT_SYMBOL(down_read_trylock);
631595
641596 /*
....@@ -68,11 +1600,8 @@
681600 {
691601 might_sleep();
701602 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
71
-
721603 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
73
- rwsem_set_owner(sem);
741604 }
75
-
761605 EXPORT_SYMBOL(down_write);
771606
781607 /*
....@@ -83,15 +1612,14 @@
831612 might_sleep();
841613 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
851614
86
- if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) {
87
- rwsem_release(&sem->dep_map, 1, _RET_IP_);
1615
+ if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
1616
+ __down_write_killable)) {
1617
+ rwsem_release(&sem->dep_map, _RET_IP_);
881618 return -EINTR;
891619 }
901620
91
- rwsem_set_owner(sem);
921621 return 0;
931622 }
94
-
951623 EXPORT_SYMBOL(down_write_killable);
961624
971625 /*
....@@ -101,14 +1629,11 @@
1011629 {
1021630 int ret = __down_write_trylock(sem);
1031631
104
- if (ret == 1) {
1632
+ if (ret == 1)
1051633 rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
106
- rwsem_set_owner(sem);
107
- }
1081634
1091635 return ret;
1101636 }
111
-
1121637 EXPORT_SYMBOL(down_write_trylock);
1131638
1141639 /*
....@@ -116,12 +1641,9 @@
1161641 */
1171642 void up_read(struct rw_semaphore *sem)
1181643 {
119
- rwsem_release(&sem->dep_map, 1, _RET_IP_);
120
- DEBUG_RWSEMS_WARN_ON(sem->owner != RWSEM_READER_OWNED);
121
-
1644
+ rwsem_release(&sem->dep_map, _RET_IP_);
1221645 __up_read(sem);
1231646 }
124
-
1251647 EXPORT_SYMBOL(up_read);
1261648
1271649 /*
....@@ -129,13 +1651,10 @@
1291651 */
1301652 void up_write(struct rw_semaphore *sem)
1311653 {
132
- rwsem_release(&sem->dep_map, 1, _RET_IP_);
133
- DEBUG_RWSEMS_WARN_ON(sem->owner != current);
134
-
135
- rwsem_clear_owner(sem);
1654
+ rwsem_release(&sem->dep_map, _RET_IP_);
1655
+ trace_android_vh_rwsem_write_finished(sem);
1361656 __up_write(sem);
1371657 }
138
-
1391658 EXPORT_SYMBOL(up_write);
1401659
1411660 /*
....@@ -144,12 +1663,9 @@
1441663 void downgrade_write(struct rw_semaphore *sem)
1451664 {
1461665 lock_downgrade(&sem->dep_map, _RET_IP_);
147
- DEBUG_RWSEMS_WARN_ON(sem->owner != current);
148
-
149
- rwsem_set_reader_owned(sem);
1666
+ trace_android_vh_rwsem_write_finished(sem);
1501667 __downgrade_write(sem);
1511668 }
152
-
1531669 EXPORT_SYMBOL(downgrade_write);
1541670
1551671 #ifdef CONFIG_DEBUG_LOCK_ALLOC
....@@ -158,43 +1674,46 @@
1581674 {
1591675 might_sleep();
1601676 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
161
-
1621677 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
163
- rwsem_set_reader_owned(sem);
1641678 }
165
-
1661679 EXPORT_SYMBOL(down_read_nested);
1680
+
1681
+int down_read_killable_nested(struct rw_semaphore *sem, int subclass)
1682
+{
1683
+ might_sleep();
1684
+ rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
1685
+
1686
+ if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
1687
+ rwsem_release(&sem->dep_map, _RET_IP_);
1688
+ return -EINTR;
1689
+ }
1690
+
1691
+ return 0;
1692
+}
1693
+EXPORT_SYMBOL(down_read_killable_nested);
1671694
1681695 void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
1691696 {
1701697 might_sleep();
1711698 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
172
-
1731699 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
174
- rwsem_set_owner(sem);
1751700 }
176
-
1771701 EXPORT_SYMBOL(_down_write_nest_lock);
1781702
1791703 void down_read_non_owner(struct rw_semaphore *sem)
1801704 {
1811705 might_sleep();
182
-
1831706 __down_read(sem);
184
- rwsem_set_reader_owned(sem);
1707
+ __rwsem_set_reader_owned(sem, NULL);
1851708 }
186
-
1871709 EXPORT_SYMBOL(down_read_non_owner);
1881710
1891711 void down_write_nested(struct rw_semaphore *sem, int subclass)
1901712 {
1911713 might_sleep();
1921714 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
193
-
1941715 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
195
- rwsem_set_owner(sem);
1961716 }
197
-
1981717 EXPORT_SYMBOL(down_write_nested);
1991718
2001719 int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
....@@ -202,23 +1721,21 @@
2021721 might_sleep();
2031722 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
2041723
205
- if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) {
206
- rwsem_release(&sem->dep_map, 1, _RET_IP_);
1724
+ if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
1725
+ __down_write_killable)) {
1726
+ rwsem_release(&sem->dep_map, _RET_IP_);
2071727 return -EINTR;
2081728 }
2091729
210
- rwsem_set_owner(sem);
2111730 return 0;
2121731 }
213
-
2141732 EXPORT_SYMBOL(down_write_killable_nested);
2151733
2161734 void up_read_non_owner(struct rw_semaphore *sem)
2171735 {
218
- DEBUG_RWSEMS_WARN_ON(sem->owner != RWSEM_READER_OWNED);
1736
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
2191737 __up_read(sem);
2201738 }
221
-
2221739 EXPORT_SYMBOL(up_read_non_owner);
2231740
2241741 #endif