| .. | .. | 
|---|
| 3 | 3 |   * | 
|---|
| 4 | 4 |   * Written by David Howells (dhowells@redhat.com). | 
|---|
| 5 | 5 |   * Derived from asm-i386/semaphore.h | 
|---|
 | 6 | + *  | 
|---|
 | 7 | + * Writer lock-stealing by Alex Shi <alex.shi@intel.com>  | 
|---|
 | 8 | + * and Michel Lespinasse <walken@google.com>  | 
|---|
 | 9 | + *  | 
|---|
 | 10 | + * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>  | 
|---|
 | 11 | + * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.  | 
|---|
 | 12 | + *  | 
|---|
 | 13 | + * Rwsem count bit fields re-definition and rwsem rearchitecture by  | 
|---|
 | 14 | + * Waiman Long <longman@redhat.com> and  | 
|---|
 | 15 | + * Peter Zijlstra <peterz@infradead.org>.  | 
|---|
| 6 | 16 |   */ | 
|---|
| 7 | 17 |   | 
|---|
| 8 | 18 |  #include <linux/types.h> | 
|---|
| 9 | 19 |  #include <linux/kernel.h> | 
|---|
| 10 | 20 |  #include <linux/sched.h> | 
|---|
 | 21 | +#include <linux/sched/rt.h>  | 
|---|
 | 22 | +#include <linux/sched/task.h>  | 
|---|
| 11 | 23 |  #include <linux/sched/debug.h> | 
|---|
 | 24 | +#include <linux/sched/wake_q.h>  | 
|---|
 | 25 | +#include <linux/sched/signal.h>  | 
|---|
 | 26 | +#include <linux/sched/clock.h>  | 
|---|
| 12 | 27 |  #include <linux/export.h> | 
|---|
| 13 | 28 |  #include <linux/rwsem.h> | 
|---|
| 14 | 29 |  #include <linux/atomic.h> | 
|---|
| 15 | 30 |   | 
|---|
| 16 |  | -#include "rwsem.h"  | 
|---|
 | 31 | +#ifndef CONFIG_PREEMPT_RT  | 
|---|
 | 32 | +#include "lock_events.h"  | 
|---|
 | 33 | +#include <trace/hooks/rwsem.h>  | 
|---|
 | 34 | +#include <trace/hooks/dtask.h>  | 
|---|
 | 35 | +  | 
|---|
 | 36 | +/*  | 
|---|
 | 37 | + * The least significant 3 bits of the owner value has the following  | 
|---|
 | 38 | + * meanings when set.  | 
|---|
 | 39 | + *  - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers  | 
|---|
 | 40 | + *  - Bit 1: RWSEM_RD_NONSPINNABLE - Readers cannot spin on this lock.  | 
|---|
 | 41 | + *  - Bit 2: RWSEM_WR_NONSPINNABLE - Writers cannot spin on this lock.  | 
|---|
 | 42 | + *  | 
|---|
 | 43 | + * When the rwsem is either owned by an anonymous writer, or it is  | 
|---|
 | 44 | + * reader-owned, but a spinning writer has timed out, both nonspinnable  | 
|---|
 | 45 | + * bits will be set to disable optimistic spinning by readers and writers.  | 
|---|
 | 46 | + * In the later case, the last unlocking reader should then check the  | 
|---|
 | 47 | + * writer nonspinnable bit and clear it only to give writers preference  | 
|---|
 | 48 | + * to acquire the lock via optimistic spinning, but not readers. Similar  | 
|---|
 | 49 | + * action is also done in the reader slowpath.  | 
|---|
 | 50 | +  | 
|---|
 | 51 | + * When a writer acquires a rwsem, it puts its task_struct pointer  | 
|---|
 | 52 | + * into the owner field. It is cleared after an unlock.  | 
|---|
 | 53 | + *  | 
|---|
 | 54 | + * When a reader acquires a rwsem, it will also puts its task_struct  | 
|---|
 | 55 | + * pointer into the owner field with the RWSEM_READER_OWNED bit set.  | 
|---|
 | 56 | + * On unlock, the owner field will largely be left untouched. So  | 
|---|
 | 57 | + * for a free or reader-owned rwsem, the owner value may contain  | 
|---|
 | 58 | + * information about the last reader that acquires the rwsem.  | 
|---|
 | 59 | + *  | 
|---|
 | 60 | + * That information may be helpful in debugging cases where the system  | 
|---|
 | 61 | + * seems to hang on a reader owned rwsem especially if only one reader  | 
|---|
 | 62 | + * is involved. Ideally we would like to track all the readers that own  | 
|---|
 | 63 | + * a rwsem, but the overhead is simply too big.  | 
|---|
 | 64 | + *  | 
|---|
 | 65 | + * Reader optimistic spinning is helpful when the reader critical section  | 
|---|
 | 66 | + * is short and there aren't that many readers around. It makes readers  | 
|---|
 | 67 | + * relatively more preferred than writers. When a writer times out spinning  | 
|---|
 | 68 | + * on a reader-owned lock and set the nospinnable bits, there are two main  | 
|---|
 | 69 | + * reasons for that.  | 
|---|
 | 70 | + *  | 
|---|
 | 71 | + *  1) The reader critical section is long, perhaps the task sleeps after  | 
|---|
 | 72 | + *     acquiring the read lock.  | 
|---|
 | 73 | + *  2) There are just too many readers contending the lock causing it to  | 
|---|
 | 74 | + *     take a while to service all of them.  | 
|---|
 | 75 | + *  | 
|---|
 | 76 | + * In the former case, long reader critical section will impede the progress  | 
|---|
 | 77 | + * of writers which is usually more important for system performance. In  | 
|---|
 | 78 | + * the later case, reader optimistic spinning tends to make the reader  | 
|---|
 | 79 | + * groups that contain readers that acquire the lock together smaller  | 
|---|
 | 80 | + * leading to more of them. That may hurt performance in some cases. In  | 
|---|
 | 81 | + * other words, the setting of nonspinnable bits indicates that reader  | 
|---|
 | 82 | + * optimistic spinning may not be helpful for those workloads that cause  | 
|---|
 | 83 | + * it.  | 
|---|
 | 84 | + *  | 
|---|
 | 85 | + * Therefore, any writers that had observed the setting of the writer  | 
|---|
 | 86 | + * nonspinnable bit for a given rwsem after they fail to acquire the lock  | 
|---|
 | 87 | + * via optimistic spinning will set the reader nonspinnable bit once they  | 
|---|
 | 88 | + * acquire the write lock. Similarly, readers that observe the setting  | 
|---|
 | 89 | + * of reader nonspinnable bit at slowpath entry will set the reader  | 
|---|
 | 90 | + * nonspinnable bits when they acquire the read lock via the wakeup path.  | 
|---|
 | 91 | + *  | 
|---|
 | 92 | + * Once the reader nonspinnable bit is on, it will only be reset when  | 
|---|
 | 93 | + * a writer is able to acquire the rwsem in the fast path or somehow a  | 
|---|
 | 94 | + * reader or writer in the slowpath doesn't observe the nonspinable bit.  | 
|---|
 | 95 | + *  | 
|---|
 | 96 | + * This is to discourage reader optmistic spinning on that particular  | 
|---|
 | 97 | + * rwsem and make writers more preferred. This adaptive disabling of reader  | 
|---|
 | 98 | + * optimistic spinning will alleviate the negative side effect of this  | 
|---|
 | 99 | + * feature.  | 
|---|
 | 100 | + */  | 
|---|
 | 101 | +#define RWSEM_READER_OWNED	(1UL << 0)  | 
|---|
 | 102 | +#define RWSEM_RD_NONSPINNABLE	(1UL << 1)  | 
|---|
 | 103 | +#define RWSEM_WR_NONSPINNABLE	(1UL << 2)  | 
|---|
 | 104 | +#define RWSEM_NONSPINNABLE	(RWSEM_RD_NONSPINNABLE | RWSEM_WR_NONSPINNABLE)  | 
|---|
 | 105 | +#define RWSEM_OWNER_FLAGS_MASK	(RWSEM_READER_OWNED | RWSEM_NONSPINNABLE)  | 
|---|
 | 106 | +  | 
|---|
 | 107 | +#ifdef CONFIG_DEBUG_RWSEMS  | 
|---|
 | 108 | +# define DEBUG_RWSEMS_WARN_ON(c, sem)	do {			\  | 
|---|
 | 109 | +	if (!debug_locks_silent &&				\  | 
|---|
 | 110 | +	    WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\  | 
|---|
 | 111 | +		#c, atomic_long_read(&(sem)->count),		\  | 
|---|
 | 112 | +		(unsigned long) sem->magic,			\  | 
|---|
 | 113 | +		atomic_long_read(&(sem)->owner), (long)current,	\  | 
|---|
 | 114 | +		list_empty(&(sem)->wait_list) ? "" : "not "))	\  | 
|---|
 | 115 | +			debug_locks_off();			\  | 
|---|
 | 116 | +	} while (0)  | 
|---|
 | 117 | +#else  | 
|---|
 | 118 | +# define DEBUG_RWSEMS_WARN_ON(c, sem)  | 
|---|
 | 119 | +#endif  | 
|---|
 | 120 | +  | 
|---|
 | 121 | +/*  | 
|---|
 | 122 | + * On 64-bit architectures, the bit definitions of the count are:  | 
|---|
 | 123 | + *  | 
|---|
 | 124 | + * Bit  0    - writer locked bit  | 
|---|
 | 125 | + * Bit  1    - waiters present bit  | 
|---|
 | 126 | + * Bit  2    - lock handoff bit  | 
|---|
 | 127 | + * Bits 3-7  - reserved  | 
|---|
 | 128 | + * Bits 8-62 - 55-bit reader count  | 
|---|
 | 129 | + * Bit  63   - read fail bit  | 
|---|
 | 130 | + *  | 
|---|
 | 131 | + * On 32-bit architectures, the bit definitions of the count are:  | 
|---|
 | 132 | + *  | 
|---|
 | 133 | + * Bit  0    - writer locked bit  | 
|---|
 | 134 | + * Bit  1    - waiters present bit  | 
|---|
 | 135 | + * Bit  2    - lock handoff bit  | 
|---|
 | 136 | + * Bits 3-7  - reserved  | 
|---|
 | 137 | + * Bits 8-30 - 23-bit reader count  | 
|---|
 | 138 | + * Bit  31   - read fail bit  | 
|---|
 | 139 | + *  | 
|---|
 | 140 | + * It is not likely that the most significant bit (read fail bit) will ever  | 
|---|
 | 141 | + * be set. This guard bit is still checked anyway in the down_read() fastpath  | 
|---|
 | 142 | + * just in case we need to use up more of the reader bits for other purpose  | 
|---|
 | 143 | + * in the future.  | 
|---|
 | 144 | + *  | 
|---|
 | 145 | + * atomic_long_fetch_add() is used to obtain reader lock, whereas  | 
|---|
 | 146 | + * atomic_long_cmpxchg() will be used to obtain writer lock.  | 
|---|
 | 147 | + *  | 
|---|
 | 148 | + * There are three places where the lock handoff bit may be set or cleared.  | 
|---|
 | 149 | + * 1) rwsem_mark_wake() for readers.  | 
|---|
 | 150 | + * 2) rwsem_try_write_lock() for writers.  | 
|---|
 | 151 | + * 3) Error path of rwsem_down_write_slowpath().  | 
|---|
 | 152 | + *  | 
|---|
 | 153 | + * For all the above cases, wait_lock will be held. A writer must also  | 
|---|
 | 154 | + * be the first one in the wait_list to be eligible for setting the handoff  | 
|---|
 | 155 | + * bit. So concurrent setting/clearing of handoff bit is not possible.  | 
|---|
 | 156 | + */  | 
|---|
 | 157 | +#define RWSEM_WRITER_LOCKED	(1UL << 0)  | 
|---|
 | 158 | +#define RWSEM_FLAG_WAITERS	(1UL << 1)  | 
|---|
 | 159 | +#define RWSEM_FLAG_HANDOFF	(1UL << 2)  | 
|---|
 | 160 | +#define RWSEM_FLAG_READFAIL	(1UL << (BITS_PER_LONG - 1))  | 
|---|
 | 161 | +  | 
|---|
 | 162 | +#define RWSEM_READER_SHIFT	8  | 
|---|
 | 163 | +#define RWSEM_READER_BIAS	(1UL << RWSEM_READER_SHIFT)  | 
|---|
 | 164 | +#define RWSEM_READER_MASK	(~(RWSEM_READER_BIAS - 1))  | 
|---|
 | 165 | +#define RWSEM_WRITER_MASK	RWSEM_WRITER_LOCKED  | 
|---|
 | 166 | +#define RWSEM_LOCK_MASK		(RWSEM_WRITER_MASK|RWSEM_READER_MASK)  | 
|---|
 | 167 | +#define RWSEM_READ_FAILED_MASK	(RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\  | 
|---|
 | 168 | +				 RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL)  | 
|---|
 | 169 | +  | 
|---|
 | 170 | +/*  | 
|---|
 | 171 | + * All writes to owner are protected by WRITE_ONCE() to make sure that  | 
|---|
 | 172 | + * store tearing can't happen as optimistic spinners may read and use  | 
|---|
 | 173 | + * the owner value concurrently without lock. Read from owner, however,  | 
|---|
 | 174 | + * may not need READ_ONCE() as long as the pointer value is only used  | 
|---|
 | 175 | + * for comparison and isn't being dereferenced.  | 
|---|
 | 176 | + */  | 
|---|
 | 177 | +static inline void rwsem_set_owner(struct rw_semaphore *sem)  | 
|---|
 | 178 | +{  | 
|---|
 | 179 | +	atomic_long_set(&sem->owner, (long)current);  | 
|---|
 | 180 | +	trace_android_vh_rwsem_set_owner(sem);  | 
|---|
 | 181 | +}  | 
|---|
 | 182 | +  | 
|---|
 | 183 | +static inline void rwsem_clear_owner(struct rw_semaphore *sem)  | 
|---|
 | 184 | +{  | 
|---|
 | 185 | +	atomic_long_set(&sem->owner, 0);  | 
|---|
 | 186 | +}  | 
|---|
 | 187 | +  | 
|---|
 | 188 | +/*  | 
|---|
 | 189 | + * Test the flags in the owner field.  | 
|---|
 | 190 | + */  | 
|---|
 | 191 | +static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags)  | 
|---|
 | 192 | +{  | 
|---|
 | 193 | +	return atomic_long_read(&sem->owner) & flags;  | 
|---|
 | 194 | +}  | 
|---|
 | 195 | +  | 
|---|
 | 196 | +/*  | 
|---|
 | 197 | + * The task_struct pointer of the last owning reader will be left in  | 
|---|
 | 198 | + * the owner field.  | 
|---|
 | 199 | + *  | 
|---|
 | 200 | + * Note that the owner value just indicates the task has owned the rwsem  | 
|---|
 | 201 | + * previously, it may not be the real owner or one of the real owners  | 
|---|
 | 202 | + * anymore when that field is examined, so take it with a grain of salt.  | 
|---|
 | 203 | + *  | 
|---|
 | 204 | + * The reader non-spinnable bit is preserved.  | 
|---|
 | 205 | + */  | 
|---|
 | 206 | +static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,  | 
|---|
 | 207 | +					    struct task_struct *owner)  | 
|---|
 | 208 | +{  | 
|---|
 | 209 | +	unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED |  | 
|---|
 | 210 | +		(atomic_long_read(&sem->owner) & RWSEM_RD_NONSPINNABLE);  | 
|---|
 | 211 | +  | 
|---|
 | 212 | +	atomic_long_set(&sem->owner, val);  | 
|---|
 | 213 | +}  | 
|---|
 | 214 | +  | 
|---|
 | 215 | +static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)  | 
|---|
 | 216 | +{  | 
|---|
 | 217 | +	__rwsem_set_reader_owned(sem, current);  | 
|---|
 | 218 | +	trace_android_vh_rwsem_set_reader_owned(sem);  | 
|---|
 | 219 | +}  | 
|---|
 | 220 | +  | 
|---|
 | 221 | +/*  | 
|---|
 | 222 | + * Return true if the rwsem is owned by a reader.  | 
|---|
 | 223 | + */  | 
|---|
 | 224 | +static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)  | 
|---|
 | 225 | +{  | 
|---|
 | 226 | +#ifdef CONFIG_DEBUG_RWSEMS  | 
|---|
 | 227 | +	/*  | 
|---|
 | 228 | +	 * Check the count to see if it is write-locked.  | 
|---|
 | 229 | +	 */  | 
|---|
 | 230 | +	long count = atomic_long_read(&sem->count);  | 
|---|
 | 231 | +  | 
|---|
 | 232 | +	if (count & RWSEM_WRITER_MASK)  | 
|---|
 | 233 | +		return false;  | 
|---|
 | 234 | +#endif  | 
|---|
 | 235 | +	return rwsem_test_oflags(sem, RWSEM_READER_OWNED);  | 
|---|
 | 236 | +}  | 
|---|
 | 237 | +  | 
|---|
 | 238 | +#ifdef CONFIG_DEBUG_RWSEMS  | 
|---|
 | 239 | +/*  | 
|---|
 | 240 | + * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there  | 
|---|
 | 241 | + * is a task pointer in owner of a reader-owned rwsem, it will be the  | 
|---|
 | 242 | + * real owner or one of the real owners. The only exception is when the  | 
|---|
 | 243 | + * unlock is done by up_read_non_owner().  | 
|---|
 | 244 | + */  | 
|---|
 | 245 | +static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)  | 
|---|
 | 246 | +{  | 
|---|
 | 247 | +	unsigned long val = atomic_long_read(&sem->owner);  | 
|---|
 | 248 | +  | 
|---|
 | 249 | +	while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) {  | 
|---|
 | 250 | +		if (atomic_long_try_cmpxchg(&sem->owner, &val,  | 
|---|
 | 251 | +					    val & RWSEM_OWNER_FLAGS_MASK))  | 
|---|
 | 252 | +			return;  | 
|---|
 | 253 | +	}  | 
|---|
 | 254 | +}  | 
|---|
 | 255 | +#else  | 
|---|
 | 256 | +static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)  | 
|---|
 | 257 | +{  | 
|---|
 | 258 | +}  | 
|---|
 | 259 | +#endif  | 
|---|
 | 260 | +  | 
|---|
 | 261 | +/*  | 
|---|
 | 262 | + * Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag  | 
|---|
 | 263 | + * remains set. Otherwise, the operation will be aborted.  | 
|---|
 | 264 | + */  | 
|---|
 | 265 | +static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)  | 
|---|
 | 266 | +{  | 
|---|
 | 267 | +	unsigned long owner = atomic_long_read(&sem->owner);  | 
|---|
 | 268 | +  | 
|---|
 | 269 | +	do {  | 
|---|
 | 270 | +		if (!(owner & RWSEM_READER_OWNED))  | 
|---|
 | 271 | +			break;  | 
|---|
 | 272 | +		if (owner & RWSEM_NONSPINNABLE)  | 
|---|
 | 273 | +			break;  | 
|---|
 | 274 | +	} while (!atomic_long_try_cmpxchg(&sem->owner, &owner,  | 
|---|
 | 275 | +					  owner | RWSEM_NONSPINNABLE));  | 
|---|
 | 276 | +}  | 
|---|
 | 277 | +  | 
|---|
 | 278 | +static inline bool rwsem_read_trylock(struct rw_semaphore *sem)  | 
|---|
 | 279 | +{  | 
|---|
 | 280 | +	long cnt = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count);  | 
|---|
 | 281 | +	if (WARN_ON_ONCE(cnt < 0))  | 
|---|
 | 282 | +		rwsem_set_nonspinnable(sem);  | 
|---|
 | 283 | +  | 
|---|
 | 284 | +	if ((cnt & RWSEM_READ_FAILED_MASK) == 0)  | 
|---|
 | 285 | +		trace_android_vh_record_rwsem_lock_starttime(current, jiffies);  | 
|---|
 | 286 | +  | 
|---|
 | 287 | +	return !(cnt & RWSEM_READ_FAILED_MASK);  | 
|---|
 | 288 | +}  | 
|---|
 | 289 | +  | 
|---|
 | 290 | +/*  | 
|---|
 | 291 | + * Return just the real task structure pointer of the owner  | 
|---|
 | 292 | + */  | 
|---|
 | 293 | +static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)  | 
|---|
 | 294 | +{  | 
|---|
 | 295 | +	return (struct task_struct *)  | 
|---|
 | 296 | +		(atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);  | 
|---|
 | 297 | +}  | 
|---|
 | 298 | +  | 
|---|
 | 299 | +/*  | 
|---|
 | 300 | + * Return the real task structure pointer of the owner and the embedded  | 
|---|
 | 301 | + * flags in the owner. pflags must be non-NULL.  | 
|---|
 | 302 | + */  | 
|---|
 | 303 | +static inline struct task_struct *  | 
|---|
 | 304 | +rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags)  | 
|---|
 | 305 | +{  | 
|---|
 | 306 | +	unsigned long owner = atomic_long_read(&sem->owner);  | 
|---|
 | 307 | +  | 
|---|
 | 308 | +	*pflags = owner & RWSEM_OWNER_FLAGS_MASK;  | 
|---|
 | 309 | +	return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK);  | 
|---|
 | 310 | +}  | 
|---|
 | 311 | +  | 
|---|
 | 312 | +/*  | 
|---|
 | 313 | + * Guide to the rw_semaphore's count field.  | 
|---|
 | 314 | + *  | 
|---|
 | 315 | + * When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned  | 
|---|
 | 316 | + * by a writer.  | 
|---|
 | 317 | + *  | 
|---|
 | 318 | + * The lock is owned by readers when  | 
|---|
 | 319 | + * (1) the RWSEM_WRITER_LOCKED isn't set in count,  | 
|---|
 | 320 | + * (2) some of the reader bits are set in count, and  | 
|---|
 | 321 | + * (3) the owner field has RWSEM_READ_OWNED bit set.  | 
|---|
 | 322 | + *  | 
|---|
 | 323 | + * Having some reader bits set is not enough to guarantee a readers owned  | 
|---|
 | 324 | + * lock as the readers may be in the process of backing out from the count  | 
|---|
 | 325 | + * and a writer has just released the lock. So another writer may steal  | 
|---|
 | 326 | + * the lock immediately after that.  | 
|---|
 | 327 | + */  | 
|---|
 | 328 | +  | 
|---|
 | 329 | +/*  | 
|---|
 | 330 | + * Initialize an rwsem:  | 
|---|
 | 331 | + */  | 
|---|
 | 332 | +void __init_rwsem(struct rw_semaphore *sem, const char *name,  | 
|---|
 | 333 | +		  struct lock_class_key *key)  | 
|---|
 | 334 | +{  | 
|---|
 | 335 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC  | 
|---|
 | 336 | +	/*  | 
|---|
 | 337 | +	 * Make sure we are not reinitializing a held semaphore:  | 
|---|
 | 338 | +	 */  | 
|---|
 | 339 | +	debug_check_no_locks_freed((void *)sem, sizeof(*sem));  | 
|---|
 | 340 | +	lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);  | 
|---|
 | 341 | +#endif  | 
|---|
 | 342 | +#ifdef CONFIG_DEBUG_RWSEMS  | 
|---|
 | 343 | +	sem->magic = sem;  | 
|---|
 | 344 | +#endif  | 
|---|
 | 345 | +	atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);  | 
|---|
 | 346 | +	raw_spin_lock_init(&sem->wait_lock);  | 
|---|
 | 347 | +	INIT_LIST_HEAD(&sem->wait_list);  | 
|---|
 | 348 | +	atomic_long_set(&sem->owner, 0L);  | 
|---|
 | 349 | +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER  | 
|---|
 | 350 | +	osq_lock_init(&sem->osq);  | 
|---|
 | 351 | +#endif  | 
|---|
 | 352 | +	trace_android_vh_rwsem_init(sem);  | 
|---|
 | 353 | +}  | 
|---|
 | 354 | +EXPORT_SYMBOL(__init_rwsem);  | 
|---|
 | 355 | +  | 
|---|
 | 356 | +#define rwsem_first_waiter(sem) \  | 
|---|
 | 357 | +	list_first_entry(&sem->wait_list, struct rwsem_waiter, list)  | 
|---|
 | 358 | +  | 
|---|
 | 359 | +enum rwsem_wake_type {  | 
|---|
 | 360 | +	RWSEM_WAKE_ANY,		/* Wake whatever's at head of wait list */  | 
|---|
 | 361 | +	RWSEM_WAKE_READERS,	/* Wake readers only */  | 
|---|
 | 362 | +	RWSEM_WAKE_READ_OWNED	/* Waker thread holds the read lock */  | 
|---|
 | 363 | +};  | 
|---|
 | 364 | +  | 
|---|
 | 365 | +enum writer_wait_state {  | 
|---|
 | 366 | +	WRITER_NOT_FIRST,	/* Writer is not first in wait list */  | 
|---|
 | 367 | +	WRITER_FIRST,		/* Writer is first in wait list     */  | 
|---|
 | 368 | +	WRITER_HANDOFF		/* Writer is first & handoff needed */  | 
|---|
 | 369 | +};  | 
|---|
 | 370 | +  | 
|---|
 | 371 | +/*  | 
|---|
 | 372 | + * The typical HZ value is either 250 or 1000. So set the minimum waiting  | 
|---|
 | 373 | + * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait  | 
|---|
 | 374 | + * queue before initiating the handoff protocol.  | 
|---|
 | 375 | + */  | 
|---|
 | 376 | +#define RWSEM_WAIT_TIMEOUT	DIV_ROUND_UP(HZ, 250)  | 
|---|
 | 377 | +  | 
|---|
 | 378 | +/*  | 
|---|
 | 379 | + * Magic number to batch-wakeup waiting readers, even when writers are  | 
|---|
 | 380 | + * also present in the queue. This both limits the amount of work the  | 
|---|
 | 381 | + * waking thread must do and also prevents any potential counter overflow,  | 
|---|
 | 382 | + * however unlikely.  | 
|---|
 | 383 | + */  | 
|---|
 | 384 | +#define MAX_READERS_WAKEUP	0x100  | 
|---|
 | 385 | +  | 
|---|
 | 386 | +/*  | 
|---|
 | 387 | + * handle the lock release when processes blocked on it that can now run  | 
|---|
 | 388 | + * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must  | 
|---|
 | 389 | + *   have been set.  | 
|---|
 | 390 | + * - there must be someone on the queue  | 
|---|
 | 391 | + * - the wait_lock must be held by the caller  | 
|---|
 | 392 | + * - tasks are marked for wakeup, the caller must later invoke wake_up_q()  | 
|---|
 | 393 | + *   to actually wakeup the blocked task(s) and drop the reference count,  | 
|---|
 | 394 | + *   preferably when the wait_lock is released  | 
|---|
 | 395 | + * - woken process blocks are discarded from the list after having task zeroed  | 
|---|
 | 396 | + * - writers are only marked woken if downgrading is false  | 
|---|
 | 397 | + */  | 
|---|
 | 398 | +static void rwsem_mark_wake(struct rw_semaphore *sem,  | 
|---|
 | 399 | +			    enum rwsem_wake_type wake_type,  | 
|---|
 | 400 | +			    struct wake_q_head *wake_q)  | 
|---|
 | 401 | +{  | 
|---|
 | 402 | +	struct rwsem_waiter *waiter, *tmp;  | 
|---|
 | 403 | +	long oldcount, woken = 0, adjustment = 0;  | 
|---|
 | 404 | +	struct list_head wlist;  | 
|---|
 | 405 | +  | 
|---|
 | 406 | +	lockdep_assert_held(&sem->wait_lock);  | 
|---|
 | 407 | +  | 
|---|
 | 408 | +	/*  | 
|---|
 | 409 | +	 * Take a peek at the queue head waiter such that we can determine  | 
|---|
 | 410 | +	 * the wakeup(s) to perform.  | 
|---|
 | 411 | +	 */  | 
|---|
 | 412 | +	waiter = rwsem_first_waiter(sem);  | 
|---|
 | 413 | +  | 
|---|
 | 414 | +	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {  | 
|---|
 | 415 | +		if (wake_type == RWSEM_WAKE_ANY) {  | 
|---|
 | 416 | +			/*  | 
|---|
 | 417 | +			 * Mark writer at the front of the queue for wakeup.  | 
|---|
 | 418 | +			 * Until the task is actually later awoken later by  | 
|---|
 | 419 | +			 * the caller, other writers are able to steal it.  | 
|---|
 | 420 | +			 * Readers, on the other hand, will block as they  | 
|---|
 | 421 | +			 * will notice the queued writer.  | 
|---|
 | 422 | +			 */  | 
|---|
 | 423 | +			wake_q_add(wake_q, waiter->task);  | 
|---|
 | 424 | +			lockevent_inc(rwsem_wake_writer);  | 
|---|
 | 425 | +		}  | 
|---|
 | 426 | +  | 
|---|
 | 427 | +		return;  | 
|---|
 | 428 | +	}  | 
|---|
 | 429 | +  | 
|---|
 | 430 | +	/*  | 
|---|
 | 431 | +	 * No reader wakeup if there are too many of them already.  | 
|---|
 | 432 | +	 */  | 
|---|
 | 433 | +	if (unlikely(atomic_long_read(&sem->count) < 0))  | 
|---|
 | 434 | +		return;  | 
|---|
 | 435 | +  | 
|---|
 | 436 | +	/*  | 
|---|
 | 437 | +	 * Writers might steal the lock before we grant it to the next reader.  | 
|---|
 | 438 | +	 * We prefer to do the first reader grant before counting readers  | 
|---|
 | 439 | +	 * so we can bail out early if a writer stole the lock.  | 
|---|
 | 440 | +	 */  | 
|---|
 | 441 | +	if (wake_type != RWSEM_WAKE_READ_OWNED) {  | 
|---|
 | 442 | +		struct task_struct *owner;  | 
|---|
 | 443 | +  | 
|---|
 | 444 | +		adjustment = RWSEM_READER_BIAS;  | 
|---|
 | 445 | +		oldcount = atomic_long_fetch_add(adjustment, &sem->count);  | 
|---|
 | 446 | +		if (unlikely(oldcount & RWSEM_WRITER_MASK)) {  | 
|---|
 | 447 | +			/*  | 
|---|
 | 448 | +			 * When we've been waiting "too" long (for writers  | 
|---|
 | 449 | +			 * to give up the lock), request a HANDOFF to  | 
|---|
 | 450 | +			 * force the issue.  | 
|---|
 | 451 | +			 */  | 
|---|
 | 452 | +			if (!(oldcount & RWSEM_FLAG_HANDOFF) &&  | 
|---|
 | 453 | +			    time_after(jiffies, waiter->timeout)) {  | 
|---|
 | 454 | +				adjustment -= RWSEM_FLAG_HANDOFF;  | 
|---|
 | 455 | +				lockevent_inc(rwsem_rlock_handoff);  | 
|---|
 | 456 | +			}  | 
|---|
 | 457 | +  | 
|---|
 | 458 | +			atomic_long_add(-adjustment, &sem->count);  | 
|---|
 | 459 | +			return;  | 
|---|
 | 460 | +		}  | 
|---|
 | 461 | +		/*  | 
|---|
 | 462 | +		 * Set it to reader-owned to give spinners an early  | 
|---|
 | 463 | +		 * indication that readers now have the lock.  | 
|---|
 | 464 | +		 * The reader nonspinnable bit seen at slowpath entry of  | 
|---|
 | 465 | +		 * the reader is copied over.  | 
|---|
 | 466 | +		 */  | 
|---|
 | 467 | +		owner = waiter->task;  | 
|---|
 | 468 | +		if (waiter->last_rowner & RWSEM_RD_NONSPINNABLE) {  | 
|---|
 | 469 | +			owner = (void *)((unsigned long)owner | RWSEM_RD_NONSPINNABLE);  | 
|---|
 | 470 | +			lockevent_inc(rwsem_opt_norspin);  | 
|---|
 | 471 | +		}  | 
|---|
 | 472 | +		__rwsem_set_reader_owned(sem, owner);  | 
|---|
 | 473 | +	}  | 
|---|
 | 474 | +  | 
|---|
 | 475 | +	/*  | 
|---|
 | 476 | +	 * Grant up to MAX_READERS_WAKEUP read locks to all the readers in the  | 
|---|
 | 477 | +	 * queue. We know that the woken will be at least 1 as we accounted  | 
|---|
 | 478 | +	 * for above. Note we increment the 'active part' of the count by the  | 
|---|
 | 479 | +	 * number of readers before waking any processes up.  | 
|---|
 | 480 | +	 *  | 
|---|
 | 481 | +	 * This is an adaptation of the phase-fair R/W locks where at the  | 
|---|
 | 482 | +	 * reader phase (first waiter is a reader), all readers are eligible  | 
|---|
 | 483 | +	 * to acquire the lock at the same time irrespective of their order  | 
|---|
 | 484 | +	 * in the queue. The writers acquire the lock according to their  | 
|---|
 | 485 | +	 * order in the queue.  | 
|---|
 | 486 | +	 *  | 
|---|
 | 487 | +	 * We have to do wakeup in 2 passes to prevent the possibility that  | 
|---|
 | 488 | +	 * the reader count may be decremented before it is incremented. It  | 
|---|
 | 489 | +	 * is because the to-be-woken waiter may not have slept yet. So it  | 
|---|
 | 490 | +	 * may see waiter->task got cleared, finish its critical section and  | 
|---|
 | 491 | +	 * do an unlock before the reader count increment.  | 
|---|
 | 492 | +	 *  | 
|---|
 | 493 | +	 * 1) Collect the read-waiters in a separate list, count them and  | 
|---|
 | 494 | +	 *    fully increment the reader count in rwsem.  | 
|---|
 | 495 | +	 * 2) For each waiters in the new list, clear waiter->task and  | 
|---|
 | 496 | +	 *    put them into wake_q to be woken up later.  | 
|---|
 | 497 | +	 */  | 
|---|
 | 498 | +	INIT_LIST_HEAD(&wlist);  | 
|---|
 | 499 | +	list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {  | 
|---|
 | 500 | +		if (waiter->type == RWSEM_WAITING_FOR_WRITE)  | 
|---|
 | 501 | +			continue;  | 
|---|
 | 502 | +  | 
|---|
 | 503 | +		woken++;  | 
|---|
 | 504 | +		list_move_tail(&waiter->list, &wlist);  | 
|---|
 | 505 | +  | 
|---|
 | 506 | +		trace_android_vh_rwsem_mark_wake_readers(sem, waiter);  | 
|---|
 | 507 | +		/*  | 
|---|
 | 508 | +		 * Limit # of readers that can be woken up per wakeup call.  | 
|---|
 | 509 | +		 */  | 
|---|
 | 510 | +		if (woken >= MAX_READERS_WAKEUP)  | 
|---|
 | 511 | +			break;  | 
|---|
 | 512 | +	}  | 
|---|
 | 513 | +  | 
|---|
 | 514 | +	adjustment = woken * RWSEM_READER_BIAS - adjustment;  | 
|---|
 | 515 | +	lockevent_cond_inc(rwsem_wake_reader, woken);  | 
|---|
 | 516 | +	if (list_empty(&sem->wait_list)) {  | 
|---|
 | 517 | +		/* hit end of list above */  | 
|---|
 | 518 | +		adjustment -= RWSEM_FLAG_WAITERS;  | 
|---|
 | 519 | +	}  | 
|---|
 | 520 | +  | 
|---|
 | 521 | +	/*  | 
|---|
 | 522 | +	 * When we've woken a reader, we no longer need to force writers  | 
|---|
 | 523 | +	 * to give up the lock and we can clear HANDOFF.  | 
|---|
 | 524 | +	 */  | 
|---|
 | 525 | +	if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF))  | 
|---|
 | 526 | +		adjustment -= RWSEM_FLAG_HANDOFF;  | 
|---|
 | 527 | +  | 
|---|
 | 528 | +	if (adjustment)  | 
|---|
 | 529 | +		atomic_long_add(adjustment, &sem->count);  | 
|---|
 | 530 | +  | 
|---|
 | 531 | +	/* 2nd pass */  | 
|---|
 | 532 | +	list_for_each_entry_safe(waiter, tmp, &wlist, list) {  | 
|---|
 | 533 | +		struct task_struct *tsk;  | 
|---|
 | 534 | +  | 
|---|
 | 535 | +		tsk = waiter->task;  | 
|---|
 | 536 | +		get_task_struct(tsk);  | 
|---|
 | 537 | +  | 
|---|
 | 538 | +		/*  | 
|---|
 | 539 | +		 * Ensure calling get_task_struct() before setting the reader  | 
|---|
 | 540 | +		 * waiter to nil such that rwsem_down_read_slowpath() cannot  | 
|---|
 | 541 | +		 * race with do_exit() by always holding a reference count  | 
|---|
 | 542 | +		 * to the task to wakeup.  | 
|---|
 | 543 | +		 */  | 
|---|
 | 544 | +		smp_store_release(&waiter->task, NULL);  | 
|---|
 | 545 | +		/*  | 
|---|
 | 546 | +		 * Ensure issuing the wakeup (either by us or someone else)  | 
|---|
 | 547 | +		 * after setting the reader waiter to nil.  | 
|---|
 | 548 | +		 */  | 
|---|
 | 549 | +		wake_q_add_safe(wake_q, tsk);  | 
|---|
 | 550 | +	}  | 
|---|
 | 551 | +}  | 
|---|
 | 552 | +  | 
|---|
 | 553 | +/*  | 
|---|
 | 554 | + * This function must be called with the sem->wait_lock held to prevent  | 
|---|
 | 555 | + * race conditions between checking the rwsem wait list and setting the  | 
|---|
 | 556 | + * sem->count accordingly.  | 
|---|
 | 557 | + *  | 
|---|
 | 558 | + * If wstate is WRITER_HANDOFF, it will make sure that either the handoff  | 
|---|
 | 559 | + * bit is set or the lock is acquired with handoff bit cleared.  | 
|---|
 | 560 | + */  | 
|---|
 | 561 | +static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,  | 
|---|
 | 562 | +					enum writer_wait_state wstate)  | 
|---|
 | 563 | +{  | 
|---|
 | 564 | +	long count, new;  | 
|---|
 | 565 | +  | 
|---|
 | 566 | +	lockdep_assert_held(&sem->wait_lock);  | 
|---|
 | 567 | +  | 
|---|
 | 568 | +	count = atomic_long_read(&sem->count);  | 
|---|
 | 569 | +	do {  | 
|---|
 | 570 | +		bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);  | 
|---|
 | 571 | +  | 
|---|
 | 572 | +		if (has_handoff && wstate == WRITER_NOT_FIRST)  | 
|---|
 | 573 | +			return false;  | 
|---|
 | 574 | +  | 
|---|
 | 575 | +		new = count;  | 
|---|
 | 576 | +  | 
|---|
 | 577 | +		if (count & RWSEM_LOCK_MASK) {  | 
|---|
 | 578 | +			if (has_handoff || (wstate != WRITER_HANDOFF))  | 
|---|
 | 579 | +				return false;  | 
|---|
 | 580 | +  | 
|---|
 | 581 | +			new |= RWSEM_FLAG_HANDOFF;  | 
|---|
 | 582 | +		} else {  | 
|---|
 | 583 | +			new |= RWSEM_WRITER_LOCKED;  | 
|---|
 | 584 | +			new &= ~RWSEM_FLAG_HANDOFF;  | 
|---|
 | 585 | +  | 
|---|
 | 586 | +			if (list_is_singular(&sem->wait_list))  | 
|---|
 | 587 | +				new &= ~RWSEM_FLAG_WAITERS;  | 
|---|
 | 588 | +		}  | 
|---|
 | 589 | +	} while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));  | 
|---|
 | 590 | +  | 
|---|
 | 591 | +	/*  | 
|---|
 | 592 | +	 * We have either acquired the lock with handoff bit cleared or  | 
|---|
 | 593 | +	 * set the handoff bit.  | 
|---|
 | 594 | +	 */  | 
|---|
 | 595 | +	if (new & RWSEM_FLAG_HANDOFF)  | 
|---|
 | 596 | +		return false;  | 
|---|
 | 597 | +  | 
|---|
 | 598 | +	rwsem_set_owner(sem);  | 
|---|
 | 599 | +	return true;  | 
|---|
 | 600 | +}  | 
|---|
 | 601 | +  | 
|---|
 | 602 | +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER  | 
|---|
 | 603 | +/*  | 
|---|
 | 604 | + * Try to acquire read lock before the reader is put on wait queue.  | 
|---|
 | 605 | + * Lock acquisition isn't allowed if the rwsem is locked or a writer handoff  | 
|---|
 | 606 | + * is ongoing.  | 
|---|
 | 607 | + */  | 
|---|
 | 608 | +static inline bool rwsem_try_read_lock_unqueued(struct rw_semaphore *sem)  | 
|---|
 | 609 | +{  | 
|---|
 | 610 | +	long count = atomic_long_read(&sem->count);  | 
|---|
 | 611 | +  | 
|---|
 | 612 | +	if (count & (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))  | 
|---|
 | 613 | +		return false;  | 
|---|
 | 614 | +  | 
|---|
 | 615 | +	count = atomic_long_fetch_add_acquire(RWSEM_READER_BIAS, &sem->count);  | 
|---|
 | 616 | +	if (!(count & (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {  | 
|---|
 | 617 | +		rwsem_set_reader_owned(sem);  | 
|---|
 | 618 | +		lockevent_inc(rwsem_opt_rlock);  | 
|---|
 | 619 | +		return true;  | 
|---|
 | 620 | +	}  | 
|---|
 | 621 | +  | 
|---|
 | 622 | +	/* Back out the change */  | 
|---|
 | 623 | +	atomic_long_add(-RWSEM_READER_BIAS, &sem->count);  | 
|---|
 | 624 | +	return false;  | 
|---|
 | 625 | +}  | 
|---|
 | 626 | +  | 
|---|
 | 627 | +/*  | 
|---|
 | 628 | + * Try to acquire write lock before the writer has been put on wait queue.  | 
|---|
 | 629 | + */  | 
|---|
 | 630 | +static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)  | 
|---|
 | 631 | +{  | 
|---|
 | 632 | +	long count = atomic_long_read(&sem->count);  | 
|---|
 | 633 | +  | 
|---|
 | 634 | +	while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) {  | 
|---|
 | 635 | +		if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,  | 
|---|
 | 636 | +					count | RWSEM_WRITER_LOCKED)) {  | 
|---|
 | 637 | +			rwsem_set_owner(sem);  | 
|---|
 | 638 | +			lockevent_inc(rwsem_opt_wlock);  | 
|---|
 | 639 | +			return true;  | 
|---|
 | 640 | +		}  | 
|---|
 | 641 | +	}  | 
|---|
 | 642 | +	return false;  | 
|---|
 | 643 | +}  | 
|---|
 | 644 | +  | 
|---|
 | 645 | +static inline bool owner_on_cpu(struct task_struct *owner)  | 
|---|
 | 646 | +{  | 
|---|
 | 647 | +	/*  | 
|---|
 | 648 | +	 * As lock holder preemption issue, we both skip spinning if  | 
|---|
 | 649 | +	 * task is not on cpu or its cpu is preempted  | 
|---|
 | 650 | +	 */  | 
|---|
 | 651 | +	return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));  | 
|---|
 | 652 | +}  | 
|---|
 | 653 | +  | 
|---|
 | 654 | +static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,  | 
|---|
 | 655 | +					   unsigned long nonspinnable)  | 
|---|
 | 656 | +{  | 
|---|
 | 657 | +	struct task_struct *owner;  | 
|---|
 | 658 | +	unsigned long flags;  | 
|---|
 | 659 | +	bool ret = true;  | 
|---|
 | 660 | +  | 
|---|
 | 661 | +	if (need_resched()) {  | 
|---|
 | 662 | +		lockevent_inc(rwsem_opt_fail);  | 
|---|
 | 663 | +		return false;  | 
|---|
 | 664 | +	}  | 
|---|
 | 665 | +  | 
|---|
 | 666 | +	preempt_disable();  | 
|---|
 | 667 | +	rcu_read_lock();  | 
|---|
 | 668 | +	owner = rwsem_owner_flags(sem, &flags);  | 
|---|
 | 669 | +	/*  | 
|---|
 | 670 | +	 * Don't check the read-owner as the entry may be stale.  | 
|---|
 | 671 | +	 */  | 
|---|
 | 672 | +	if ((flags & nonspinnable) ||  | 
|---|
 | 673 | +	    (owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner)))  | 
|---|
 | 674 | +		ret = false;  | 
|---|
 | 675 | +	rcu_read_unlock();  | 
|---|
 | 676 | +	preempt_enable();  | 
|---|
 | 677 | +  | 
|---|
 | 678 | +	lockevent_cond_inc(rwsem_opt_fail, !ret);  | 
|---|
 | 679 | +	return ret;  | 
|---|
 | 680 | +}  | 
|---|
 | 681 | +  | 
|---|
 | 682 | +/*  | 
|---|
 | 683 | + * The rwsem_spin_on_owner() function returns the folowing 4 values  | 
|---|
 | 684 | + * depending on the lock owner state.  | 
|---|
 | 685 | + *   OWNER_NULL  : owner is currently NULL  | 
|---|
 | 686 | + *   OWNER_WRITER: when owner changes and is a writer  | 
|---|
 | 687 | + *   OWNER_READER: when owner changes and the new owner may be a reader.  | 
|---|
 | 688 | + *   OWNER_NONSPINNABLE:  | 
|---|
 | 689 | + *		   when optimistic spinning has to stop because either the  | 
|---|
 | 690 | + *		   owner stops running, is unknown, or its timeslice has  | 
|---|
 | 691 | + *		   been used up.  | 
|---|
 | 692 | + */  | 
|---|
 | 693 | +enum owner_state {  | 
|---|
 | 694 | +	OWNER_NULL		= 1 << 0,  | 
|---|
 | 695 | +	OWNER_WRITER		= 1 << 1,  | 
|---|
 | 696 | +	OWNER_READER		= 1 << 2,  | 
|---|
 | 697 | +	OWNER_NONSPINNABLE	= 1 << 3,  | 
|---|
 | 698 | +};  | 
|---|
 | 699 | +#define OWNER_SPINNABLE		(OWNER_NULL | OWNER_WRITER | OWNER_READER)  | 
|---|
 | 700 | +  | 
|---|
 | 701 | +static inline enum owner_state  | 
|---|
 | 702 | +rwsem_owner_state(struct task_struct *owner, unsigned long flags, unsigned long nonspinnable)  | 
|---|
 | 703 | +{  | 
|---|
 | 704 | +	if (flags & nonspinnable)  | 
|---|
 | 705 | +		return OWNER_NONSPINNABLE;  | 
|---|
 | 706 | +  | 
|---|
 | 707 | +	if (flags & RWSEM_READER_OWNED)  | 
|---|
 | 708 | +		return OWNER_READER;  | 
|---|
 | 709 | +  | 
|---|
 | 710 | +	return owner ? OWNER_WRITER : OWNER_NULL;  | 
|---|
 | 711 | +}  | 
|---|
 | 712 | +  | 
|---|
 | 713 | +static noinline enum owner_state  | 
|---|
 | 714 | +rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)  | 
|---|
 | 715 | +{  | 
|---|
 | 716 | +	struct task_struct *new, *owner;  | 
|---|
 | 717 | +	unsigned long flags, new_flags;  | 
|---|
 | 718 | +	enum owner_state state;  | 
|---|
 | 719 | +  | 
|---|
 | 720 | +	owner = rwsem_owner_flags(sem, &flags);  | 
|---|
 | 721 | +	state = rwsem_owner_state(owner, flags, nonspinnable);  | 
|---|
 | 722 | +	if (state != OWNER_WRITER)  | 
|---|
 | 723 | +		return state;  | 
|---|
 | 724 | +  | 
|---|
 | 725 | +	rcu_read_lock();  | 
|---|
 | 726 | +	for (;;) {  | 
|---|
 | 727 | +		/*  | 
|---|
 | 728 | +		 * When a waiting writer set the handoff flag, it may spin  | 
|---|
 | 729 | +		 * on the owner as well. Once that writer acquires the lock,  | 
|---|
 | 730 | +		 * we can spin on it. So we don't need to quit even when the  | 
|---|
 | 731 | +		 * handoff bit is set.  | 
|---|
 | 732 | +		 */  | 
|---|
 | 733 | +		new = rwsem_owner_flags(sem, &new_flags);  | 
|---|
 | 734 | +		if ((new != owner) || (new_flags != flags)) {  | 
|---|
 | 735 | +			state = rwsem_owner_state(new, new_flags, nonspinnable);  | 
|---|
 | 736 | +			break;  | 
|---|
 | 737 | +		}  | 
|---|
 | 738 | +  | 
|---|
 | 739 | +		/*  | 
|---|
 | 740 | +		 * Ensure we emit the owner->on_cpu, dereference _after_  | 
|---|
 | 741 | +		 * checking sem->owner still matches owner, if that fails,  | 
|---|
 | 742 | +		 * owner might point to free()d memory, if it still matches,  | 
|---|
 | 743 | +		 * the rcu_read_lock() ensures the memory stays valid.  | 
|---|
 | 744 | +		 */  | 
|---|
 | 745 | +		barrier();  | 
|---|
 | 746 | +  | 
|---|
 | 747 | +		if (need_resched() || !owner_on_cpu(owner)) {  | 
|---|
 | 748 | +			state = OWNER_NONSPINNABLE;  | 
|---|
 | 749 | +			break;  | 
|---|
 | 750 | +		}  | 
|---|
 | 751 | +  | 
|---|
 | 752 | +		cpu_relax();  | 
|---|
 | 753 | +	}  | 
|---|
 | 754 | +	rcu_read_unlock();  | 
|---|
 | 755 | +  | 
|---|
 | 756 | +	return state;  | 
|---|
 | 757 | +}  | 
|---|
 | 758 | +  | 
|---|
 | 759 | +/*  | 
|---|
 | 760 | + * Calculate reader-owned rwsem spinning threshold for writer  | 
|---|
 | 761 | + *  | 
|---|
 | 762 | + * The more readers own the rwsem, the longer it will take for them to  | 
|---|
 | 763 | + * wind down and free the rwsem. So the empirical formula used to  | 
|---|
 | 764 | + * determine the actual spinning time limit here is:  | 
|---|
 | 765 | + *  | 
|---|
 | 766 | + *   Spinning threshold = (10 + nr_readers/2)us  | 
|---|
 | 767 | + *  | 
|---|
 | 768 | + * The limit is capped to a maximum of 25us (30 readers). This is just  | 
|---|
 | 769 | + * a heuristic and is subjected to change in the future.  | 
|---|
 | 770 | + */  | 
|---|
 | 771 | +static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)  | 
|---|
 | 772 | +{  | 
|---|
 | 773 | +	long count = atomic_long_read(&sem->count);  | 
|---|
 | 774 | +	int readers = count >> RWSEM_READER_SHIFT;  | 
|---|
 | 775 | +	u64 delta;  | 
|---|
 | 776 | +  | 
|---|
 | 777 | +	if (readers > 30)  | 
|---|
 | 778 | +		readers = 30;  | 
|---|
 | 779 | +	delta = (20 + readers) * NSEC_PER_USEC / 2;  | 
|---|
 | 780 | +  | 
|---|
 | 781 | +	return sched_clock() + delta;  | 
|---|
 | 782 | +}  | 
|---|
 | 783 | +  | 
|---|
 | 784 | +static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)  | 
|---|
 | 785 | +{  | 
|---|
 | 786 | +	bool taken = false;  | 
|---|
 | 787 | +	int prev_owner_state = OWNER_NULL;  | 
|---|
 | 788 | +	int loop = 0;  | 
|---|
 | 789 | +	u64 rspin_threshold = 0;  | 
|---|
 | 790 | +	unsigned long nonspinnable = wlock ? RWSEM_WR_NONSPINNABLE  | 
|---|
 | 791 | +					   : RWSEM_RD_NONSPINNABLE;  | 
|---|
 | 792 | +  | 
|---|
 | 793 | +	preempt_disable();  | 
|---|
 | 794 | +  | 
|---|
 | 795 | +	/* sem->wait_lock should not be held when doing optimistic spinning */  | 
|---|
 | 796 | +	if (!osq_lock(&sem->osq))  | 
|---|
 | 797 | +		goto done;  | 
|---|
 | 798 | +  | 
|---|
 | 799 | +	/*  | 
|---|
 | 800 | +	 * Optimistically spin on the owner field and attempt to acquire the  | 
|---|
 | 801 | +	 * lock whenever the owner changes. Spinning will be stopped when:  | 
|---|
 | 802 | +	 *  1) the owning writer isn't running; or  | 
|---|
 | 803 | +	 *  2) readers own the lock and spinning time has exceeded limit.  | 
|---|
 | 804 | +	 */  | 
|---|
 | 805 | +	for (;;) {  | 
|---|
 | 806 | +		enum owner_state owner_state;  | 
|---|
 | 807 | +  | 
|---|
 | 808 | +		owner_state = rwsem_spin_on_owner(sem, nonspinnable);  | 
|---|
 | 809 | +		if (!(owner_state & OWNER_SPINNABLE))  | 
|---|
 | 810 | +			break;  | 
|---|
 | 811 | +  | 
|---|
 | 812 | +		/*  | 
|---|
 | 813 | +		 * Try to acquire the lock  | 
|---|
 | 814 | +		 */  | 
|---|
 | 815 | +		taken = wlock ? rwsem_try_write_lock_unqueued(sem)  | 
|---|
 | 816 | +			      : rwsem_try_read_lock_unqueued(sem);  | 
|---|
 | 817 | +  | 
|---|
 | 818 | +		if (taken)  | 
|---|
 | 819 | +			break;  | 
|---|
 | 820 | +  | 
|---|
 | 821 | +		/*  | 
|---|
 | 822 | +		 * Time-based reader-owned rwsem optimistic spinning  | 
|---|
 | 823 | +		 */  | 
|---|
 | 824 | +		if (wlock && (owner_state == OWNER_READER)) {  | 
|---|
 | 825 | +			/*  | 
|---|
 | 826 | +			 * Re-initialize rspin_threshold every time when  | 
|---|
 | 827 | +			 * the owner state changes from non-reader to reader.  | 
|---|
 | 828 | +			 * This allows a writer to steal the lock in between  | 
|---|
 | 829 | +			 * 2 reader phases and have the threshold reset at  | 
|---|
 | 830 | +			 * the beginning of the 2nd reader phase.  | 
|---|
 | 831 | +			 */  | 
|---|
 | 832 | +			if (prev_owner_state != OWNER_READER) {  | 
|---|
 | 833 | +				if (rwsem_test_oflags(sem, nonspinnable))  | 
|---|
 | 834 | +					break;  | 
|---|
 | 835 | +				rspin_threshold = rwsem_rspin_threshold(sem);  | 
|---|
 | 836 | +				loop = 0;  | 
|---|
 | 837 | +			}  | 
|---|
 | 838 | +  | 
|---|
 | 839 | +			/*  | 
|---|
 | 840 | +			 * Check time threshold once every 16 iterations to  | 
|---|
 | 841 | +			 * avoid calling sched_clock() too frequently so  | 
|---|
 | 842 | +			 * as to reduce the average latency between the times  | 
|---|
 | 843 | +			 * when the lock becomes free and when the spinner  | 
|---|
 | 844 | +			 * is ready to do a trylock.  | 
|---|
 | 845 | +			 */  | 
|---|
 | 846 | +			else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) {  | 
|---|
 | 847 | +				rwsem_set_nonspinnable(sem);  | 
|---|
 | 848 | +				lockevent_inc(rwsem_opt_nospin);  | 
|---|
 | 849 | +				break;  | 
|---|
 | 850 | +			}  | 
|---|
 | 851 | +		}  | 
|---|
 | 852 | +  | 
|---|
 | 853 | +		/*  | 
|---|
 | 854 | +		 * An RT task cannot do optimistic spinning if it cannot  | 
|---|
 | 855 | +		 * be sure the lock holder is running or live-lock may  | 
|---|
 | 856 | +		 * happen if the current task and the lock holder happen  | 
|---|
 | 857 | +		 * to run in the same CPU. However, aborting optimistic  | 
|---|
 | 858 | +		 * spinning while a NULL owner is detected may miss some  | 
|---|
 | 859 | +		 * opportunity where spinning can continue without causing  | 
|---|
 | 860 | +		 * problem.  | 
|---|
 | 861 | +		 *  | 
|---|
 | 862 | +		 * There are 2 possible cases where an RT task may be able  | 
|---|
 | 863 | +		 * to continue spinning.  | 
|---|
 | 864 | +		 *  | 
|---|
 | 865 | +		 * 1) The lock owner is in the process of releasing the  | 
|---|
 | 866 | +		 *    lock, sem->owner is cleared but the lock has not  | 
|---|
 | 867 | +		 *    been released yet.  | 
|---|
 | 868 | +		 * 2) The lock was free and owner cleared, but another  | 
|---|
 | 869 | +		 *    task just comes in and acquire the lock before  | 
|---|
 | 870 | +		 *    we try to get it. The new owner may be a spinnable  | 
|---|
 | 871 | +		 *    writer.  | 
|---|
 | 872 | +		 *  | 
|---|
 | 873 | +		 * To take advantage of two scenarios listed agove, the RT  | 
|---|
 | 874 | +		 * task is made to retry one more time to see if it can  | 
|---|
 | 875 | +		 * acquire the lock or continue spinning on the new owning  | 
|---|
 | 876 | +		 * writer. Of course, if the time lag is long enough or the  | 
|---|
 | 877 | +		 * new owner is not a writer or spinnable, the RT task will  | 
|---|
 | 878 | +		 * quit spinning.  | 
|---|
 | 879 | +		 *  | 
|---|
 | 880 | +		 * If the owner is a writer, the need_resched() check is  | 
|---|
 | 881 | +		 * done inside rwsem_spin_on_owner(). If the owner is not  | 
|---|
 | 882 | +		 * a writer, need_resched() check needs to be done here.  | 
|---|
 | 883 | +		 */  | 
|---|
 | 884 | +		if (owner_state != OWNER_WRITER) {  | 
|---|
 | 885 | +			if (need_resched())  | 
|---|
 | 886 | +				break;  | 
|---|
 | 887 | +			if (rt_task(current) &&  | 
|---|
 | 888 | +			   (prev_owner_state != OWNER_WRITER))  | 
|---|
 | 889 | +				break;  | 
|---|
 | 890 | +		}  | 
|---|
 | 891 | +		prev_owner_state = owner_state;  | 
|---|
 | 892 | +  | 
|---|
 | 893 | +		/*  | 
|---|
 | 894 | +		 * The cpu_relax() call is a compiler barrier which forces  | 
|---|
 | 895 | +		 * everything in this loop to be re-loaded. We don't need  | 
|---|
 | 896 | +		 * memory barriers as we'll eventually observe the right  | 
|---|
 | 897 | +		 * values at the cost of a few extra spins.  | 
|---|
 | 898 | +		 */  | 
|---|
 | 899 | +		cpu_relax();  | 
|---|
 | 900 | +	}  | 
|---|
 | 901 | +	osq_unlock(&sem->osq);  | 
|---|
 | 902 | +done:  | 
|---|
 | 903 | +	preempt_enable();  | 
|---|
 | 904 | +	lockevent_cond_inc(rwsem_opt_fail, !taken);  | 
|---|
 | 905 | +	return taken;  | 
|---|
 | 906 | +}  | 
|---|
 | 907 | +  | 
|---|
 | 908 | +/*  | 
|---|
 | 909 | + * Clear the owner's RWSEM_WR_NONSPINNABLE bit if it is set. This should  | 
|---|
 | 910 | + * only be called when the reader count reaches 0.  | 
|---|
 | 911 | + *  | 
|---|
 | 912 | + * This give writers better chance to acquire the rwsem first before  | 
|---|
 | 913 | + * readers when the rwsem was being held by readers for a relatively long  | 
|---|
 | 914 | + * period of time. Race can happen that an optimistic spinner may have  | 
|---|
 | 915 | + * just stolen the rwsem and set the owner, but just clearing the  | 
|---|
 | 916 | + * RWSEM_WR_NONSPINNABLE bit will do no harm anyway.  | 
|---|
 | 917 | + */  | 
|---|
 | 918 | +static inline void clear_wr_nonspinnable(struct rw_semaphore *sem)  | 
|---|
 | 919 | +{  | 
|---|
 | 920 | +	if (rwsem_test_oflags(sem, RWSEM_WR_NONSPINNABLE))  | 
|---|
 | 921 | +		atomic_long_andnot(RWSEM_WR_NONSPINNABLE, &sem->owner);  | 
|---|
 | 922 | +}  | 
|---|
 | 923 | +  | 
|---|
 | 924 | +/*  | 
|---|
 | 925 | + * This function is called when the reader fails to acquire the lock via  | 
|---|
 | 926 | + * optimistic spinning. In this case we will still attempt to do a trylock  | 
|---|
 | 927 | + * when comparing the rwsem state right now with the state when entering  | 
|---|
 | 928 | + * the slowpath indicates that the reader is still in a valid reader phase.  | 
|---|
 | 929 | + * This happens when the following conditions are true:  | 
|---|
 | 930 | + *  | 
|---|
 | 931 | + * 1) The lock is currently reader owned, and  | 
|---|
 | 932 | + * 2) The lock is previously not reader-owned or the last read owner changes.  | 
|---|
 | 933 | + *  | 
|---|
 | 934 | + * In the former case, we have transitioned from a writer phase to a  | 
|---|
 | 935 | + * reader-phase while spinning. In the latter case, it means the reader  | 
|---|
 | 936 | + * phase hasn't ended when we entered the optimistic spinning loop. In  | 
|---|
 | 937 | + * both cases, the reader is eligible to acquire the lock. This is the  | 
|---|
 | 938 | + * secondary path where a read lock is acquired optimistically.  | 
|---|
 | 939 | + *  | 
|---|
 | 940 | + * The reader non-spinnable bit wasn't set at time of entry or it will  | 
|---|
 | 941 | + * not be here at all.  | 
|---|
 | 942 | + */  | 
|---|
 | 943 | +static inline bool rwsem_reader_phase_trylock(struct rw_semaphore *sem,  | 
|---|
 | 944 | +					      unsigned long last_rowner)  | 
|---|
 | 945 | +{  | 
|---|
 | 946 | +	unsigned long owner = atomic_long_read(&sem->owner);  | 
|---|
 | 947 | +  | 
|---|
 | 948 | +	if (!(owner & RWSEM_READER_OWNED))  | 
|---|
 | 949 | +		return false;  | 
|---|
 | 950 | +  | 
|---|
 | 951 | +	if (((owner ^ last_rowner) & ~RWSEM_OWNER_FLAGS_MASK) &&  | 
|---|
 | 952 | +	    rwsem_try_read_lock_unqueued(sem)) {  | 
|---|
 | 953 | +		lockevent_inc(rwsem_opt_rlock2);  | 
|---|
 | 954 | +		lockevent_add(rwsem_opt_fail, -1);  | 
|---|
 | 955 | +		return true;  | 
|---|
 | 956 | +	}  | 
|---|
 | 957 | +	return false;  | 
|---|
 | 958 | +}  | 
|---|
 | 959 | +#else  | 
|---|
 | 960 | +static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,  | 
|---|
 | 961 | +					   unsigned long nonspinnable)  | 
|---|
 | 962 | +{  | 
|---|
 | 963 | +	return false;  | 
|---|
 | 964 | +}  | 
|---|
 | 965 | +  | 
|---|
 | 966 | +static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)  | 
|---|
 | 967 | +{  | 
|---|
 | 968 | +	return false;  | 
|---|
 | 969 | +}  | 
|---|
 | 970 | +  | 
|---|
 | 971 | +static inline void clear_wr_nonspinnable(struct rw_semaphore *sem) { }  | 
|---|
 | 972 | +  | 
|---|
 | 973 | +static inline bool rwsem_reader_phase_trylock(struct rw_semaphore *sem,  | 
|---|
 | 974 | +					      unsigned long last_rowner)  | 
|---|
 | 975 | +{  | 
|---|
 | 976 | +	return false;  | 
|---|
 | 977 | +}  | 
|---|
 | 978 | +  | 
|---|
 | 979 | +static inline int  | 
|---|
 | 980 | +rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)  | 
|---|
 | 981 | +{  | 
|---|
 | 982 | +	return 0;  | 
|---|
 | 983 | +}  | 
|---|
 | 984 | +#define OWNER_NULL	1  | 
|---|
 | 985 | +#endif  | 
|---|
 | 986 | +  | 
|---|
 | 987 | +/*  | 
|---|
 | 988 | + * Wait for the read lock to be granted  | 
|---|
 | 989 | + */  | 
|---|
 | 990 | +static struct rw_semaphore __sched *  | 
|---|
 | 991 | +rwsem_down_read_slowpath(struct rw_semaphore *sem, int state)  | 
|---|
 | 992 | +{  | 
|---|
 | 993 | +	long count, adjustment = -RWSEM_READER_BIAS;  | 
|---|
 | 994 | +	struct rwsem_waiter waiter;  | 
|---|
 | 995 | +	DEFINE_WAKE_Q(wake_q);  | 
|---|
 | 996 | +	bool wake = false;  | 
|---|
 | 997 | +	bool already_on_list = false;  | 
|---|
 | 998 | +  | 
|---|
 | 999 | +	/*  | 
|---|
 | 1000 | +	 * Save the current read-owner of rwsem, if available, and the  | 
|---|
 | 1001 | +	 * reader nonspinnable bit.  | 
|---|
 | 1002 | +	 */  | 
|---|
 | 1003 | +	waiter.last_rowner = atomic_long_read(&sem->owner);  | 
|---|
 | 1004 | +	if (!(waiter.last_rowner & RWSEM_READER_OWNED))  | 
|---|
 | 1005 | +		waiter.last_rowner &= RWSEM_RD_NONSPINNABLE;  | 
|---|
 | 1006 | +  | 
|---|
 | 1007 | +	if (!rwsem_can_spin_on_owner(sem, RWSEM_RD_NONSPINNABLE))  | 
|---|
 | 1008 | +		goto queue;  | 
|---|
 | 1009 | +  | 
|---|
 | 1010 | +	/*  | 
|---|
 | 1011 | +	 * Undo read bias from down_read() and do optimistic spinning.  | 
|---|
 | 1012 | +	 */  | 
|---|
 | 1013 | +	atomic_long_add(-RWSEM_READER_BIAS, &sem->count);  | 
|---|
 | 1014 | +	adjustment = 0;  | 
|---|
 | 1015 | +	if (rwsem_optimistic_spin(sem, false)) {  | 
|---|
 | 1016 | +		/* rwsem_optimistic_spin() implies ACQUIRE on success */  | 
|---|
 | 1017 | +		/*  | 
|---|
 | 1018 | +		 * Wake up other readers in the wait list if the front  | 
|---|
 | 1019 | +		 * waiter is a reader.  | 
|---|
 | 1020 | +		 */  | 
|---|
 | 1021 | +		if ((atomic_long_read(&sem->count) & RWSEM_FLAG_WAITERS)) {  | 
|---|
 | 1022 | +			raw_spin_lock_irq(&sem->wait_lock);  | 
|---|
 | 1023 | +			if (!list_empty(&sem->wait_list))  | 
|---|
 | 1024 | +				rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED,  | 
|---|
 | 1025 | +						&wake_q);  | 
|---|
 | 1026 | +			raw_spin_unlock_irq(&sem->wait_lock);  | 
|---|
 | 1027 | +			wake_up_q(&wake_q);  | 
|---|
 | 1028 | +		}  | 
|---|
 | 1029 | +		trace_android_vh_record_rwsem_lock_starttime(current, jiffies);  | 
|---|
 | 1030 | +		return sem;  | 
|---|
 | 1031 | +	} else if (rwsem_reader_phase_trylock(sem, waiter.last_rowner)) {  | 
|---|
 | 1032 | +		/* rwsem_reader_phase_trylock() implies ACQUIRE on success */  | 
|---|
 | 1033 | +		trace_android_vh_record_rwsem_lock_starttime(current, jiffies);  | 
|---|
 | 1034 | +		return sem;  | 
|---|
 | 1035 | +	}  | 
|---|
 | 1036 | +  | 
|---|
 | 1037 | +queue:  | 
|---|
 | 1038 | +	waiter.task = current;  | 
|---|
 | 1039 | +	waiter.type = RWSEM_WAITING_FOR_READ;  | 
|---|
 | 1040 | +	waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;  | 
|---|
 | 1041 | +  | 
|---|
 | 1042 | +	raw_spin_lock_irq(&sem->wait_lock);  | 
|---|
 | 1043 | +	if (list_empty(&sem->wait_list)) {  | 
|---|
 | 1044 | +		/*  | 
|---|
 | 1045 | +		 * In case the wait queue is empty and the lock isn't owned  | 
|---|
 | 1046 | +		 * by a writer or has the handoff bit set, this reader can  | 
|---|
 | 1047 | +		 * exit the slowpath and return immediately as its  | 
|---|
 | 1048 | +		 * RWSEM_READER_BIAS has already been set in the count.  | 
|---|
 | 1049 | +		 */  | 
|---|
 | 1050 | +		if (adjustment && !(atomic_long_read(&sem->count) &  | 
|---|
 | 1051 | +		     (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {  | 
|---|
 | 1052 | +			/* Provide lock ACQUIRE */  | 
|---|
 | 1053 | +			smp_acquire__after_ctrl_dep();  | 
|---|
 | 1054 | +			raw_spin_unlock_irq(&sem->wait_lock);  | 
|---|
 | 1055 | +			rwsem_set_reader_owned(sem);  | 
|---|
 | 1056 | +			lockevent_inc(rwsem_rlock_fast);  | 
|---|
 | 1057 | +			return sem;  | 
|---|
 | 1058 | +		}  | 
|---|
 | 1059 | +		adjustment += RWSEM_FLAG_WAITERS;  | 
|---|
 | 1060 | +	}  | 
|---|
 | 1061 | +	trace_android_vh_alter_rwsem_list_add(  | 
|---|
 | 1062 | +					&waiter,  | 
|---|
 | 1063 | +					sem, &already_on_list);  | 
|---|
 | 1064 | +	if (!already_on_list)  | 
|---|
 | 1065 | +		list_add_tail(&waiter.list, &sem->wait_list);  | 
|---|
 | 1066 | +  | 
|---|
 | 1067 | +	/* we're now waiting on the lock, but no longer actively locking */  | 
|---|
 | 1068 | +	if (adjustment)  | 
|---|
 | 1069 | +		count = atomic_long_add_return(adjustment, &sem->count);  | 
|---|
 | 1070 | +	else  | 
|---|
 | 1071 | +		count = atomic_long_read(&sem->count);  | 
|---|
 | 1072 | +  | 
|---|
 | 1073 | +	/*  | 
|---|
 | 1074 | +	 * If there are no active locks, wake the front queued process(es).  | 
|---|
 | 1075 | +	 *  | 
|---|
 | 1076 | +	 * If there are no writers and we are first in the queue,  | 
|---|
 | 1077 | +	 * wake our own waiter to join the existing active readers !  | 
|---|
 | 1078 | +	 */  | 
|---|
 | 1079 | +	if (!(count & RWSEM_LOCK_MASK)) {  | 
|---|
 | 1080 | +		clear_wr_nonspinnable(sem);  | 
|---|
 | 1081 | +		wake = true;  | 
|---|
 | 1082 | +	}  | 
|---|
 | 1083 | +	if (wake || (!(count & RWSEM_WRITER_MASK) &&  | 
|---|
 | 1084 | +		    (adjustment & RWSEM_FLAG_WAITERS)))  | 
|---|
 | 1085 | +		rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);  | 
|---|
 | 1086 | +  | 
|---|
 | 1087 | +	trace_android_vh_rwsem_wake(sem);  | 
|---|
 | 1088 | +	raw_spin_unlock_irq(&sem->wait_lock);  | 
|---|
 | 1089 | +	wake_up_q(&wake_q);  | 
|---|
 | 1090 | +  | 
|---|
 | 1091 | +	/* wait to be given the lock */  | 
|---|
 | 1092 | +	trace_android_vh_rwsem_read_wait_start(sem);  | 
|---|
 | 1093 | +	for (;;) {  | 
|---|
 | 1094 | +		set_current_state(state);  | 
|---|
 | 1095 | +		if (!smp_load_acquire(&waiter.task)) {  | 
|---|
 | 1096 | +			/* Matches rwsem_mark_wake()'s smp_store_release(). */  | 
|---|
 | 1097 | +			break;  | 
|---|
 | 1098 | +		}  | 
|---|
 | 1099 | +		if (signal_pending_state(state, current)) {  | 
|---|
 | 1100 | +			raw_spin_lock_irq(&sem->wait_lock);  | 
|---|
 | 1101 | +			if (waiter.task)  | 
|---|
 | 1102 | +				goto out_nolock;  | 
|---|
 | 1103 | +			raw_spin_unlock_irq(&sem->wait_lock);  | 
|---|
 | 1104 | +			/* Ordered by sem->wait_lock against rwsem_mark_wake(). */  | 
|---|
 | 1105 | +			break;  | 
|---|
 | 1106 | +		}  | 
|---|
 | 1107 | +		schedule();  | 
|---|
 | 1108 | +		lockevent_inc(rwsem_sleep_reader);  | 
|---|
 | 1109 | +	}  | 
|---|
 | 1110 | +  | 
|---|
 | 1111 | +	__set_current_state(TASK_RUNNING);  | 
|---|
 | 1112 | +	trace_android_vh_rwsem_read_wait_finish(sem);  | 
|---|
 | 1113 | +	lockevent_inc(rwsem_rlock);  | 
|---|
 | 1114 | +	trace_android_vh_record_rwsem_lock_starttime(current, jiffies);  | 
|---|
 | 1115 | +	return sem;  | 
|---|
 | 1116 | +  | 
|---|
 | 1117 | +out_nolock:  | 
|---|
 | 1118 | +	list_del(&waiter.list);  | 
|---|
 | 1119 | +	if (list_empty(&sem->wait_list)) {  | 
|---|
 | 1120 | +		atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF,  | 
|---|
 | 1121 | +				   &sem->count);  | 
|---|
 | 1122 | +	}  | 
|---|
 | 1123 | +	raw_spin_unlock_irq(&sem->wait_lock);  | 
|---|
 | 1124 | +	__set_current_state(TASK_RUNNING);  | 
|---|
 | 1125 | +	trace_android_vh_rwsem_read_wait_finish(sem);  | 
|---|
 | 1126 | +	lockevent_inc(rwsem_rlock_fail);  | 
|---|
 | 1127 | +	return ERR_PTR(-EINTR);  | 
|---|
 | 1128 | +}  | 
|---|
 | 1129 | +  | 
|---|
 | 1130 | +/*  | 
|---|
 | 1131 | + * This function is called by the a write lock owner. So the owner value  | 
|---|
 | 1132 | + * won't get changed by others.  | 
|---|
 | 1133 | + */  | 
|---|
 | 1134 | +static inline void rwsem_disable_reader_optspin(struct rw_semaphore *sem,  | 
|---|
 | 1135 | +						bool disable)  | 
|---|
 | 1136 | +{  | 
|---|
 | 1137 | +	if (unlikely(disable)) {  | 
|---|
 | 1138 | +		atomic_long_or(RWSEM_RD_NONSPINNABLE, &sem->owner);  | 
|---|
 | 1139 | +		lockevent_inc(rwsem_opt_norspin);  | 
|---|
 | 1140 | +	}  | 
|---|
 | 1141 | +}  | 
|---|
 | 1142 | +  | 
|---|
 | 1143 | +/*  | 
|---|
 | 1144 | + * Wait until we successfully acquire the write lock  | 
|---|
 | 1145 | + */  | 
|---|
 | 1146 | +static struct rw_semaphore *  | 
|---|
 | 1147 | +rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)  | 
|---|
 | 1148 | +{  | 
|---|
 | 1149 | +	long count;  | 
|---|
 | 1150 | +	bool disable_rspin;  | 
|---|
 | 1151 | +	enum writer_wait_state wstate;  | 
|---|
 | 1152 | +	struct rwsem_waiter waiter;  | 
|---|
 | 1153 | +	struct rw_semaphore *ret = sem;  | 
|---|
 | 1154 | +	DEFINE_WAKE_Q(wake_q);  | 
|---|
 | 1155 | +	bool already_on_list = false;  | 
|---|
 | 1156 | +  | 
|---|
 | 1157 | +	/* do optimistic spinning and steal lock if possible */  | 
|---|
 | 1158 | +	if (rwsem_can_spin_on_owner(sem, RWSEM_WR_NONSPINNABLE) &&  | 
|---|
 | 1159 | +	    rwsem_optimistic_spin(sem, true)) {  | 
|---|
 | 1160 | +		/* rwsem_optimistic_spin() implies ACQUIRE on success */  | 
|---|
 | 1161 | +		trace_android_vh_record_rwsem_lock_starttime(current, jiffies);  | 
|---|
 | 1162 | +		return sem;  | 
|---|
 | 1163 | +	}  | 
|---|
 | 1164 | +  | 
|---|
 | 1165 | +	/*  | 
|---|
 | 1166 | +	 * Disable reader optimistic spinning for this rwsem after  | 
|---|
 | 1167 | +	 * acquiring the write lock when the setting of the nonspinnable  | 
|---|
 | 1168 | +	 * bits are observed.  | 
|---|
 | 1169 | +	 */  | 
|---|
 | 1170 | +	disable_rspin = atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE;  | 
|---|
 | 1171 | +  | 
|---|
 | 1172 | +	/*  | 
|---|
 | 1173 | +	 * Optimistic spinning failed, proceed to the slowpath  | 
|---|
 | 1174 | +	 * and block until we can acquire the sem.  | 
|---|
 | 1175 | +	 */  | 
|---|
 | 1176 | +	waiter.task = current;  | 
|---|
 | 1177 | +	waiter.type = RWSEM_WAITING_FOR_WRITE;  | 
|---|
 | 1178 | +	waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;  | 
|---|
 | 1179 | +  | 
|---|
 | 1180 | +	raw_spin_lock_irq(&sem->wait_lock);  | 
|---|
 | 1181 | +  | 
|---|
 | 1182 | +	/* account for this before adding a new element to the list */  | 
|---|
 | 1183 | +	wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST;  | 
|---|
 | 1184 | +  | 
|---|
 | 1185 | +	trace_android_vh_alter_rwsem_list_add(  | 
|---|
 | 1186 | +					&waiter,  | 
|---|
 | 1187 | +					sem, &already_on_list);  | 
|---|
 | 1188 | +	if (!already_on_list)  | 
|---|
 | 1189 | +		list_add_tail(&waiter.list, &sem->wait_list);  | 
|---|
 | 1190 | +  | 
|---|
 | 1191 | +	/* we're now waiting on the lock */  | 
|---|
 | 1192 | +	if (wstate == WRITER_NOT_FIRST) {  | 
|---|
 | 1193 | +		count = atomic_long_read(&sem->count);  | 
|---|
 | 1194 | +  | 
|---|
 | 1195 | +		/*  | 
|---|
 | 1196 | +		 * If there were already threads queued before us and:  | 
|---|
 | 1197 | +		 *  1) there are no active locks, wake the front  | 
|---|
 | 1198 | +		 *     queued process(es) as the handoff bit might be set.  | 
|---|
 | 1199 | +		 *  2) there are no active writers and some readers, the lock  | 
|---|
 | 1200 | +		 *     must be read owned; so we try to wake any read lock  | 
|---|
 | 1201 | +		 *     waiters that were queued ahead of us.  | 
|---|
 | 1202 | +		 */  | 
|---|
 | 1203 | +		if (count & RWSEM_WRITER_MASK)  | 
|---|
 | 1204 | +			goto wait;  | 
|---|
 | 1205 | +  | 
|---|
 | 1206 | +		rwsem_mark_wake(sem, (count & RWSEM_READER_MASK)  | 
|---|
 | 1207 | +					? RWSEM_WAKE_READERS  | 
|---|
 | 1208 | +					: RWSEM_WAKE_ANY, &wake_q);  | 
|---|
 | 1209 | +  | 
|---|
 | 1210 | +		if (!wake_q_empty(&wake_q)) {  | 
|---|
 | 1211 | +			/*  | 
|---|
 | 1212 | +			 * We want to minimize wait_lock hold time especially  | 
|---|
 | 1213 | +			 * when a large number of readers are to be woken up.  | 
|---|
 | 1214 | +			 */  | 
|---|
 | 1215 | +			raw_spin_unlock_irq(&sem->wait_lock);  | 
|---|
 | 1216 | +			wake_up_q(&wake_q);  | 
|---|
 | 1217 | +			wake_q_init(&wake_q);	/* Used again, reinit */  | 
|---|
 | 1218 | +			raw_spin_lock_irq(&sem->wait_lock);  | 
|---|
 | 1219 | +		}  | 
|---|
 | 1220 | +	} else {  | 
|---|
 | 1221 | +		atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);  | 
|---|
 | 1222 | +	}  | 
|---|
 | 1223 | +  | 
|---|
 | 1224 | +wait:  | 
|---|
 | 1225 | +	trace_android_vh_rwsem_wake(sem);  | 
|---|
 | 1226 | +	/* wait until we successfully acquire the lock */  | 
|---|
 | 1227 | +	trace_android_vh_rwsem_write_wait_start(sem);  | 
|---|
 | 1228 | +	set_current_state(state);  | 
|---|
 | 1229 | +	for (;;) {  | 
|---|
 | 1230 | +		if (rwsem_try_write_lock(sem, wstate)) {  | 
|---|
 | 1231 | +			/* rwsem_try_write_lock() implies ACQUIRE on success */  | 
|---|
 | 1232 | +			break;  | 
|---|
 | 1233 | +		}  | 
|---|
 | 1234 | +  | 
|---|
 | 1235 | +		raw_spin_unlock_irq(&sem->wait_lock);  | 
|---|
 | 1236 | +  | 
|---|
 | 1237 | +		/*  | 
|---|
 | 1238 | +		 * After setting the handoff bit and failing to acquire  | 
|---|
 | 1239 | +		 * the lock, attempt to spin on owner to accelerate lock  | 
|---|
 | 1240 | +		 * transfer. If the previous owner is a on-cpu writer and it  | 
|---|
 | 1241 | +		 * has just released the lock, OWNER_NULL will be returned.  | 
|---|
 | 1242 | +		 * In this case, we attempt to acquire the lock again  | 
|---|
 | 1243 | +		 * without sleeping.  | 
|---|
 | 1244 | +		 */  | 
|---|
 | 1245 | +		if (wstate == WRITER_HANDOFF &&  | 
|---|
 | 1246 | +		    rwsem_spin_on_owner(sem, RWSEM_NONSPINNABLE) == OWNER_NULL)  | 
|---|
 | 1247 | +			goto trylock_again;  | 
|---|
 | 1248 | +  | 
|---|
 | 1249 | +		/* Block until there are no active lockers. */  | 
|---|
 | 1250 | +		for (;;) {  | 
|---|
 | 1251 | +			if (signal_pending_state(state, current))  | 
|---|
 | 1252 | +				goto out_nolock;  | 
|---|
 | 1253 | +  | 
|---|
 | 1254 | +			schedule();  | 
|---|
 | 1255 | +			lockevent_inc(rwsem_sleep_writer);  | 
|---|
 | 1256 | +			set_current_state(state);  | 
|---|
 | 1257 | +			/*  | 
|---|
 | 1258 | +			 * If HANDOFF bit is set, unconditionally do  | 
|---|
 | 1259 | +			 * a trylock.  | 
|---|
 | 1260 | +			 */  | 
|---|
 | 1261 | +			if (wstate == WRITER_HANDOFF)  | 
|---|
 | 1262 | +				break;  | 
|---|
 | 1263 | +  | 
|---|
 | 1264 | +			if ((wstate == WRITER_NOT_FIRST) &&  | 
|---|
 | 1265 | +			    (rwsem_first_waiter(sem) == &waiter))  | 
|---|
 | 1266 | +				wstate = WRITER_FIRST;  | 
|---|
 | 1267 | +  | 
|---|
 | 1268 | +			count = atomic_long_read(&sem->count);  | 
|---|
 | 1269 | +			if (!(count & RWSEM_LOCK_MASK))  | 
|---|
 | 1270 | +				break;  | 
|---|
 | 1271 | +  | 
|---|
 | 1272 | +			/*  | 
|---|
 | 1273 | +			 * The setting of the handoff bit is deferred  | 
|---|
 | 1274 | +			 * until rwsem_try_write_lock() is called.  | 
|---|
 | 1275 | +			 */  | 
|---|
 | 1276 | +			if ((wstate == WRITER_FIRST) && (rt_task(current) ||  | 
|---|
 | 1277 | +			    time_after(jiffies, waiter.timeout))) {  | 
|---|
 | 1278 | +				wstate = WRITER_HANDOFF;  | 
|---|
 | 1279 | +				lockevent_inc(rwsem_wlock_handoff);  | 
|---|
 | 1280 | +				break;  | 
|---|
 | 1281 | +			}  | 
|---|
 | 1282 | +		}  | 
|---|
 | 1283 | +trylock_again:  | 
|---|
 | 1284 | +		raw_spin_lock_irq(&sem->wait_lock);  | 
|---|
 | 1285 | +	}  | 
|---|
 | 1286 | +	__set_current_state(TASK_RUNNING);  | 
|---|
 | 1287 | +	trace_android_vh_rwsem_write_wait_finish(sem);  | 
|---|
 | 1288 | +	list_del(&waiter.list);  | 
|---|
 | 1289 | +	rwsem_disable_reader_optspin(sem, disable_rspin);  | 
|---|
 | 1290 | +	raw_spin_unlock_irq(&sem->wait_lock);  | 
|---|
 | 1291 | +	lockevent_inc(rwsem_wlock);  | 
|---|
 | 1292 | +	trace_android_vh_record_rwsem_lock_starttime(current, jiffies);  | 
|---|
 | 1293 | +	return ret;  | 
|---|
 | 1294 | +  | 
|---|
 | 1295 | +out_nolock:  | 
|---|
 | 1296 | +	__set_current_state(TASK_RUNNING);  | 
|---|
 | 1297 | +	trace_android_vh_rwsem_write_wait_finish(sem);  | 
|---|
 | 1298 | +	raw_spin_lock_irq(&sem->wait_lock);  | 
|---|
 | 1299 | +	list_del(&waiter.list);  | 
|---|
 | 1300 | +  | 
|---|
 | 1301 | +	if (unlikely(wstate == WRITER_HANDOFF))  | 
|---|
 | 1302 | +		atomic_long_andnot(RWSEM_FLAG_HANDOFF,  &sem->count);  | 
|---|
 | 1303 | +  | 
|---|
 | 1304 | +	if (list_empty(&sem->wait_list))  | 
|---|
 | 1305 | +		atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);  | 
|---|
 | 1306 | +	else  | 
|---|
 | 1307 | +		rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);  | 
|---|
 | 1308 | +	raw_spin_unlock_irq(&sem->wait_lock);  | 
|---|
 | 1309 | +	wake_up_q(&wake_q);  | 
|---|
 | 1310 | +	lockevent_inc(rwsem_wlock_fail);  | 
|---|
 | 1311 | +  | 
|---|
 | 1312 | +	return ERR_PTR(-EINTR);  | 
|---|
 | 1313 | +}  | 
|---|
 | 1314 | +  | 
|---|
 | 1315 | +/*  | 
|---|
 | 1316 | + * handle waking up a waiter on the semaphore  | 
|---|
 | 1317 | + * - up_read/up_write has decremented the active part of count if we come here  | 
|---|
 | 1318 | + */  | 
|---|
 | 1319 | +static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem, long count)  | 
|---|
 | 1320 | +{  | 
|---|
 | 1321 | +	unsigned long flags;  | 
|---|
 | 1322 | +	DEFINE_WAKE_Q(wake_q);  | 
|---|
 | 1323 | +  | 
|---|
 | 1324 | +	raw_spin_lock_irqsave(&sem->wait_lock, flags);  | 
|---|
 | 1325 | +  | 
|---|
 | 1326 | +	if (!list_empty(&sem->wait_list))  | 
|---|
 | 1327 | +		rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);  | 
|---|
 | 1328 | +	trace_android_vh_rwsem_wake_finish(sem);  | 
|---|
 | 1329 | +  | 
|---|
 | 1330 | +	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);  | 
|---|
 | 1331 | +	wake_up_q(&wake_q);  | 
|---|
 | 1332 | +  | 
|---|
 | 1333 | +	return sem;  | 
|---|
 | 1334 | +}  | 
|---|
 | 1335 | +  | 
|---|
 | 1336 | +/*  | 
|---|
 | 1337 | + * downgrade a write lock into a read lock  | 
|---|
 | 1338 | + * - caller incremented waiting part of count and discovered it still negative  | 
|---|
 | 1339 | + * - just wake up any readers at the front of the queue  | 
|---|
 | 1340 | + */  | 
|---|
 | 1341 | +static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)  | 
|---|
 | 1342 | +{  | 
|---|
 | 1343 | +	unsigned long flags;  | 
|---|
 | 1344 | +	DEFINE_WAKE_Q(wake_q);  | 
|---|
 | 1345 | +  | 
|---|
 | 1346 | +	raw_spin_lock_irqsave(&sem->wait_lock, flags);  | 
|---|
 | 1347 | +  | 
|---|
 | 1348 | +	if (!list_empty(&sem->wait_list))  | 
|---|
 | 1349 | +		rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);  | 
|---|
 | 1350 | +  | 
|---|
 | 1351 | +	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);  | 
|---|
 | 1352 | +	wake_up_q(&wake_q);  | 
|---|
 | 1353 | +  | 
|---|
 | 1354 | +	return sem;  | 
|---|
 | 1355 | +}  | 
|---|
 | 1356 | +  | 
|---|
 | 1357 | +/*  | 
|---|
 | 1358 | + * lock for reading  | 
|---|
 | 1359 | + */  | 
|---|
 | 1360 | +static inline void __down_read(struct rw_semaphore *sem)  | 
|---|
 | 1361 | +{  | 
|---|
 | 1362 | +	if (!rwsem_read_trylock(sem)) {  | 
|---|
 | 1363 | +		rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE);  | 
|---|
 | 1364 | +		DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);  | 
|---|
 | 1365 | +	} else {  | 
|---|
 | 1366 | +		rwsem_set_reader_owned(sem);  | 
|---|
 | 1367 | +	}  | 
|---|
 | 1368 | +}  | 
|---|
 | 1369 | +  | 
|---|
 | 1370 | +static inline int __down_read_interruptible(struct rw_semaphore *sem)  | 
|---|
 | 1371 | +{  | 
|---|
 | 1372 | +	if (!rwsem_read_trylock(sem)) {  | 
|---|
 | 1373 | +		if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_INTERRUPTIBLE)))  | 
|---|
 | 1374 | +			return -EINTR;  | 
|---|
 | 1375 | +		DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);  | 
|---|
 | 1376 | +	} else {  | 
|---|
 | 1377 | +		rwsem_set_reader_owned(sem);  | 
|---|
 | 1378 | +	}  | 
|---|
 | 1379 | +	return 0;  | 
|---|
 | 1380 | +}  | 
|---|
 | 1381 | +  | 
|---|
 | 1382 | +static inline int __down_read_killable(struct rw_semaphore *sem)  | 
|---|
 | 1383 | +{  | 
|---|
 | 1384 | +	if (!rwsem_read_trylock(sem)) {  | 
|---|
 | 1385 | +		if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_KILLABLE)))  | 
|---|
 | 1386 | +			return -EINTR;  | 
|---|
 | 1387 | +		DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);  | 
|---|
 | 1388 | +	} else {  | 
|---|
 | 1389 | +		rwsem_set_reader_owned(sem);  | 
|---|
 | 1390 | +	}  | 
|---|
 | 1391 | +	return 0;  | 
|---|
 | 1392 | +}  | 
|---|
 | 1393 | +  | 
|---|
 | 1394 | +static inline int __down_read_trylock(struct rw_semaphore *sem)  | 
|---|
 | 1395 | +{  | 
|---|
 | 1396 | +	long tmp;  | 
|---|
 | 1397 | +  | 
|---|
 | 1398 | +	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);  | 
|---|
 | 1399 | +  | 
|---|
 | 1400 | +	/*  | 
|---|
 | 1401 | +	 * Optimize for the case when the rwsem is not locked at all.  | 
|---|
 | 1402 | +	 */  | 
|---|
 | 1403 | +	tmp = RWSEM_UNLOCKED_VALUE;  | 
|---|
 | 1404 | +	do {  | 
|---|
 | 1405 | +		if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,  | 
|---|
 | 1406 | +					tmp + RWSEM_READER_BIAS)) {  | 
|---|
 | 1407 | +			rwsem_set_reader_owned(sem);  | 
|---|
 | 1408 | +			trace_android_vh_record_rwsem_lock_starttime(current, jiffies);  | 
|---|
 | 1409 | +			return 1;  | 
|---|
 | 1410 | +		}  | 
|---|
 | 1411 | +	} while (!(tmp & RWSEM_READ_FAILED_MASK));  | 
|---|
 | 1412 | +	return 0;  | 
|---|
 | 1413 | +}  | 
|---|
 | 1414 | +  | 
|---|
 | 1415 | +/*  | 
|---|
 | 1416 | + * lock for writing  | 
|---|
 | 1417 | + */  | 
|---|
 | 1418 | +static inline void __down_write(struct rw_semaphore *sem)  | 
|---|
 | 1419 | +{  | 
|---|
 | 1420 | +	long tmp = RWSEM_UNLOCKED_VALUE;  | 
|---|
 | 1421 | +  | 
|---|
 | 1422 | +	if (unlikely(!atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,  | 
|---|
 | 1423 | +						      RWSEM_WRITER_LOCKED))) {  | 
|---|
 | 1424 | +		rwsem_down_write_slowpath(sem, TASK_UNINTERRUPTIBLE);  | 
|---|
 | 1425 | +	} else {  | 
|---|
 | 1426 | +		trace_android_vh_record_rwsem_lock_starttime(current, jiffies);  | 
|---|
 | 1427 | +		rwsem_set_owner(sem);  | 
|---|
 | 1428 | +	}  | 
|---|
 | 1429 | +}  | 
|---|
 | 1430 | +  | 
|---|
 | 1431 | +static inline int __down_write_killable(struct rw_semaphore *sem)  | 
|---|
 | 1432 | +{  | 
|---|
 | 1433 | +	long tmp = RWSEM_UNLOCKED_VALUE;  | 
|---|
 | 1434 | +  | 
|---|
 | 1435 | +	if (unlikely(!atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,  | 
|---|
 | 1436 | +						      RWSEM_WRITER_LOCKED))) {  | 
|---|
 | 1437 | +		if (IS_ERR(rwsem_down_write_slowpath(sem, TASK_KILLABLE)))  | 
|---|
 | 1438 | +			return -EINTR;  | 
|---|
 | 1439 | +	} else {  | 
|---|
 | 1440 | +		trace_android_vh_record_rwsem_lock_starttime(current, jiffies);  | 
|---|
 | 1441 | +		rwsem_set_owner(sem);  | 
|---|
 | 1442 | +	}  | 
|---|
 | 1443 | +	return 0;  | 
|---|
 | 1444 | +}  | 
|---|
 | 1445 | +  | 
|---|
 | 1446 | +static inline int __down_write_trylock(struct rw_semaphore *sem)  | 
|---|
 | 1447 | +{  | 
|---|
 | 1448 | +	long tmp;  | 
|---|
 | 1449 | +  | 
|---|
 | 1450 | +	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);  | 
|---|
 | 1451 | +  | 
|---|
 | 1452 | +	tmp  = RWSEM_UNLOCKED_VALUE;  | 
|---|
 | 1453 | +	if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,  | 
|---|
 | 1454 | +					    RWSEM_WRITER_LOCKED)) {  | 
|---|
 | 1455 | +		rwsem_set_owner(sem);  | 
|---|
 | 1456 | +		trace_android_vh_record_rwsem_lock_starttime(current, jiffies);  | 
|---|
 | 1457 | +		return true;  | 
|---|
 | 1458 | +	}  | 
|---|
 | 1459 | +	return false;  | 
|---|
 | 1460 | +}  | 
|---|
 | 1461 | +  | 
|---|
 | 1462 | +/*  | 
|---|
 | 1463 | + * unlock after reading  | 
|---|
 | 1464 | + */  | 
|---|
 | 1465 | +static inline void __up_read(struct rw_semaphore *sem)  | 
|---|
 | 1466 | +{  | 
|---|
 | 1467 | +	long tmp;  | 
|---|
 | 1468 | +  | 
|---|
 | 1469 | +	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);  | 
|---|
 | 1470 | +	DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);  | 
|---|
 | 1471 | +  | 
|---|
 | 1472 | +	trace_android_vh_record_rwsem_lock_starttime(current, 0);  | 
|---|
 | 1473 | +	rwsem_clear_reader_owned(sem);  | 
|---|
 | 1474 | +	tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);  | 
|---|
 | 1475 | +	DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);  | 
|---|
 | 1476 | +	if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==  | 
|---|
 | 1477 | +		      RWSEM_FLAG_WAITERS)) {  | 
|---|
 | 1478 | +		clear_wr_nonspinnable(sem);  | 
|---|
 | 1479 | +		rwsem_wake(sem, tmp);  | 
|---|
 | 1480 | +	}  | 
|---|
 | 1481 | +	trace_android_vh_rwsem_up_read_end(sem);  | 
|---|
 | 1482 | +}  | 
|---|
 | 1483 | +  | 
|---|
 | 1484 | +/*  | 
|---|
 | 1485 | + * unlock after writing  | 
|---|
 | 1486 | + */  | 
|---|
 | 1487 | +static inline void __up_write(struct rw_semaphore *sem)  | 
|---|
 | 1488 | +{  | 
|---|
 | 1489 | +	long tmp;  | 
|---|
 | 1490 | +  | 
|---|
 | 1491 | +	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);  | 
|---|
 | 1492 | +	/*  | 
|---|
 | 1493 | +	 * sem->owner may differ from current if the ownership is transferred  | 
|---|
 | 1494 | +	 * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits.  | 
|---|
 | 1495 | +	 */  | 
|---|
 | 1496 | +	DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) &&  | 
|---|
 | 1497 | +			    !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem);  | 
|---|
 | 1498 | +  | 
|---|
 | 1499 | +	trace_android_vh_record_rwsem_lock_starttime(current, 0);  | 
|---|
 | 1500 | +	rwsem_clear_owner(sem);  | 
|---|
 | 1501 | +	tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);  | 
|---|
 | 1502 | +	if (unlikely(tmp & RWSEM_FLAG_WAITERS))  | 
|---|
 | 1503 | +		rwsem_wake(sem, tmp);  | 
|---|
 | 1504 | +	trace_android_vh_rwsem_up_write_end(sem);  | 
|---|
 | 1505 | +}  | 
|---|
 | 1506 | +  | 
|---|
 | 1507 | +/*  | 
|---|
 | 1508 | + * downgrade write lock to read lock  | 
|---|
 | 1509 | + */  | 
|---|
 | 1510 | +static inline void __downgrade_write(struct rw_semaphore *sem)  | 
|---|
 | 1511 | +{  | 
|---|
 | 1512 | +	long tmp;  | 
|---|
 | 1513 | +  | 
|---|
 | 1514 | +	/*  | 
|---|
 | 1515 | +	 * When downgrading from exclusive to shared ownership,  | 
|---|
 | 1516 | +	 * anything inside the write-locked region cannot leak  | 
|---|
 | 1517 | +	 * into the read side. In contrast, anything in the  | 
|---|
 | 1518 | +	 * read-locked region is ok to be re-ordered into the  | 
|---|
 | 1519 | +	 * write side. As such, rely on RELEASE semantics.  | 
|---|
 | 1520 | +	 */  | 
|---|
 | 1521 | +	DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem);  | 
|---|
 | 1522 | +	tmp = atomic_long_fetch_add_release(  | 
|---|
 | 1523 | +		-RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);  | 
|---|
 | 1524 | +	rwsem_set_reader_owned(sem);  | 
|---|
 | 1525 | +	if (tmp & RWSEM_FLAG_WAITERS)  | 
|---|
 | 1526 | +		rwsem_downgrade_wake(sem);  | 
|---|
 | 1527 | +}  | 
|---|
 | 1528 | +#endif  | 
|---|
| 17 | 1529 |   | 
|---|
| 18 | 1530 |  /* | 
|---|
| 19 | 1531 |   * lock for reading | 
|---|
| .. | .. | 
|---|
| 24 | 1536 |  	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); | 
|---|
| 25 | 1537 |   | 
|---|
| 26 | 1538 |  	LOCK_CONTENDED(sem, __down_read_trylock, __down_read); | 
|---|
| 27 |  | -	rwsem_set_reader_owned(sem);  | 
|---|
| 28 | 1539 |  } | 
|---|
| 29 |  | -  | 
|---|
| 30 | 1540 |  EXPORT_SYMBOL(down_read); | 
|---|
 | 1541 | +  | 
|---|
 | 1542 | +int __sched down_read_interruptible(struct rw_semaphore *sem)  | 
|---|
 | 1543 | +{  | 
|---|
 | 1544 | +	might_sleep();  | 
|---|
 | 1545 | +	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);  | 
|---|
 | 1546 | +  | 
|---|
 | 1547 | +	if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) {  | 
|---|
 | 1548 | +		rwsem_release(&sem->dep_map, _RET_IP_);  | 
|---|
 | 1549 | +		return -EINTR;  | 
|---|
 | 1550 | +	}  | 
|---|
 | 1551 | +  | 
|---|
 | 1552 | +	return 0;  | 
|---|
 | 1553 | +}  | 
|---|
 | 1554 | +EXPORT_SYMBOL(down_read_interruptible);  | 
|---|
| 31 | 1555 |   | 
|---|
| 32 | 1556 |  int __sched down_read_killable(struct rw_semaphore *sem) | 
|---|
| 33 | 1557 |  { | 
|---|
| .. | .. | 
|---|
| 35 | 1559 |  	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); | 
|---|
| 36 | 1560 |   | 
|---|
| 37 | 1561 |  	if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) { | 
|---|
| 38 |  | -		rwsem_release(&sem->dep_map, 1, _RET_IP_);  | 
|---|
 | 1562 | +		rwsem_release(&sem->dep_map, _RET_IP_);  | 
|---|
| 39 | 1563 |  		return -EINTR; | 
|---|
| 40 | 1564 |  	} | 
|---|
| 41 | 1565 |   | 
|---|
| 42 |  | -	rwsem_set_reader_owned(sem);  | 
|---|
| 43 | 1566 |  	return 0; | 
|---|
| 44 | 1567 |  } | 
|---|
| 45 |  | -  | 
|---|
| 46 | 1568 |  EXPORT_SYMBOL(down_read_killable); | 
|---|
| 47 | 1569 |   | 
|---|
| 48 | 1570 |  /* | 
|---|
| .. | .. | 
|---|
| 52 | 1574 |  { | 
|---|
| 53 | 1575 |  	int ret = __down_read_trylock(sem); | 
|---|
| 54 | 1576 |   | 
|---|
| 55 |  | -	if (ret == 1) {  | 
|---|
 | 1577 | +	if (ret == 1)  | 
|---|
| 56 | 1578 |  		rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); | 
|---|
| 57 |  | -		rwsem_set_reader_owned(sem);  | 
|---|
| 58 |  | -	}  | 
|---|
| 59 | 1579 |  	return ret; | 
|---|
| 60 | 1580 |  } | 
|---|
| 61 |  | -  | 
|---|
| 62 | 1581 |  EXPORT_SYMBOL(down_read_trylock); | 
|---|
| 63 | 1582 |   | 
|---|
| 64 | 1583 |  /* | 
|---|
| .. | .. | 
|---|
| 68 | 1587 |  { | 
|---|
| 69 | 1588 |  	might_sleep(); | 
|---|
| 70 | 1589 |  	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); | 
|---|
| 71 |  | -  | 
|---|
| 72 | 1590 |  	LOCK_CONTENDED(sem, __down_write_trylock, __down_write); | 
|---|
| 73 |  | -	rwsem_set_owner(sem);  | 
|---|
| 74 | 1591 |  } | 
|---|
| 75 |  | -  | 
|---|
| 76 | 1592 |  EXPORT_SYMBOL(down_write); | 
|---|
| 77 | 1593 |   | 
|---|
| 78 | 1594 |  /* | 
|---|
| .. | .. | 
|---|
| 83 | 1599 |  	might_sleep(); | 
|---|
| 84 | 1600 |  	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); | 
|---|
| 85 | 1601 |   | 
|---|
| 86 |  | -	if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) {  | 
|---|
| 87 |  | -		rwsem_release(&sem->dep_map, 1, _RET_IP_);  | 
|---|
 | 1602 | +	if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,  | 
|---|
 | 1603 | +				  __down_write_killable)) {  | 
|---|
 | 1604 | +		rwsem_release(&sem->dep_map, _RET_IP_);  | 
|---|
| 88 | 1605 |  		return -EINTR; | 
|---|
| 89 | 1606 |  	} | 
|---|
| 90 | 1607 |   | 
|---|
| 91 |  | -	rwsem_set_owner(sem);  | 
|---|
| 92 | 1608 |  	return 0; | 
|---|
| 93 | 1609 |  } | 
|---|
| 94 |  | -  | 
|---|
| 95 | 1610 |  EXPORT_SYMBOL(down_write_killable); | 
|---|
| 96 | 1611 |   | 
|---|
| 97 | 1612 |  /* | 
|---|
| .. | .. | 
|---|
| 101 | 1616 |  { | 
|---|
| 102 | 1617 |  	int ret = __down_write_trylock(sem); | 
|---|
| 103 | 1618 |   | 
|---|
| 104 |  | -	if (ret == 1) {  | 
|---|
 | 1619 | +	if (ret == 1)  | 
|---|
| 105 | 1620 |  		rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); | 
|---|
| 106 |  | -		rwsem_set_owner(sem);  | 
|---|
| 107 |  | -	}  | 
|---|
| 108 | 1621 |   | 
|---|
| 109 | 1622 |  	return ret; | 
|---|
| 110 | 1623 |  } | 
|---|
| 111 |  | -  | 
|---|
| 112 | 1624 |  EXPORT_SYMBOL(down_write_trylock); | 
|---|
| 113 | 1625 |   | 
|---|
| 114 | 1626 |  /* | 
|---|
| .. | .. | 
|---|
| 116 | 1628 |   */ | 
|---|
| 117 | 1629 |  void up_read(struct rw_semaphore *sem) | 
|---|
| 118 | 1630 |  { | 
|---|
| 119 |  | -	rwsem_release(&sem->dep_map, 1, _RET_IP_);  | 
|---|
| 120 |  | -	DEBUG_RWSEMS_WARN_ON(sem->owner != RWSEM_READER_OWNED);  | 
|---|
| 121 |  | -  | 
|---|
 | 1631 | +	rwsem_release(&sem->dep_map, _RET_IP_);  | 
|---|
| 122 | 1632 |  	__up_read(sem); | 
|---|
| 123 | 1633 |  } | 
|---|
| 124 |  | -  | 
|---|
| 125 | 1634 |  EXPORT_SYMBOL(up_read); | 
|---|
| 126 | 1635 |   | 
|---|
| 127 | 1636 |  /* | 
|---|
| .. | .. | 
|---|
| 129 | 1638 |   */ | 
|---|
| 130 | 1639 |  void up_write(struct rw_semaphore *sem) | 
|---|
| 131 | 1640 |  { | 
|---|
| 132 |  | -	rwsem_release(&sem->dep_map, 1, _RET_IP_);  | 
|---|
| 133 |  | -	DEBUG_RWSEMS_WARN_ON(sem->owner != current);  | 
|---|
| 134 |  | -  | 
|---|
| 135 |  | -	rwsem_clear_owner(sem);  | 
|---|
 | 1641 | +	rwsem_release(&sem->dep_map, _RET_IP_);  | 
|---|
| 136 | 1642 |  	__up_write(sem); | 
|---|
| 137 | 1643 |  } | 
|---|
| 138 |  | -  | 
|---|
| 139 | 1644 |  EXPORT_SYMBOL(up_write); | 
|---|
| 140 | 1645 |   | 
|---|
| 141 | 1646 |  /* | 
|---|
| .. | .. | 
|---|
| 144 | 1649 |  void downgrade_write(struct rw_semaphore *sem) | 
|---|
| 145 | 1650 |  { | 
|---|
| 146 | 1651 |  	lock_downgrade(&sem->dep_map, _RET_IP_); | 
|---|
| 147 |  | -	DEBUG_RWSEMS_WARN_ON(sem->owner != current);  | 
|---|
| 148 |  | -  | 
|---|
| 149 |  | -	rwsem_set_reader_owned(sem);  | 
|---|
| 150 | 1652 |  	__downgrade_write(sem); | 
|---|
| 151 | 1653 |  } | 
|---|
| 152 |  | -  | 
|---|
| 153 | 1654 |  EXPORT_SYMBOL(downgrade_write); | 
|---|
| 154 | 1655 |   | 
|---|
| 155 | 1656 |  #ifdef CONFIG_DEBUG_LOCK_ALLOC | 
|---|
| .. | .. | 
|---|
| 158 | 1659 |  { | 
|---|
| 159 | 1660 |  	might_sleep(); | 
|---|
| 160 | 1661 |  	rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); | 
|---|
| 161 |  | -  | 
|---|
| 162 | 1662 |  	LOCK_CONTENDED(sem, __down_read_trylock, __down_read); | 
|---|
| 163 |  | -	rwsem_set_reader_owned(sem);  | 
|---|
| 164 | 1663 |  } | 
|---|
| 165 |  | -  | 
|---|
| 166 | 1664 |  EXPORT_SYMBOL(down_read_nested); | 
|---|
 | 1665 | +  | 
|---|
 | 1666 | +int down_read_killable_nested(struct rw_semaphore *sem, int subclass)  | 
|---|
 | 1667 | +{  | 
|---|
 | 1668 | +	might_sleep();  | 
|---|
 | 1669 | +	rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);  | 
|---|
 | 1670 | +  | 
|---|
 | 1671 | +	if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {  | 
|---|
 | 1672 | +		rwsem_release(&sem->dep_map, _RET_IP_);  | 
|---|
 | 1673 | +		return -EINTR;  | 
|---|
 | 1674 | +	}  | 
|---|
 | 1675 | +  | 
|---|
 | 1676 | +	return 0;  | 
|---|
 | 1677 | +}  | 
|---|
 | 1678 | +EXPORT_SYMBOL(down_read_killable_nested);  | 
|---|
| 167 | 1679 |   | 
|---|
| 168 | 1680 |  void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) | 
|---|
| 169 | 1681 |  { | 
|---|
| 170 | 1682 |  	might_sleep(); | 
|---|
| 171 | 1683 |  	rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); | 
|---|
| 172 |  | -  | 
|---|
| 173 | 1684 |  	LOCK_CONTENDED(sem, __down_write_trylock, __down_write); | 
|---|
| 174 |  | -	rwsem_set_owner(sem);  | 
|---|
| 175 | 1685 |  } | 
|---|
| 176 |  | -  | 
|---|
| 177 | 1686 |  EXPORT_SYMBOL(_down_write_nest_lock); | 
|---|
| 178 | 1687 |   | 
|---|
| 179 | 1688 |  void down_read_non_owner(struct rw_semaphore *sem) | 
|---|
| 180 | 1689 |  { | 
|---|
| 181 | 1690 |  	might_sleep(); | 
|---|
| 182 |  | -  | 
|---|
| 183 | 1691 |  	__down_read(sem); | 
|---|
| 184 |  | -	rwsem_set_reader_owned(sem);  | 
|---|
 | 1692 | +#ifndef CONFIG_PREEMPT_RT  | 
|---|
 | 1693 | +	__rwsem_set_reader_owned(sem, NULL);  | 
|---|
 | 1694 | +#endif  | 
|---|
| 185 | 1695 |  } | 
|---|
| 186 |  | -  | 
|---|
| 187 | 1696 |  EXPORT_SYMBOL(down_read_non_owner); | 
|---|
| 188 | 1697 |   | 
|---|
| 189 | 1698 |  void down_write_nested(struct rw_semaphore *sem, int subclass) | 
|---|
| 190 | 1699 |  { | 
|---|
| 191 | 1700 |  	might_sleep(); | 
|---|
| 192 | 1701 |  	rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); | 
|---|
| 193 |  | -  | 
|---|
| 194 | 1702 |  	LOCK_CONTENDED(sem, __down_write_trylock, __down_write); | 
|---|
| 195 |  | -	rwsem_set_owner(sem);  | 
|---|
| 196 | 1703 |  } | 
|---|
| 197 |  | -  | 
|---|
| 198 | 1704 |  EXPORT_SYMBOL(down_write_nested); | 
|---|
| 199 | 1705 |   | 
|---|
| 200 | 1706 |  int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass) | 
|---|
| .. | .. | 
|---|
| 202 | 1708 |  	might_sleep(); | 
|---|
| 203 | 1709 |  	rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); | 
|---|
| 204 | 1710 |   | 
|---|
| 205 |  | -	if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) {  | 
|---|
| 206 |  | -		rwsem_release(&sem->dep_map, 1, _RET_IP_);  | 
|---|
 | 1711 | +	if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,  | 
|---|
 | 1712 | +				  __down_write_killable)) {  | 
|---|
 | 1713 | +		rwsem_release(&sem->dep_map, _RET_IP_);  | 
|---|
| 207 | 1714 |  		return -EINTR; | 
|---|
| 208 | 1715 |  	} | 
|---|
| 209 | 1716 |   | 
|---|
| 210 |  | -	rwsem_set_owner(sem);  | 
|---|
| 211 | 1717 |  	return 0; | 
|---|
| 212 | 1718 |  } | 
|---|
| 213 |  | -  | 
|---|
| 214 | 1719 |  EXPORT_SYMBOL(down_write_killable_nested); | 
|---|
| 215 | 1720 |   | 
|---|
| 216 | 1721 |  void up_read_non_owner(struct rw_semaphore *sem) | 
|---|
| 217 | 1722 |  { | 
|---|
| 218 |  | -	DEBUG_RWSEMS_WARN_ON(sem->owner != RWSEM_READER_OWNED);  | 
|---|
 | 1723 | +#ifndef CONFIG_PREEMPT_RT  | 
|---|
 | 1724 | +	DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);  | 
|---|
 | 1725 | +#endif  | 
|---|
| 219 | 1726 |  	__up_read(sem); | 
|---|
| 220 | 1727 |  } | 
|---|
| 221 |  | -  | 
|---|
| 222 | 1728 |  EXPORT_SYMBOL(up_read_non_owner); | 
|---|
| 223 | 1729 |   | 
|---|
| 224 | 1730 |  #endif | 
|---|