.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | #include <linux/atomic.h> |
---|
2 | | -#include <linux/rwsem.h> |
---|
3 | 3 | #include <linux/percpu.h> |
---|
| 4 | +#include <linux/wait.h> |
---|
4 | 5 | #include <linux/lockdep.h> |
---|
5 | 6 | #include <linux/percpu-rwsem.h> |
---|
6 | 7 | #include <linux/rcupdate.h> |
---|
7 | 8 | #include <linux/sched.h> |
---|
| 9 | +#include <linux/sched/task.h> |
---|
| 10 | +#include <linux/slab.h> |
---|
8 | 11 | #include <linux/errno.h> |
---|
9 | 12 | |
---|
| 13 | +#include <trace/hooks/dtask.h> |
---|
| 14 | + |
---|
| 15 | +/* |
---|
| 16 | + * trace_android_vh_record_pcpu_rwsem_starttime is called in |
---|
| 17 | + * include/linux/percpu-rwsem.h by including include/hooks/dtask.h, which |
---|
| 18 | + * will result to build-err. So we create |
---|
| 19 | + * func:_trace_android_vh_record_pcpu_rwsem_starttime for percpu-rwsem.h to call. |
---|
| 20 | + */ |
---|
| 21 | +void _trace_android_vh_record_pcpu_rwsem_starttime(struct task_struct *tsk, |
---|
| 22 | + unsigned long settime) |
---|
| 23 | +{ |
---|
| 24 | + trace_android_vh_record_pcpu_rwsem_starttime(tsk, settime); |
---|
| 25 | +} |
---|
| 26 | +EXPORT_SYMBOL_GPL(_trace_android_vh_record_pcpu_rwsem_starttime); |
---|
| 27 | + |
---|
10 | 28 | int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, |
---|
11 | | - const char *name, struct lock_class_key *rwsem_key) |
---|
| 29 | + const char *name, struct lock_class_key *key) |
---|
12 | 30 | { |
---|
13 | 31 | sem->read_count = alloc_percpu(int); |
---|
14 | 32 | if (unlikely(!sem->read_count)) |
---|
15 | 33 | return -ENOMEM; |
---|
16 | 34 | |
---|
17 | | - /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ |
---|
18 | | - rcu_sync_init(&sem->rss, RCU_SCHED_SYNC); |
---|
19 | | - __init_rwsem(&sem->rw_sem, name, rwsem_key); |
---|
| 35 | + rcu_sync_init(&sem->rss); |
---|
20 | 36 | rcuwait_init(&sem->writer); |
---|
21 | | - sem->readers_block = 0; |
---|
| 37 | + init_waitqueue_head(&sem->waiters); |
---|
| 38 | + atomic_set(&sem->block, 0); |
---|
| 39 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC |
---|
| 40 | + debug_check_no_locks_freed((void *)sem, sizeof(*sem)); |
---|
| 41 | + lockdep_init_map(&sem->dep_map, name, key, 0); |
---|
| 42 | +#endif |
---|
22 | 43 | return 0; |
---|
23 | 44 | } |
---|
24 | 45 | EXPORT_SYMBOL_GPL(__percpu_init_rwsem); |
---|
.. | .. |
---|
38 | 59 | } |
---|
39 | 60 | EXPORT_SYMBOL_GPL(percpu_free_rwsem); |
---|
40 | 61 | |
---|
41 | | -int __percpu_down_read(struct percpu_rw_semaphore *sem, int try) |
---|
| 62 | +static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem) |
---|
42 | 63 | { |
---|
| 64 | + this_cpu_inc(*sem->read_count); |
---|
| 65 | + |
---|
43 | 66 | /* |
---|
44 | 67 | * Due to having preemption disabled the decrement happens on |
---|
45 | 68 | * the same CPU as the increment, avoiding the |
---|
46 | 69 | * increment-on-one-CPU-and-decrement-on-another problem. |
---|
47 | 70 | * |
---|
48 | | - * If the reader misses the writer's assignment of readers_block, then |
---|
49 | | - * the writer is guaranteed to see the reader's increment. |
---|
| 71 | + * If the reader misses the writer's assignment of sem->block, then the |
---|
| 72 | + * writer is guaranteed to see the reader's increment. |
---|
50 | 73 | * |
---|
51 | 74 | * Conversely, any readers that increment their sem->read_count after |
---|
52 | | - * the writer looks are guaranteed to see the readers_block value, |
---|
53 | | - * which in turn means that they are guaranteed to immediately |
---|
54 | | - * decrement their sem->read_count, so that it doesn't matter that the |
---|
55 | | - * writer missed them. |
---|
| 75 | + * the writer looks are guaranteed to see the sem->block value, which |
---|
| 76 | + * in turn means that they are guaranteed to immediately decrement |
---|
| 77 | + * their sem->read_count, so that it doesn't matter that the writer |
---|
| 78 | + * missed them. |
---|
56 | 79 | */ |
---|
57 | 80 | |
---|
58 | 81 | smp_mb(); /* A matches D */ |
---|
59 | 82 | |
---|
60 | 83 | /* |
---|
61 | | - * If !readers_block the critical section starts here, matched by the |
---|
| 84 | + * If !sem->block the critical section starts here, matched by the |
---|
62 | 85 | * release in percpu_up_write(). |
---|
63 | 86 | */ |
---|
64 | | - if (likely(!smp_load_acquire(&sem->readers_block))) |
---|
| 87 | + if (likely(!atomic_read_acquire(&sem->block))) |
---|
| 88 | + return true; |
---|
| 89 | + |
---|
| 90 | + this_cpu_dec(*sem->read_count); |
---|
| 91 | + |
---|
| 92 | + /* Prod writer to re-evaluate readers_active_check() */ |
---|
| 93 | + rcuwait_wake_up(&sem->writer); |
---|
| 94 | + |
---|
| 95 | + return false; |
---|
| 96 | +} |
---|
| 97 | + |
---|
| 98 | +static inline bool __percpu_down_write_trylock(struct percpu_rw_semaphore *sem) |
---|
| 99 | +{ |
---|
| 100 | + if (atomic_read(&sem->block)) |
---|
| 101 | + return false; |
---|
| 102 | + |
---|
| 103 | + return atomic_xchg(&sem->block, 1) == 0; |
---|
| 104 | +} |
---|
| 105 | + |
---|
| 106 | +static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader) |
---|
| 107 | +{ |
---|
| 108 | + if (reader) { |
---|
| 109 | + bool ret; |
---|
| 110 | + |
---|
| 111 | + preempt_disable(); |
---|
| 112 | + ret = __percpu_down_read_trylock(sem); |
---|
| 113 | + preempt_enable(); |
---|
| 114 | + |
---|
| 115 | + return ret; |
---|
| 116 | + } |
---|
| 117 | + return __percpu_down_write_trylock(sem); |
---|
| 118 | +} |
---|
| 119 | + |
---|
| 120 | +/* |
---|
| 121 | + * The return value of wait_queue_entry::func means: |
---|
| 122 | + * |
---|
| 123 | + * <0 - error, wakeup is terminated and the error is returned |
---|
| 124 | + * 0 - no wakeup, a next waiter is tried |
---|
| 125 | + * >0 - woken, if EXCLUSIVE, counted towards @nr_exclusive. |
---|
| 126 | + * |
---|
| 127 | + * We use EXCLUSIVE for both readers and writers to preserve FIFO order, |
---|
| 128 | + * and play games with the return value to allow waking multiple readers. |
---|
| 129 | + * |
---|
| 130 | + * Specifically, we wake readers until we've woken a single writer, or until a |
---|
| 131 | + * trylock fails. |
---|
| 132 | + */ |
---|
| 133 | +static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry, |
---|
| 134 | + unsigned int mode, int wake_flags, |
---|
| 135 | + void *key) |
---|
| 136 | +{ |
---|
| 137 | + bool reader = wq_entry->flags & WQ_FLAG_CUSTOM; |
---|
| 138 | + struct percpu_rw_semaphore *sem = key; |
---|
| 139 | + struct task_struct *p; |
---|
| 140 | + |
---|
| 141 | + /* concurrent against percpu_down_write(), can get stolen */ |
---|
| 142 | + if (!__percpu_rwsem_trylock(sem, reader)) |
---|
65 | 143 | return 1; |
---|
66 | 144 | |
---|
| 145 | + p = get_task_struct(wq_entry->private); |
---|
| 146 | + list_del_init(&wq_entry->entry); |
---|
| 147 | + smp_store_release(&wq_entry->private, NULL); |
---|
| 148 | + |
---|
| 149 | + wake_up_process(p); |
---|
| 150 | + put_task_struct(p); |
---|
| 151 | + |
---|
| 152 | + return !reader; /* wake (readers until) 1 writer */ |
---|
| 153 | +} |
---|
| 154 | + |
---|
| 155 | +static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader) |
---|
| 156 | +{ |
---|
| 157 | + DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function); |
---|
| 158 | + bool wait; |
---|
| 159 | + |
---|
| 160 | + spin_lock_irq(&sem->waiters.lock); |
---|
67 | 161 | /* |
---|
68 | | - * Per the above comment; we still have preemption disabled and |
---|
69 | | - * will thus decrement on the same CPU as we incremented. |
---|
| 162 | + * Serialize against the wakeup in percpu_up_write(), if we fail |
---|
| 163 | + * the trylock, the wakeup must see us on the list. |
---|
70 | 164 | */ |
---|
71 | | - __percpu_up_read(sem); |
---|
| 165 | + wait = !__percpu_rwsem_trylock(sem, reader); |
---|
| 166 | + if (wait) { |
---|
| 167 | + wq_entry.flags |= WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM; |
---|
| 168 | + __add_wait_queue_entry_tail(&sem->waiters, &wq_entry); |
---|
| 169 | + trace_android_vh_percpu_rwsem_wq_add(sem, reader); |
---|
| 170 | + } |
---|
| 171 | + spin_unlock_irq(&sem->waiters.lock); |
---|
| 172 | + |
---|
| 173 | + while (wait) { |
---|
| 174 | + set_current_state(TASK_UNINTERRUPTIBLE); |
---|
| 175 | + if (!smp_load_acquire(&wq_entry.private)) |
---|
| 176 | + break; |
---|
| 177 | + schedule(); |
---|
| 178 | + } |
---|
| 179 | + __set_current_state(TASK_RUNNING); |
---|
| 180 | +} |
---|
| 181 | + |
---|
| 182 | +bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) |
---|
| 183 | +{ |
---|
| 184 | + if (__percpu_down_read_trylock(sem)) |
---|
| 185 | + return true; |
---|
72 | 186 | |
---|
73 | 187 | if (try) |
---|
74 | | - return 0; |
---|
| 188 | + return false; |
---|
75 | 189 | |
---|
76 | | - /* |
---|
77 | | - * We either call schedule() in the wait, or we'll fall through |
---|
78 | | - * and reschedule on the preempt_enable() in percpu_down_read(). |
---|
79 | | - */ |
---|
80 | | - preempt_enable_no_resched(); |
---|
81 | | - |
---|
82 | | - /* |
---|
83 | | - * Avoid lockdep for the down/up_read() we already have them. |
---|
84 | | - */ |
---|
85 | | - __down_read(&sem->rw_sem); |
---|
86 | | - this_cpu_inc(*sem->read_count); |
---|
87 | | - __up_read(&sem->rw_sem); |
---|
88 | | - |
---|
| 190 | + preempt_enable(); |
---|
| 191 | + percpu_rwsem_wait(sem, /* .reader = */ true); |
---|
89 | 192 | preempt_disable(); |
---|
90 | | - return 1; |
---|
| 193 | + |
---|
| 194 | + return true; |
---|
91 | 195 | } |
---|
92 | 196 | EXPORT_SYMBOL_GPL(__percpu_down_read); |
---|
93 | | - |
---|
94 | | -void __percpu_up_read(struct percpu_rw_semaphore *sem) |
---|
95 | | -{ |
---|
96 | | - smp_mb(); /* B matches C */ |
---|
97 | | - /* |
---|
98 | | - * In other words, if they see our decrement (presumably to aggregate |
---|
99 | | - * zero, as that is the only time it matters) they will also see our |
---|
100 | | - * critical section. |
---|
101 | | - */ |
---|
102 | | - __this_cpu_dec(*sem->read_count); |
---|
103 | | - |
---|
104 | | - /* Prod writer to recheck readers_active */ |
---|
105 | | - rcuwait_wake_up(&sem->writer); |
---|
106 | | -} |
---|
107 | | -EXPORT_SYMBOL_GPL(__percpu_up_read); |
---|
108 | 197 | |
---|
109 | 198 | #define per_cpu_sum(var) \ |
---|
110 | 199 | ({ \ |
---|
.. | .. |
---|
121 | 210 | * zero. If this sum is zero, then it is stable due to the fact that if any |
---|
122 | 211 | * newly arriving readers increment a given counter, they will immediately |
---|
123 | 212 | * decrement that same counter. |
---|
| 213 | + * |
---|
| 214 | + * Assumes sem->block is set. |
---|
124 | 215 | */ |
---|
125 | 216 | static bool readers_active_check(struct percpu_rw_semaphore *sem) |
---|
126 | 217 | { |
---|
.. | .. |
---|
139 | 230 | |
---|
140 | 231 | void percpu_down_write(struct percpu_rw_semaphore *sem) |
---|
141 | 232 | { |
---|
| 233 | + might_sleep(); |
---|
| 234 | + rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); |
---|
| 235 | + |
---|
142 | 236 | /* Notify readers to take the slow path. */ |
---|
143 | 237 | rcu_sync_enter(&sem->rss); |
---|
144 | 238 | |
---|
145 | | - down_write(&sem->rw_sem); |
---|
| 239 | + /* |
---|
| 240 | + * Try set sem->block; this provides writer-writer exclusion. |
---|
| 241 | + * Having sem->block set makes new readers block. |
---|
| 242 | + */ |
---|
| 243 | + if (!__percpu_down_write_trylock(sem)) |
---|
| 244 | + percpu_rwsem_wait(sem, /* .reader = */ false); |
---|
| 245 | + |
---|
| 246 | + /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */ |
---|
146 | 247 | |
---|
147 | 248 | /* |
---|
148 | | - * Notify new readers to block; up until now, and thus throughout the |
---|
149 | | - * longish rcu_sync_enter() above, new readers could still come in. |
---|
150 | | - */ |
---|
151 | | - WRITE_ONCE(sem->readers_block, 1); |
---|
152 | | - |
---|
153 | | - smp_mb(); /* D matches A */ |
---|
154 | | - |
---|
155 | | - /* |
---|
156 | | - * If they don't see our writer of readers_block, then we are |
---|
157 | | - * guaranteed to see their sem->read_count increment, and therefore |
---|
158 | | - * will wait for them. |
---|
| 249 | + * If they don't see our store of sem->block, then we are guaranteed to |
---|
| 250 | + * see their sem->read_count increment, and therefore will wait for |
---|
| 251 | + * them. |
---|
159 | 252 | */ |
---|
160 | 253 | |
---|
161 | | - /* Wait for all now active readers to complete. */ |
---|
162 | | - rcuwait_wait_event(&sem->writer, readers_active_check(sem)); |
---|
| 254 | + /* Wait for all active readers to complete. */ |
---|
| 255 | + rcuwait_wait_event(&sem->writer, readers_active_check(sem), TASK_UNINTERRUPTIBLE); |
---|
| 256 | + trace_android_vh_record_pcpu_rwsem_starttime(current, jiffies); |
---|
163 | 257 | } |
---|
164 | 258 | EXPORT_SYMBOL_GPL(percpu_down_write); |
---|
165 | 259 | |
---|
166 | 260 | void percpu_up_write(struct percpu_rw_semaphore *sem) |
---|
167 | 261 | { |
---|
| 262 | + rwsem_release(&sem->dep_map, _RET_IP_); |
---|
| 263 | + |
---|
168 | 264 | /* |
---|
169 | 265 | * Signal the writer is done, no fast path yet. |
---|
170 | 266 | * |
---|
.. | .. |
---|
175 | 271 | * Therefore we force it through the slow path which guarantees an |
---|
176 | 272 | * acquire and thereby guarantees the critical section's consistency. |
---|
177 | 273 | */ |
---|
178 | | - smp_store_release(&sem->readers_block, 0); |
---|
| 274 | + atomic_set_release(&sem->block, 0); |
---|
179 | 275 | |
---|
180 | 276 | /* |
---|
181 | | - * Release the write lock, this will allow readers back in the game. |
---|
| 277 | + * Prod any pending reader/writer to make progress. |
---|
182 | 278 | */ |
---|
183 | | - up_write(&sem->rw_sem); |
---|
| 279 | + __wake_up(&sem->waiters, TASK_NORMAL, 1, sem); |
---|
184 | 280 | |
---|
185 | 281 | /* |
---|
186 | 282 | * Once this completes (at least one RCU-sched grace period hence) the |
---|
.. | .. |
---|
188 | 284 | * exclusive write lock because its counting. |
---|
189 | 285 | */ |
---|
190 | 286 | rcu_sync_exit(&sem->rss); |
---|
| 287 | + trace_android_vh_record_pcpu_rwsem_starttime(current, 0); |
---|
191 | 288 | } |
---|
192 | 289 | EXPORT_SYMBOL_GPL(percpu_up_write); |
---|
| 290 | + |
---|
| 291 | +static LIST_HEAD(destroy_list); |
---|
| 292 | +static DEFINE_SPINLOCK(destroy_list_lock); |
---|
| 293 | + |
---|
| 294 | +static void destroy_list_workfn(struct work_struct *work) |
---|
| 295 | +{ |
---|
| 296 | + struct percpu_rw_semaphore_atomic *sem, *sem2; |
---|
| 297 | + LIST_HEAD(to_destroy); |
---|
| 298 | + |
---|
| 299 | + spin_lock(&destroy_list_lock); |
---|
| 300 | + list_splice_init(&destroy_list, &to_destroy); |
---|
| 301 | + spin_unlock(&destroy_list_lock); |
---|
| 302 | + |
---|
| 303 | + if (list_empty(&to_destroy)) |
---|
| 304 | + return; |
---|
| 305 | + |
---|
| 306 | + list_for_each_entry_safe(sem, sem2, &to_destroy, destroy_list_entry) { |
---|
| 307 | + percpu_free_rwsem(&sem->rw_sem); |
---|
| 308 | + kfree(sem); |
---|
| 309 | + } |
---|
| 310 | +} |
---|
| 311 | + |
---|
| 312 | +static DECLARE_WORK(destroy_list_work, destroy_list_workfn); |
---|
| 313 | + |
---|
| 314 | +void percpu_rwsem_async_destroy(struct percpu_rw_semaphore_atomic *sem) |
---|
| 315 | +{ |
---|
| 316 | + spin_lock(&destroy_list_lock); |
---|
| 317 | + list_add_tail(&sem->destroy_list_entry, &destroy_list); |
---|
| 318 | + spin_unlock(&destroy_list_lock); |
---|
| 319 | + schedule_work(&destroy_list_work); |
---|
| 320 | +} |
---|