hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/kernel/futex.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Fast Userspace Mutexes (which I call "Futexes!").
34 * (C) Rusty Russell, IBM 2002
....@@ -29,49 +30,20 @@
2930 *
3031 * "The futexes are also cursed."
3132 * "But they come in a choice of three flavours!"
32
- *
33
- * This program is free software; you can redistribute it and/or modify
34
- * it under the terms of the GNU General Public License as published by
35
- * the Free Software Foundation; either version 2 of the License, or
36
- * (at your option) any later version.
37
- *
38
- * This program is distributed in the hope that it will be useful,
39
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
40
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
41
- * GNU General Public License for more details.
42
- *
43
- * You should have received a copy of the GNU General Public License
44
- * along with this program; if not, write to the Free Software
45
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
4633 */
4734 #include <linux/compat.h>
48
-#include <linux/slab.h>
49
-#include <linux/poll.h>
50
-#include <linux/fs.h>
51
-#include <linux/file.h>
5235 #include <linux/jhash.h>
53
-#include <linux/init.h>
54
-#include <linux/futex.h>
55
-#include <linux/mount.h>
5636 #include <linux/pagemap.h>
5737 #include <linux/syscalls.h>
58
-#include <linux/signal.h>
59
-#include <linux/export.h>
60
-#include <linux/magic.h>
61
-#include <linux/pid.h>
62
-#include <linux/nsproxy.h>
63
-#include <linux/ptrace.h>
64
-#include <linux/sched/rt.h>
65
-#include <linux/sched/wake_q.h>
66
-#include <linux/sched/mm.h>
67
-#include <linux/hugetlb.h>
6838 #include <linux/freezer.h>
69
-#include <linux/bootmem.h>
39
+#include <linux/memblock.h>
7040 #include <linux/fault-inject.h>
41
+#include <linux/time_namespace.h>
7142
7243 #include <asm/futex.h>
7344
7445 #include "locking/rtmutex_common.h"
46
+#include <trace/hooks/futex.h>
7547
7648 /*
7749 * READ this before attempting to hack on futexes!
....@@ -147,8 +119,7 @@
147119 *
148120 * Where (A) orders the waiters increment and the futex value read through
149121 * atomic operations (see hb_waiters_inc) and where (B) orders the write
150
- * to futex and the waiters read -- this is done by the barriers for both
151
- * shared and private futexes in get_futex_key_refs().
122
+ * to futex and the waiters read (see hb_waiters_pending()).
152123 *
153124 * This yields the following case (where X:=waiters, Y:=futex):
154125 *
....@@ -212,7 +183,7 @@
212183 struct rt_mutex pi_mutex;
213184
214185 struct task_struct *owner;
215
- atomic_t refcount;
186
+ refcount_t refcount;
216187
217188 union futex_key key;
218189 } __randomize_layout;
....@@ -321,12 +292,8 @@
321292 if (IS_ERR(dir))
322293 return PTR_ERR(dir);
323294
324
- if (!debugfs_create_bool("ignore-private", mode, dir,
325
- &fail_futex.ignore_private)) {
326
- debugfs_remove_recursive(dir);
327
- return -ENOMEM;
328
- }
329
-
295
+ debugfs_create_bool("ignore-private", mode, dir,
296
+ &fail_futex.ignore_private);
330297 return 0;
331298 }
332299
....@@ -346,17 +313,6 @@
346313 #else
347314 static inline void compat_exit_robust_list(struct task_struct *curr) { }
348315 #endif
349
-
350
-static inline void futex_get_mm(union futex_key *key)
351
-{
352
- mmgrab(key->private.mm);
353
- /*
354
- * Ensure futex_get_mm() implies a full barrier such that
355
- * get_futex_key() implies a full barrier. This is relied upon
356
- * as smp_mb(); (B), see the ordering comment above.
357
- */
358
- smp_mb__after_atomic();
359
-}
360316
361317 /*
362318 * Reflects a new waiter being added to the waitqueue.
....@@ -386,6 +342,10 @@
386342 static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
387343 {
388344 #ifdef CONFIG_SMP
345
+ /*
346
+ * Full barrier (B), see the ordering comment above.
347
+ */
348
+ smp_mb();
389349 return atomic_read(&hb->waiters);
390350 #else
391351 return 1;
....@@ -423,67 +383,38 @@
423383 && key1->both.offset == key2->both.offset);
424384 }
425385
426
-/*
427
- * Take a reference to the resource addressed by a key.
428
- * Can be called while holding spinlocks.
386
+enum futex_access {
387
+ FUTEX_READ,
388
+ FUTEX_WRITE
389
+};
390
+
391
+/**
392
+ * futex_setup_timer - set up the sleeping hrtimer.
393
+ * @time: ptr to the given timeout value
394
+ * @timeout: the hrtimer_sleeper structure to be set up
395
+ * @flags: futex flags
396
+ * @range_ns: optional range in ns
429397 *
398
+ * Return: Initialized hrtimer_sleeper structure or NULL if no timeout
399
+ * value given
430400 */
431
-static void get_futex_key_refs(union futex_key *key)
401
+static inline struct hrtimer_sleeper *
402
+futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
403
+ int flags, u64 range_ns)
432404 {
433
- if (!key->both.ptr)
434
- return;
405
+ if (!time)
406
+ return NULL;
435407
408
+ hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
409
+ CLOCK_REALTIME : CLOCK_MONOTONIC,
410
+ HRTIMER_MODE_ABS);
436411 /*
437
- * On MMU less systems futexes are always "private" as there is no per
438
- * process address space. We need the smp wmb nevertheless - yes,
439
- * arch/blackfin has MMU less SMP ...
412
+ * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
413
+ * effectively the same as calling hrtimer_set_expires().
440414 */
441
- if (!IS_ENABLED(CONFIG_MMU)) {
442
- smp_mb(); /* explicit smp_mb(); (B) */
443
- return;
444
- }
415
+ hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
445416
446
- switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
447
- case FUT_OFF_INODE:
448
- smp_mb(); /* explicit smp_mb(); (B) */
449
- break;
450
- case FUT_OFF_MMSHARED:
451
- futex_get_mm(key); /* implies smp_mb(); (B) */
452
- break;
453
- default:
454
- /*
455
- * Private futexes do not hold reference on an inode or
456
- * mm, therefore the only purpose of calling get_futex_key_refs
457
- * is because we need the barrier for the lockless waiter check.
458
- */
459
- smp_mb(); /* explicit smp_mb(); (B) */
460
- }
461
-}
462
-
463
-/*
464
- * Drop a reference to the resource addressed by a key.
465
- * The hash bucket spinlock must not be held. This is
466
- * a no-op for private futexes, see comment in the get
467
- * counterpart.
468
- */
469
-static void drop_futex_key_refs(union futex_key *key)
470
-{
471
- if (!key->both.ptr) {
472
- /* If we're here then we tried to put a key we failed to get */
473
- WARN_ON_ONCE(1);
474
- return;
475
- }
476
-
477
- if (!IS_ENABLED(CONFIG_MMU))
478
- return;
479
-
480
- switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
481
- case FUT_OFF_INODE:
482
- break;
483
- case FUT_OFF_MMSHARED:
484
- mmdrop(key->private.mm);
485
- break;
486
- }
417
+ return timeout;
487418 }
488419
489420 /*
....@@ -529,20 +460,23 @@
529460 /**
530461 * get_futex_key() - Get parameters which are the keys for a futex
531462 * @uaddr: virtual address of the futex
532
- * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
463
+ * @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED
533464 * @key: address where result is stored.
534
- * @rw: mapping needs to be read/write (values: VERIFY_READ,
535
- * VERIFY_WRITE)
465
+ * @rw: mapping needs to be read/write (values: FUTEX_READ,
466
+ * FUTEX_WRITE)
536467 *
537468 * Return: a negative error code or 0
538469 *
539470 * The key words are stored in @key on success.
540471 *
541472 * For shared mappings (when @fshared), the key is:
473
+ *
542474 * ( inode->i_sequence, page->index, offset_within_page )
475
+ *
543476 * [ also see get_inode_sequence_number() ]
544477 *
545478 * For private mappings (or when !@fshared), the key is:
479
+ *
546480 * ( current->mm, address, 0 )
547481 *
548482 * This allows (cross process, where applicable) identification of the futex
....@@ -550,8 +484,8 @@
550484 *
551485 * lock_page() might sleep, the caller should not hold a spinlock.
552486 */
553
-static int
554
-get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
487
+static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
488
+ enum futex_access rw)
555489 {
556490 unsigned long address = (unsigned long)uaddr;
557491 struct mm_struct *mm = current->mm;
....@@ -567,7 +501,7 @@
567501 return -EINVAL;
568502 address -= key->both.offset;
569503
570
- if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
504
+ if (unlikely(!access_ok(uaddr, sizeof(u32))))
571505 return -EFAULT;
572506
573507 if (unlikely(should_fail_futex(fshared)))
....@@ -583,21 +517,20 @@
583517 if (!fshared) {
584518 key->private.mm = mm;
585519 key->private.address = address;
586
- get_futex_key_refs(key); /* implies smp_mb(); (B) */
587520 return 0;
588521 }
589522
590523 again:
591524 /* Ignore any VERIFY_READ mapping (futex common case) */
592
- if (unlikely(should_fail_futex(fshared)))
525
+ if (unlikely(should_fail_futex(true)))
593526 return -EFAULT;
594527
595
- err = get_user_pages_fast(address, 1, 1, &page);
528
+ err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
596529 /*
597530 * If write access is not required (eg. FUTEX_WAIT), try
598531 * and get read-only access.
599532 */
600
- if (err == -EFAULT && rw == VERIFY_READ) {
533
+ if (err == -EFAULT && rw == FUTEX_READ) {
601534 err = get_user_pages_fast(address, 1, 0, &page);
602535 ro = 1;
603536 }
....@@ -654,7 +587,7 @@
654587 lock_page(page);
655588 shmem_swizzled = PageSwapCache(page) || page->mapping;
656589 unlock_page(page);
657
- put_page(page);
590
+ put_user_page(page);
658591
659592 if (shmem_swizzled)
660593 goto again;
....@@ -677,7 +610,7 @@
677610 * A RO anonymous page will never change and thus doesn't make
678611 * sense for futex operations.
679612 */
680
- if (unlikely(should_fail_futex(fshared)) || ro) {
613
+ if (unlikely(should_fail_futex(true)) || ro) {
681614 err = -EFAULT;
682615 goto out;
683616 }
....@@ -704,7 +637,7 @@
704637
705638 if (READ_ONCE(page->mapping) != mapping) {
706639 rcu_read_unlock();
707
- put_page(page);
640
+ put_user_page(page);
708641
709642 goto again;
710643 }
....@@ -712,7 +645,7 @@
712645 inode = READ_ONCE(mapping->host);
713646 if (!inode) {
714647 rcu_read_unlock();
715
- put_page(page);
648
+ put_user_page(page);
716649
717650 goto again;
718651 }
....@@ -723,16 +656,9 @@
723656 rcu_read_unlock();
724657 }
725658
726
- get_futex_key_refs(key); /* implies smp_mb(); (B) */
727
-
728659 out:
729
- put_page(page);
660
+ put_user_page(page);
730661 return err;
731
-}
732
-
733
-static inline void put_futex_key(union futex_key *key)
734
-{
735
- drop_futex_key_refs(key);
736662 }
737663
738664 /**
....@@ -752,10 +678,10 @@
752678 struct mm_struct *mm = current->mm;
753679 int ret;
754680
755
- down_read(&mm->mmap_sem);
756
- ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
681
+ mmap_read_lock(mm);
682
+ ret = fixup_user_fault(mm, (unsigned long)uaddr,
757683 FAULT_FLAG_WRITE, NULL);
758
- up_read(&mm->mmap_sem);
684
+ mmap_read_unlock(mm);
759685
760686 return ret < 0 ? ret : 0;
761687 }
....@@ -821,7 +747,7 @@
821747 INIT_LIST_HEAD(&pi_state->list);
822748 /* pi_mutex gets initialized later */
823749 pi_state->owner = NULL;
824
- atomic_set(&pi_state->refcount, 1);
750
+ refcount_set(&pi_state->refcount, 1);
825751 pi_state->key = FUTEX_KEY_INIT;
826752
827753 current->pi_state_cache = pi_state;
....@@ -864,7 +790,7 @@
864790
865791 static void get_pi_state(struct futex_pi_state *pi_state)
866792 {
867
- WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount));
793
+ WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
868794 }
869795
870796 /*
....@@ -876,7 +802,7 @@
876802 if (!pi_state)
877803 return;
878804
879
- if (!atomic_dec_and_test(&pi_state->refcount))
805
+ if (!refcount_dec_and_test(&pi_state->refcount))
880806 return;
881807
882808 /*
....@@ -901,7 +827,7 @@
901827 * refcount is at 0 - put it back to 1.
902828 */
903829 pi_state->owner = NULL;
904
- atomic_set(&pi_state->refcount, 1);
830
+ refcount_set(&pi_state->refcount, 1);
905831 current->pi_state_cache = pi_state;
906832 }
907833 }
....@@ -944,7 +870,7 @@
944870 * In that case; drop the locks to let put_pi_state() make
945871 * progress and retry the loop.
946872 */
947
- if (!atomic_inc_not_zero(&pi_state->refcount)) {
873
+ if (!refcount_inc_not_zero(&pi_state->refcount)) {
948874 raw_spin_unlock_irq(&curr->pi_lock);
949875 cpu_relax();
950876 raw_spin_lock_irq(&curr->pi_lock);
....@@ -1009,7 +935,7 @@
1009935 * [10] Found | Found | task | !=taskTID | 0/1 | Invalid
1010936 *
1011937 * [1] Indicates that the kernel can acquire the futex atomically. We
1012
- * came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
938
+ * came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
1013939 *
1014940 * [2] Valid, if TID does not belong to a kernel thread. If no matching
1015941 * thread is found then it indicates that the owner TID has died.
....@@ -1102,7 +1028,7 @@
11021028 * and futex_wait_requeue_pi() as it cannot go to 0 and consequently
11031029 * free pi_state before we can take a reference ourselves.
11041030 */
1105
- WARN_ON(!atomic_read(&pi_state->refcount));
1031
+ WARN_ON(!refcount_read(&pi_state->refcount));
11061032
11071033 /*
11081034 * Now that we have a pi_state, we can acquire wait_lock
....@@ -1196,6 +1122,7 @@
11961122
11971123 /**
11981124 * wait_for_owner_exiting - Block until the owner has exited
1125
+ * @ret: owner's current futex lock status
11991126 * @exiting: Pointer to the exiting task
12001127 *
12011128 * Caller must hold a refcount on @exiting.
....@@ -1398,7 +1325,7 @@
13981325 static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
13991326 {
14001327 int err;
1401
- u32 uninitialized_var(curval);
1328
+ u32 curval;
14021329
14031330 if (unlikely(should_fail_futex(true)))
14041331 return -EFAULT;
....@@ -1523,9 +1450,9 @@
15231450 {
15241451 struct futex_hash_bucket *hb;
15251452
1526
- if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
1527
- || WARN_ON(plist_node_empty(&q->list)))
1453
+ if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list)))
15281454 return;
1455
+ lockdep_assert_held(q->lock_ptr);
15291456
15301457 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
15311458 plist_del(&q->list, &hb->chain);
....@@ -1558,10 +1485,9 @@
15581485
15591486 /*
15601487 * Queue the task for later wakeup for after we've released
1561
- * the hb->lock. wake_q_add() grabs reference to p.
1488
+ * the hb->lock.
15621489 */
1563
- wake_q_add(wake_q, p);
1564
- put_task_struct(p);
1490
+ wake_q_add_safe(wake_q, p);
15651491 }
15661492
15671493 /*
....@@ -1569,10 +1495,11 @@
15691495 */
15701496 static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
15711497 {
1572
- u32 uninitialized_var(curval), newval;
1498
+ u32 curval, newval;
15731499 struct task_struct *new_owner;
15741500 bool postunlock = false;
15751501 DEFINE_WAKE_Q(wake_q);
1502
+ DEFINE_WAKE_Q(wake_sleeper_q);
15761503 int ret = 0;
15771504
15781505 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
....@@ -1622,14 +1549,15 @@
16221549 * not fail.
16231550 */
16241551 pi_state_update_owner(pi_state, new_owner);
1625
- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
1552
+ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
1553
+ &wake_sleeper_q);
16261554 }
16271555
16281556 out_unlock:
16291557 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
16301558
16311559 if (postunlock)
1632
- rt_mutex_postunlock(&wake_q);
1560
+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
16331561
16341562 return ret;
16351563 }
....@@ -1668,23 +1596,25 @@
16681596 struct futex_q *this, *next;
16691597 union futex_key key = FUTEX_KEY_INIT;
16701598 int ret;
1599
+ int target_nr;
16711600 DEFINE_WAKE_Q(wake_q);
16721601
16731602 if (!bitset)
16741603 return -EINVAL;
16751604
1676
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
1605
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
16771606 if (unlikely(ret != 0))
1678
- goto out;
1607
+ return ret;
16791608
16801609 hb = hash_futex(&key);
16811610
16821611 /* Make sure we really have tasks to wakeup */
16831612 if (!hb_waiters_pending(hb))
1684
- goto out_put_key;
1613
+ return ret;
16851614
16861615 spin_lock(&hb->lock);
16871616
1617
+ trace_android_vh_futex_wake_traverse_plist(&hb->chain, &target_nr, key, bitset);
16881618 plist_for_each_entry_safe(this, next, &hb->chain, list) {
16891619 if (match_futex (&this->key, &key)) {
16901620 if (this->pi_state || this->rt_waiter) {
....@@ -1696,6 +1626,7 @@
16961626 if (!(this->bitset & bitset))
16971627 continue;
16981628
1629
+ trace_android_vh_futex_wake_this(ret, nr_wake, target_nr, this->task);
16991630 mark_wake_futex(&wake_q, this);
17001631 if (++ret >= nr_wake)
17011632 break;
....@@ -1704,9 +1635,7 @@
17041635
17051636 spin_unlock(&hb->lock);
17061637 wake_up_q(&wake_q);
1707
-out_put_key:
1708
- put_futex_key(&key);
1709
-out:
1638
+ trace_android_vh_futex_wake_up_q_finish(nr_wake, target_nr);
17101639 return ret;
17111640 }
17121641
....@@ -1732,10 +1661,9 @@
17321661 oparg = 1 << oparg;
17331662 }
17341663
1735
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
1736
- return -EFAULT;
1737
-
1664
+ pagefault_disable();
17381665 ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
1666
+ pagefault_enable();
17391667 if (ret)
17401668 return ret;
17411669
....@@ -1772,12 +1700,12 @@
17721700 DEFINE_WAKE_Q(wake_q);
17731701
17741702 retry:
1775
- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1703
+ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
17761704 if (unlikely(ret != 0))
1777
- goto out;
1778
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1705
+ return ret;
1706
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
17791707 if (unlikely(ret != 0))
1780
- goto out_put_key1;
1708
+ return ret;
17811709
17821710 hb1 = hash_futex(&key1);
17831711 hb2 = hash_futex(&key2);
....@@ -1795,13 +1723,13 @@
17951723 * an MMU, but we might get them from range checking
17961724 */
17971725 ret = op_ret;
1798
- goto out_put_keys;
1726
+ return ret;
17991727 }
18001728
18011729 if (op_ret == -EFAULT) {
18021730 ret = fault_in_user_writeable(uaddr2);
18031731 if (ret)
1804
- goto out_put_keys;
1732
+ return ret;
18051733 }
18061734
18071735 if (!(flags & FLAGS_SHARED)) {
....@@ -1809,8 +1737,6 @@
18091737 goto retry_private;
18101738 }
18111739
1812
- put_futex_key(&key2);
1813
- put_futex_key(&key1);
18141740 cond_resched();
18151741 goto retry;
18161742 }
....@@ -1846,11 +1772,6 @@
18461772 out_unlock:
18471773 double_unlock_hb(hb1, hb2);
18481774 wake_up_q(&wake_q);
1849
-out_put_keys:
1850
- put_futex_key(&key2);
1851
-out_put_key1:
1852
- put_futex_key(&key1);
1853
-out:
18541775 return ret;
18551776 }
18561777
....@@ -1877,7 +1798,6 @@
18771798 plist_add(&q->list, &hb2->chain);
18781799 q->lock_ptr = &hb2->lock;
18791800 }
1880
- get_futex_key_refs(key2);
18811801 q->key = *key2;
18821802 }
18831803
....@@ -1899,7 +1819,6 @@
18991819 void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
19001820 struct futex_hash_bucket *hb)
19011821 {
1902
- get_futex_key_refs(key);
19031822 q->key = *key;
19041823
19051824 __unqueue_futex(q);
....@@ -2010,7 +1929,7 @@
20101929 u32 *cmpval, int requeue_pi)
20111930 {
20121931 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
2013
- int drop_count = 0, task_count = 0, ret;
1932
+ int task_count = 0, ret;
20141933 struct futex_pi_state *pi_state = NULL;
20151934 struct futex_hash_bucket *hb1, *hb2;
20161935 struct futex_q *this, *next;
....@@ -2057,22 +1976,20 @@
20571976 }
20581977
20591978 retry:
2060
- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1979
+ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
20611980 if (unlikely(ret != 0))
2062
- goto out;
1981
+ return ret;
20631982 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
2064
- requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1983
+ requeue_pi ? FUTEX_WRITE : FUTEX_READ);
20651984 if (unlikely(ret != 0))
2066
- goto out_put_key1;
1985
+ return ret;
20671986
20681987 /*
20691988 * The check above which compares uaddrs is not sufficient for
20701989 * shared futexes. We need to compare the keys:
20711990 */
2072
- if (requeue_pi && match_futex(&key1, &key2)) {
2073
- ret = -EINVAL;
2074
- goto out_put_keys;
2075
- }
1991
+ if (requeue_pi && match_futex(&key1, &key2))
1992
+ return -EINVAL;
20761993
20771994 hb1 = hash_futex(&key1);
20781995 hb2 = hash_futex(&key2);
....@@ -2092,13 +2009,11 @@
20922009
20932010 ret = get_user(curval, uaddr1);
20942011 if (ret)
2095
- goto out_put_keys;
2012
+ return ret;
20962013
20972014 if (!(flags & FLAGS_SHARED))
20982015 goto retry_private;
20992016
2100
- put_futex_key(&key2);
2101
- put_futex_key(&key1);
21022017 goto retry;
21032018 }
21042019 if (curval != *cmpval) {
....@@ -2131,7 +2046,6 @@
21312046 */
21322047 if (ret > 0) {
21332048 WARN_ON(pi_state);
2134
- drop_count++;
21352049 task_count++;
21362050 /*
21372051 * If we acquired the lock, then the user space value
....@@ -2158,12 +2072,10 @@
21582072 case -EFAULT:
21592073 double_unlock_hb(hb1, hb2);
21602074 hb_waiters_dec(hb2);
2161
- put_futex_key(&key2);
2162
- put_futex_key(&key1);
21632075 ret = fault_in_user_writeable(uaddr2);
21642076 if (!ret)
21652077 goto retry;
2166
- goto out;
2078
+ return ret;
21672079 case -EBUSY:
21682080 case -EAGAIN:
21692081 /*
....@@ -2174,8 +2086,6 @@
21742086 */
21752087 double_unlock_hb(hb1, hb2);
21762088 hb_waiters_dec(hb2);
2177
- put_futex_key(&key2);
2178
- put_futex_key(&key1);
21792089 /*
21802090 * Handle the case where the owner is in the middle of
21812091 * exiting. Wait for the exit to complete otherwise
....@@ -2251,7 +2161,6 @@
22512161 * doing so.
22522162 */
22532163 requeue_pi_wake_futex(this, &key2, hb2);
2254
- drop_count++;
22552164 continue;
22562165 } else if (ret) {
22572166 /*
....@@ -2272,7 +2181,6 @@
22722181 }
22732182 }
22742183 requeue_futex(this, hb1, hb2, &key2);
2275
- drop_count++;
22762184 }
22772185
22782186 /*
....@@ -2286,21 +2194,6 @@
22862194 double_unlock_hb(hb1, hb2);
22872195 wake_up_q(&wake_q);
22882196 hb_waiters_dec(hb2);
2289
-
2290
- /*
2291
- * drop_futex_key_refs() must be called outside the spinlocks. During
2292
- * the requeue we moved futex_q's from the hash bucket at key1 to the
2293
- * one at key2 and updated their key pointer. We no longer need to
2294
- * hold the references to key1.
2295
- */
2296
- while (--drop_count >= 0)
2297
- drop_futex_key_refs(&key1);
2298
-
2299
-out_put_keys:
2300
- put_futex_key(&key2);
2301
-out_put_key1:
2302
- put_futex_key(&key1);
2303
-out:
23042197 return ret ? ret : task_count;
23052198 }
23062199
....@@ -2320,11 +2213,11 @@
23202213 * decrement the counter at queue_unlock() when some error has
23212214 * occurred and we don't end up adding the task to the list.
23222215 */
2323
- hb_waiters_inc(hb);
2216
+ hb_waiters_inc(hb); /* implies smp_mb(); (A) */
23242217
23252218 q->lock_ptr = &hb->lock;
23262219
2327
- spin_lock(&hb->lock); /* implies smp_mb(); (A) */
2220
+ spin_lock(&hb->lock);
23282221 return hb;
23292222 }
23302223
....@@ -2339,6 +2232,7 @@
23392232 static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
23402233 {
23412234 int prio;
2235
+ bool already_on_hb = false;
23422236
23432237 /*
23442238 * The priority used to register this element is
....@@ -2351,7 +2245,9 @@
23512245 prio = min(current->normal_prio, MAX_RT_PRIO);
23522246
23532247 plist_node_init(&q->list, prio);
2354
- plist_add(&q->list, &hb->chain);
2248
+ trace_android_vh_alter_futex_plist_add(&q->list, &hb->chain, &already_on_hb);
2249
+ if (!already_on_hb)
2250
+ plist_add(&q->list, &hb->chain);
23552251 q->task = current;
23562252 }
23572253
....@@ -2425,7 +2321,6 @@
24252321 ret = 1;
24262322 }
24272323
2428
- drop_futex_key_refs(&q->key);
24292324 return ret;
24302325 }
24312326
....@@ -2449,9 +2344,9 @@
24492344 static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
24502345 struct task_struct *argowner)
24512346 {
2452
- u32 uval, uninitialized_var(curval), newval, newtid;
24532347 struct futex_pi_state *pi_state = q->pi_state;
24542348 struct task_struct *oldowner, *newowner;
2349
+ u32 uval, curval, newval, newtid;
24552350 int err = 0;
24562351
24572352 oldowner = pi_state->owner;
....@@ -2706,7 +2601,7 @@
27062601
27072602 /* Arm the timer */
27082603 if (timeout)
2709
- hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
2604
+ hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
27102605
27112606 /*
27122607 * If we have been removed from the hash list, then another task
....@@ -2718,8 +2613,10 @@
27182613 * flagged for rescheduling. Only call schedule if there
27192614 * is no timeout, or if it has yet to expire.
27202615 */
2721
- if (!timeout || timeout->task)
2616
+ if (!timeout || timeout->task) {
2617
+ trace_android_vh_futex_sleep_start(current);
27222618 freezable_schedule();
2619
+ }
27232620 }
27242621 __set_current_state(TASK_RUNNING);
27252622 }
....@@ -2766,7 +2663,7 @@
27662663 * while the syscall executes.
27672664 */
27682665 retry:
2769
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
2666
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
27702667 if (unlikely(ret != 0))
27712668 return ret;
27722669
....@@ -2780,12 +2677,11 @@
27802677
27812678 ret = get_user(uval, uaddr);
27822679 if (ret)
2783
- goto out;
2680
+ return ret;
27842681
27852682 if (!(flags & FLAGS_SHARED))
27862683 goto retry_private;
27872684
2788
- put_futex_key(&q->key);
27892685 goto retry;
27902686 }
27912687
....@@ -2794,16 +2690,13 @@
27942690 ret = -EWOULDBLOCK;
27952691 }
27962692
2797
-out:
2798
- if (ret)
2799
- put_futex_key(&q->key);
28002693 return ret;
28012694 }
28022695
28032696 static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
28042697 ktime_t *abs_time, u32 bitset)
28052698 {
2806
- struct hrtimer_sleeper timeout, *to = NULL;
2699
+ struct hrtimer_sleeper timeout, *to;
28072700 struct restart_block *restart;
28082701 struct futex_hash_bucket *hb;
28092702 struct futex_q q = futex_q_init;
....@@ -2812,18 +2705,10 @@
28122705 if (!bitset)
28132706 return -EINVAL;
28142707 q.bitset = bitset;
2708
+ trace_android_vh_futex_wait_start(flags, bitset);
28152709
2816
- if (abs_time) {
2817
- to = &timeout;
2818
-
2819
- hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2820
- CLOCK_REALTIME : CLOCK_MONOTONIC,
2821
- HRTIMER_MODE_ABS);
2822
- hrtimer_init_sleeper(to, current);
2823
- hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2824
- current->timer_slack_ns);
2825
- }
2826
-
2710
+ to = futex_setup_timer(abs_time, &timeout, flags,
2711
+ current->timer_slack_ns);
28272712 retry:
28282713 /*
28292714 * Prepare to wait on uaddr. On success, holds hb lock and increments
....@@ -2870,6 +2755,7 @@
28702755 hrtimer_cancel(&to->timer);
28712756 destroy_hrtimer_on_stack(&to->timer);
28722757 }
2758
+ trace_android_vh_futex_wait_end(flags, bitset);
28732759 return ret;
28742760 }
28752761
....@@ -2902,7 +2788,7 @@
29022788 static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
29032789 ktime_t *time, int trylock)
29042790 {
2905
- struct hrtimer_sleeper timeout, *to = NULL;
2791
+ struct hrtimer_sleeper timeout, *to;
29062792 struct task_struct *exiting = NULL;
29072793 struct rt_mutex_waiter rt_waiter;
29082794 struct futex_hash_bucket *hb;
....@@ -2915,16 +2801,10 @@
29152801 if (refill_pi_state_cache())
29162802 return -ENOMEM;
29172803
2918
- if (time) {
2919
- to = &timeout;
2920
- hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
2921
- HRTIMER_MODE_ABS);
2922
- hrtimer_init_sleeper(to, current);
2923
- hrtimer_set_expires(&to->timer, *time);
2924
- }
2804
+ to = futex_setup_timer(time, &timeout, FLAGS_CLOCKRT, 0);
29252805
29262806 retry:
2927
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
2807
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
29282808 if (unlikely(ret != 0))
29292809 goto out;
29302810
....@@ -2954,7 +2834,6 @@
29542834 * - EAGAIN: The user space value changed.
29552835 */
29562836 queue_unlock(hb);
2957
- put_futex_key(&q.key);
29582837 /*
29592838 * Handle the case where the owner is in the middle of
29602839 * exiting. Wait for the exit to complete otherwise
....@@ -2982,7 +2861,7 @@
29822861 goto no_block;
29832862 }
29842863
2985
- rt_mutex_init_waiter(&rt_waiter);
2864
+ rt_mutex_init_waiter(&rt_waiter, false);
29862865
29872866 /*
29882867 * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
....@@ -3014,7 +2893,7 @@
30142893 }
30152894
30162895 if (unlikely(to))
3017
- hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
2896
+ hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
30182897
30192898 ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
30202899
....@@ -3047,14 +2926,11 @@
30472926
30482927 /* Unqueue and drop the lock */
30492928 unqueue_me_pi(&q);
3050
-
3051
- goto out_put_key;
2929
+ goto out;
30522930
30532931 out_unlock_put_key:
30542932 queue_unlock(hb);
30552933
3056
-out_put_key:
3057
- put_futex_key(&q.key);
30582934 out:
30592935 if (to) {
30602936 hrtimer_cancel(&to->timer);
....@@ -3067,12 +2943,11 @@
30672943
30682944 ret = fault_in_user_writeable(uaddr);
30692945 if (ret)
3070
- goto out_put_key;
2946
+ goto out;
30712947
30722948 if (!(flags & FLAGS_SHARED))
30732949 goto retry_private;
30742950
3075
- put_futex_key(&q.key);
30762951 goto retry;
30772952 }
30782953
....@@ -3083,7 +2958,7 @@
30832958 */
30842959 static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
30852960 {
3086
- u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current);
2961
+ u32 curval, uval, vpid = task_pid_vnr(current);
30872962 union futex_key key = FUTEX_KEY_INIT;
30882963 struct futex_hash_bucket *hb;
30892964 struct futex_q *top_waiter;
....@@ -3101,7 +2976,7 @@
31012976 if ((uval & FUTEX_TID_MASK) != vpid)
31022977 return -EPERM;
31032978
3104
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2979
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
31052980 if (ret)
31062981 return ret;
31072982
....@@ -3201,16 +3076,13 @@
32013076 out_unlock:
32023077 spin_unlock(&hb->lock);
32033078 out_putkey:
3204
- put_futex_key(&key);
32053079 return ret;
32063080
32073081 pi_retry:
3208
- put_futex_key(&key);
32093082 cond_resched();
32103083 goto retry;
32113084
32123085 pi_faulted:
3213
- put_futex_key(&key);
32143086
32153087 ret = fault_in_user_writeable(uaddr);
32163088 if (!ret)
....@@ -3312,7 +3184,7 @@
33123184 u32 val, ktime_t *abs_time, u32 bitset,
33133185 u32 __user *uaddr2)
33143186 {
3315
- struct hrtimer_sleeper timeout, *to = NULL;
3187
+ struct hrtimer_sleeper timeout, *to;
33163188 struct rt_mutex_waiter rt_waiter;
33173189 struct futex_hash_bucket *hb;
33183190 union futex_key key2 = FUTEX_KEY_INIT;
....@@ -3328,23 +3200,16 @@
33283200 if (!bitset)
33293201 return -EINVAL;
33303202
3331
- if (abs_time) {
3332
- to = &timeout;
3333
- hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
3334
- CLOCK_REALTIME : CLOCK_MONOTONIC,
3335
- HRTIMER_MODE_ABS);
3336
- hrtimer_init_sleeper(to, current);
3337
- hrtimer_set_expires_range_ns(&to->timer, *abs_time,
3338
- current->timer_slack_ns);
3339
- }
3203
+ to = futex_setup_timer(abs_time, &timeout, flags,
3204
+ current->timer_slack_ns);
33403205
33413206 /*
33423207 * The waiter is allocated on our stack, manipulated by the requeue
33433208 * code while we sleep on uaddr.
33443209 */
3345
- rt_mutex_init_waiter(&rt_waiter);
3210
+ rt_mutex_init_waiter(&rt_waiter, false);
33463211
3347
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
3212
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
33483213 if (unlikely(ret != 0))
33493214 goto out;
33503215
....@@ -3358,7 +3223,7 @@
33583223 */
33593224 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
33603225 if (ret)
3361
- goto out_key2;
3226
+ goto out;
33623227
33633228 /*
33643229 * The check above which compares uaddrs is not sufficient for
....@@ -3367,7 +3232,7 @@
33673232 if (match_futex(&q.key, &key2)) {
33683233 queue_unlock(hb);
33693234 ret = -EINVAL;
3370
- goto out_put_keys;
3235
+ goto out;
33713236 }
33723237
33733238 /* Queue the futex_q, drop the hb lock, wait for wakeup. */
....@@ -3377,7 +3242,7 @@
33773242 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
33783243 spin_unlock(&hb->lock);
33793244 if (ret)
3380
- goto out_put_keys;
3245
+ goto out;
33813246
33823247 /*
33833248 * In order for us to be here, we know our q.key == key2, and since
....@@ -3452,11 +3317,6 @@
34523317 */
34533318 ret = -EWOULDBLOCK;
34543319 }
3455
-
3456
-out_put_keys:
3457
- put_futex_key(&q.key);
3458
-out_key2:
3459
- put_futex_key(&key2);
34603320
34613321 out:
34623322 if (to) {
....@@ -3558,7 +3418,7 @@
35583418 static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
35593419 bool pi, bool pending_op)
35603420 {
3561
- u32 uval, uninitialized_var(nval), mval;
3421
+ u32 uval, nval, mval;
35623422 int err;
35633423
35643424 /* Futex address must be 32bit aligned */
....@@ -3688,7 +3548,7 @@
36883548 struct robust_list_head __user *head = curr->robust_list;
36893549 struct robust_list __user *entry, *next_entry, *pending;
36903550 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
3691
- unsigned int uninitialized_var(next_pi);
3551
+ unsigned int next_pi;
36923552 unsigned long futex_offset;
36933553 int rc;
36943554
....@@ -3881,15 +3741,16 @@
38813741 return -ENOSYS;
38823742 }
38833743
3744
+ trace_android_vh_do_futex(cmd, &flags, uaddr2);
38843745 switch (cmd) {
38853746 case FUTEX_WAIT:
38863747 val3 = FUTEX_BITSET_MATCH_ANY;
3887
- /* fall through */
3748
+ fallthrough;
38883749 case FUTEX_WAIT_BITSET:
38893750 return futex_wait(uaddr, flags, val, timeout, val3);
38903751 case FUTEX_WAKE:
38913752 val3 = FUTEX_BITSET_MATCH_ANY;
3892
- /* fall through */
3753
+ fallthrough;
38933754 case FUTEX_WAKE_BITSET:
38943755 return futex_wake(uaddr, flags, val, val3);
38953756 case FUTEX_REQUEUE:
....@@ -3916,10 +3777,10 @@
39163777
39173778
39183779 SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
3919
- struct timespec __user *, utime, u32 __user *, uaddr2,
3780
+ struct __kernel_timespec __user *, utime, u32 __user *, uaddr2,
39203781 u32, val3)
39213782 {
3922
- struct timespec ts;
3783
+ struct timespec64 ts;
39233784 ktime_t t, *tp = NULL;
39243785 u32 val2 = 0;
39253786 int cmd = op & FUTEX_CMD_MASK;
....@@ -3929,14 +3790,16 @@
39293790 cmd == FUTEX_WAIT_REQUEUE_PI)) {
39303791 if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
39313792 return -EFAULT;
3932
- if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
3793
+ if (get_timespec64(&ts, utime))
39333794 return -EFAULT;
3934
- if (!timespec_valid(&ts))
3795
+ if (!timespec64_valid(&ts))
39353796 return -EINVAL;
39363797
3937
- t = timespec_to_ktime(ts);
3798
+ t = timespec64_to_ktime(ts);
39383799 if (cmd == FUTEX_WAIT)
39393800 t = ktime_add_safe(ktime_get(), t);
3801
+ else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
3802
+ t = timens_ktime_to_host(CLOCK_MONOTONIC, t);
39403803 tp = &t;
39413804 }
39423805 /*
....@@ -3987,7 +3850,7 @@
39873850 struct compat_robust_list_head __user *head = curr->compat_robust_list;
39883851 struct robust_list __user *entry, *next_entry, *pending;
39893852 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
3990
- unsigned int uninitialized_var(next_pi);
3853
+ unsigned int next_pi;
39913854 compat_uptr_t uentry, next_uentry, upending;
39923855 compat_long_t futex_offset;
39933856 int rc;
....@@ -4106,12 +3969,14 @@
41063969
41073970 return ret;
41083971 }
3972
+#endif /* CONFIG_COMPAT */
41093973
4110
-COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
3974
+#ifdef CONFIG_COMPAT_32BIT_TIME
3975
+SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
41113976 struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
41123977 u32, val3)
41133978 {
4114
- struct timespec ts;
3979
+ struct timespec64 ts;
41153980 ktime_t t, *tp = NULL;
41163981 int val2 = 0;
41173982 int cmd = op & FUTEX_CMD_MASK;
....@@ -4119,14 +3984,16 @@
41193984 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
41203985 cmd == FUTEX_WAIT_BITSET ||
41213986 cmd == FUTEX_WAIT_REQUEUE_PI)) {
4122
- if (compat_get_timespec(&ts, utime))
3987
+ if (get_old_timespec32(&ts, utime))
41233988 return -EFAULT;
4124
- if (!timespec_valid(&ts))
3989
+ if (!timespec64_valid(&ts))
41253990 return -EINVAL;
41263991
4127
- t = timespec_to_ktime(ts);
3992
+ t = timespec64_to_ktime(ts);
41283993 if (cmd == FUTEX_WAIT)
41293994 t = ktime_add_safe(ktime_get(), t);
3995
+ else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
3996
+ t = timens_ktime_to_host(CLOCK_MONOTONIC, t);
41303997 tp = &t;
41313998 }
41323999 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
....@@ -4135,7 +4002,7 @@
41354002
41364003 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
41374004 }
4138
-#endif /* CONFIG_COMPAT */
4005
+#endif /* CONFIG_COMPAT_32BIT_TIME */
41394006
41404007 static void __init futex_detect_cmpxchg(void)
41414008 {