hc
2023-12-11 6778948f9de86c3cfaf36725a7c87dcff9ba247f
kernel/kernel/futex.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Fast Userspace Mutexes (which I call "Futexes!").
34 * (C) Rusty Russell, IBM 2002
....@@ -29,49 +30,20 @@
2930 *
3031 * "The futexes are also cursed."
3132 * "But they come in a choice of three flavours!"
32
- *
33
- * This program is free software; you can redistribute it and/or modify
34
- * it under the terms of the GNU General Public License as published by
35
- * the Free Software Foundation; either version 2 of the License, or
36
- * (at your option) any later version.
37
- *
38
- * This program is distributed in the hope that it will be useful,
39
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
40
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
41
- * GNU General Public License for more details.
42
- *
43
- * You should have received a copy of the GNU General Public License
44
- * along with this program; if not, write to the Free Software
45
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
4633 */
4734 #include <linux/compat.h>
48
-#include <linux/slab.h>
49
-#include <linux/poll.h>
50
-#include <linux/fs.h>
51
-#include <linux/file.h>
5235 #include <linux/jhash.h>
53
-#include <linux/init.h>
54
-#include <linux/futex.h>
55
-#include <linux/mount.h>
5636 #include <linux/pagemap.h>
5737 #include <linux/syscalls.h>
58
-#include <linux/signal.h>
59
-#include <linux/export.h>
60
-#include <linux/magic.h>
61
-#include <linux/pid.h>
62
-#include <linux/nsproxy.h>
63
-#include <linux/ptrace.h>
64
-#include <linux/sched/rt.h>
65
-#include <linux/sched/wake_q.h>
66
-#include <linux/sched/mm.h>
67
-#include <linux/hugetlb.h>
6838 #include <linux/freezer.h>
69
-#include <linux/bootmem.h>
39
+#include <linux/memblock.h>
7040 #include <linux/fault-inject.h>
41
+#include <linux/time_namespace.h>
7142
7243 #include <asm/futex.h>
7344
7445 #include "locking/rtmutex_common.h"
46
+#include <trace/hooks/futex.h>
7547
7648 /*
7749 * READ this before attempting to hack on futexes!
....@@ -147,8 +119,7 @@
147119 *
148120 * Where (A) orders the waiters increment and the futex value read through
149121 * atomic operations (see hb_waiters_inc) and where (B) orders the write
150
- * to futex and the waiters read -- this is done by the barriers for both
151
- * shared and private futexes in get_futex_key_refs().
122
+ * to futex and the waiters read (see hb_waiters_pending()).
152123 *
153124 * This yields the following case (where X:=waiters, Y:=futex):
154125 *
....@@ -212,7 +183,7 @@
212183 struct rt_mutex pi_mutex;
213184
214185 struct task_struct *owner;
215
- atomic_t refcount;
186
+ refcount_t refcount;
216187
217188 union futex_key key;
218189 } __randomize_layout;
....@@ -321,12 +292,8 @@
321292 if (IS_ERR(dir))
322293 return PTR_ERR(dir);
323294
324
- if (!debugfs_create_bool("ignore-private", mode, dir,
325
- &fail_futex.ignore_private)) {
326
- debugfs_remove_recursive(dir);
327
- return -ENOMEM;
328
- }
329
-
295
+ debugfs_create_bool("ignore-private", mode, dir,
296
+ &fail_futex.ignore_private);
330297 return 0;
331298 }
332299
....@@ -346,17 +313,6 @@
346313 #else
347314 static inline void compat_exit_robust_list(struct task_struct *curr) { }
348315 #endif
349
-
350
-static inline void futex_get_mm(union futex_key *key)
351
-{
352
- mmgrab(key->private.mm);
353
- /*
354
- * Ensure futex_get_mm() implies a full barrier such that
355
- * get_futex_key() implies a full barrier. This is relied upon
356
- * as smp_mb(); (B), see the ordering comment above.
357
- */
358
- smp_mb__after_atomic();
359
-}
360316
361317 /*
362318 * Reflects a new waiter being added to the waitqueue.
....@@ -386,6 +342,10 @@
386342 static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
387343 {
388344 #ifdef CONFIG_SMP
345
+ /*
346
+ * Full barrier (B), see the ordering comment above.
347
+ */
348
+ smp_mb();
389349 return atomic_read(&hb->waiters);
390350 #else
391351 return 1;
....@@ -423,67 +383,38 @@
423383 && key1->both.offset == key2->both.offset);
424384 }
425385
426
-/*
427
- * Take a reference to the resource addressed by a key.
428
- * Can be called while holding spinlocks.
386
+enum futex_access {
387
+ FUTEX_READ,
388
+ FUTEX_WRITE
389
+};
390
+
391
+/**
392
+ * futex_setup_timer - set up the sleeping hrtimer.
393
+ * @time: ptr to the given timeout value
394
+ * @timeout: the hrtimer_sleeper structure to be set up
395
+ * @flags: futex flags
396
+ * @range_ns: optional range in ns
429397 *
398
+ * Return: Initialized hrtimer_sleeper structure or NULL if no timeout
399
+ * value given
430400 */
431
-static void get_futex_key_refs(union futex_key *key)
401
+static inline struct hrtimer_sleeper *
402
+futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
403
+ int flags, u64 range_ns)
432404 {
433
- if (!key->both.ptr)
434
- return;
405
+ if (!time)
406
+ return NULL;
435407
408
+ hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
409
+ CLOCK_REALTIME : CLOCK_MONOTONIC,
410
+ HRTIMER_MODE_ABS);
436411 /*
437
- * On MMU less systems futexes are always "private" as there is no per
438
- * process address space. We need the smp wmb nevertheless - yes,
439
- * arch/blackfin has MMU less SMP ...
412
+ * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
413
+ * effectively the same as calling hrtimer_set_expires().
440414 */
441
- if (!IS_ENABLED(CONFIG_MMU)) {
442
- smp_mb(); /* explicit smp_mb(); (B) */
443
- return;
444
- }
415
+ hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
445416
446
- switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
447
- case FUT_OFF_INODE:
448
- smp_mb(); /* explicit smp_mb(); (B) */
449
- break;
450
- case FUT_OFF_MMSHARED:
451
- futex_get_mm(key); /* implies smp_mb(); (B) */
452
- break;
453
- default:
454
- /*
455
- * Private futexes do not hold reference on an inode or
456
- * mm, therefore the only purpose of calling get_futex_key_refs
457
- * is because we need the barrier for the lockless waiter check.
458
- */
459
- smp_mb(); /* explicit smp_mb(); (B) */
460
- }
461
-}
462
-
463
-/*
464
- * Drop a reference to the resource addressed by a key.
465
- * The hash bucket spinlock must not be held. This is
466
- * a no-op for private futexes, see comment in the get
467
- * counterpart.
468
- */
469
-static void drop_futex_key_refs(union futex_key *key)
470
-{
471
- if (!key->both.ptr) {
472
- /* If we're here then we tried to put a key we failed to get */
473
- WARN_ON_ONCE(1);
474
- return;
475
- }
476
-
477
- if (!IS_ENABLED(CONFIG_MMU))
478
- return;
479
-
480
- switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
481
- case FUT_OFF_INODE:
482
- break;
483
- case FUT_OFF_MMSHARED:
484
- mmdrop(key->private.mm);
485
- break;
486
- }
417
+ return timeout;
487418 }
488419
489420 /*
....@@ -529,20 +460,23 @@
529460 /**
530461 * get_futex_key() - Get parameters which are the keys for a futex
531462 * @uaddr: virtual address of the futex
532
- * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
463
+ * @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED
533464 * @key: address where result is stored.
534
- * @rw: mapping needs to be read/write (values: VERIFY_READ,
535
- * VERIFY_WRITE)
465
+ * @rw: mapping needs to be read/write (values: FUTEX_READ,
466
+ * FUTEX_WRITE)
536467 *
537468 * Return: a negative error code or 0
538469 *
539470 * The key words are stored in @key on success.
540471 *
541472 * For shared mappings (when @fshared), the key is:
473
+ *
542474 * ( inode->i_sequence, page->index, offset_within_page )
475
+ *
543476 * [ also see get_inode_sequence_number() ]
544477 *
545478 * For private mappings (or when !@fshared), the key is:
479
+ *
546480 * ( current->mm, address, 0 )
547481 *
548482 * This allows (cross process, where applicable) identification of the futex
....@@ -550,8 +484,8 @@
550484 *
551485 * lock_page() might sleep, the caller should not hold a spinlock.
552486 */
553
-static int
554
-get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
487
+static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
488
+ enum futex_access rw)
555489 {
556490 unsigned long address = (unsigned long)uaddr;
557491 struct mm_struct *mm = current->mm;
....@@ -567,7 +501,7 @@
567501 return -EINVAL;
568502 address -= key->both.offset;
569503
570
- if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
504
+ if (unlikely(!access_ok(uaddr, sizeof(u32))))
571505 return -EFAULT;
572506
573507 if (unlikely(should_fail_futex(fshared)))
....@@ -583,21 +517,20 @@
583517 if (!fshared) {
584518 key->private.mm = mm;
585519 key->private.address = address;
586
- get_futex_key_refs(key); /* implies smp_mb(); (B) */
587520 return 0;
588521 }
589522
590523 again:
591524 /* Ignore any VERIFY_READ mapping (futex common case) */
592
- if (unlikely(should_fail_futex(fshared)))
525
+ if (unlikely(should_fail_futex(true)))
593526 return -EFAULT;
594527
595
- err = get_user_pages_fast(address, 1, 1, &page);
528
+ err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
596529 /*
597530 * If write access is not required (eg. FUTEX_WAIT), try
598531 * and get read-only access.
599532 */
600
- if (err == -EFAULT && rw == VERIFY_READ) {
533
+ if (err == -EFAULT && rw == FUTEX_READ) {
601534 err = get_user_pages_fast(address, 1, 0, &page);
602535 ro = 1;
603536 }
....@@ -654,7 +587,7 @@
654587 lock_page(page);
655588 shmem_swizzled = PageSwapCache(page) || page->mapping;
656589 unlock_page(page);
657
- put_page(page);
590
+ put_user_page(page);
658591
659592 if (shmem_swizzled)
660593 goto again;
....@@ -677,7 +610,7 @@
677610 * A RO anonymous page will never change and thus doesn't make
678611 * sense for futex operations.
679612 */
680
- if (unlikely(should_fail_futex(fshared)) || ro) {
613
+ if (unlikely(should_fail_futex(true)) || ro) {
681614 err = -EFAULT;
682615 goto out;
683616 }
....@@ -704,7 +637,7 @@
704637
705638 if (READ_ONCE(page->mapping) != mapping) {
706639 rcu_read_unlock();
707
- put_page(page);
640
+ put_user_page(page);
708641
709642 goto again;
710643 }
....@@ -712,7 +645,7 @@
712645 inode = READ_ONCE(mapping->host);
713646 if (!inode) {
714647 rcu_read_unlock();
715
- put_page(page);
648
+ put_user_page(page);
716649
717650 goto again;
718651 }
....@@ -723,16 +656,9 @@
723656 rcu_read_unlock();
724657 }
725658
726
- get_futex_key_refs(key); /* implies smp_mb(); (B) */
727
-
728659 out:
729
- put_page(page);
660
+ put_user_page(page);
730661 return err;
731
-}
732
-
733
-static inline void put_futex_key(union futex_key *key)
734
-{
735
- drop_futex_key_refs(key);
736662 }
737663
738664 /**
....@@ -752,10 +678,10 @@
752678 struct mm_struct *mm = current->mm;
753679 int ret;
754680
755
- down_read(&mm->mmap_sem);
756
- ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
681
+ mmap_read_lock(mm);
682
+ ret = fixup_user_fault(mm, (unsigned long)uaddr,
757683 FAULT_FLAG_WRITE, NULL);
758
- up_read(&mm->mmap_sem);
684
+ mmap_read_unlock(mm);
759685
760686 return ret < 0 ? ret : 0;
761687 }
....@@ -821,7 +747,7 @@
821747 INIT_LIST_HEAD(&pi_state->list);
822748 /* pi_mutex gets initialized later */
823749 pi_state->owner = NULL;
824
- atomic_set(&pi_state->refcount, 1);
750
+ refcount_set(&pi_state->refcount, 1);
825751 pi_state->key = FUTEX_KEY_INIT;
826752
827753 current->pi_state_cache = pi_state;
....@@ -864,7 +790,7 @@
864790
865791 static void get_pi_state(struct futex_pi_state *pi_state)
866792 {
867
- WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount));
793
+ WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
868794 }
869795
870796 /*
....@@ -876,7 +802,7 @@
876802 if (!pi_state)
877803 return;
878804
879
- if (!atomic_dec_and_test(&pi_state->refcount))
805
+ if (!refcount_dec_and_test(&pi_state->refcount))
880806 return;
881807
882808 /*
....@@ -901,7 +827,7 @@
901827 * refcount is at 0 - put it back to 1.
902828 */
903829 pi_state->owner = NULL;
904
- atomic_set(&pi_state->refcount, 1);
830
+ refcount_set(&pi_state->refcount, 1);
905831 current->pi_state_cache = pi_state;
906832 }
907833 }
....@@ -944,7 +870,7 @@
944870 * In that case; drop the locks to let put_pi_state() make
945871 * progress and retry the loop.
946872 */
947
- if (!atomic_inc_not_zero(&pi_state->refcount)) {
873
+ if (!refcount_inc_not_zero(&pi_state->refcount)) {
948874 raw_spin_unlock_irq(&curr->pi_lock);
949875 cpu_relax();
950876 raw_spin_lock_irq(&curr->pi_lock);
....@@ -1009,7 +935,7 @@
1009935 * [10] Found | Found | task | !=taskTID | 0/1 | Invalid
1010936 *
1011937 * [1] Indicates that the kernel can acquire the futex atomically. We
1012
- * came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
938
+ * came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
1013939 *
1014940 * [2] Valid, if TID does not belong to a kernel thread. If no matching
1015941 * thread is found then it indicates that the owner TID has died.
....@@ -1102,7 +1028,7 @@
11021028 * and futex_wait_requeue_pi() as it cannot go to 0 and consequently
11031029 * free pi_state before we can take a reference ourselves.
11041030 */
1105
- WARN_ON(!atomic_read(&pi_state->refcount));
1031
+ WARN_ON(!refcount_read(&pi_state->refcount));
11061032
11071033 /*
11081034 * Now that we have a pi_state, we can acquire wait_lock
....@@ -1196,6 +1122,7 @@
11961122
11971123 /**
11981124 * wait_for_owner_exiting - Block until the owner has exited
1125
+ * @ret: owner's current futex lock status
11991126 * @exiting: Pointer to the exiting task
12001127 *
12011128 * Caller must hold a refcount on @exiting.
....@@ -1398,7 +1325,7 @@
13981325 static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
13991326 {
14001327 int err;
1401
- u32 uninitialized_var(curval);
1328
+ u32 curval;
14021329
14031330 if (unlikely(should_fail_futex(true)))
14041331 return -EFAULT;
....@@ -1523,9 +1450,9 @@
15231450 {
15241451 struct futex_hash_bucket *hb;
15251452
1526
- if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
1527
- || WARN_ON(plist_node_empty(&q->list)))
1453
+ if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list)))
15281454 return;
1455
+ lockdep_assert_held(q->lock_ptr);
15291456
15301457 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
15311458 plist_del(&q->list, &hb->chain);
....@@ -1558,10 +1485,9 @@
15581485
15591486 /*
15601487 * Queue the task for later wakeup for after we've released
1561
- * the hb->lock. wake_q_add() grabs reference to p.
1488
+ * the hb->lock.
15621489 */
1563
- wake_q_add(wake_q, p);
1564
- put_task_struct(p);
1490
+ wake_q_add_safe(wake_q, p);
15651491 }
15661492
15671493 /*
....@@ -1569,7 +1495,7 @@
15691495 */
15701496 static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
15711497 {
1572
- u32 uninitialized_var(curval), newval;
1498
+ u32 curval, newval;
15731499 struct task_struct *new_owner;
15741500 bool postunlock = false;
15751501 DEFINE_WAKE_Q(wake_q);
....@@ -1668,23 +1594,25 @@
16681594 struct futex_q *this, *next;
16691595 union futex_key key = FUTEX_KEY_INIT;
16701596 int ret;
1597
+ int target_nr;
16711598 DEFINE_WAKE_Q(wake_q);
16721599
16731600 if (!bitset)
16741601 return -EINVAL;
16751602
1676
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
1603
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
16771604 if (unlikely(ret != 0))
1678
- goto out;
1605
+ return ret;
16791606
16801607 hb = hash_futex(&key);
16811608
16821609 /* Make sure we really have tasks to wakeup */
16831610 if (!hb_waiters_pending(hb))
1684
- goto out_put_key;
1611
+ return ret;
16851612
16861613 spin_lock(&hb->lock);
16871614
1615
+ trace_android_vh_futex_wake_traverse_plist(&hb->chain, &target_nr, key, bitset);
16881616 plist_for_each_entry_safe(this, next, &hb->chain, list) {
16891617 if (match_futex (&this->key, &key)) {
16901618 if (this->pi_state || this->rt_waiter) {
....@@ -1696,6 +1624,7 @@
16961624 if (!(this->bitset & bitset))
16971625 continue;
16981626
1627
+ trace_android_vh_futex_wake_this(ret, nr_wake, target_nr, this->task);
16991628 mark_wake_futex(&wake_q, this);
17001629 if (++ret >= nr_wake)
17011630 break;
....@@ -1704,9 +1633,7 @@
17041633
17051634 spin_unlock(&hb->lock);
17061635 wake_up_q(&wake_q);
1707
-out_put_key:
1708
- put_futex_key(&key);
1709
-out:
1636
+ trace_android_vh_futex_wake_up_q_finish(nr_wake, target_nr);
17101637 return ret;
17111638 }
17121639
....@@ -1732,10 +1659,9 @@
17321659 oparg = 1 << oparg;
17331660 }
17341661
1735
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
1736
- return -EFAULT;
1737
-
1662
+ pagefault_disable();
17381663 ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
1664
+ pagefault_enable();
17391665 if (ret)
17401666 return ret;
17411667
....@@ -1772,12 +1698,12 @@
17721698 DEFINE_WAKE_Q(wake_q);
17731699
17741700 retry:
1775
- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1701
+ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
17761702 if (unlikely(ret != 0))
1777
- goto out;
1778
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1703
+ return ret;
1704
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
17791705 if (unlikely(ret != 0))
1780
- goto out_put_key1;
1706
+ return ret;
17811707
17821708 hb1 = hash_futex(&key1);
17831709 hb2 = hash_futex(&key2);
....@@ -1795,13 +1721,13 @@
17951721 * an MMU, but we might get them from range checking
17961722 */
17971723 ret = op_ret;
1798
- goto out_put_keys;
1724
+ return ret;
17991725 }
18001726
18011727 if (op_ret == -EFAULT) {
18021728 ret = fault_in_user_writeable(uaddr2);
18031729 if (ret)
1804
- goto out_put_keys;
1730
+ return ret;
18051731 }
18061732
18071733 if (!(flags & FLAGS_SHARED)) {
....@@ -1809,8 +1735,6 @@
18091735 goto retry_private;
18101736 }
18111737
1812
- put_futex_key(&key2);
1813
- put_futex_key(&key1);
18141738 cond_resched();
18151739 goto retry;
18161740 }
....@@ -1846,11 +1770,6 @@
18461770 out_unlock:
18471771 double_unlock_hb(hb1, hb2);
18481772 wake_up_q(&wake_q);
1849
-out_put_keys:
1850
- put_futex_key(&key2);
1851
-out_put_key1:
1852
- put_futex_key(&key1);
1853
-out:
18541773 return ret;
18551774 }
18561775
....@@ -1877,7 +1796,6 @@
18771796 plist_add(&q->list, &hb2->chain);
18781797 q->lock_ptr = &hb2->lock;
18791798 }
1880
- get_futex_key_refs(key2);
18811799 q->key = *key2;
18821800 }
18831801
....@@ -1899,7 +1817,6 @@
18991817 void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
19001818 struct futex_hash_bucket *hb)
19011819 {
1902
- get_futex_key_refs(key);
19031820 q->key = *key;
19041821
19051822 __unqueue_futex(q);
....@@ -2010,7 +1927,7 @@
20101927 u32 *cmpval, int requeue_pi)
20111928 {
20121929 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
2013
- int drop_count = 0, task_count = 0, ret;
1930
+ int task_count = 0, ret;
20141931 struct futex_pi_state *pi_state = NULL;
20151932 struct futex_hash_bucket *hb1, *hb2;
20161933 struct futex_q *this, *next;
....@@ -2057,22 +1974,20 @@
20571974 }
20581975
20591976 retry:
2060
- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1977
+ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
20611978 if (unlikely(ret != 0))
2062
- goto out;
1979
+ return ret;
20631980 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
2064
- requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1981
+ requeue_pi ? FUTEX_WRITE : FUTEX_READ);
20651982 if (unlikely(ret != 0))
2066
- goto out_put_key1;
1983
+ return ret;
20671984
20681985 /*
20691986 * The check above which compares uaddrs is not sufficient for
20701987 * shared futexes. We need to compare the keys:
20711988 */
2072
- if (requeue_pi && match_futex(&key1, &key2)) {
2073
- ret = -EINVAL;
2074
- goto out_put_keys;
2075
- }
1989
+ if (requeue_pi && match_futex(&key1, &key2))
1990
+ return -EINVAL;
20761991
20771992 hb1 = hash_futex(&key1);
20781993 hb2 = hash_futex(&key2);
....@@ -2092,13 +2007,11 @@
20922007
20932008 ret = get_user(curval, uaddr1);
20942009 if (ret)
2095
- goto out_put_keys;
2010
+ return ret;
20962011
20972012 if (!(flags & FLAGS_SHARED))
20982013 goto retry_private;
20992014
2100
- put_futex_key(&key2);
2101
- put_futex_key(&key1);
21022015 goto retry;
21032016 }
21042017 if (curval != *cmpval) {
....@@ -2131,7 +2044,6 @@
21312044 */
21322045 if (ret > 0) {
21332046 WARN_ON(pi_state);
2134
- drop_count++;
21352047 task_count++;
21362048 /*
21372049 * If we acquired the lock, then the user space value
....@@ -2158,12 +2070,10 @@
21582070 case -EFAULT:
21592071 double_unlock_hb(hb1, hb2);
21602072 hb_waiters_dec(hb2);
2161
- put_futex_key(&key2);
2162
- put_futex_key(&key1);
21632073 ret = fault_in_user_writeable(uaddr2);
21642074 if (!ret)
21652075 goto retry;
2166
- goto out;
2076
+ return ret;
21672077 case -EBUSY:
21682078 case -EAGAIN:
21692079 /*
....@@ -2174,8 +2084,6 @@
21742084 */
21752085 double_unlock_hb(hb1, hb2);
21762086 hb_waiters_dec(hb2);
2177
- put_futex_key(&key2);
2178
- put_futex_key(&key1);
21792087 /*
21802088 * Handle the case where the owner is in the middle of
21812089 * exiting. Wait for the exit to complete otherwise
....@@ -2251,7 +2159,6 @@
22512159 * doing so.
22522160 */
22532161 requeue_pi_wake_futex(this, &key2, hb2);
2254
- drop_count++;
22552162 continue;
22562163 } else if (ret) {
22572164 /*
....@@ -2272,7 +2179,6 @@
22722179 }
22732180 }
22742181 requeue_futex(this, hb1, hb2, &key2);
2275
- drop_count++;
22762182 }
22772183
22782184 /*
....@@ -2286,21 +2192,6 @@
22862192 double_unlock_hb(hb1, hb2);
22872193 wake_up_q(&wake_q);
22882194 hb_waiters_dec(hb2);
2289
-
2290
- /*
2291
- * drop_futex_key_refs() must be called outside the spinlocks. During
2292
- * the requeue we moved futex_q's from the hash bucket at key1 to the
2293
- * one at key2 and updated their key pointer. We no longer need to
2294
- * hold the references to key1.
2295
- */
2296
- while (--drop_count >= 0)
2297
- drop_futex_key_refs(&key1);
2298
-
2299
-out_put_keys:
2300
- put_futex_key(&key2);
2301
-out_put_key1:
2302
- put_futex_key(&key1);
2303
-out:
23042195 return ret ? ret : task_count;
23052196 }
23062197
....@@ -2320,11 +2211,11 @@
23202211 * decrement the counter at queue_unlock() when some error has
23212212 * occurred and we don't end up adding the task to the list.
23222213 */
2323
- hb_waiters_inc(hb);
2214
+ hb_waiters_inc(hb); /* implies smp_mb(); (A) */
23242215
23252216 q->lock_ptr = &hb->lock;
23262217
2327
- spin_lock(&hb->lock); /* implies smp_mb(); (A) */
2218
+ spin_lock(&hb->lock);
23282219 return hb;
23292220 }
23302221
....@@ -2339,6 +2230,7 @@
23392230 static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
23402231 {
23412232 int prio;
2233
+ bool already_on_hb = false;
23422234
23432235 /*
23442236 * The priority used to register this element is
....@@ -2351,7 +2243,9 @@
23512243 prio = min(current->normal_prio, MAX_RT_PRIO);
23522244
23532245 plist_node_init(&q->list, prio);
2354
- plist_add(&q->list, &hb->chain);
2246
+ trace_android_vh_alter_futex_plist_add(&q->list, &hb->chain, &already_on_hb);
2247
+ if (!already_on_hb)
2248
+ plist_add(&q->list, &hb->chain);
23552249 q->task = current;
23562250 }
23572251
....@@ -2425,7 +2319,6 @@
24252319 ret = 1;
24262320 }
24272321
2428
- drop_futex_key_refs(&q->key);
24292322 return ret;
24302323 }
24312324
....@@ -2449,9 +2342,9 @@
24492342 static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
24502343 struct task_struct *argowner)
24512344 {
2452
- u32 uval, uninitialized_var(curval), newval, newtid;
24532345 struct futex_pi_state *pi_state = q->pi_state;
24542346 struct task_struct *oldowner, *newowner;
2347
+ u32 uval, curval, newval, newtid;
24552348 int err = 0;
24562349
24572350 oldowner = pi_state->owner;
....@@ -2706,7 +2599,7 @@
27062599
27072600 /* Arm the timer */
27082601 if (timeout)
2709
- hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
2602
+ hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
27102603
27112604 /*
27122605 * If we have been removed from the hash list, then another task
....@@ -2718,8 +2611,10 @@
27182611 * flagged for rescheduling. Only call schedule if there
27192612 * is no timeout, or if it has yet to expire.
27202613 */
2721
- if (!timeout || timeout->task)
2614
+ if (!timeout || timeout->task) {
2615
+ trace_android_vh_futex_sleep_start(current);
27222616 freezable_schedule();
2617
+ }
27232618 }
27242619 __set_current_state(TASK_RUNNING);
27252620 }
....@@ -2766,7 +2661,7 @@
27662661 * while the syscall executes.
27672662 */
27682663 retry:
2769
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
2664
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
27702665 if (unlikely(ret != 0))
27712666 return ret;
27722667
....@@ -2780,12 +2675,11 @@
27802675
27812676 ret = get_user(uval, uaddr);
27822677 if (ret)
2783
- goto out;
2678
+ return ret;
27842679
27852680 if (!(flags & FLAGS_SHARED))
27862681 goto retry_private;
27872682
2788
- put_futex_key(&q->key);
27892683 goto retry;
27902684 }
27912685
....@@ -2794,16 +2688,13 @@
27942688 ret = -EWOULDBLOCK;
27952689 }
27962690
2797
-out:
2798
- if (ret)
2799
- put_futex_key(&q->key);
28002691 return ret;
28012692 }
28022693
28032694 static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
28042695 ktime_t *abs_time, u32 bitset)
28052696 {
2806
- struct hrtimer_sleeper timeout, *to = NULL;
2697
+ struct hrtimer_sleeper timeout, *to;
28072698 struct restart_block *restart;
28082699 struct futex_hash_bucket *hb;
28092700 struct futex_q q = futex_q_init;
....@@ -2812,18 +2703,10 @@
28122703 if (!bitset)
28132704 return -EINVAL;
28142705 q.bitset = bitset;
2706
+ trace_android_vh_futex_wait_start(flags, bitset);
28152707
2816
- if (abs_time) {
2817
- to = &timeout;
2818
-
2819
- hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2820
- CLOCK_REALTIME : CLOCK_MONOTONIC,
2821
- HRTIMER_MODE_ABS);
2822
- hrtimer_init_sleeper(to, current);
2823
- hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2824
- current->timer_slack_ns);
2825
- }
2826
-
2708
+ to = futex_setup_timer(abs_time, &timeout, flags,
2709
+ current->timer_slack_ns);
28272710 retry:
28282711 /*
28292712 * Prepare to wait on uaddr. On success, holds hb lock and increments
....@@ -2870,6 +2753,7 @@
28702753 hrtimer_cancel(&to->timer);
28712754 destroy_hrtimer_on_stack(&to->timer);
28722755 }
2756
+ trace_android_vh_futex_wait_end(flags, bitset);
28732757 return ret;
28742758 }
28752759
....@@ -2902,7 +2786,7 @@
29022786 static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
29032787 ktime_t *time, int trylock)
29042788 {
2905
- struct hrtimer_sleeper timeout, *to = NULL;
2789
+ struct hrtimer_sleeper timeout, *to;
29062790 struct task_struct *exiting = NULL;
29072791 struct rt_mutex_waiter rt_waiter;
29082792 struct futex_hash_bucket *hb;
....@@ -2915,16 +2799,10 @@
29152799 if (refill_pi_state_cache())
29162800 return -ENOMEM;
29172801
2918
- if (time) {
2919
- to = &timeout;
2920
- hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
2921
- HRTIMER_MODE_ABS);
2922
- hrtimer_init_sleeper(to, current);
2923
- hrtimer_set_expires(&to->timer, *time);
2924
- }
2802
+ to = futex_setup_timer(time, &timeout, FLAGS_CLOCKRT, 0);
29252803
29262804 retry:
2927
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
2805
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
29282806 if (unlikely(ret != 0))
29292807 goto out;
29302808
....@@ -2954,7 +2832,6 @@
29542832 * - EAGAIN: The user space value changed.
29552833 */
29562834 queue_unlock(hb);
2957
- put_futex_key(&q.key);
29582835 /*
29592836 * Handle the case where the owner is in the middle of
29602837 * exiting. Wait for the exit to complete otherwise
....@@ -3014,7 +2891,7 @@
30142891 }
30152892
30162893 if (unlikely(to))
3017
- hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
2894
+ hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
30182895
30192896 ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
30202897
....@@ -3047,14 +2924,11 @@
30472924
30482925 /* Unqueue and drop the lock */
30492926 unqueue_me_pi(&q);
3050
-
3051
- goto out_put_key;
2927
+ goto out;
30522928
30532929 out_unlock_put_key:
30542930 queue_unlock(hb);
30552931
3056
-out_put_key:
3057
- put_futex_key(&q.key);
30582932 out:
30592933 if (to) {
30602934 hrtimer_cancel(&to->timer);
....@@ -3067,12 +2941,11 @@
30672941
30682942 ret = fault_in_user_writeable(uaddr);
30692943 if (ret)
3070
- goto out_put_key;
2944
+ goto out;
30712945
30722946 if (!(flags & FLAGS_SHARED))
30732947 goto retry_private;
30742948
3075
- put_futex_key(&q.key);
30762949 goto retry;
30772950 }
30782951
....@@ -3083,7 +2956,7 @@
30832956 */
30842957 static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
30852958 {
3086
- u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current);
2959
+ u32 curval, uval, vpid = task_pid_vnr(current);
30872960 union futex_key key = FUTEX_KEY_INIT;
30882961 struct futex_hash_bucket *hb;
30892962 struct futex_q *top_waiter;
....@@ -3101,7 +2974,7 @@
31012974 if ((uval & FUTEX_TID_MASK) != vpid)
31022975 return -EPERM;
31032976
3104
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2977
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
31052978 if (ret)
31062979 return ret;
31072980
....@@ -3201,16 +3074,13 @@
32013074 out_unlock:
32023075 spin_unlock(&hb->lock);
32033076 out_putkey:
3204
- put_futex_key(&key);
32053077 return ret;
32063078
32073079 pi_retry:
3208
- put_futex_key(&key);
32093080 cond_resched();
32103081 goto retry;
32113082
32123083 pi_faulted:
3213
- put_futex_key(&key);
32143084
32153085 ret = fault_in_user_writeable(uaddr);
32163086 if (!ret)
....@@ -3312,7 +3182,7 @@
33123182 u32 val, ktime_t *abs_time, u32 bitset,
33133183 u32 __user *uaddr2)
33143184 {
3315
- struct hrtimer_sleeper timeout, *to = NULL;
3185
+ struct hrtimer_sleeper timeout, *to;
33163186 struct rt_mutex_waiter rt_waiter;
33173187 struct futex_hash_bucket *hb;
33183188 union futex_key key2 = FUTEX_KEY_INIT;
....@@ -3328,15 +3198,8 @@
33283198 if (!bitset)
33293199 return -EINVAL;
33303200
3331
- if (abs_time) {
3332
- to = &timeout;
3333
- hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
3334
- CLOCK_REALTIME : CLOCK_MONOTONIC,
3335
- HRTIMER_MODE_ABS);
3336
- hrtimer_init_sleeper(to, current);
3337
- hrtimer_set_expires_range_ns(&to->timer, *abs_time,
3338
- current->timer_slack_ns);
3339
- }
3201
+ to = futex_setup_timer(abs_time, &timeout, flags,
3202
+ current->timer_slack_ns);
33403203
33413204 /*
33423205 * The waiter is allocated on our stack, manipulated by the requeue
....@@ -3344,7 +3207,7 @@
33443207 */
33453208 rt_mutex_init_waiter(&rt_waiter);
33463209
3347
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
3210
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
33483211 if (unlikely(ret != 0))
33493212 goto out;
33503213
....@@ -3358,7 +3221,7 @@
33583221 */
33593222 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
33603223 if (ret)
3361
- goto out_key2;
3224
+ goto out;
33623225
33633226 /*
33643227 * The check above which compares uaddrs is not sufficient for
....@@ -3367,7 +3230,7 @@
33673230 if (match_futex(&q.key, &key2)) {
33683231 queue_unlock(hb);
33693232 ret = -EINVAL;
3370
- goto out_put_keys;
3233
+ goto out;
33713234 }
33723235
33733236 /* Queue the futex_q, drop the hb lock, wait for wakeup. */
....@@ -3377,7 +3240,7 @@
33773240 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
33783241 spin_unlock(&hb->lock);
33793242 if (ret)
3380
- goto out_put_keys;
3243
+ goto out;
33813244
33823245 /*
33833246 * In order for us to be here, we know our q.key == key2, and since
....@@ -3452,11 +3315,6 @@
34523315 */
34533316 ret = -EWOULDBLOCK;
34543317 }
3455
-
3456
-out_put_keys:
3457
- put_futex_key(&q.key);
3458
-out_key2:
3459
- put_futex_key(&key2);
34603318
34613319 out:
34623320 if (to) {
....@@ -3558,7 +3416,7 @@
35583416 static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
35593417 bool pi, bool pending_op)
35603418 {
3561
- u32 uval, uninitialized_var(nval), mval;
3419
+ u32 uval, nval, mval;
35623420 int err;
35633421
35643422 /* Futex address must be 32bit aligned */
....@@ -3688,7 +3546,7 @@
36883546 struct robust_list_head __user *head = curr->robust_list;
36893547 struct robust_list __user *entry, *next_entry, *pending;
36903548 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
3691
- unsigned int uninitialized_var(next_pi);
3549
+ unsigned int next_pi;
36923550 unsigned long futex_offset;
36933551 int rc;
36943552
....@@ -3881,15 +3739,16 @@
38813739 return -ENOSYS;
38823740 }
38833741
3742
+ trace_android_vh_do_futex(cmd, &flags, uaddr2);
38843743 switch (cmd) {
38853744 case FUTEX_WAIT:
38863745 val3 = FUTEX_BITSET_MATCH_ANY;
3887
- /* fall through */
3746
+ fallthrough;
38883747 case FUTEX_WAIT_BITSET:
38893748 return futex_wait(uaddr, flags, val, timeout, val3);
38903749 case FUTEX_WAKE:
38913750 val3 = FUTEX_BITSET_MATCH_ANY;
3892
- /* fall through */
3751
+ fallthrough;
38933752 case FUTEX_WAKE_BITSET:
38943753 return futex_wake(uaddr, flags, val, val3);
38953754 case FUTEX_REQUEUE:
....@@ -3916,10 +3775,10 @@
39163775
39173776
39183777 SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
3919
- struct timespec __user *, utime, u32 __user *, uaddr2,
3778
+ struct __kernel_timespec __user *, utime, u32 __user *, uaddr2,
39203779 u32, val3)
39213780 {
3922
- struct timespec ts;
3781
+ struct timespec64 ts;
39233782 ktime_t t, *tp = NULL;
39243783 u32 val2 = 0;
39253784 int cmd = op & FUTEX_CMD_MASK;
....@@ -3929,14 +3788,16 @@
39293788 cmd == FUTEX_WAIT_REQUEUE_PI)) {
39303789 if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
39313790 return -EFAULT;
3932
- if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
3791
+ if (get_timespec64(&ts, utime))
39333792 return -EFAULT;
3934
- if (!timespec_valid(&ts))
3793
+ if (!timespec64_valid(&ts))
39353794 return -EINVAL;
39363795
3937
- t = timespec_to_ktime(ts);
3796
+ t = timespec64_to_ktime(ts);
39383797 if (cmd == FUTEX_WAIT)
39393798 t = ktime_add_safe(ktime_get(), t);
3799
+ else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
3800
+ t = timens_ktime_to_host(CLOCK_MONOTONIC, t);
39403801 tp = &t;
39413802 }
39423803 /*
....@@ -3987,7 +3848,7 @@
39873848 struct compat_robust_list_head __user *head = curr->compat_robust_list;
39883849 struct robust_list __user *entry, *next_entry, *pending;
39893850 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
3990
- unsigned int uninitialized_var(next_pi);
3851
+ unsigned int next_pi;
39913852 compat_uptr_t uentry, next_uentry, upending;
39923853 compat_long_t futex_offset;
39933854 int rc;
....@@ -4106,12 +3967,14 @@
41063967
41073968 return ret;
41083969 }
3970
+#endif /* CONFIG_COMPAT */
41093971
4110
-COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
3972
+#ifdef CONFIG_COMPAT_32BIT_TIME
3973
+SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
41113974 struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
41123975 u32, val3)
41133976 {
4114
- struct timespec ts;
3977
+ struct timespec64 ts;
41153978 ktime_t t, *tp = NULL;
41163979 int val2 = 0;
41173980 int cmd = op & FUTEX_CMD_MASK;
....@@ -4119,14 +3982,16 @@
41193982 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
41203983 cmd == FUTEX_WAIT_BITSET ||
41213984 cmd == FUTEX_WAIT_REQUEUE_PI)) {
4122
- if (compat_get_timespec(&ts, utime))
3985
+ if (get_old_timespec32(&ts, utime))
41233986 return -EFAULT;
4124
- if (!timespec_valid(&ts))
3987
+ if (!timespec64_valid(&ts))
41253988 return -EINVAL;
41263989
4127
- t = timespec_to_ktime(ts);
3990
+ t = timespec64_to_ktime(ts);
41283991 if (cmd == FUTEX_WAIT)
41293992 t = ktime_add_safe(ktime_get(), t);
3993
+ else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
3994
+ t = timens_ktime_to_host(CLOCK_MONOTONIC, t);
41303995 tp = &t;
41313996 }
41323997 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
....@@ -4135,7 +4000,7 @@
41354000
41364001 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
41374002 }
4138
-#endif /* CONFIG_COMPAT */
4003
+#endif /* CONFIG_COMPAT_32BIT_TIME */
41394004
41404005 static void __init futex_detect_cmpxchg(void)
41414006 {