| .. | .. |
|---|
| 962 | 962 | if (head->next != next) { |
|---|
| 963 | 963 | /* retain curr->pi_lock for the loop invariant */ |
|---|
| 964 | 964 | raw_spin_unlock(&pi_state->pi_mutex.wait_lock); |
|---|
| 965 | + raw_spin_unlock_irq(&curr->pi_lock); |
|---|
| 965 | 966 | spin_unlock(&hb->lock); |
|---|
| 967 | + raw_spin_lock_irq(&curr->pi_lock); |
|---|
| 966 | 968 | put_pi_state(pi_state); |
|---|
| 967 | 969 | continue; |
|---|
| 968 | 970 | } |
|---|
| .. | .. |
|---|
| 1573 | 1575 | struct task_struct *new_owner; |
|---|
| 1574 | 1576 | bool postunlock = false; |
|---|
| 1575 | 1577 | DEFINE_WAKE_Q(wake_q); |
|---|
| 1578 | + DEFINE_WAKE_Q(wake_sleeper_q); |
|---|
| 1576 | 1579 | int ret = 0; |
|---|
| 1577 | 1580 | |
|---|
| 1578 | 1581 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); |
|---|
| .. | .. |
|---|
| 1622 | 1625 | * not fail. |
|---|
| 1623 | 1626 | */ |
|---|
| 1624 | 1627 | pi_state_update_owner(pi_state, new_owner); |
|---|
| 1625 | | - postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); |
|---|
| 1628 | + postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, |
|---|
| 1629 | + &wake_sleeper_q); |
|---|
| 1626 | 1630 | } |
|---|
| 1627 | 1631 | |
|---|
| 1628 | 1632 | out_unlock: |
|---|
| 1629 | 1633 | raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); |
|---|
| 1630 | 1634 | |
|---|
| 1631 | 1635 | if (postunlock) |
|---|
| 1632 | | - rt_mutex_postunlock(&wake_q); |
|---|
| 1636 | + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); |
|---|
| 1633 | 1637 | |
|---|
| 1634 | 1638 | return ret; |
|---|
| 1635 | 1639 | } |
|---|
| .. | .. |
|---|
| 2253 | 2257 | requeue_pi_wake_futex(this, &key2, hb2); |
|---|
| 2254 | 2258 | drop_count++; |
|---|
| 2255 | 2259 | continue; |
|---|
| 2260 | + } else if (ret == -EAGAIN) { |
|---|
| 2261 | + /* |
|---|
| 2262 | + * Waiter was woken by timeout or |
|---|
| 2263 | + * signal and has set pi_blocked_on to |
|---|
| 2264 | + * PI_WAKEUP_INPROGRESS before we |
|---|
| 2265 | + * tried to enqueue it on the rtmutex. |
|---|
| 2266 | + */ |
|---|
| 2267 | + this->pi_state = NULL; |
|---|
| 2268 | + put_pi_state(pi_state); |
|---|
| 2269 | + continue; |
|---|
| 2256 | 2270 | } else if (ret) { |
|---|
| 2257 | 2271 | /* |
|---|
| 2258 | 2272 | * rt_mutex_start_proxy_lock() detected a |
|---|
| .. | .. |
|---|
| 2816 | 2830 | if (abs_time) { |
|---|
| 2817 | 2831 | to = &timeout; |
|---|
| 2818 | 2832 | |
|---|
| 2819 | | - hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? |
|---|
| 2820 | | - CLOCK_REALTIME : CLOCK_MONOTONIC, |
|---|
| 2821 | | - HRTIMER_MODE_ABS); |
|---|
| 2822 | | - hrtimer_init_sleeper(to, current); |
|---|
| 2833 | + hrtimer_init_sleeper_on_stack(to, (flags & FLAGS_CLOCKRT) ? |
|---|
| 2834 | + CLOCK_REALTIME : CLOCK_MONOTONIC, |
|---|
| 2835 | + HRTIMER_MODE_ABS, current); |
|---|
| 2823 | 2836 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, |
|---|
| 2824 | 2837 | current->timer_slack_ns); |
|---|
| 2825 | 2838 | } |
|---|
| .. | .. |
|---|
| 2917 | 2930 | |
|---|
| 2918 | 2931 | if (time) { |
|---|
| 2919 | 2932 | to = &timeout; |
|---|
| 2920 | | - hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, |
|---|
| 2921 | | - HRTIMER_MODE_ABS); |
|---|
| 2922 | | - hrtimer_init_sleeper(to, current); |
|---|
| 2933 | + hrtimer_init_sleeper_on_stack(to, CLOCK_REALTIME, |
|---|
| 2934 | + HRTIMER_MODE_ABS, current); |
|---|
| 2923 | 2935 | hrtimer_set_expires(&to->timer, *time); |
|---|
| 2924 | 2936 | } |
|---|
| 2925 | 2937 | |
|---|
| .. | .. |
|---|
| 2982 | 2994 | goto no_block; |
|---|
| 2983 | 2995 | } |
|---|
| 2984 | 2996 | |
|---|
| 2985 | | - rt_mutex_init_waiter(&rt_waiter); |
|---|
| 2997 | + rt_mutex_init_waiter(&rt_waiter, false); |
|---|
| 2986 | 2998 | |
|---|
| 2987 | 2999 | /* |
|---|
| 2988 | 3000 | * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not |
|---|
| .. | .. |
|---|
| 2998 | 3010 | * before __rt_mutex_start_proxy_lock() is done. |
|---|
| 2999 | 3011 | */ |
|---|
| 3000 | 3012 | raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); |
|---|
| 3013 | + /* |
|---|
| 3014 | + * the migrate_disable() here disables migration in the in_atomic() fast |
|---|
| 3015 | + * path which is enabled again in the following spin_unlock(). We have |
|---|
| 3016 | + * one migrate_disable() pending in the slow-path which is reversed |
|---|
| 3017 | + * after the raw_spin_unlock_irq() where we leave the atomic context. |
|---|
| 3018 | + */ |
|---|
| 3019 | + migrate_disable(); |
|---|
| 3020 | + |
|---|
| 3001 | 3021 | spin_unlock(q.lock_ptr); |
|---|
| 3002 | 3022 | /* |
|---|
| 3003 | 3023 | * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter |
|---|
| .. | .. |
|---|
| 3006 | 3026 | */ |
|---|
| 3007 | 3027 | ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); |
|---|
| 3008 | 3028 | raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); |
|---|
| 3029 | + migrate_enable(); |
|---|
| 3009 | 3030 | |
|---|
| 3010 | 3031 | if (ret) { |
|---|
| 3011 | 3032 | if (ret == 1) |
|---|
| .. | .. |
|---|
| 3140 | 3161 | * rt_waiter. Also see the WARN in wake_futex_pi(). |
|---|
| 3141 | 3162 | */ |
|---|
| 3142 | 3163 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); |
|---|
| 3164 | + /* |
|---|
| 3165 | + * Magic trickery for now to make the RT migrate disable |
|---|
| 3166 | + * logic happy. The following spin_unlock() happens with |
|---|
| 3167 | + * interrupts disabled so the internal migrate_enable() |
|---|
| 3168 | + * won't undo the migrate_disable() which was issued when |
|---|
| 3169 | + * locking hb->lock. |
|---|
| 3170 | + */ |
|---|
| 3171 | + migrate_disable(); |
|---|
| 3143 | 3172 | spin_unlock(&hb->lock); |
|---|
| 3144 | 3173 | |
|---|
| 3145 | 3174 | /* drops pi_state->pi_mutex.wait_lock */ |
|---|
| 3146 | 3175 | ret = wake_futex_pi(uaddr, uval, pi_state); |
|---|
| 3176 | + migrate_enable(); |
|---|
| 3147 | 3177 | |
|---|
| 3148 | 3178 | put_pi_state(pi_state); |
|---|
| 3149 | 3179 | |
|---|
| .. | .. |
|---|
| 3314 | 3344 | { |
|---|
| 3315 | 3345 | struct hrtimer_sleeper timeout, *to = NULL; |
|---|
| 3316 | 3346 | struct rt_mutex_waiter rt_waiter; |
|---|
| 3317 | | - struct futex_hash_bucket *hb; |
|---|
| 3347 | + struct futex_hash_bucket *hb, *hb2; |
|---|
| 3318 | 3348 | union futex_key key2 = FUTEX_KEY_INIT; |
|---|
| 3319 | 3349 | struct futex_q q = futex_q_init; |
|---|
| 3320 | 3350 | int res, ret; |
|---|
| .. | .. |
|---|
| 3330 | 3360 | |
|---|
| 3331 | 3361 | if (abs_time) { |
|---|
| 3332 | 3362 | to = &timeout; |
|---|
| 3333 | | - hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? |
|---|
| 3334 | | - CLOCK_REALTIME : CLOCK_MONOTONIC, |
|---|
| 3335 | | - HRTIMER_MODE_ABS); |
|---|
| 3336 | | - hrtimer_init_sleeper(to, current); |
|---|
| 3363 | + hrtimer_init_sleeper_on_stack(to, (flags & FLAGS_CLOCKRT) ? |
|---|
| 3364 | + CLOCK_REALTIME : CLOCK_MONOTONIC, |
|---|
| 3365 | + HRTIMER_MODE_ABS, current); |
|---|
| 3337 | 3366 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, |
|---|
| 3338 | 3367 | current->timer_slack_ns); |
|---|
| 3339 | 3368 | } |
|---|
| .. | .. |
|---|
| 3342 | 3371 | * The waiter is allocated on our stack, manipulated by the requeue |
|---|
| 3343 | 3372 | * code while we sleep on uaddr. |
|---|
| 3344 | 3373 | */ |
|---|
| 3345 | | - rt_mutex_init_waiter(&rt_waiter); |
|---|
| 3374 | + rt_mutex_init_waiter(&rt_waiter, false); |
|---|
| 3346 | 3375 | |
|---|
| 3347 | 3376 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); |
|---|
| 3348 | 3377 | if (unlikely(ret != 0)) |
|---|
| .. | .. |
|---|
| 3373 | 3402 | /* Queue the futex_q, drop the hb lock, wait for wakeup. */ |
|---|
| 3374 | 3403 | futex_wait_queue_me(hb, &q, to); |
|---|
| 3375 | 3404 | |
|---|
| 3376 | | - spin_lock(&hb->lock); |
|---|
| 3377 | | - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); |
|---|
| 3378 | | - spin_unlock(&hb->lock); |
|---|
| 3379 | | - if (ret) |
|---|
| 3380 | | - goto out_put_keys; |
|---|
| 3405 | + /* |
|---|
| 3406 | + * On RT we must avoid races with requeue and trying to block |
|---|
| 3407 | + * on two mutexes (hb->lock and uaddr2's rtmutex) by |
|---|
| 3408 | + * serializing access to pi_blocked_on with pi_lock. |
|---|
| 3409 | + */ |
|---|
| 3410 | + raw_spin_lock_irq(¤t->pi_lock); |
|---|
| 3411 | + if (current->pi_blocked_on) { |
|---|
| 3412 | + /* |
|---|
| 3413 | + * We have been requeued or are in the process of |
|---|
| 3414 | + * being requeued. |
|---|
| 3415 | + */ |
|---|
| 3416 | + raw_spin_unlock_irq(¤t->pi_lock); |
|---|
| 3417 | + } else { |
|---|
| 3418 | + /* |
|---|
| 3419 | + * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS |
|---|
| 3420 | + * prevents a concurrent requeue from moving us to the |
|---|
| 3421 | + * uaddr2 rtmutex. After that we can safely acquire |
|---|
| 3422 | + * (and possibly block on) hb->lock. |
|---|
| 3423 | + */ |
|---|
| 3424 | + current->pi_blocked_on = PI_WAKEUP_INPROGRESS; |
|---|
| 3425 | + raw_spin_unlock_irq(¤t->pi_lock); |
|---|
| 3426 | + |
|---|
| 3427 | + spin_lock(&hb->lock); |
|---|
| 3428 | + |
|---|
| 3429 | + /* |
|---|
| 3430 | + * Clean up pi_blocked_on. We might leak it otherwise |
|---|
| 3431 | + * when we succeeded with the hb->lock in the fast |
|---|
| 3432 | + * path. |
|---|
| 3433 | + */ |
|---|
| 3434 | + raw_spin_lock_irq(¤t->pi_lock); |
|---|
| 3435 | + current->pi_blocked_on = NULL; |
|---|
| 3436 | + raw_spin_unlock_irq(¤t->pi_lock); |
|---|
| 3437 | + |
|---|
| 3438 | + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); |
|---|
| 3439 | + spin_unlock(&hb->lock); |
|---|
| 3440 | + if (ret) |
|---|
| 3441 | + goto out_put_keys; |
|---|
| 3442 | + } |
|---|
| 3381 | 3443 | |
|---|
| 3382 | 3444 | /* |
|---|
| 3383 | | - * In order for us to be here, we know our q.key == key2, and since |
|---|
| 3384 | | - * we took the hb->lock above, we also know that futex_requeue() has |
|---|
| 3385 | | - * completed and we no longer have to concern ourselves with a wakeup |
|---|
| 3386 | | - * race with the atomic proxy lock acquisition by the requeue code. The |
|---|
| 3387 | | - * futex_requeue dropped our key1 reference and incremented our key2 |
|---|
| 3388 | | - * reference count. |
|---|
| 3445 | + * In order to be here, we have either been requeued, are in |
|---|
| 3446 | + * the process of being requeued, or requeue successfully |
|---|
| 3447 | + * acquired uaddr2 on our behalf. If pi_blocked_on was |
|---|
| 3448 | + * non-null above, we may be racing with a requeue. Do not |
|---|
| 3449 | + * rely on q->lock_ptr to be hb2->lock until after blocking on |
|---|
| 3450 | + * hb->lock or hb2->lock. The futex_requeue dropped our key1 |
|---|
| 3451 | + * reference and incremented our key2 reference count. |
|---|
| 3389 | 3452 | */ |
|---|
| 3453 | + hb2 = hash_futex(&key2); |
|---|
| 3390 | 3454 | |
|---|
| 3391 | 3455 | /* Check if the requeue code acquired the second futex for us. */ |
|---|
| 3392 | 3456 | if (!q.rt_waiter) { |
|---|
| .. | .. |
|---|
| 3395 | 3459 | * did a lock-steal - fix up the PI-state in that case. |
|---|
| 3396 | 3460 | */ |
|---|
| 3397 | 3461 | if (q.pi_state && (q.pi_state->owner != current)) { |
|---|
| 3398 | | - spin_lock(q.lock_ptr); |
|---|
| 3462 | + spin_lock(&hb2->lock); |
|---|
| 3463 | + BUG_ON(&hb2->lock != q.lock_ptr); |
|---|
| 3399 | 3464 | ret = fixup_pi_state_owner(uaddr2, &q, current); |
|---|
| 3400 | 3465 | /* |
|---|
| 3401 | 3466 | * Drop the reference to the pi state which |
|---|
| 3402 | 3467 | * the requeue_pi() code acquired for us. |
|---|
| 3403 | 3468 | */ |
|---|
| 3404 | 3469 | put_pi_state(q.pi_state); |
|---|
| 3405 | | - spin_unlock(q.lock_ptr); |
|---|
| 3470 | + spin_unlock(&hb2->lock); |
|---|
| 3406 | 3471 | /* |
|---|
| 3407 | 3472 | * Adjust the return value. It's either -EFAULT or |
|---|
| 3408 | 3473 | * success (1) but the caller expects 0 for success. |
|---|
| .. | .. |
|---|
| 3421 | 3486 | pi_mutex = &q.pi_state->pi_mutex; |
|---|
| 3422 | 3487 | ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); |
|---|
| 3423 | 3488 | |
|---|
| 3424 | | - spin_lock(q.lock_ptr); |
|---|
| 3489 | + spin_lock(&hb2->lock); |
|---|
| 3490 | + BUG_ON(&hb2->lock != q.lock_ptr); |
|---|
| 3425 | 3491 | if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) |
|---|
| 3426 | 3492 | ret = 0; |
|---|
| 3427 | 3493 | |
|---|