.. | .. |
---|
962 | 962 | if (head->next != next) { |
---|
963 | 963 | /* retain curr->pi_lock for the loop invariant */ |
---|
964 | 964 | raw_spin_unlock(&pi_state->pi_mutex.wait_lock); |
---|
| 965 | + raw_spin_unlock_irq(&curr->pi_lock); |
---|
965 | 966 | spin_unlock(&hb->lock); |
---|
| 967 | + raw_spin_lock_irq(&curr->pi_lock); |
---|
966 | 968 | put_pi_state(pi_state); |
---|
967 | 969 | continue; |
---|
968 | 970 | } |
---|
.. | .. |
---|
1573 | 1575 | struct task_struct *new_owner; |
---|
1574 | 1576 | bool postunlock = false; |
---|
1575 | 1577 | DEFINE_WAKE_Q(wake_q); |
---|
| 1578 | + DEFINE_WAKE_Q(wake_sleeper_q); |
---|
1576 | 1579 | int ret = 0; |
---|
1577 | 1580 | |
---|
1578 | 1581 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); |
---|
.. | .. |
---|
1622 | 1625 | * not fail. |
---|
1623 | 1626 | */ |
---|
1624 | 1627 | pi_state_update_owner(pi_state, new_owner); |
---|
1625 | | - postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); |
---|
| 1628 | + postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, |
---|
| 1629 | + &wake_sleeper_q); |
---|
1626 | 1630 | } |
---|
1627 | 1631 | |
---|
1628 | 1632 | out_unlock: |
---|
1629 | 1633 | raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); |
---|
1630 | 1634 | |
---|
1631 | 1635 | if (postunlock) |
---|
1632 | | - rt_mutex_postunlock(&wake_q); |
---|
| 1636 | + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); |
---|
1633 | 1637 | |
---|
1634 | 1638 | return ret; |
---|
1635 | 1639 | } |
---|
.. | .. |
---|
2253 | 2257 | requeue_pi_wake_futex(this, &key2, hb2); |
---|
2254 | 2258 | drop_count++; |
---|
2255 | 2259 | continue; |
---|
| 2260 | + } else if (ret == -EAGAIN) { |
---|
| 2261 | + /* |
---|
| 2262 | + * Waiter was woken by timeout or |
---|
| 2263 | + * signal and has set pi_blocked_on to |
---|
| 2264 | + * PI_WAKEUP_INPROGRESS before we |
---|
| 2265 | + * tried to enqueue it on the rtmutex. |
---|
| 2266 | + */ |
---|
| 2267 | + this->pi_state = NULL; |
---|
| 2268 | + put_pi_state(pi_state); |
---|
| 2269 | + continue; |
---|
2256 | 2270 | } else if (ret) { |
---|
2257 | 2271 | /* |
---|
2258 | 2272 | * rt_mutex_start_proxy_lock() detected a |
---|
.. | .. |
---|
2816 | 2830 | if (abs_time) { |
---|
2817 | 2831 | to = &timeout; |
---|
2818 | 2832 | |
---|
2819 | | - hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? |
---|
2820 | | - CLOCK_REALTIME : CLOCK_MONOTONIC, |
---|
2821 | | - HRTIMER_MODE_ABS); |
---|
2822 | | - hrtimer_init_sleeper(to, current); |
---|
| 2833 | + hrtimer_init_sleeper_on_stack(to, (flags & FLAGS_CLOCKRT) ? |
---|
| 2834 | + CLOCK_REALTIME : CLOCK_MONOTONIC, |
---|
| 2835 | + HRTIMER_MODE_ABS, current); |
---|
2823 | 2836 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, |
---|
2824 | 2837 | current->timer_slack_ns); |
---|
2825 | 2838 | } |
---|
.. | .. |
---|
2917 | 2930 | |
---|
2918 | 2931 | if (time) { |
---|
2919 | 2932 | to = &timeout; |
---|
2920 | | - hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, |
---|
2921 | | - HRTIMER_MODE_ABS); |
---|
2922 | | - hrtimer_init_sleeper(to, current); |
---|
| 2933 | + hrtimer_init_sleeper_on_stack(to, CLOCK_REALTIME, |
---|
| 2934 | + HRTIMER_MODE_ABS, current); |
---|
2923 | 2935 | hrtimer_set_expires(&to->timer, *time); |
---|
2924 | 2936 | } |
---|
2925 | 2937 | |
---|
.. | .. |
---|
2982 | 2994 | goto no_block; |
---|
2983 | 2995 | } |
---|
2984 | 2996 | |
---|
2985 | | - rt_mutex_init_waiter(&rt_waiter); |
---|
| 2997 | + rt_mutex_init_waiter(&rt_waiter, false); |
---|
2986 | 2998 | |
---|
2987 | 2999 | /* |
---|
2988 | 3000 | * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not |
---|
.. | .. |
---|
2998 | 3010 | * before __rt_mutex_start_proxy_lock() is done. |
---|
2999 | 3011 | */ |
---|
3000 | 3012 | raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); |
---|
| 3013 | + /* |
---|
| 3014 | + * the migrate_disable() here disables migration in the in_atomic() fast |
---|
| 3015 | + * path which is enabled again in the following spin_unlock(). We have |
---|
| 3016 | + * one migrate_disable() pending in the slow-path which is reversed |
---|
| 3017 | + * after the raw_spin_unlock_irq() where we leave the atomic context. |
---|
| 3018 | + */ |
---|
| 3019 | + migrate_disable(); |
---|
| 3020 | + |
---|
3001 | 3021 | spin_unlock(q.lock_ptr); |
---|
3002 | 3022 | /* |
---|
3003 | 3023 | * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter |
---|
.. | .. |
---|
3006 | 3026 | */ |
---|
3007 | 3027 | ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); |
---|
3008 | 3028 | raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); |
---|
| 3029 | + migrate_enable(); |
---|
3009 | 3030 | |
---|
3010 | 3031 | if (ret) { |
---|
3011 | 3032 | if (ret == 1) |
---|
.. | .. |
---|
3140 | 3161 | * rt_waiter. Also see the WARN in wake_futex_pi(). |
---|
3141 | 3162 | */ |
---|
3142 | 3163 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); |
---|
| 3164 | + /* |
---|
| 3165 | + * Magic trickery for now to make the RT migrate disable |
---|
| 3166 | + * logic happy. The following spin_unlock() happens with |
---|
| 3167 | + * interrupts disabled so the internal migrate_enable() |
---|
| 3168 | + * won't undo the migrate_disable() which was issued when |
---|
| 3169 | + * locking hb->lock. |
---|
| 3170 | + */ |
---|
| 3171 | + migrate_disable(); |
---|
3143 | 3172 | spin_unlock(&hb->lock); |
---|
3144 | 3173 | |
---|
3145 | 3174 | /* drops pi_state->pi_mutex.wait_lock */ |
---|
3146 | 3175 | ret = wake_futex_pi(uaddr, uval, pi_state); |
---|
| 3176 | + migrate_enable(); |
---|
3147 | 3177 | |
---|
3148 | 3178 | put_pi_state(pi_state); |
---|
3149 | 3179 | |
---|
.. | .. |
---|
3314 | 3344 | { |
---|
3315 | 3345 | struct hrtimer_sleeper timeout, *to = NULL; |
---|
3316 | 3346 | struct rt_mutex_waiter rt_waiter; |
---|
3317 | | - struct futex_hash_bucket *hb; |
---|
| 3347 | + struct futex_hash_bucket *hb, *hb2; |
---|
3318 | 3348 | union futex_key key2 = FUTEX_KEY_INIT; |
---|
3319 | 3349 | struct futex_q q = futex_q_init; |
---|
3320 | 3350 | int res, ret; |
---|
.. | .. |
---|
3330 | 3360 | |
---|
3331 | 3361 | if (abs_time) { |
---|
3332 | 3362 | to = &timeout; |
---|
3333 | | - hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? |
---|
3334 | | - CLOCK_REALTIME : CLOCK_MONOTONIC, |
---|
3335 | | - HRTIMER_MODE_ABS); |
---|
3336 | | - hrtimer_init_sleeper(to, current); |
---|
| 3363 | + hrtimer_init_sleeper_on_stack(to, (flags & FLAGS_CLOCKRT) ? |
---|
| 3364 | + CLOCK_REALTIME : CLOCK_MONOTONIC, |
---|
| 3365 | + HRTIMER_MODE_ABS, current); |
---|
3337 | 3366 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, |
---|
3338 | 3367 | current->timer_slack_ns); |
---|
3339 | 3368 | } |
---|
.. | .. |
---|
3342 | 3371 | * The waiter is allocated on our stack, manipulated by the requeue |
---|
3343 | 3372 | * code while we sleep on uaddr. |
---|
3344 | 3373 | */ |
---|
3345 | | - rt_mutex_init_waiter(&rt_waiter); |
---|
| 3374 | + rt_mutex_init_waiter(&rt_waiter, false); |
---|
3346 | 3375 | |
---|
3347 | 3376 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); |
---|
3348 | 3377 | if (unlikely(ret != 0)) |
---|
.. | .. |
---|
3373 | 3402 | /* Queue the futex_q, drop the hb lock, wait for wakeup. */ |
---|
3374 | 3403 | futex_wait_queue_me(hb, &q, to); |
---|
3375 | 3404 | |
---|
3376 | | - spin_lock(&hb->lock); |
---|
3377 | | - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); |
---|
3378 | | - spin_unlock(&hb->lock); |
---|
3379 | | - if (ret) |
---|
3380 | | - goto out_put_keys; |
---|
| 3405 | + /* |
---|
| 3406 | + * On RT we must avoid races with requeue and trying to block |
---|
| 3407 | + * on two mutexes (hb->lock and uaddr2's rtmutex) by |
---|
| 3408 | + * serializing access to pi_blocked_on with pi_lock. |
---|
| 3409 | + */ |
---|
| 3410 | + raw_spin_lock_irq(¤t->pi_lock); |
---|
| 3411 | + if (current->pi_blocked_on) { |
---|
| 3412 | + /* |
---|
| 3413 | + * We have been requeued or are in the process of |
---|
| 3414 | + * being requeued. |
---|
| 3415 | + */ |
---|
| 3416 | + raw_spin_unlock_irq(¤t->pi_lock); |
---|
| 3417 | + } else { |
---|
| 3418 | + /* |
---|
| 3419 | + * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS |
---|
| 3420 | + * prevents a concurrent requeue from moving us to the |
---|
| 3421 | + * uaddr2 rtmutex. After that we can safely acquire |
---|
| 3422 | + * (and possibly block on) hb->lock. |
---|
| 3423 | + */ |
---|
| 3424 | + current->pi_blocked_on = PI_WAKEUP_INPROGRESS; |
---|
| 3425 | + raw_spin_unlock_irq(¤t->pi_lock); |
---|
| 3426 | + |
---|
| 3427 | + spin_lock(&hb->lock); |
---|
| 3428 | + |
---|
| 3429 | + /* |
---|
| 3430 | + * Clean up pi_blocked_on. We might leak it otherwise |
---|
| 3431 | + * when we succeeded with the hb->lock in the fast |
---|
| 3432 | + * path. |
---|
| 3433 | + */ |
---|
| 3434 | + raw_spin_lock_irq(¤t->pi_lock); |
---|
| 3435 | + current->pi_blocked_on = NULL; |
---|
| 3436 | + raw_spin_unlock_irq(¤t->pi_lock); |
---|
| 3437 | + |
---|
| 3438 | + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); |
---|
| 3439 | + spin_unlock(&hb->lock); |
---|
| 3440 | + if (ret) |
---|
| 3441 | + goto out_put_keys; |
---|
| 3442 | + } |
---|
3381 | 3443 | |
---|
3382 | 3444 | /* |
---|
3383 | | - * In order for us to be here, we know our q.key == key2, and since |
---|
3384 | | - * we took the hb->lock above, we also know that futex_requeue() has |
---|
3385 | | - * completed and we no longer have to concern ourselves with a wakeup |
---|
3386 | | - * race with the atomic proxy lock acquisition by the requeue code. The |
---|
3387 | | - * futex_requeue dropped our key1 reference and incremented our key2 |
---|
3388 | | - * reference count. |
---|
| 3445 | + * In order to be here, we have either been requeued, are in |
---|
| 3446 | + * the process of being requeued, or requeue successfully |
---|
| 3447 | + * acquired uaddr2 on our behalf. If pi_blocked_on was |
---|
| 3448 | + * non-null above, we may be racing with a requeue. Do not |
---|
| 3449 | + * rely on q->lock_ptr to be hb2->lock until after blocking on |
---|
| 3450 | + * hb->lock or hb2->lock. The futex_requeue dropped our key1 |
---|
| 3451 | + * reference and incremented our key2 reference count. |
---|
3389 | 3452 | */ |
---|
| 3453 | + hb2 = hash_futex(&key2); |
---|
3390 | 3454 | |
---|
3391 | 3455 | /* Check if the requeue code acquired the second futex for us. */ |
---|
3392 | 3456 | if (!q.rt_waiter) { |
---|
.. | .. |
---|
3395 | 3459 | * did a lock-steal - fix up the PI-state in that case. |
---|
3396 | 3460 | */ |
---|
3397 | 3461 | if (q.pi_state && (q.pi_state->owner != current)) { |
---|
3398 | | - spin_lock(q.lock_ptr); |
---|
| 3462 | + spin_lock(&hb2->lock); |
---|
| 3463 | + BUG_ON(&hb2->lock != q.lock_ptr); |
---|
3399 | 3464 | ret = fixup_pi_state_owner(uaddr2, &q, current); |
---|
3400 | 3465 | /* |
---|
3401 | 3466 | * Drop the reference to the pi state which |
---|
3402 | 3467 | * the requeue_pi() code acquired for us. |
---|
3403 | 3468 | */ |
---|
3404 | 3469 | put_pi_state(q.pi_state); |
---|
3405 | | - spin_unlock(q.lock_ptr); |
---|
| 3470 | + spin_unlock(&hb2->lock); |
---|
3406 | 3471 | /* |
---|
3407 | 3472 | * Adjust the return value. It's either -EFAULT or |
---|
3408 | 3473 | * success (1) but the caller expects 0 for success. |
---|
.. | .. |
---|
3421 | 3486 | pi_mutex = &q.pi_state->pi_mutex; |
---|
3422 | 3487 | ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); |
---|
3423 | 3488 | |
---|
3424 | | - spin_lock(q.lock_ptr); |
---|
| 3489 | + spin_lock(&hb2->lock); |
---|
| 3490 | + BUG_ON(&hb2->lock != q.lock_ptr); |
---|
3425 | 3491 | if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) |
---|
3426 | 3492 | ret = 0; |
---|
3427 | 3493 | |
---|