@@ -1072,12 +1072,43 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
return 0;
}
+/**
+ * wait_for_owner_exiting - Block until the owner has exited
+ * @exiting: Pointer to the exiting task
+ *
+ * Caller must hold a refcount on @exiting.
+ */
+static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
+{
+ if (ret != -EBUSY) {
+ WARN_ON_ONCE(exiting);
+ return;
+ }
+
+ if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
+ return;
+
+ mutex_lock(&exiting->futex_exit_mutex);
+ /*
+ * No point in doing state checking here. If the waiter got here
+ * while the task was in exec()->exec_futex_release() then it can
+ * have any FUTEX_STATE_* value when the waiter has acquired the
+ * mutex. OK, if running, EXITING or DEAD if it reached exit()
+ * already. Highly unlikely and not a problem. Just one more round
+ * through the futex maze.
+ */
+ mutex_unlock(&exiting->futex_exit_mutex);
+
+ put_task_struct(exiting);
+}
+
/*
* Lookup the task for the TID provided from user space and attach to
* it after doing proper sanity checks.
*/
static int attach_to_pi_owner(u32 uval, union futex_key *key,
- struct futex_pi_state **ps)
+ struct futex_pi_state **ps,
+ struct task_struct **exiting)
{
pid_t pid = uval & FUTEX_TID_MASK;
struct futex_pi_state *pi_state;
@@ -1113,7 +1144,19 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
int ret = (p->futex_state = FUTEX_STATE_DEAD) ? -ESRCH : -EAGAIN;
raw_spin_unlock_irq(&p->pi_lock);
- put_task_struct(p);
+ /*
+ * If the owner task is between FUTEX_STATE_EXITING and
+ * FUTEX_STATE_DEAD then store the task pointer and keep
+ * the reference on the task struct. The calling code will
+ * drop all locks, wait for the task to reach
+ * FUTEX_STATE_DEAD and then drop the refcount. This is
+ * required to prevent a live lock when the current task
+ * preempted the exiting task between the two states.
+ */
+ if (ret == -EBUSY)
+ *exiting = p;
+ else
+ put_task_struct(p);
return ret;
}
@@ -1144,7 +1187,8 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
}
static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
- union futex_key *key, struct futex_pi_state **ps)
+ union futex_key *key, struct futex_pi_state **ps,
+ struct task_struct **exiting)
{
struct futex_q *match = futex_top_waiter(hb, key);
@@ -1159,7 +1203,7 @@ static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
* We are the first waiter - try to look up the owner based on
* @uval and attach to it.
*/
- return attach_to_pi_owner(uval, key, ps);
+ return attach_to_pi_owner(uval, key, ps, exiting);
}
static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
@@ -1185,6 +1229,8 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
* lookup
* @task: the task to perform the atomic lock work for. This will
* be "current" except in the case of requeue pi.
+ * @exiting: Pointer to store the task pointer of the owner task
+ * which is in the middle of exiting
* @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
*
* Return:
@@ -1193,11 +1239,17 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
* <0 - error
*
* The hb->lock and futex_key refs shall be held by the caller.
+ *
+ * @exiting is only set when the return value is -EBUSY. If so, this holds
+ * a refcount on the exiting task on return and the caller needs to drop it
+ * after waiting for the exit to complete.
*/
static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
union futex_key *key,
struct futex_pi_state **ps,
- struct task_struct *task, int set_waiters)
+ struct task_struct *task,
+ struct task_struct **exiting,
+ int set_waiters)
{
u32 uval, newval, vpid = task_pid_vnr(task);
struct futex_q *match;
@@ -1267,7 +1319,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
* attach to the owner. If that fails, no harm done, we only
* set the FUTEX_WAITERS bit in the user space variable.
*/
- return attach_to_pi_owner(uval, key, ps);
+ return attach_to_pi_owner(uval, key, ps, exiting);
}
/**
@@ -1693,6 +1745,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
* @key1: the from futex key
* @key2: the to futex key
* @ps: address to store the pi_state pointer
+ * @exiting: Pointer to store the task pointer of the owner task
+ * which is in the middle of exiting
* @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
*
* Try and get the lock on behalf of the top waiter if we can do it atomically.
@@ -1700,16 +1754,20 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
* then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
* hb1 and hb2 must be held by the caller.
*
+ * @exiting is only set when the return value is -EBUSY. If so, this holds
+ * a refcount on the exiting task on return and the caller needs to drop it
+ * after waiting for the exit to complete.
+ *
* Return:
* 0 - failed to acquire the lock atomically;
* >0 - acquired the lock, return value is vpid of the top_waiter
* <0 - error
*/
-static int futex_proxy_trylock_atomic(u32 __user *pifutex,
- struct futex_hash_bucket *hb1,
- struct futex_hash_bucket *hb2,
- union futex_key *key1, union futex_key *key2,
- struct futex_pi_state **ps, int set_waiters)
+static int
+futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
+ struct futex_hash_bucket *hb2, union futex_key *key1,
+ union futex_key *key2, struct futex_pi_state **ps,
+ struct task_struct **exiting, int set_waiters)
{
struct futex_q *top_waiter = NULL;
u32 curval;
@@ -1746,7 +1804,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
*/
vpid = task_pid_vnr(top_waiter->task);
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
- set_waiters);
+ exiting, set_waiters);
if (ret == 1) {
requeue_pi_wake_futex(top_waiter, key2, hb2);
return vpid;
@@ -1866,6 +1924,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
}
if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
+ struct task_struct *exiting = NULL;
+
/*
* Attempt to acquire uaddr2 and wake the top waiter. If we
* intend to requeue waiters, force setting the FUTEX_WAITERS
@@ -1873,7 +1933,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
* faults rather in the requeue loop below.
*/
ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
- &key2, &pi_state, nr_requeue);
+ &key2, &pi_state,
+ &exiting, nr_requeue);
/*
* At this point the top_waiter has either taken uaddr2 or is
@@ -1900,7 +1961,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
* If that call succeeds then we have pi_state and an
* initial refcount on it.
*/
- ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
+ ret = lookup_pi_state(ret, hb2, &key2,
+ &pi_state, &exiting);
}
switch (ret) {
@@ -1930,6 +1992,12 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
hb_waiters_dec(hb2);
put_futex_key(&key2);
put_futex_key(&key1);
+ /*
+ * Handle the case where the owner is in the middle of
+ * exiting. Wait for the exit to complete otherwise
+ * this task might loop forever, aka. live lock.
+ */
+ wait_for_owner_exiting(ret, exiting);
cond_resched();
goto retry;
default:
@@ -2580,6 +2648,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
ktime_t *time, int trylock)
{
struct hrtimer_sleeper timeout, *to = NULL;
+ struct task_struct *exiting = NULL;
struct futex_hash_bucket *hb;
struct futex_q q = futex_q_init;
int res, ret;
@@ -2603,7 +2672,8 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
retry_private:
hb = queue_lock(&q);
- ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
+ ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
+ &exiting, 0);
if (unlikely(ret)) {
/*
* Atomic work succeeded and we got the lock,
@@ -2626,6 +2696,12 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
*/
queue_unlock(hb);
put_futex_key(&q.key);
+ /*
+ * Handle the case where the owner is in the middle of
+ * exiting. Wait for the exit to complete otherwise
+ * this task might loop forever, aka. live lock.
+ */
+ wait_for_owner_exiting(ret, exiting);
cond_resched();
goto retry;
default: