commit 849a4ece87943933826b5afff5f8d9bcc8bd03b5
Author: Torvald Riegel <triegel@redhat.com>
Date: Sat Dec 24 00:40:46 2016 +0100
Add compiler barriers around modifications of the robust mutex list.
Any changes to the per-thread list of robust mutexes currently acquired as
well as the pending-operations entry are not simply sequential code but
basically concurrent with any actions taken by the kernel when it tries
to clean up after a crash. This is not quite like multi-thread concurrency
but more like signal-handler concurrency.
This patch fixes latent bugs by adding compiler barriers where necessary so
that it is ensured that the kernel crash handling sees consistent data.
* nptl/descr.h (ENQUEUE_MUTEX_BOTH, DEQUEUE_MUTEX): Add compiler
barriers and comments.
* nptl/pthread_mutex_lock.c (__pthread_mutex_lock_full): Likewise.
* nptl/pthread_mutex_timedlock.c (pthread_mutex_timedlock): Likewise.
* nptl/pthread_mutex_unlock.c (__pthread_mutex_unlock_full): Likewise.
@@ -179,7 +179,16 @@ struct pthread
but the pointer to the next/previous element of the list points
in the middle of the object, the __next element. Whenever
casting to __pthread_list_t we need to adjust the pointer
- first. */
+ first.
+ These operations are effectively concurrent code in that the thread
+ can get killed at any point in time and the kernel takes over. Thus,
+ the __next elements are a kind of concurrent list and we need to
+ enforce using compiler barriers that the individual operations happen
+ in such a way that the kernel always sees a consistent list. The
+ backward links (ie, the __prev elements) are not used by the kernel.
+ FIXME We should use relaxed MO atomic operations here and signal fences
+ because this kind of concurrency is similar to synchronizing with a
+ signal handler. */
# define QUEUE_PTR_ADJUST (offsetof (__pthread_list_t, __next))
# define ENQUEUE_MUTEX_BOTH(mutex, val) \
@@ -191,6 +200,8 @@ struct pthread
mutex->__data.__list.__next = THREAD_GETMEM (THREAD_SELF, \
robust_head.list); \
mutex->__data.__list.__prev = (void *) &THREAD_SELF->robust_head; \
+ /* Ensure that the new list entry is ready before we insert it. */ \
+ __asm ("" ::: "memory"); \
THREAD_SETMEM (THREAD_SELF, robust_head.list, \
(void *) (((uintptr_t) &mutex->__data.__list.__next) \
| val)); \
@@ -205,6 +216,9 @@ struct pthread
((char *) (((uintptr_t) mutex->__data.__list.__prev) & ~1ul) \
- QUEUE_PTR_ADJUST); \
prev->__next = mutex->__data.__list.__next; \
+ /* Ensure that we remove the entry from the list before we change the \
+ __next pointer of the entry, which is read by the kernel. */ \
+ __asm ("" ::: "memory"); \
mutex->__data.__list.__prev = NULL; \
mutex->__data.__list.__next = NULL; \
} while (0)
@@ -219,6 +233,8 @@ struct pthread
do { \
mutex->__data.__list.__next \
= THREAD_GETMEM (THREAD_SELF, robust_list.__next); \
+ /* Ensure that the new list entry is ready before we insert it. */ \
+ __asm ("" ::: "memory"); \
THREAD_SETMEM (THREAD_SELF, robust_list.__next, \
(void *) (((uintptr_t) &mutex->__data.__list) | val)); \
} while (0)
@@ -239,6 +255,9 @@ struct pthread
} \
\
runp->__next = next->__next; \
+ /* Ensure that we remove the entry from the list before we change the \
+ __next pointer of the entry, which is read by the kernel. */ \
+ __asm ("" ::: "memory"); \
mutex->__data.__list.__next = NULL; \
} \
} while (0)
@@ -180,6 +180,9 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
case PTHREAD_MUTEX_ROBUST_ADAPTIVE_NP:
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
&mutex->__data.__list.__next);
+ /* We need to set op_pending before starting the operation. Also
+ see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
oldval = mutex->__data.__lock;
/* This is set to FUTEX_WAITERS iff we might have shared the
@@ -227,7 +230,12 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
/* But it is inconsistent unless marked otherwise. */
mutex->__data.__owner = PTHREAD_MUTEX_INCONSISTENT;
+ /* We must not enqueue the mutex before we have acquired it.
+ Also see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
ENQUEUE_MUTEX (mutex);
+ /* We need to clear op_pending after we enqueue the mutex. */
+ __asm ("" ::: "memory");
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
/* Note that we deliberately exit here. If we fall
@@ -249,6 +257,8 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
int kind = PTHREAD_MUTEX_TYPE (mutex);
if (kind == PTHREAD_MUTEX_ROBUST_ERRORCHECK_NP)
{
+ /* We do not need to ensure ordering wrt another memory
+ access. Also see comments at ENQUEUE_MUTEX. */
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
NULL);
return EDEADLK;
@@ -256,6 +266,8 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
if (kind == PTHREAD_MUTEX_ROBUST_RECURSIVE_NP)
{
+ /* We do not need to ensure ordering wrt another memory
+ access. */
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
NULL);
@@ -308,12 +320,19 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
mutex->__data.__count = 0;
int private = PTHREAD_ROBUST_MUTEX_PSHARED (mutex);
lll_unlock (mutex->__data.__lock, private);
+ /* FIXME This violates the mutex destruction requirements. See
+ __pthread_mutex_unlock_full. */
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
return ENOTRECOVERABLE;
}
mutex->__data.__count = 1;
+ /* We must not enqueue the mutex before we have acquired it.
+ Also see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
ENQUEUE_MUTEX (mutex);
+ /* We need to clear op_pending after we enqueue the mutex. */
+ __asm ("" ::: "memory");
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
break;
@@ -334,10 +353,15 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
int robust = mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP;
if (robust)
- /* Note: robust PI futexes are signaled by setting bit 0. */
- THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
- (void *) (((uintptr_t) &mutex->__data.__list.__next)
- | 1));
+ {
+ /* Note: robust PI futexes are signaled by setting bit 0. */
+ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
+ (void *) (((uintptr_t) &mutex->__data.__list.__next)
+ | 1));
+ /* We need to set op_pending before starting the operation. Also
+ see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
+ }
oldval = mutex->__data.__lock;
@@ -346,12 +370,16 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
{
if (kind == PTHREAD_MUTEX_ERRORCHECK_NP)
{
+ /* We do not need to ensure ordering wrt another memory
+ access. */
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
return EDEADLK;
}
if (kind == PTHREAD_MUTEX_RECURSIVE_NP)
{
+ /* We do not need to ensure ordering wrt another memory
+ access. */
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
/* Just bump the counter. */
@@ -414,7 +442,12 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
/* But it is inconsistent unless marked otherwise. */
mutex->__data.__owner = PTHREAD_MUTEX_INCONSISTENT;
+ /* We must not enqueue the mutex before we have acquired it.
+ Also see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
ENQUEUE_MUTEX_PI (mutex);
+ /* We need to clear op_pending after we enqueue the mutex. */
+ __asm ("" ::: "memory");
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
/* Note that we deliberately exit here. If we fall
@@ -442,6 +475,8 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
PTHREAD_ROBUST_MUTEX_PSHARED (mutex)),
0, 0);
+ /* To the kernel, this will be visible after the kernel has
+ acquired the mutex in the syscall. */
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
return ENOTRECOVERABLE;
}
@@ -449,7 +484,12 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
mutex->__data.__count = 1;
if (robust)
{
+ /* We must not enqueue the mutex before we have acquired it.
+ Also see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
ENQUEUE_MUTEX_PI (mutex);
+ /* We need to clear op_pending after we enqueue the mutex. */
+ __asm ("" ::: "memory");
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
}
}
@@ -140,6 +140,9 @@ pthread_mutex_timedlock (pthread_mutex_t *mutex,
case PTHREAD_MUTEX_ROBUST_ADAPTIVE_NP:
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
&mutex->__data.__list.__next);
+ /* We need to set op_pending before starting the operation. Also
+ see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
oldval = mutex->__data.__lock;
/* This is set to FUTEX_WAITERS iff we might have shared the
@@ -177,7 +180,12 @@ pthread_mutex_timedlock (pthread_mutex_t *mutex,
/* But it is inconsistent unless marked otherwise. */
mutex->__data.__owner = PTHREAD_MUTEX_INCONSISTENT;
+ /* We must not enqueue the mutex before we have acquired it.
+ Also see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
ENQUEUE_MUTEX (mutex);
+ /* We need to clear op_pending after we enqueue the mutex. */
+ __asm ("" ::: "memory");
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
/* Note that we deliberately exit here. If we fall
@@ -193,6 +201,8 @@ pthread_mutex_timedlock (pthread_mutex_t *mutex,
int kind = PTHREAD_MUTEX_TYPE (mutex);
if (kind == PTHREAD_MUTEX_ROBUST_ERRORCHECK_NP)
{
+ /* We do not need to ensure ordering wrt another memory
+ access. Also see comments at ENQUEUE_MUTEX. */
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
NULL);
return EDEADLK;
@@ -200,6 +210,8 @@ pthread_mutex_timedlock (pthread_mutex_t *mutex,
if (kind == PTHREAD_MUTEX_ROBUST_RECURSIVE_NP)
{
+ /* We do not need to ensure ordering wrt another memory
+ access. */
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
NULL);
@@ -294,12 +306,19 @@ pthread_mutex_timedlock (pthread_mutex_t *mutex,
mutex->__data.__count = 0;
int private = PTHREAD_ROBUST_MUTEX_PSHARED (mutex);
lll_unlock (mutex->__data.__lock, private);
+ /* FIXME This violates the mutex destruction requirements. See
+ __pthread_mutex_unlock_full. */
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
return ENOTRECOVERABLE;
}
mutex->__data.__count = 1;
+ /* We must not enqueue the mutex before we have acquired it.
+ Also see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
ENQUEUE_MUTEX (mutex);
+ /* We need to clear op_pending after we enqueue the mutex. */
+ __asm ("" ::: "memory");
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
break;
@@ -320,10 +339,15 @@ pthread_mutex_timedlock (pthread_mutex_t *mutex,
int robust = mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP;
if (robust)
- /* Note: robust PI futexes are signaled by setting bit 0. */
- THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
- (void *) (((uintptr_t) &mutex->__data.__list.__next)
- | 1));
+ {
+ /* Note: robust PI futexes are signaled by setting bit 0. */
+ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
+ (void *) (((uintptr_t) &mutex->__data.__list.__next)
+ | 1));
+ /* We need to set op_pending before starting the operation. Also
+ see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
+ }
oldval = mutex->__data.__lock;
@@ -332,12 +356,16 @@ pthread_mutex_timedlock (pthread_mutex_t *mutex,
{
if (kind == PTHREAD_MUTEX_ERRORCHECK_NP)
{
+ /* We do not need to ensure ordering wrt another memory
+ access. */
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
return EDEADLK;
}
if (kind == PTHREAD_MUTEX_RECURSIVE_NP)
{
+ /* We do not need to ensure ordering wrt another memory
+ access. */
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
/* Just bump the counter. */
@@ -424,7 +452,12 @@ pthread_mutex_timedlock (pthread_mutex_t *mutex,
/* But it is inconsistent unless marked otherwise. */
mutex->__data.__owner = PTHREAD_MUTEX_INCONSISTENT;
+ /* We must not enqueue the mutex before we have acquired it.
+ Also see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
ENQUEUE_MUTEX_PI (mutex);
+ /* We need to clear op_pending after we enqueue the mutex. */
+ __asm ("" ::: "memory");
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
/* Note that we deliberately exit here. If we fall
@@ -447,6 +480,8 @@ pthread_mutex_timedlock (pthread_mutex_t *mutex,
PTHREAD_ROBUST_MUTEX_PSHARED (mutex)),
0, 0);
+ /* To the kernel, this will be visible after the kernel has
+ acquired the mutex in the syscall. */
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
return ENOTRECOVERABLE;
}
@@ -454,7 +489,12 @@ pthread_mutex_timedlock (pthread_mutex_t *mutex,
mutex->__data.__count = 1;
if (robust)
{
+ /* We must not enqueue the mutex before we have acquired it.
+ Also see comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
ENQUEUE_MUTEX_PI (mutex);
+ /* We need to clear op_pending after we enqueue the mutex. */
+ __asm ("" ::: "memory");
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
}
}
@@ -143,6 +143,9 @@ __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
/* Remove mutex from the list. */
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
&mutex->__data.__list.__next);
+ /* We must set op_pending before we dequeue the mutex. Also see
+ comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
DEQUEUE_MUTEX (mutex);
mutex->__data.__owner = newowner;
@@ -159,6 +162,14 @@ __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
& FUTEX_WAITERS) != 0))
lll_futex_wake (&mutex->__data.__lock, 1, private);
+ /* We must clear op_pending after we release the mutex.
+ FIXME However, this violates the mutex destruction requirements
+ because another thread could acquire the mutex, destroy it, and
+ reuse the memory for something else; then, if this thread crashes,
+ and the memory happens to have a value equal to the TID, the kernel
+ will believe it is still related to the mutex (which has been
+ destroyed already) and will modify some other random object. */
+ __asm ("" ::: "memory");
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
break;
@@ -227,6 +238,9 @@ __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
(void *) (((uintptr_t) &mutex->__data.__list.__next)
| 1));
+ /* We must set op_pending before we dequeue the mutex. Also see
+ comments at ENQUEUE_MUTEX. */
+ __asm ("" ::: "memory");
DEQUEUE_MUTEX (mutex);
}
@@ -262,6 +276,9 @@ __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
while (!atomic_compare_exchange_weak_release (&mutex->__data.__lock,
&l, 0));
+ /* This happens after the kernel releases the mutex but violates the
+ mutex destruction requirements; see comments in the code handling
+ PTHREAD_MUTEX_ROBUST_NORMAL_NP. */
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
break;
#endif /* __NR_futex. */