diff mbox

[API-NEXT,PATCHv2] linux-generic: queue: yield trying to obtain multiple locks

Message ID 1446237185-13264-1-git-send-email-bill.fischofer@linaro.org
State New
Headers show

Commit Message

Bill Fischofer Oct. 30, 2015, 8:33 p.m. UTC
To avoid deadlock, especially on a single core, force an explicit
yield while not holding either lock when attempting to acquire multiple
locks for ordered queue processing. Also handle enqueues to self as in
this case the origin and target queues share a single lock.

This addresses the aspect of Bug
https://bugs.linaro.org/show_bug.cgi?id=1879
relating to deadlock in unicore systems.

Signed-off-by: Bill Fischofer <bill.fischofer@linaro.org>
---
 platform/linux-generic/odp_queue.c | 47 ++++++++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 9 deletions(-)

Comments

Maxim Uvarov Nov. 2, 2015, 8:14 a.m. UTC | #1
I applied that on api-next and still see fails:

$while true; do ./test/validation/scheduler/scheduler_main 2>&1 |grep 
FAIL; done
   Test: scheduler_test_mq_mt_prio_o ...FAILED
   Test: scheduler_test_multi_mq_mt_prio_o ...FAILED
   Test: scheduler_test_multi_mq_mt_prio_o ...FAILED
   Test: scheduler_test_mq_mt_prio_o ...FAILED
   Test: scheduler_test_mq_mt_prio_o ...FAILED
   Test: scheduler_test_mq_mt_prio_o ...FAILED
   Test: scheduler_test_multi_mq_mt_prio_o ...FAILED
   Test: scheduler_test_mq_mt_prio_o ...FAILED
   Test: scheduler_test_mq_mt_prio_o ...FAILED


On 10/30/2015 23:33, Bill Fischofer wrote:
> To avoid deadlock, especially on a single core, force an explicit
> yield while not holding either lock when attempting to acquire multiple
> locks for ordered queue processing. Also handle enqueues to self as in
> this case the origin and target queues share a single lock.
>
> This addresses the aspect of Bug
> https://bugs.linaro.org/show_bug.cgi?id=1879
> relating to deadlock in unicore systems.
>
> Signed-off-by: Bill Fischofer <bill.fischofer@linaro.org>
> ---
>   platform/linux-generic/odp_queue.c | 47 ++++++++++++++++++++++++++++++--------
>   1 file changed, 38 insertions(+), 9 deletions(-)
>
> diff --git a/platform/linux-generic/odp_queue.c b/platform/linux-generic/odp_queue.c
> index a27af0b..4366683 100644
> --- a/platform/linux-generic/odp_queue.c
> +++ b/platform/linux-generic/odp_queue.c
> @@ -48,6 +48,36 @@ typedef struct queue_table_t {
>   static queue_table_t *queue_tbl;
>   
>   
> +static inline void get_qe_locks(queue_entry_t *qe1, queue_entry_t *qe2)
> +{
> +	int i;
> +
> +	/* Special case: enq to self */
> +	if (qe1 == qe2) {
> +		LOCK(&qe1->s.lock);
> +		return;
> +	}
> +
> +       /* Enq to another queue. Issue is that since any queue can be either
> +	* origin or target we can't have a static lock hierarchy. Strategy is
> +	* to get one lock then attempt to get the other. If the second lock
> +	* attempt fails, release the first and try again.  Note that in single
> +	* CPU mode we require the explicit yield since otherwise we may never
> +	* resolve unless the scheduler happens to timeslice exactly when we
> +	* hold no lock.
> +	*/
> +	while (1) {
> +		for (i = 0; i < 10; i++) {
> +			LOCK(&qe1->s.lock);
> +			if (LOCK_TRY(&qe2->s.lock))
> +				return;
> +			UNLOCK(&qe1->s.lock);
> +			odp_sync_stores();
> +		}
> +		sched_yield();
> +	}
> +}
> +
>   queue_entry_t *get_qentry(uint32_t queue_id)
>   {
>   	return &queue_tbl->queue[queue_id];
> @@ -370,14 +400,11 @@ int queue_enq(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr, int sustain)
>   
>   	/* Need two locks for enq operations from ordered queues */
>   	if (origin_qe) {
> -		LOCK(&origin_qe->s.lock);
> -		while (!LOCK_TRY(&queue->s.lock)) {
> -			UNLOCK(&origin_qe->s.lock);
> -			LOCK(&origin_qe->s.lock);
> -		}
> +		get_qe_locks(origin_qe, queue);
>   		if (odp_unlikely(origin_qe->s.status < QUEUE_STATUS_READY)) {
>   			UNLOCK(&queue->s.lock);
> -			UNLOCK(&origin_qe->s.lock);
> +			if (origin_qe != queue)
> +				UNLOCK(&origin_qe->s.lock);
>   			ODP_ERR("Bad origin queue status\n");
>   			ODP_ERR("queue = %s, origin q = %s, buf = %p\n",
>   				queue->s.name, origin_qe->s.name, buf_hdr);
> @@ -389,7 +416,7 @@ int queue_enq(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr, int sustain)
>   
>   	if (odp_unlikely(queue->s.status < QUEUE_STATUS_READY)) {
>   		UNLOCK(&queue->s.lock);
> -		if (origin_qe)
> +		if (origin_qe && origin_qe != queue)
>   			UNLOCK(&origin_qe->s.lock);
>   		ODP_ERR("Bad queue status\n");
>   		return -1;
> @@ -405,7 +432,8 @@ int queue_enq(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr, int sustain)
>   			 * we're done here.
>   			 */
>   			UNLOCK(&queue->s.lock);
> -			UNLOCK(&origin_qe->s.lock);
> +			if (origin_qe != queue)
> +				UNLOCK(&origin_qe->s.lock);
>   			return 0;
>   		}
>   
> @@ -477,7 +505,8 @@ int queue_enq(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr, int sustain)
>   		/* Now handle any unblocked complete buffers destined for
>   		 * other queues, appending placeholder bufs as needed.
>   		 */
> -		UNLOCK(&queue->s.lock);
> +		if (origin_qe != queue)
> +			UNLOCK(&queue->s.lock);
>   		reorder_complete(origin_qe, &reorder_buf, &placeholder_buf,
>   				 1, 0);
>   		UNLOCK(&origin_qe->s.lock);
Maxim Uvarov Nov. 2, 2015, 8:49 a.m. UTC | #2
Debug says that order of that two packets is wrong:


   Test: scheduler_test_mq_mt_prio_o ...bctx 64 seq 63
bctx 63 seq 64

On 10/30/2015 23:33, Bill Fischofer wrote:
> To avoid deadlock, especially on a single core, force an explicit
> yield while not holding either lock when attempting to acquire multiple
> locks for ordered queue processing. Also handle enqueues to self as in
> this case the origin and target queues share a single lock.
>
> This addresses the aspect of Bug
> https://bugs.linaro.org/show_bug.cgi?id=1879
> relating to deadlock in unicore systems.
>
> Signed-off-by: Bill Fischofer <bill.fischofer@linaro.org>
> ---
>   platform/linux-generic/odp_queue.c | 47 ++++++++++++++++++++++++++++++--------
>   1 file changed, 38 insertions(+), 9 deletions(-)
>
> diff --git a/platform/linux-generic/odp_queue.c b/platform/linux-generic/odp_queue.c
> index a27af0b..4366683 100644
> --- a/platform/linux-generic/odp_queue.c
> +++ b/platform/linux-generic/odp_queue.c
> @@ -48,6 +48,36 @@ typedef struct queue_table_t {
>   static queue_table_t *queue_tbl;
>   
>   
> +static inline void get_qe_locks(queue_entry_t *qe1, queue_entry_t *qe2)
> +{
> +	int i;
> +
> +	/* Special case: enq to self */
> +	if (qe1 == qe2) {
> +		LOCK(&qe1->s.lock);
> +		return;
> +	}
> +
> +       /* Enq to another queue. Issue is that since any queue can be either
> +	* origin or target we can't have a static lock hierarchy. Strategy is
> +	* to get one lock then attempt to get the other. If the second lock
> +	* attempt fails, release the first and try again.  Note that in single
> +	* CPU mode we require the explicit yield since otherwise we may never
> +	* resolve unless the scheduler happens to timeslice exactly when we
> +	* hold no lock.
> +	*/
> +	while (1) {
> +		for (i = 0; i < 10; i++) {
> +			LOCK(&qe1->s.lock);
> +			if (LOCK_TRY(&qe2->s.lock))
> +				return;
> +			UNLOCK(&qe1->s.lock);
> +			odp_sync_stores();
> +		}
> +		sched_yield();
> +	}
> +}
> +
>   queue_entry_t *get_qentry(uint32_t queue_id)
>   {
>   	return &queue_tbl->queue[queue_id];
> @@ -370,14 +400,11 @@ int queue_enq(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr, int sustain)
>   
>   	/* Need two locks for enq operations from ordered queues */
>   	if (origin_qe) {
> -		LOCK(&origin_qe->s.lock);
> -		while (!LOCK_TRY(&queue->s.lock)) {
> -			UNLOCK(&origin_qe->s.lock);
> -			LOCK(&origin_qe->s.lock);
> -		}
> +		get_qe_locks(origin_qe, queue);
>   		if (odp_unlikely(origin_qe->s.status < QUEUE_STATUS_READY)) {
>   			UNLOCK(&queue->s.lock);
> -			UNLOCK(&origin_qe->s.lock);
> +			if (origin_qe != queue)
> +				UNLOCK(&origin_qe->s.lock);
>   			ODP_ERR("Bad origin queue status\n");
>   			ODP_ERR("queue = %s, origin q = %s, buf = %p\n",
>   				queue->s.name, origin_qe->s.name, buf_hdr);
> @@ -389,7 +416,7 @@ int queue_enq(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr, int sustain)
>   
>   	if (odp_unlikely(queue->s.status < QUEUE_STATUS_READY)) {
>   		UNLOCK(&queue->s.lock);
> -		if (origin_qe)
> +		if (origin_qe && origin_qe != queue)
>   			UNLOCK(&origin_qe->s.lock);
>   		ODP_ERR("Bad queue status\n");
>   		return -1;
> @@ -405,7 +432,8 @@ int queue_enq(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr, int sustain)
>   			 * we're done here.
>   			 */
>   			UNLOCK(&queue->s.lock);
> -			UNLOCK(&origin_qe->s.lock);
> +			if (origin_qe != queue)
> +				UNLOCK(&origin_qe->s.lock);
>   			return 0;
>   		}
>   
> @@ -477,7 +505,8 @@ int queue_enq(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr, int sustain)
>   		/* Now handle any unblocked complete buffers destined for
>   		 * other queues, appending placeholder bufs as needed.
>   		 */
> -		UNLOCK(&queue->s.lock);
> +		if (origin_qe != queue)
> +			UNLOCK(&queue->s.lock);
>   		reorder_complete(origin_qe, &reorder_buf, &placeholder_buf,
>   				 1, 0);
>   		UNLOCK(&origin_qe->s.lock);
Bill Fischofer Nov. 2, 2015, 12:44 p.m. UTC | #3
That particular patch is designed to address Carl's deadlock issue, it
doesn't address the reorder glitch (still investigating that one).

On Mon, Nov 2, 2015 at 2:49 AM, Maxim Uvarov <maxim.uvarov@linaro.org>
wrote:

> Debug says that order of that two packets is wrong:

>

>

>   Test: scheduler_test_mq_mt_prio_o ...bctx 64 seq 63

> bctx 63 seq 64

>

> On 10/30/2015 23:33, Bill Fischofer wrote:

>

>> To avoid deadlock, especially on a single core, force an explicit

>> yield while not holding either lock when attempting to acquire multiple

>> locks for ordered queue processing. Also handle enqueues to self as in

>> this case the origin and target queues share a single lock.

>>

>> This addresses the aspect of Bug

>> https://bugs.linaro.org/show_bug.cgi?id=1879

>> relating to deadlock in unicore systems.

>>

>> Signed-off-by: Bill Fischofer <bill.fischofer@linaro.org>

>> ---

>>   platform/linux-generic/odp_queue.c | 47

>> ++++++++++++++++++++++++++++++--------

>>   1 file changed, 38 insertions(+), 9 deletions(-)

>>

>> diff --git a/platform/linux-generic/odp_queue.c

>> b/platform/linux-generic/odp_queue.c

>> index a27af0b..4366683 100644

>> --- a/platform/linux-generic/odp_queue.c

>> +++ b/platform/linux-generic/odp_queue.c

>> @@ -48,6 +48,36 @@ typedef struct queue_table_t {

>>   static queue_table_t *queue_tbl;

>>     +static inline void get_qe_locks(queue_entry_t *qe1, queue_entry_t

>> *qe2)

>> +{

>> +       int i;

>> +

>> +       /* Special case: enq to self */

>> +       if (qe1 == qe2) {

>> +               LOCK(&qe1->s.lock);

>> +               return;

>> +       }

>> +

>> +       /* Enq to another queue. Issue is that since any queue can be

>> either

>> +       * origin or target we can't have a static lock hierarchy.

>> Strategy is

>> +       * to get one lock then attempt to get the other. If the second

>> lock

>> +       * attempt fails, release the first and try again.  Note that in

>> single

>> +       * CPU mode we require the explicit yield since otherwise we may

>> never

>> +       * resolve unless the scheduler happens to timeslice exactly when

>> we

>> +       * hold no lock.

>> +       */

>> +       while (1) {

>> +               for (i = 0; i < 10; i++) {

>> +                       LOCK(&qe1->s.lock);

>> +                       if (LOCK_TRY(&qe2->s.lock))

>> +                               return;

>> +                       UNLOCK(&qe1->s.lock);

>> +                       odp_sync_stores();

>> +               }

>> +               sched_yield();

>> +       }

>> +}

>> +

>>   queue_entry_t *get_qentry(uint32_t queue_id)

>>   {

>>         return &queue_tbl->queue[queue_id];

>> @@ -370,14 +400,11 @@ int queue_enq(queue_entry_t *queue,

>> odp_buffer_hdr_t *buf_hdr, int sustain)

>>         /* Need two locks for enq operations from ordered queues */

>>         if (origin_qe) {

>> -               LOCK(&origin_qe->s.lock);

>> -               while (!LOCK_TRY(&queue->s.lock)) {

>> -                       UNLOCK(&origin_qe->s.lock);

>> -                       LOCK(&origin_qe->s.lock);

>> -               }

>> +               get_qe_locks(origin_qe, queue);

>>                 if (odp_unlikely(origin_qe->s.status <

>> QUEUE_STATUS_READY)) {

>>                         UNLOCK(&queue->s.lock);

>> -                       UNLOCK(&origin_qe->s.lock);

>> +                       if (origin_qe != queue)

>> +                               UNLOCK(&origin_qe->s.lock);

>>                         ODP_ERR("Bad origin queue status\n");

>>                         ODP_ERR("queue = %s, origin q = %s, buf = %p\n",

>>                                 queue->s.name, origin_qe->s.name,

>> buf_hdr);

>> @@ -389,7 +416,7 @@ int queue_enq(queue_entry_t *queue, odp_buffer_hdr_t

>> *buf_hdr, int sustain)

>>         if (odp_unlikely(queue->s.status < QUEUE_STATUS_READY)) {

>>                 UNLOCK(&queue->s.lock);

>> -               if (origin_qe)

>> +               if (origin_qe && origin_qe != queue)

>>                         UNLOCK(&origin_qe->s.lock);

>>                 ODP_ERR("Bad queue status\n");

>>                 return -1;

>> @@ -405,7 +432,8 @@ int queue_enq(queue_entry_t *queue, odp_buffer_hdr_t

>> *buf_hdr, int sustain)

>>                          * we're done here.

>>                          */

>>                         UNLOCK(&queue->s.lock);

>> -                       UNLOCK(&origin_qe->s.lock);

>> +                       if (origin_qe != queue)

>> +                               UNLOCK(&origin_qe->s.lock);

>>                         return 0;

>>                 }

>>   @@ -477,7 +505,8 @@ int queue_enq(queue_entry_t *queue,

>> odp_buffer_hdr_t *buf_hdr, int sustain)

>>                 /* Now handle any unblocked complete buffers destined for

>>                  * other queues, appending placeholder bufs as needed.

>>                  */

>> -               UNLOCK(&queue->s.lock);

>> +               if (origin_qe != queue)

>> +                       UNLOCK(&queue->s.lock);

>>                 reorder_complete(origin_qe, &reorder_buf,

>> &placeholder_buf,

>>                                  1, 0);

>>                 UNLOCK(&origin_qe->s.lock);

>>

>

> _______________________________________________

> lng-odp mailing list

> lng-odp@lists.linaro.org

> https://lists.linaro.org/mailman/listinfo/lng-odp

>
Bill Fischofer Nov. 2, 2015, 3:45 p.m. UTC | #4
Interesting suggestion.  I'll take a look at that and send a v3 for Carl to
try.

On Mon, Nov 2, 2015 at 8:14 AM, Nicolas Morey-Chaisemartin <nmorey@kalray.eu
> wrote:


>

>

> On 10/30/2015 09:33 PM, Bill Fischofer wrote:

> > To avoid deadlock, especially on a single core, force an explicit

> > yield while not holding either lock when attempting to acquire multiple

> > locks for ordered queue processing. Also handle enqueues to self as in

> > this case the origin and target queues share a single lock.

> >

> > This addresses the aspect of Bug

> > https://bugs.linaro.org/show_bug.cgi?id=1879

> > relating to deadlock in unicore systems.

> >

> > Signed-off-by: Bill Fischofer <bill.fischofer@linaro.org>

> > ---

> >  platform/linux-generic/odp_queue.c | 47

> ++++++++++++++++++++++++++++++--------

> >  1 file changed, 38 insertions(+), 9 deletions(-)

> >

> > diff --git a/platform/linux-generic/odp_queue.c

> b/platform/linux-generic/odp_queue.c

> > index a27af0b..4366683 100644

> > --- a/platform/linux-generic/odp_queue.c

> > +++ b/platform/linux-generic/odp_queue.c

> > @@ -48,6 +48,36 @@ typedef struct queue_table_t {

> >  static queue_table_t *queue_tbl;

> >

> >

> > +static inline void get_qe_locks(queue_entry_t *qe1, queue_entry_t *qe2)

> > +{

> > +     int i;

> > +

> > +     /* Special case: enq to self */

> > +     if (qe1 == qe2) {

> > +             LOCK(&qe1->s.lock);

> > +             return;

> > +     }

> > +

> > +       /* Enq to another queue. Issue is that since any queue can be

> either

> > +     * origin or target we can't have a static lock hierarchy. Strategy

> is

> > +     * to get one lock then attempt to get the other. If the second lock

> > +     * attempt fails, release the first and try again.  Note that in

> single

> > +     * CPU mode we require the explicit yield since otherwise we may

> never

> > +     * resolve unless the scheduler happens to timeslice exactly when we

> > +     * hold no lock.

> > +     */

> > +     while (1) {

> > +             for (i = 0; i < 10; i++) {

> > +                     LOCK(&qe1->s.lock);

> > +                     if (LOCK_TRY(&qe2->s.lock))

> > +                             return;

> > +                     UNLOCK(&qe1->s.lock);

> > +                     odp_sync_stores();

> > +             }

> > +             sched_yield();

> > +     }

> > +}

> > +

>

> Quick question: why do we do that lock/trylock/unlock loop instead or

> acquiring locks "in order" ?

> If we sort all the required lock by some universal way (pointer, id), the

> code can be rewritten as 2 simples LOCK()

>

>

>
diff mbox

Patch

diff --git a/platform/linux-generic/odp_queue.c b/platform/linux-generic/odp_queue.c
index a27af0b..4366683 100644
--- a/platform/linux-generic/odp_queue.c
+++ b/platform/linux-generic/odp_queue.c
@@ -48,6 +48,36 @@  typedef struct queue_table_t {
 static queue_table_t *queue_tbl;
 
 
+static inline void get_qe_locks(queue_entry_t *qe1, queue_entry_t *qe2)
+{
+	int i;
+
+	/* Special case: enq to self */
+	if (qe1 == qe2) {
+		LOCK(&qe1->s.lock);
+		return;
+	}
+
+       /* Enq to another queue. Issue is that since any queue can be either
+	* origin or target we can't have a static lock hierarchy. Strategy is
+	* to get one lock then attempt to get the other. If the second lock
+	* attempt fails, release the first and try again.  Note that in single
+	* CPU mode we require the explicit yield since otherwise we may never
+	* resolve unless the scheduler happens to timeslice exactly when we
+	* hold no lock.
+	*/
+	while (1) {
+		for (i = 0; i < 10; i++) {
+			LOCK(&qe1->s.lock);
+			if (LOCK_TRY(&qe2->s.lock))
+				return;
+			UNLOCK(&qe1->s.lock);
+			odp_sync_stores();
+		}
+		sched_yield();
+	}
+}
+
 queue_entry_t *get_qentry(uint32_t queue_id)
 {
 	return &queue_tbl->queue[queue_id];
@@ -370,14 +400,11 @@  int queue_enq(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr, int sustain)
 
 	/* Need two locks for enq operations from ordered queues */
 	if (origin_qe) {
-		LOCK(&origin_qe->s.lock);
-		while (!LOCK_TRY(&queue->s.lock)) {
-			UNLOCK(&origin_qe->s.lock);
-			LOCK(&origin_qe->s.lock);
-		}
+		get_qe_locks(origin_qe, queue);
 		if (odp_unlikely(origin_qe->s.status < QUEUE_STATUS_READY)) {
 			UNLOCK(&queue->s.lock);
-			UNLOCK(&origin_qe->s.lock);
+			if (origin_qe != queue)
+				UNLOCK(&origin_qe->s.lock);
 			ODP_ERR("Bad origin queue status\n");
 			ODP_ERR("queue = %s, origin q = %s, buf = %p\n",
 				queue->s.name, origin_qe->s.name, buf_hdr);
@@ -389,7 +416,7 @@  int queue_enq(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr, int sustain)
 
 	if (odp_unlikely(queue->s.status < QUEUE_STATUS_READY)) {
 		UNLOCK(&queue->s.lock);
-		if (origin_qe)
+		if (origin_qe && origin_qe != queue)
 			UNLOCK(&origin_qe->s.lock);
 		ODP_ERR("Bad queue status\n");
 		return -1;
@@ -405,7 +432,8 @@  int queue_enq(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr, int sustain)
 			 * we're done here.
 			 */
 			UNLOCK(&queue->s.lock);
-			UNLOCK(&origin_qe->s.lock);
+			if (origin_qe != queue)
+				UNLOCK(&origin_qe->s.lock);
 			return 0;
 		}
 
@@ -477,7 +505,8 @@  int queue_enq(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr, int sustain)
 		/* Now handle any unblocked complete buffers destined for
 		 * other queues, appending placeholder bufs as needed.
 		 */
-		UNLOCK(&queue->s.lock);
+		if (origin_qe != queue)
+			UNLOCK(&queue->s.lock);
 		reorder_complete(origin_qe, &reorder_buf, &placeholder_buf,
 				 1, 0);
 		UNLOCK(&origin_qe->s.lock);