diff mbox

[PATCHv2] linux-generic: odp_ticketlock.c: performance regression, added comments

Message ID 1417713606-7920-1-git-send-email-ola.liljedahl@linaro.org
State Accepted
Commit 7f536744a3d4d1b5da67ac2ffe7d6ecbc1176163
Headers show

Commit Message

Ola Liljedahl Dec. 4, 2014, 5:20 p.m. UTC
Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org>
---
When releasing a ticket lock, replace the atomic increment operation with
load-relaxed and store-release as this avaoids an (unnecessary) atomic RMW
operation which is expensive on some architectures.
Add descriptive comments for all ticketlock operations.

 platform/linux-generic/odp_ticketlock.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

Comments

Bill Fischofer Dec. 4, 2014, 5:31 p.m. UTC | #1
On Thu, Dec 4, 2014 at 11:20 AM, Ola Liljedahl <ola.liljedahl@linaro.org>
wrote:

> Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org>
>

Reviewed-by: Bill Fischofer <bill.fischofer@linaro.org>


> ---
> When releasing a ticket lock, replace the atomic increment operation with
> load-relaxed and store-release as this avaoids an (unnecessary) atomic RMW
> operation which is expensive on some architectures.
> Add descriptive comments for all ticketlock operations.
>
>  platform/linux-generic/odp_ticketlock.c | 20 +++++++++++++++++++-
>  1 file changed, 19 insertions(+), 1 deletion(-)
>
> diff --git a/platform/linux-generic/odp_ticketlock.c
> b/platform/linux-generic/odp_ticketlock.c
> index 6c5e74e..682b01b 100644
> --- a/platform/linux-generic/odp_ticketlock.c
> +++ b/platform/linux-generic/odp_ticketlock.c
> @@ -22,8 +22,13 @@ void odp_ticketlock_lock(odp_ticketlock_t *ticketlock)
>  {
>         uint32_t ticket;
>
> +       /* Take a ticket using an atomic increment of 'next_ticket'.
> +        * This can be a relaxed operation but it cannot have the
> +        * acquire semantics since we haven't acquired the lock yet */
>         ticket = odp_atomic_fetch_inc_u32(&ticketlock->next_ticket);
>
> +       /* Spin waiting for our turn. Use load-acquire so that we acquire
> +        * all stores from the previous lock owner */
>         while (ticket != _odp_atomic_u32_load_mm(&ticketlock->cur_ticket,
>                                                  _ODP_MEMMODEL_ACQ))
>                 odp_spin();
> @@ -32,7 +37,15 @@ void odp_ticketlock_lock(odp_ticketlock_t *ticketlock)
>
>  void odp_ticketlock_unlock(odp_ticketlock_t *ticketlock)
>  {
> -       _odp_atomic_u32_add_mm(&ticketlock->cur_ticket, 1,
> _ODP_MEMMODEL_RLS);
> +       /* Release the lock by incrementing 'cur_ticket'. As we are the
> +        * lock owner and thus the only thread that is allowed to write
> +        * 'cur_ticket', we don't need to do this with an (expensive)
> +        * atomic RMW operation. Instead load-relaxed the current value
> +        * and a store-release of the incremented value */
> +       uint32_t cur = _odp_atomic_u32_load_mm(&ticketlock->cur_ticket,
> +                                              _ODP_MEMMODEL_RLX);
> +       _odp_atomic_u32_store_mm(&ticketlock->cur_ticket, cur + 1,
> +                                _ODP_MEMMODEL_RLS);
>
>  #if defined __OCTEON__
>         odp_sync_stores(); /* SYNCW to flush write buffer */
> @@ -42,6 +55,11 @@ void odp_ticketlock_unlock(odp_ticketlock_t *ticketlock)
>
>  int odp_ticketlock_is_locked(odp_ticketlock_t *ticketlock)
>  {
> +       /* Compare 'cur_ticket' with 'next_ticket'. Ideally we should read
> +        * both variables atomically but the information can become stale
> +        * immediately anyway so the function can only be used reliably in
> +        * a quiescent system where non-atomic loads should not pose a
> +        * problem */
>         return odp_atomic_load_u32(&ticketlock->cur_ticket) !=
>                 odp_atomic_load_u32(&ticketlock->next_ticket);
>  }
> --
> 1.9.1
>
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> http://lists.linaro.org/mailman/listinfo/lng-odp
>
Maxim Uvarov Dec. 4, 2014, 8:21 p.m. UTC | #2
Merged this patch!

  with a little rewording:
"s/added comments//" in subject.

And commit description should be above "---" to be included to git commit.

Maxim.

On 12/04/2014 08:20 PM, Ola Liljedahl wrote:
> Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org>
> ---
> When releasing a ticket lock, replace the atomic increment operation with
> load-relaxed and store-release as this avaoids an (unnecessary) atomic RMW
> operation which is expensive on some architectures.
> Add descriptive comments for all ticketlock operations.
>
>   platform/linux-generic/odp_ticketlock.c | 20 +++++++++++++++++++-
>   1 file changed, 19 insertions(+), 1 deletion(-)
>
> diff --git a/platform/linux-generic/odp_ticketlock.c b/platform/linux-generic/odp_ticketlock.c
> index 6c5e74e..682b01b 100644
> --- a/platform/linux-generic/odp_ticketlock.c
> +++ b/platform/linux-generic/odp_ticketlock.c
> @@ -22,8 +22,13 @@ void odp_ticketlock_lock(odp_ticketlock_t *ticketlock)
>   {
>   	uint32_t ticket;
>   
> +	/* Take a ticket using an atomic increment of 'next_ticket'.
> +	 * This can be a relaxed operation but it cannot have the
> +	 * acquire semantics since we haven't acquired the lock yet */
>   	ticket = odp_atomic_fetch_inc_u32(&ticketlock->next_ticket);
>   
> +	/* Spin waiting for our turn. Use load-acquire so that we acquire
> +	 * all stores from the previous lock owner */
>   	while (ticket != _odp_atomic_u32_load_mm(&ticketlock->cur_ticket,
>   						 _ODP_MEMMODEL_ACQ))
>   		odp_spin();
> @@ -32,7 +37,15 @@ void odp_ticketlock_lock(odp_ticketlock_t *ticketlock)
>   
>   void odp_ticketlock_unlock(odp_ticketlock_t *ticketlock)
>   {
> -	_odp_atomic_u32_add_mm(&ticketlock->cur_ticket, 1, _ODP_MEMMODEL_RLS);
> +	/* Release the lock by incrementing 'cur_ticket'. As we are the
> +	 * lock owner and thus the only thread that is allowed to write
> +	 * 'cur_ticket', we don't need to do this with an (expensive)
> +	 * atomic RMW operation. Instead load-relaxed the current value
> +	 * and a store-release of the incremented value */
> +	uint32_t cur = _odp_atomic_u32_load_mm(&ticketlock->cur_ticket,
> +					       _ODP_MEMMODEL_RLX);
> +	_odp_atomic_u32_store_mm(&ticketlock->cur_ticket, cur + 1,
> +				 _ODP_MEMMODEL_RLS);
>   
>   #if defined __OCTEON__
>   	odp_sync_stores(); /* SYNCW to flush write buffer */
> @@ -42,6 +55,11 @@ void odp_ticketlock_unlock(odp_ticketlock_t *ticketlock)
>   
>   int odp_ticketlock_is_locked(odp_ticketlock_t *ticketlock)
>   {
> +	/* Compare 'cur_ticket' with 'next_ticket'. Ideally we should read
> +	 * both variables atomically but the information can become stale
> +	 * immediately anyway so the function can only be used reliably in
> +	 * a quiescent system where non-atomic loads should not pose a
> +	 * problem */
>   	return odp_atomic_load_u32(&ticketlock->cur_ticket) !=
>   		odp_atomic_load_u32(&ticketlock->next_ticket);
>   }
diff mbox

Patch

diff --git a/platform/linux-generic/odp_ticketlock.c b/platform/linux-generic/odp_ticketlock.c
index 6c5e74e..682b01b 100644
--- a/platform/linux-generic/odp_ticketlock.c
+++ b/platform/linux-generic/odp_ticketlock.c
@@ -22,8 +22,13 @@  void odp_ticketlock_lock(odp_ticketlock_t *ticketlock)
 {
 	uint32_t ticket;
 
+	/* Take a ticket using an atomic increment of 'next_ticket'.
+	 * This can be a relaxed operation but it cannot have the
+	 * acquire semantics since we haven't acquired the lock yet */
 	ticket = odp_atomic_fetch_inc_u32(&ticketlock->next_ticket);
 
+	/* Spin waiting for our turn. Use load-acquire so that we acquire
+	 * all stores from the previous lock owner */
 	while (ticket != _odp_atomic_u32_load_mm(&ticketlock->cur_ticket,
 						 _ODP_MEMMODEL_ACQ))
 		odp_spin();
@@ -32,7 +37,15 @@  void odp_ticketlock_lock(odp_ticketlock_t *ticketlock)
 
 void odp_ticketlock_unlock(odp_ticketlock_t *ticketlock)
 {
-	_odp_atomic_u32_add_mm(&ticketlock->cur_ticket, 1, _ODP_MEMMODEL_RLS);
+	/* Release the lock by incrementing 'cur_ticket'. As we are the
+	 * lock owner and thus the only thread that is allowed to write
+	 * 'cur_ticket', we don't need to do this with an (expensive)
+	 * atomic RMW operation. Instead load-relaxed the current value
+	 * and a store-release of the incremented value */
+	uint32_t cur = _odp_atomic_u32_load_mm(&ticketlock->cur_ticket,
+					       _ODP_MEMMODEL_RLX);
+	_odp_atomic_u32_store_mm(&ticketlock->cur_ticket, cur + 1,
+				 _ODP_MEMMODEL_RLS);
 
 #if defined __OCTEON__
 	odp_sync_stores(); /* SYNCW to flush write buffer */
@@ -42,6 +55,11 @@  void odp_ticketlock_unlock(odp_ticketlock_t *ticketlock)
 
 int odp_ticketlock_is_locked(odp_ticketlock_t *ticketlock)
 {
+	/* Compare 'cur_ticket' with 'next_ticket'. Ideally we should read
+	 * both variables atomically but the information can become stale
+	 * immediately anyway so the function can only be used reliably in
+	 * a quiescent system where non-atomic loads should not pose a
+	 * problem */
 	return odp_atomic_load_u32(&ticketlock->cur_ticket) !=
 		odp_atomic_load_u32(&ticketlock->next_ticket);
 }