@@ -467,16 +467,15 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
* Otherwise, we only need to grab the lock.
*/
- /* In the PV case we might already have _Q_LOCKED_VAL set */
- if ((val & _Q_TAIL_MASK) == tail) {
- /*
- * The atomic_cond_read_acquire() call above has provided the
- * necessary acquire semantics required for locking.
- */
- old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
- if (old == val)
- goto release; /* No contention */
- }
+ /*
+ * In the PV case we might already have _Q_LOCKED_VAL set.
+ *
+ * The atomic_cond_read_acquire() call above has provided the
+ * necessary acquire semantics required for locking.
+ */
+ if (((val & _Q_TAIL_MASK) == tail) &&
+ atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
+ goto release; /* No contention */
/* Either somebody is queued behind us or _Q_PENDING_VAL is set */
set_locked(lock);
When reaching the head of an uncontended queue on the qspinlock slow-path, using a try_cmpxchg instead of a cmpxchg operation to transition the lock work to _Q_LOCKED_VAL generates slightly better code for x86 and pretty much identical code for arm64. Cc: Ingo Molnar <mingo@kernel.org> Reported-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Will Deacon <will.deacon@arm.com> --- kernel/locking/qspinlock.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) -- 2.1.4