Message ID | 1463426107-24816-1-git-send-email-maxim.uvarov@linaro.org |
---|---|
State | New |
Headers | show |
On Mon, May 16, 2016 at 2:15 PM, Maxim Uvarov <maxim.uvarov@linaro.org> wrote: > Remove totally untested branch with 128 bit optimizations > for timer code. > This also fixes static assert bug: > https://bugs.linaro.org/show_bug.cgi?id=2211 > > Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org> > Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org> > --- > platform/linux-generic/odp_timer.c | 107 > ------------------------------------- > 1 file changed, 107 deletions(-) > > diff --git a/platform/linux-generic/odp_timer.c > b/platform/linux-generic/odp_timer.c > index 6b84309..be28da3 100644 > --- a/platform/linux-generic/odp_timer.c > +++ b/platform/linux-generic/odp_timer.c > @@ -11,15 +11,7 @@ > * > */ > > -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 flag on > x86 */ > -/* Using spin lock actually seems faster on Core2 */ > -#ifdef ODP_ATOMIC_U128 > -/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */ > -#define TB_NEEDS_PAD > -#define TB_SET_PAD(x) ((x).pad = 0) > -#else > #define TB_SET_PAD(x) (void)(x) > -#endif > > #include <odp_posix_extensions.h> > > @@ -70,11 +62,9 @@ > * Mutual exclusion in the absence of CAS16 > > *****************************************************************************/ > > -#ifndef ODP_ATOMIC_U128 > #define NUM_LOCKS 1024 > static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per cache > line! */ > #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS]) > -#endif > > > /****************************************************************************** > * Translation between timeout buffer and timeout header > @@ -98,17 +88,9 @@ static odp_timeout_hdr_t *timeout_hdr(odp_timeout_t tmo) > typedef struct tick_buf_s { > odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */ > odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not active */ > -#ifdef TB_NEEDS_PAD > - uint32_t pad;/* Need to be able to access padding for successful > CAS */ > -#endif > } tick_buf_t > -#ifdef ODP_ATOMIC_U128 > -ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned > addresses */ > -#endif > ; > > -ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) == 16"); > - > typedef struct odp_timer_s { > void *user_ptr; > odp_queue_t queue;/* Used for free list when timer is free */ > @@ -378,34 +360,6 @@ static bool timer_reset(uint32_t idx, > tick_buf_t *tb = &tp->tick_buf[idx]; > > if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) { > -#ifdef ODP_ATOMIC_U128 > - tick_buf_t new, old; > - do { > - /* Relaxed and non-atomic read of current values */ > - old.exp_tck.v = tb->exp_tck.v; > - old.tmo_buf = tb->tmo_buf; > - TB_SET_PAD(old); > - /* Check if there actually is a timeout buffer > - * present */ > - if (old.tmo_buf == ODP_BUFFER_INVALID) { > - /* Cannot reset a timer with neither old > nor > - * new timeout buffer */ > - success = false; > - break; > - } > - /* Set up new values */ > - new.exp_tck.v = abs_tck; > - new.tmo_buf = old.tmo_buf; > - TB_SET_PAD(new); > - /* Atomic CAS will fail if we experienced torn > reads, > - * retry update sequence until CAS succeeds */ > - } while (!_odp_atomic_u128_cmp_xchg_mm( > - (_odp_atomic_u128_t *)tb, > - (_uint128_t *)&old, > - (_uint128_t *)&new, > - _ODP_MEMMODEL_RLS, > - _ODP_MEMMODEL_RLX)); > -#else > #ifdef __ARM_ARCH > /* Since barriers are not good for C-A15, we take an > * alternative approach using relaxed memory model */ > @@ -453,7 +407,6 @@ static bool timer_reset(uint32_t idx, > /* Release the lock */ > _odp_atomic_flag_clear(IDX2LOCK(idx)); > #endif > -#endif > } else { > /* We have a new timeout buffer which replaces any old one > */ > /* Fill in some (constant) header fields for timeout > events */ > @@ -468,19 +421,6 @@ static bool timer_reset(uint32_t idx, > } > /* Else ignore buffers of other types */ > odp_buffer_t old_buf = ODP_BUFFER_INVALID; > -#ifdef ODP_ATOMIC_U128 > - tick_buf_t new, old; > - new.exp_tck.v = abs_tck; > - new.tmo_buf = *tmo_buf; > - TB_SET_PAD(new); > - /* We are releasing the new timeout buffer to some other > - * thread */ > - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, > - (_uint128_t *)&new, > - (_uint128_t *)&old, > - _ODP_MEMMODEL_ACQ_RLS); > - old_buf = old.tmo_buf; > -#else > /* Take a related lock */ > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) > /* While lock is taken, spin using relaxed loads */ > @@ -496,7 +436,6 @@ static bool timer_reset(uint32_t idx, > > /* Release the lock */ > _odp_atomic_flag_clear(IDX2LOCK(idx)); > -#endif > /* Return old timeout buffer */ > *tmo_buf = old_buf; > } > @@ -510,18 +449,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool *tp, > tick_buf_t *tb = &tp->tick_buf[idx]; > odp_buffer_t old_buf; > > -#ifdef ODP_ATOMIC_U128 > - tick_buf_t new, old; > - /* Update the timer state (e.g. cancel the current timeout) */ > - new.exp_tck.v = new_state; > - /* Swap out the old buffer */ > - new.tmo_buf = ODP_BUFFER_INVALID; > - TB_SET_PAD(new); > - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, > - (_uint128_t *)&new, (_uint128_t *)&old, > - _ODP_MEMMODEL_RLX); > - old_buf = old.tmo_buf; > -#else > /* Take a related lock */ > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) > /* While lock is taken, spin using relaxed loads */ > @@ -537,7 +464,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool *tp, > > /* Release the lock */ > _odp_atomic_flag_clear(IDX2LOCK(idx)); > -#endif > /* Return the old buffer */ > return old_buf; > } > @@ -548,34 +474,6 @@ static unsigned timer_expire(odp_timer_pool *tp, > uint32_t idx, uint64_t tick) > tick_buf_t *tb = &tp->tick_buf[idx]; > odp_buffer_t tmo_buf = ODP_BUFFER_INVALID; > uint64_t exp_tck; > -#ifdef ODP_ATOMIC_U128 > - /* Atomic re-read for correctness */ > - exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck, _ODP_MEMMODEL_RLX); > - /* Re-check exp_tck */ > - if (odp_likely(exp_tck <= tick)) { > - /* Attempt to grab timeout buffer, replace with inactive > timer > - * and invalid buffer */ > - tick_buf_t new, old; > - old.exp_tck.v = exp_tck; > - old.tmo_buf = tb->tmo_buf; > - TB_SET_PAD(old); > - /* Set the inactive/expired bit keeping the expiration > tick so > - * that we can check against the expiration tick of the > timeout > - * when it is received */ > - new.exp_tck.v = exp_tck | TMO_INACTIVE; > - new.tmo_buf = ODP_BUFFER_INVALID; > - TB_SET_PAD(new); > - int succ = _odp_atomic_u128_cmp_xchg_mm( > - (_odp_atomic_u128_t *)tb, > - (_uint128_t *)&old, (_uint128_t *)&new, > - _ODP_MEMMODEL_RLS, _ODP_MEMMODEL_RLX); > - if (succ) > - tmo_buf = old.tmo_buf; > - /* Else CAS failed, something changed => skip timer > - * this tick, it will be checked again next tick */ > - } > - /* Else false positive, ignore */ > -#else > /* Take a related lock */ > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) > /* While lock is taken, spin using relaxed loads */ > @@ -600,7 +498,6 @@ static unsigned timer_expire(odp_timer_pool *tp, > uint32_t idx, uint64_t tick) > /* Else false positive, ignore */ > /* Release the lock */ > _odp_atomic_flag_clear(IDX2LOCK(idx)); > -#endif > if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) { > /* Fill in expiration tick for timeout events */ > if (odp_event_type(odp_buffer_to_event(tmo_buf)) == > @@ -974,13 +871,9 @@ void odp_timeout_free(odp_timeout_t tmo) > > int odp_timer_init_global(void) > { > -#ifndef ODP_ATOMIC_U128 > uint32_t i; > for (i = 0; i < NUM_LOCKS; i++) > _odp_atomic_flag_clear(&locks[i]); > -#else > - ODP_DBG("Using lock-less timer implementation\n"); > -#endif > odp_atomic_init_u32(&num_timer_pools, 0); > > block_sigalarm(); > -- > 2.7.1.250.gff4ea60 > > _______________________________________________ > lng-odp mailing list > lng-odp@lists.linaro.org > https://lists.linaro.org/mailman/listinfo/lng-odp >
I disagree. The 128-bit support is important because that's the lock-free timer implementation which was the whole idea. The lock-based code is just a work-around. We should rather add support in configure to detect compiler support for the -mcx16 flag which enables 128-bit CAS on x86-64. Now when I have an ARM64 target, I can actually write and test 128-bit CAS support on ARM (A64 ISA in AArch64 mode) and contribute that in another patch. I am not sure whether common compilers (e.g. GCC) properly supports 128-bit CAS on ARM64, might have to use LDXP/STXP (load/store exclusive pair) directly. -- Ola On 16 May 2016 at 22:43, Bill Fischofer <bill.fischofer@linaro.org> wrote: > On Mon, May 16, 2016 at 2:15 PM, Maxim Uvarov <maxim.uvarov@linaro.org> > wrote: > > > Remove totally untested branch with 128 bit optimizations > > for timer code. > > This also fixes static assert bug: > > https://bugs.linaro.org/show_bug.cgi?id=2211 > > > > Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org> > > > > Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org> > > > > --- > > platform/linux-generic/odp_timer.c | 107 > > ------------------------------------- > > 1 file changed, 107 deletions(-) > > > > diff --git a/platform/linux-generic/odp_timer.c > > b/platform/linux-generic/odp_timer.c > > index 6b84309..be28da3 100644 > > --- a/platform/linux-generic/odp_timer.c > > +++ b/platform/linux-generic/odp_timer.c > > @@ -11,15 +11,7 @@ > > * > > */ > > > > -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 flag on > > x86 */ > > -/* Using spin lock actually seems faster on Core2 */ > > -#ifdef ODP_ATOMIC_U128 > > -/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */ > > -#define TB_NEEDS_PAD > > -#define TB_SET_PAD(x) ((x).pad = 0) > > -#else > > #define TB_SET_PAD(x) (void)(x) > > -#endif > > > > #include <odp_posix_extensions.h> > > > > @@ -70,11 +62,9 @@ > > * Mutual exclusion in the absence of CAS16 > > > > > *****************************************************************************/ > > > > -#ifndef ODP_ATOMIC_U128 > > #define NUM_LOCKS 1024 > > static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per cache > > line! */ > > #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS]) > > -#endif > > > > > > > /****************************************************************************** > > * Translation between timeout buffer and timeout header > > @@ -98,17 +88,9 @@ static odp_timeout_hdr_t *timeout_hdr(odp_timeout_t > tmo) > > typedef struct tick_buf_s { > > odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */ > > odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not active */ > > -#ifdef TB_NEEDS_PAD > > - uint32_t pad;/* Need to be able to access padding for successful > > CAS */ > > -#endif > > } tick_buf_t > > -#ifdef ODP_ATOMIC_U128 > > -ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned > > addresses */ > > -#endif > > ; > > > > -ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) == 16"); > > - > > typedef struct odp_timer_s { > > void *user_ptr; > > odp_queue_t queue;/* Used for free list when timer is free */ > > @@ -378,34 +360,6 @@ static bool timer_reset(uint32_t idx, > > tick_buf_t *tb = &tp->tick_buf[idx]; > > > > if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) { > > -#ifdef ODP_ATOMIC_U128 > > - tick_buf_t new, old; > > - do { > > - /* Relaxed and non-atomic read of current values > */ > > - old.exp_tck.v = tb->exp_tck.v; > > - old.tmo_buf = tb->tmo_buf; > > - TB_SET_PAD(old); > > - /* Check if there actually is a timeout buffer > > - * present */ > > - if (old.tmo_buf == ODP_BUFFER_INVALID) { > > - /* Cannot reset a timer with neither old > > nor > > - * new timeout buffer */ > > - success = false; > > - break; > > - } > > - /* Set up new values */ > > - new.exp_tck.v = abs_tck; > > - new.tmo_buf = old.tmo_buf; > > - TB_SET_PAD(new); > > - /* Atomic CAS will fail if we experienced torn > > reads, > > - * retry update sequence until CAS succeeds */ > > - } while (!_odp_atomic_u128_cmp_xchg_mm( > > - (_odp_atomic_u128_t *)tb, > > - (_uint128_t *)&old, > > - (_uint128_t *)&new, > > - _ODP_MEMMODEL_RLS, > > - _ODP_MEMMODEL_RLX)); > > -#else > > #ifdef __ARM_ARCH > > /* Since barriers are not good for C-A15, we take an > > * alternative approach using relaxed memory model */ > > @@ -453,7 +407,6 @@ static bool timer_reset(uint32_t idx, > > /* Release the lock */ > > _odp_atomic_flag_clear(IDX2LOCK(idx)); > > #endif > > -#endif > > } else { > > /* We have a new timeout buffer which replaces any old > one > > */ > > /* Fill in some (constant) header fields for timeout > > events */ > > @@ -468,19 +421,6 @@ static bool timer_reset(uint32_t idx, > > } > > /* Else ignore buffers of other types */ > > odp_buffer_t old_buf = ODP_BUFFER_INVALID; > > -#ifdef ODP_ATOMIC_U128 > > - tick_buf_t new, old; > > - new.exp_tck.v = abs_tck; > > - new.tmo_buf = *tmo_buf; > > - TB_SET_PAD(new); > > - /* We are releasing the new timeout buffer to some other > > - * thread */ > > - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, > > - (_uint128_t *)&new, > > - (_uint128_t *)&old, > > - _ODP_MEMMODEL_ACQ_RLS); > > - old_buf = old.tmo_buf; > > -#else > > /* Take a related lock */ > > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) > > /* While lock is taken, spin using relaxed loads > */ > > @@ -496,7 +436,6 @@ static bool timer_reset(uint32_t idx, > > > > /* Release the lock */ > > _odp_atomic_flag_clear(IDX2LOCK(idx)); > > -#endif > > /* Return old timeout buffer */ > > *tmo_buf = old_buf; > > } > > @@ -510,18 +449,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool *tp, > > tick_buf_t *tb = &tp->tick_buf[idx]; > > odp_buffer_t old_buf; > > > > -#ifdef ODP_ATOMIC_U128 > > - tick_buf_t new, old; > > - /* Update the timer state (e.g. cancel the current timeout) */ > > - new.exp_tck.v = new_state; > > - /* Swap out the old buffer */ > > - new.tmo_buf = ODP_BUFFER_INVALID; > > - TB_SET_PAD(new); > > - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, > > - (_uint128_t *)&new, (_uint128_t *)&old, > > - _ODP_MEMMODEL_RLX); > > - old_buf = old.tmo_buf; > > -#else > > /* Take a related lock */ > > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) > > /* While lock is taken, spin using relaxed loads */ > > @@ -537,7 +464,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool *tp, > > > > /* Release the lock */ > > _odp_atomic_flag_clear(IDX2LOCK(idx)); > > -#endif > > /* Return the old buffer */ > > return old_buf; > > } > > @@ -548,34 +474,6 @@ static unsigned timer_expire(odp_timer_pool *tp, > > uint32_t idx, uint64_t tick) > > tick_buf_t *tb = &tp->tick_buf[idx]; > > odp_buffer_t tmo_buf = ODP_BUFFER_INVALID; > > uint64_t exp_tck; > > -#ifdef ODP_ATOMIC_U128 > > - /* Atomic re-read for correctness */ > > - exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck, > _ODP_MEMMODEL_RLX); > > - /* Re-check exp_tck */ > > - if (odp_likely(exp_tck <= tick)) { > > - /* Attempt to grab timeout buffer, replace with inactive > > timer > > - * and invalid buffer */ > > - tick_buf_t new, old; > > - old.exp_tck.v = exp_tck; > > - old.tmo_buf = tb->tmo_buf; > > - TB_SET_PAD(old); > > - /* Set the inactive/expired bit keeping the expiration > > tick so > > - * that we can check against the expiration tick of the > > timeout > > - * when it is received */ > > - new.exp_tck.v = exp_tck | TMO_INACTIVE; > > - new.tmo_buf = ODP_BUFFER_INVALID; > > - TB_SET_PAD(new); > > - int succ = _odp_atomic_u128_cmp_xchg_mm( > > - (_odp_atomic_u128_t *)tb, > > - (_uint128_t *)&old, (_uint128_t *)&new, > > - _ODP_MEMMODEL_RLS, _ODP_MEMMODEL_RLX); > > - if (succ) > > - tmo_buf = old.tmo_buf; > > - /* Else CAS failed, something changed => skip timer > > - * this tick, it will be checked again next tick */ > > - } > > - /* Else false positive, ignore */ > > -#else > > /* Take a related lock */ > > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) > > /* While lock is taken, spin using relaxed loads */ > > @@ -600,7 +498,6 @@ static unsigned timer_expire(odp_timer_pool *tp, > > uint32_t idx, uint64_t tick) > > /* Else false positive, ignore */ > > /* Release the lock */ > > _odp_atomic_flag_clear(IDX2LOCK(idx)); > > -#endif > > if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) { > > /* Fill in expiration tick for timeout events */ > > if (odp_event_type(odp_buffer_to_event(tmo_buf)) == > > @@ -974,13 +871,9 @@ void odp_timeout_free(odp_timeout_t tmo) > > > > int odp_timer_init_global(void) > > { > > -#ifndef ODP_ATOMIC_U128 > > uint32_t i; > > for (i = 0; i < NUM_LOCKS; i++) > > _odp_atomic_flag_clear(&locks[i]); > > -#else > > - ODP_DBG("Using lock-less timer implementation\n"); > > -#endif > > odp_atomic_init_u32(&num_timer_pools, 0); > > > > block_sigalarm(); > > -- > > 2.7.1.250.gff4ea60 > > > > _______________________________________________ > > lng-odp mailing list > > lng-odp@lists.linaro.org > > https://lists.linaro.org/mailman/listinfo/lng-odp > > > _______________________________________________ > lng-odp mailing list > lng-odp@lists.linaro.org > https://lists.linaro.org/mailman/listinfo/lng-odp >
Thanks Ola. The original bug is that this fails compiling with clang on 32-bit systems and the reason is that the struct in that environment winds up being 24 bytes rather than 16 bytes long, so the ODP_STATIC_ASSERT() fails. My original (simple) patch is at http://patches.opendataplane.org/patch/5932/ however Maxim didn't like it. Perhaps you can offer a compromise? On Mon, May 16, 2016 at 4:21 PM, Ola Liljedahl <ola.liljedahl@linaro.org> wrote: > I disagree. The 128-bit support is important because that's the lock-free > timer implementation which was the whole idea. The lock-based code is just > a work-around. We should rather add support in configure to detect compiler > support for the -mcx16 flag which enables 128-bit CAS on x86-64. Now when I > have an ARM64 target, I can actually write and test 128-bit CAS support on > ARM (A64 ISA in AArch64 mode) and contribute that in another patch. I am > not sure whether common compilers (e.g. GCC) properly supports 128-bit CAS > on ARM64, might have to use LDXP/STXP (load/store exclusive pair) directly. > > -- Ola > > On 16 May 2016 at 22:43, Bill Fischofer <bill.fischofer@linaro.org> wrote: > >> On Mon, May 16, 2016 at 2:15 PM, Maxim Uvarov <maxim.uvarov@linaro.org> >> wrote: >> >> > Remove totally untested branch with 128 bit optimizations >> > for timer code. >> > This also fixes static assert bug: >> > https://bugs.linaro.org/show_bug.cgi?id=2211 >> > >> > Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org> >> > >> >> Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org> >> >> >> > --- >> > platform/linux-generic/odp_timer.c | 107 >> > ------------------------------------- >> > 1 file changed, 107 deletions(-) >> > >> > diff --git a/platform/linux-generic/odp_timer.c >> > b/platform/linux-generic/odp_timer.c >> > index 6b84309..be28da3 100644 >> > --- a/platform/linux-generic/odp_timer.c >> > +++ b/platform/linux-generic/odp_timer.c >> > @@ -11,15 +11,7 @@ >> > * >> > */ >> > >> > -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 flag on >> > x86 */ >> > -/* Using spin lock actually seems faster on Core2 */ >> > -#ifdef ODP_ATOMIC_U128 >> > -/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */ >> > -#define TB_NEEDS_PAD >> > -#define TB_SET_PAD(x) ((x).pad = 0) >> > -#else >> > #define TB_SET_PAD(x) (void)(x) >> > -#endif >> > >> > #include <odp_posix_extensions.h> >> > >> > @@ -70,11 +62,9 @@ >> > * Mutual exclusion in the absence of CAS16 >> > >> > >> *****************************************************************************/ >> > >> > -#ifndef ODP_ATOMIC_U128 >> > #define NUM_LOCKS 1024 >> > static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per cache >> > line! */ >> > #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS]) >> > -#endif >> > >> > >> > >> /****************************************************************************** >> > * Translation between timeout buffer and timeout header >> > @@ -98,17 +88,9 @@ static odp_timeout_hdr_t *timeout_hdr(odp_timeout_t >> tmo) >> > typedef struct tick_buf_s { >> > odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */ >> > odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not active >> */ >> > -#ifdef TB_NEEDS_PAD >> > - uint32_t pad;/* Need to be able to access padding for successful >> > CAS */ >> > -#endif >> > } tick_buf_t >> > -#ifdef ODP_ATOMIC_U128 >> > -ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned >> > addresses */ >> > -#endif >> > ; >> > >> > -ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) == >> 16"); >> > - >> > typedef struct odp_timer_s { >> > void *user_ptr; >> > odp_queue_t queue;/* Used for free list when timer is free */ >> > @@ -378,34 +360,6 @@ static bool timer_reset(uint32_t idx, >> > tick_buf_t *tb = &tp->tick_buf[idx]; >> > >> > if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) { >> > -#ifdef ODP_ATOMIC_U128 >> > - tick_buf_t new, old; >> > - do { >> > - /* Relaxed and non-atomic read of current >> values */ >> > - old.exp_tck.v = tb->exp_tck.v; >> > - old.tmo_buf = tb->tmo_buf; >> > - TB_SET_PAD(old); >> > - /* Check if there actually is a timeout buffer >> > - * present */ >> > - if (old.tmo_buf == ODP_BUFFER_INVALID) { >> > - /* Cannot reset a timer with neither old >> > nor >> > - * new timeout buffer */ >> > - success = false; >> > - break; >> > - } >> > - /* Set up new values */ >> > - new.exp_tck.v = abs_tck; >> > - new.tmo_buf = old.tmo_buf; >> > - TB_SET_PAD(new); >> > - /* Atomic CAS will fail if we experienced torn >> > reads, >> > - * retry update sequence until CAS succeeds */ >> > - } while (!_odp_atomic_u128_cmp_xchg_mm( >> > - (_odp_atomic_u128_t *)tb, >> > - (_uint128_t *)&old, >> > - (_uint128_t *)&new, >> > - _ODP_MEMMODEL_RLS, >> > - _ODP_MEMMODEL_RLX)); >> > -#else >> > #ifdef __ARM_ARCH >> > /* Since barriers are not good for C-A15, we take an >> > * alternative approach using relaxed memory model */ >> > @@ -453,7 +407,6 @@ static bool timer_reset(uint32_t idx, >> > /* Release the lock */ >> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >> > #endif >> > -#endif >> > } else { >> > /* We have a new timeout buffer which replaces any old >> one >> > */ >> > /* Fill in some (constant) header fields for timeout >> > events */ >> > @@ -468,19 +421,6 @@ static bool timer_reset(uint32_t idx, >> > } >> > /* Else ignore buffers of other types */ >> > odp_buffer_t old_buf = ODP_BUFFER_INVALID; >> > -#ifdef ODP_ATOMIC_U128 >> > - tick_buf_t new, old; >> > - new.exp_tck.v = abs_tck; >> > - new.tmo_buf = *tmo_buf; >> > - TB_SET_PAD(new); >> > - /* We are releasing the new timeout buffer to some other >> > - * thread */ >> > - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, >> > - (_uint128_t *)&new, >> > - (_uint128_t *)&old, >> > - _ODP_MEMMODEL_ACQ_RLS); >> > - old_buf = old.tmo_buf; >> > -#else >> > /* Take a related lock */ >> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >> > /* While lock is taken, spin using relaxed >> loads */ >> > @@ -496,7 +436,6 @@ static bool timer_reset(uint32_t idx, >> > >> > /* Release the lock */ >> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >> > -#endif >> > /* Return old timeout buffer */ >> > *tmo_buf = old_buf; >> > } >> > @@ -510,18 +449,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool >> *tp, >> > tick_buf_t *tb = &tp->tick_buf[idx]; >> > odp_buffer_t old_buf; >> > >> > -#ifdef ODP_ATOMIC_U128 >> > - tick_buf_t new, old; >> > - /* Update the timer state (e.g. cancel the current timeout) */ >> > - new.exp_tck.v = new_state; >> > - /* Swap out the old buffer */ >> > - new.tmo_buf = ODP_BUFFER_INVALID; >> > - TB_SET_PAD(new); >> > - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, >> > - (_uint128_t *)&new, (_uint128_t *)&old, >> > - _ODP_MEMMODEL_RLX); >> > - old_buf = old.tmo_buf; >> > -#else >> > /* Take a related lock */ >> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >> > /* While lock is taken, spin using relaxed loads */ >> > @@ -537,7 +464,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool *tp, >> > >> > /* Release the lock */ >> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >> > -#endif >> > /* Return the old buffer */ >> > return old_buf; >> > } >> > @@ -548,34 +474,6 @@ static unsigned timer_expire(odp_timer_pool *tp, >> > uint32_t idx, uint64_t tick) >> > tick_buf_t *tb = &tp->tick_buf[idx]; >> > odp_buffer_t tmo_buf = ODP_BUFFER_INVALID; >> > uint64_t exp_tck; >> > -#ifdef ODP_ATOMIC_U128 >> > - /* Atomic re-read for correctness */ >> > - exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck, >> _ODP_MEMMODEL_RLX); >> > - /* Re-check exp_tck */ >> > - if (odp_likely(exp_tck <= tick)) { >> > - /* Attempt to grab timeout buffer, replace with inactive >> > timer >> > - * and invalid buffer */ >> > - tick_buf_t new, old; >> > - old.exp_tck.v = exp_tck; >> > - old.tmo_buf = tb->tmo_buf; >> > - TB_SET_PAD(old); >> > - /* Set the inactive/expired bit keeping the expiration >> > tick so >> > - * that we can check against the expiration tick of the >> > timeout >> > - * when it is received */ >> > - new.exp_tck.v = exp_tck | TMO_INACTIVE; >> > - new.tmo_buf = ODP_BUFFER_INVALID; >> > - TB_SET_PAD(new); >> > - int succ = _odp_atomic_u128_cmp_xchg_mm( >> > - (_odp_atomic_u128_t *)tb, >> > - (_uint128_t *)&old, (_uint128_t *)&new, >> > - _ODP_MEMMODEL_RLS, _ODP_MEMMODEL_RLX); >> > - if (succ) >> > - tmo_buf = old.tmo_buf; >> > - /* Else CAS failed, something changed => skip timer >> > - * this tick, it will be checked again next tick */ >> > - } >> > - /* Else false positive, ignore */ >> > -#else >> > /* Take a related lock */ >> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >> > /* While lock is taken, spin using relaxed loads */ >> > @@ -600,7 +498,6 @@ static unsigned timer_expire(odp_timer_pool *tp, >> > uint32_t idx, uint64_t tick) >> > /* Else false positive, ignore */ >> > /* Release the lock */ >> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >> > -#endif >> > if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) { >> > /* Fill in expiration tick for timeout events */ >> > if (odp_event_type(odp_buffer_to_event(tmo_buf)) == >> > @@ -974,13 +871,9 @@ void odp_timeout_free(odp_timeout_t tmo) >> > >> > int odp_timer_init_global(void) >> > { >> > -#ifndef ODP_ATOMIC_U128 >> > uint32_t i; >> > for (i = 0; i < NUM_LOCKS; i++) >> > _odp_atomic_flag_clear(&locks[i]); >> > -#else >> > - ODP_DBG("Using lock-less timer implementation\n"); >> > -#endif >> > odp_atomic_init_u32(&num_timer_pools, 0); >> > >> > block_sigalarm(); >> > -- >> > 2.7.1.250.gff4ea60 >> > >> > _______________________________________________ >> > lng-odp mailing list >> > lng-odp@lists.linaro.org >> > https://lists.linaro.org/mailman/listinfo/lng-odp >> > >> _______________________________________________ >> lng-odp mailing list >> lng-odp@lists.linaro.org >> https://lists.linaro.org/mailman/listinfo/lng-odp >> > >
On 16 May 2016 at 23:31, Bill Fischofer <bill.fischofer@linaro.org> wrote: > Thanks Ola. The original bug is that this fails compiling with clang on > 32-bit systems and the reason is that the struct in that environment winds > up being 24 bytes rather than 16 bytes long, so the ODP_STATIC_ASSERT() > fails. > The timer code should stop using odp_atomic_u64_t (which will include the spinlock for systems that do not support atomic operations on 64-bit locations, ARMv7A is one of the few 32-bit platforms which do support 64-bit atomics natively). Instead use a plain 64-bit datatype (uint64_t) on such systems. For systems which do not support 128-bit (or 64-bit?) atomic operations (which is the ideal but I think I managed to get something working with only 64-bit atomics), we should use locks instead. I think the code does use a separate array of locks which is indexed through a hash of the timer handle or something similar. This locks makes the additional lock in odp_atomic_u64_t unused. Which platforms use the 64-bit atomic code in the timer? ARMv7A (32-bit but with 64-bit atomics support - ldrexd/strexd) OCTEON/MIPS64 (with 64-bit atomics support - lld/scd) 64-bit POWER? (with 64--bit atomics support - ldarx/stdcx.) Se we need a couple of things: 1) 128-bit CAS support on e.g. x86 (requires that configure detects and enables -mcx16 flag). 2) 128-bit ldxp/stxp support on ARM64 (A64 ISA in AArch64 mode). I don't think common compilers support 128-bit CAS on ARM64 (it's a bit tricky) so need to use inline assembly. 3) Some cleanup in the timer code not to use odp_atomic_u64_t on systems which do not support 64-bit atomic operations (and instead uses the spin lock array). This will avoid the failed static assert. > My original (simple) patch is at > http://patches.opendataplane.org/patch/5932/ however Maxim didn't like > it. Perhaps you can offer a compromise? > > On Mon, May 16, 2016 at 4:21 PM, Ola Liljedahl <ola.liljedahl@linaro.org> > wrote: > >> I disagree. The 128-bit support is important because that's the lock-free >> timer implementation which was the whole idea. The lock-based code is just >> a work-around. We should rather add support in configure to detect compiler >> support for the -mcx16 flag which enables 128-bit CAS on x86-64. Now when I >> have an ARM64 target, I can actually write and test 128-bit CAS support on >> ARM (A64 ISA in AArch64 mode) and contribute that in another patch. I am >> not sure whether common compilers (e.g. GCC) properly supports 128-bit CAS >> on ARM64, might have to use LDXP/STXP (load/store exclusive pair) directly. >> >> -- Ola >> >> On 16 May 2016 at 22:43, Bill Fischofer <bill.fischofer@linaro.org> >> wrote: >> >>> On Mon, May 16, 2016 at 2:15 PM, Maxim Uvarov <maxim.uvarov@linaro.org> >>> wrote: >>> >>> > Remove totally untested branch with 128 bit optimizations >>> > for timer code. >>> > This also fixes static assert bug: >>> > https://bugs.linaro.org/show_bug.cgi?id=2211 >>> > >>> > Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org> >>> > >>> >>> Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org> >>> >>> >>> > --- >>> > platform/linux-generic/odp_timer.c | 107 >>> > ------------------------------------- >>> > 1 file changed, 107 deletions(-) >>> > >>> > diff --git a/platform/linux-generic/odp_timer.c >>> > b/platform/linux-generic/odp_timer.c >>> > index 6b84309..be28da3 100644 >>> > --- a/platform/linux-generic/odp_timer.c >>> > +++ b/platform/linux-generic/odp_timer.c >>> > @@ -11,15 +11,7 @@ >>> > * >>> > */ >>> > >>> > -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 flag >>> on >>> > x86 */ >>> > -/* Using spin lock actually seems faster on Core2 */ >>> > -#ifdef ODP_ATOMIC_U128 >>> > -/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */ >>> > -#define TB_NEEDS_PAD >>> > -#define TB_SET_PAD(x) ((x).pad = 0) >>> > -#else >>> > #define TB_SET_PAD(x) (void)(x) >>> > -#endif >>> > >>> > #include <odp_posix_extensions.h> >>> > >>> > @@ -70,11 +62,9 @@ >>> > * Mutual exclusion in the absence of CAS16 >>> > >>> > >>> *****************************************************************************/ >>> > >>> > -#ifndef ODP_ATOMIC_U128 >>> > #define NUM_LOCKS 1024 >>> > static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per >>> cache >>> > line! */ >>> > #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS]) >>> > -#endif >>> > >>> > >>> > >>> /****************************************************************************** >>> > * Translation between timeout buffer and timeout header >>> > @@ -98,17 +88,9 @@ static odp_timeout_hdr_t *timeout_hdr(odp_timeout_t >>> tmo) >>> > typedef struct tick_buf_s { >>> > odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */ >>> > odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not active >>> */ >>> > -#ifdef TB_NEEDS_PAD >>> > - uint32_t pad;/* Need to be able to access padding for >>> successful >>> > CAS */ >>> > -#endif >>> > } tick_buf_t >>> > -#ifdef ODP_ATOMIC_U128 >>> > -ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned >>> > addresses */ >>> > -#endif >>> > ; >>> > >>> > -ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) == >>> 16"); >>> > - >>> > typedef struct odp_timer_s { >>> > void *user_ptr; >>> > odp_queue_t queue;/* Used for free list when timer is free */ >>> > @@ -378,34 +360,6 @@ static bool timer_reset(uint32_t idx, >>> > tick_buf_t *tb = &tp->tick_buf[idx]; >>> > >>> > if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) { >>> > -#ifdef ODP_ATOMIC_U128 >>> > - tick_buf_t new, old; >>> > - do { >>> > - /* Relaxed and non-atomic read of current >>> values */ >>> > - old.exp_tck.v = tb->exp_tck.v; >>> > - old.tmo_buf = tb->tmo_buf; >>> > - TB_SET_PAD(old); >>> > - /* Check if there actually is a timeout buffer >>> > - * present */ >>> > - if (old.tmo_buf == ODP_BUFFER_INVALID) { >>> > - /* Cannot reset a timer with neither >>> old >>> > nor >>> > - * new timeout buffer */ >>> > - success = false; >>> > - break; >>> > - } >>> > - /* Set up new values */ >>> > - new.exp_tck.v = abs_tck; >>> > - new.tmo_buf = old.tmo_buf; >>> > - TB_SET_PAD(new); >>> > - /* Atomic CAS will fail if we experienced torn >>> > reads, >>> > - * retry update sequence until CAS succeeds */ >>> > - } while (!_odp_atomic_u128_cmp_xchg_mm( >>> > - (_odp_atomic_u128_t *)tb, >>> > - (_uint128_t *)&old, >>> > - (_uint128_t *)&new, >>> > - _ODP_MEMMODEL_RLS, >>> > - _ODP_MEMMODEL_RLX)); >>> > -#else >>> > #ifdef __ARM_ARCH >>> > /* Since barriers are not good for C-A15, we take an >>> > * alternative approach using relaxed memory model */ >>> > @@ -453,7 +407,6 @@ static bool timer_reset(uint32_t idx, >>> > /* Release the lock */ >>> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >>> > #endif >>> > -#endif >>> > } else { >>> > /* We have a new timeout buffer which replaces any old >>> one >>> > */ >>> > /* Fill in some (constant) header fields for timeout >>> > events */ >>> > @@ -468,19 +421,6 @@ static bool timer_reset(uint32_t idx, >>> > } >>> > /* Else ignore buffers of other types */ >>> > odp_buffer_t old_buf = ODP_BUFFER_INVALID; >>> > -#ifdef ODP_ATOMIC_U128 >>> > - tick_buf_t new, old; >>> > - new.exp_tck.v = abs_tck; >>> > - new.tmo_buf = *tmo_buf; >>> > - TB_SET_PAD(new); >>> > - /* We are releasing the new timeout buffer to some >>> other >>> > - * thread */ >>> > - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, >>> > - (_uint128_t *)&new, >>> > - (_uint128_t *)&old, >>> > - _ODP_MEMMODEL_ACQ_RLS); >>> > - old_buf = old.tmo_buf; >>> > -#else >>> > /* Take a related lock */ >>> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >>> > /* While lock is taken, spin using relaxed >>> loads */ >>> > @@ -496,7 +436,6 @@ static bool timer_reset(uint32_t idx, >>> > >>> > /* Release the lock */ >>> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >>> > -#endif >>> > /* Return old timeout buffer */ >>> > *tmo_buf = old_buf; >>> > } >>> > @@ -510,18 +449,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool >>> *tp, >>> > tick_buf_t *tb = &tp->tick_buf[idx]; >>> > odp_buffer_t old_buf; >>> > >>> > -#ifdef ODP_ATOMIC_U128 >>> > - tick_buf_t new, old; >>> > - /* Update the timer state (e.g. cancel the current timeout) */ >>> > - new.exp_tck.v = new_state; >>> > - /* Swap out the old buffer */ >>> > - new.tmo_buf = ODP_BUFFER_INVALID; >>> > - TB_SET_PAD(new); >>> > - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, >>> > - (_uint128_t *)&new, (_uint128_t >>> *)&old, >>> > - _ODP_MEMMODEL_RLX); >>> > - old_buf = old.tmo_buf; >>> > -#else >>> > /* Take a related lock */ >>> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >>> > /* While lock is taken, spin using relaxed loads */ >>> > @@ -537,7 +464,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool >>> *tp, >>> > >>> > /* Release the lock */ >>> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >>> > -#endif >>> > /* Return the old buffer */ >>> > return old_buf; >>> > } >>> > @@ -548,34 +474,6 @@ static unsigned timer_expire(odp_timer_pool *tp, >>> > uint32_t idx, uint64_t tick) >>> > tick_buf_t *tb = &tp->tick_buf[idx]; >>> > odp_buffer_t tmo_buf = ODP_BUFFER_INVALID; >>> > uint64_t exp_tck; >>> > -#ifdef ODP_ATOMIC_U128 >>> > - /* Atomic re-read for correctness */ >>> > - exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck, >>> _ODP_MEMMODEL_RLX); >>> > - /* Re-check exp_tck */ >>> > - if (odp_likely(exp_tck <= tick)) { >>> > - /* Attempt to grab timeout buffer, replace with >>> inactive >>> > timer >>> > - * and invalid buffer */ >>> > - tick_buf_t new, old; >>> > - old.exp_tck.v = exp_tck; >>> > - old.tmo_buf = tb->tmo_buf; >>> > - TB_SET_PAD(old); >>> > - /* Set the inactive/expired bit keeping the expiration >>> > tick so >>> > - * that we can check against the expiration tick of the >>> > timeout >>> > - * when it is received */ >>> > - new.exp_tck.v = exp_tck | TMO_INACTIVE; >>> > - new.tmo_buf = ODP_BUFFER_INVALID; >>> > - TB_SET_PAD(new); >>> > - int succ = _odp_atomic_u128_cmp_xchg_mm( >>> > - (_odp_atomic_u128_t *)tb, >>> > - (_uint128_t *)&old, (_uint128_t *)&new, >>> > - _ODP_MEMMODEL_RLS, _ODP_MEMMODEL_RLX); >>> > - if (succ) >>> > - tmo_buf = old.tmo_buf; >>> > - /* Else CAS failed, something changed => skip timer >>> > - * this tick, it will be checked again next tick */ >>> > - } >>> > - /* Else false positive, ignore */ >>> > -#else >>> > /* Take a related lock */ >>> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >>> > /* While lock is taken, spin using relaxed loads */ >>> > @@ -600,7 +498,6 @@ static unsigned timer_expire(odp_timer_pool *tp, >>> > uint32_t idx, uint64_t tick) >>> > /* Else false positive, ignore */ >>> > /* Release the lock */ >>> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >>> > -#endif >>> > if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) { >>> > /* Fill in expiration tick for timeout events */ >>> > if (odp_event_type(odp_buffer_to_event(tmo_buf)) == >>> > @@ -974,13 +871,9 @@ void odp_timeout_free(odp_timeout_t tmo) >>> > >>> > int odp_timer_init_global(void) >>> > { >>> > -#ifndef ODP_ATOMIC_U128 >>> > uint32_t i; >>> > for (i = 0; i < NUM_LOCKS; i++) >>> > _odp_atomic_flag_clear(&locks[i]); >>> > -#else >>> > - ODP_DBG("Using lock-less timer implementation\n"); >>> > -#endif >>> > odp_atomic_init_u32(&num_timer_pools, 0); >>> > >>> > block_sigalarm(); >>> > -- >>> > 2.7.1.250.gff4ea60 >>> > >>> > _______________________________________________ >>> > lng-odp mailing list >>> > lng-odp@lists.linaro.org >>> > https://lists.linaro.org/mailman/listinfo/lng-odp >>> > >>> _______________________________________________ >>> lng-odp mailing list >>> lng-odp@lists.linaro.org >>> https://lists.linaro.org/mailman/listinfo/lng-odp >>> >> >> >
On Mon, May 16, 2016 at 4:55 PM, Ola Liljedahl <ola.liljedahl@linaro.org> wrote: > On 16 May 2016 at 23:31, Bill Fischofer <bill.fischofer@linaro.org> wrote: > >> Thanks Ola. The original bug is that this fails compiling with clang on >> 32-bit systems and the reason is that the struct in that environment winds >> up being 24 bytes rather than 16 bytes long, so the ODP_STATIC_ASSERT() >> fails. >> > The timer code should stop using odp_atomic_u64_t (which will include the > spinlock for systems that do not support atomic operations on 64-bit > locations, ARMv7A is one of the few 32-bit platforms which do support > 64-bit atomics natively). Instead use a plain 64-bit datatype (uint64_t) on > such systems. For systems which do not support 128-bit (or 64-bit?) atomic > operations (which is the ideal but I think I managed to get something > working with only 64-bit atomics), we should use locks instead. I think the > code does use a separate array of locks which is indexed through a hash of > the timer handle or something similar. This locks makes the additional lock > in odp_atomic_u64_t unused. > > Which platforms use the 64-bit atomic code in the timer? > ARMv7A (32-bit but with 64-bit atomics support - ldrexd/strexd) > OCTEON/MIPS64 (with 64-bit atomics support - lld/scd) > 64-bit POWER? (with 64--bit atomics support - ldarx/stdcx.) > > Se we need a couple of things: > 1) 128-bit CAS support on e.g. x86 (requires that configure detects and > enables -mcx16 flag). > 2) 128-bit ldxp/stxp support on ARM64 (A64 ISA in AArch64 mode). I don't > think common compilers support 128-bit CAS on ARM64 (it's a bit tricky) so > need to use inline assembly. > 3) Some cleanup in the timer code not to use odp_atomic_u64_t on systems > which do not support 64-bit atomic operations (and instead uses the spin > lock array). This will avoid the failed static assert. > Unless you'd like to submit that patch this week, we need to do something on at least a temporary basis to close out Monarch RC3. Sounds like this is an area we should revisit for restructure/cleanup for Tiger Moth. Given the divergence between near-term need and the (better) longer-term solution you outline, what do you recommend for now? > > >> My original (simple) patch is at >> http://patches.opendataplane.org/patch/5932/ however Maxim didn't like >> it. Perhaps you can offer a compromise? >> >> On Mon, May 16, 2016 at 4:21 PM, Ola Liljedahl <ola.liljedahl@linaro.org> >> wrote: >> >>> I disagree. The 128-bit support is important because that's the >>> lock-free timer implementation which was the whole idea. The lock-based >>> code is just a work-around. We should rather add support in configure to >>> detect compiler support for the -mcx16 flag which enables 128-bit CAS on >>> x86-64. Now when I have an ARM64 target, I can actually write and test >>> 128-bit CAS support on ARM (A64 ISA in AArch64 mode) and contribute that in >>> another patch. I am not sure whether common compilers (e.g. GCC) properly >>> supports 128-bit CAS on ARM64, might have to use LDXP/STXP (load/store >>> exclusive pair) directly. >>> >>> -- Ola >>> >>> On 16 May 2016 at 22:43, Bill Fischofer <bill.fischofer@linaro.org> >>> wrote: >>> >>>> On Mon, May 16, 2016 at 2:15 PM, Maxim Uvarov <maxim.uvarov@linaro.org> >>>> wrote: >>>> >>>> > Remove totally untested branch with 128 bit optimizations >>>> > for timer code. >>>> > This also fixes static assert bug: >>>> > https://bugs.linaro.org/show_bug.cgi?id=2211 >>>> > >>>> > Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org> >>>> > >>>> >>>> Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org> >>>> >>>> >>>> > --- >>>> > platform/linux-generic/odp_timer.c | 107 >>>> > ------------------------------------- >>>> > 1 file changed, 107 deletions(-) >>>> > >>>> > diff --git a/platform/linux-generic/odp_timer.c >>>> > b/platform/linux-generic/odp_timer.c >>>> > index 6b84309..be28da3 100644 >>>> > --- a/platform/linux-generic/odp_timer.c >>>> > +++ b/platform/linux-generic/odp_timer.c >>>> > @@ -11,15 +11,7 @@ >>>> > * >>>> > */ >>>> > >>>> > -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 flag >>>> on >>>> > x86 */ >>>> > -/* Using spin lock actually seems faster on Core2 */ >>>> > -#ifdef ODP_ATOMIC_U128 >>>> > -/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */ >>>> > -#define TB_NEEDS_PAD >>>> > -#define TB_SET_PAD(x) ((x).pad = 0) >>>> > -#else >>>> > #define TB_SET_PAD(x) (void)(x) >>>> > -#endif >>>> > >>>> > #include <odp_posix_extensions.h> >>>> > >>>> > @@ -70,11 +62,9 @@ >>>> > * Mutual exclusion in the absence of CAS16 >>>> > >>>> > >>>> *****************************************************************************/ >>>> > >>>> > -#ifndef ODP_ATOMIC_U128 >>>> > #define NUM_LOCKS 1024 >>>> > static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per >>>> cache >>>> > line! */ >>>> > #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS]) >>>> > -#endif >>>> > >>>> > >>>> > >>>> /****************************************************************************** >>>> > * Translation between timeout buffer and timeout header >>>> > @@ -98,17 +88,9 @@ static odp_timeout_hdr_t >>>> *timeout_hdr(odp_timeout_t tmo) >>>> > typedef struct tick_buf_s { >>>> > odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */ >>>> > odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not >>>> active */ >>>> > -#ifdef TB_NEEDS_PAD >>>> > - uint32_t pad;/* Need to be able to access padding for >>>> successful >>>> > CAS */ >>>> > -#endif >>>> > } tick_buf_t >>>> > -#ifdef ODP_ATOMIC_U128 >>>> > -ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned >>>> > addresses */ >>>> > -#endif >>>> > ; >>>> > >>>> > -ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) == >>>> 16"); >>>> > - >>>> > typedef struct odp_timer_s { >>>> > void *user_ptr; >>>> > odp_queue_t queue;/* Used for free list when timer is free */ >>>> > @@ -378,34 +360,6 @@ static bool timer_reset(uint32_t idx, >>>> > tick_buf_t *tb = &tp->tick_buf[idx]; >>>> > >>>> > if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) { >>>> > -#ifdef ODP_ATOMIC_U128 >>>> > - tick_buf_t new, old; >>>> > - do { >>>> > - /* Relaxed and non-atomic read of current >>>> values */ >>>> > - old.exp_tck.v = tb->exp_tck.v; >>>> > - old.tmo_buf = tb->tmo_buf; >>>> > - TB_SET_PAD(old); >>>> > - /* Check if there actually is a timeout buffer >>>> > - * present */ >>>> > - if (old.tmo_buf == ODP_BUFFER_INVALID) { >>>> > - /* Cannot reset a timer with neither >>>> old >>>> > nor >>>> > - * new timeout buffer */ >>>> > - success = false; >>>> > - break; >>>> > - } >>>> > - /* Set up new values */ >>>> > - new.exp_tck.v = abs_tck; >>>> > - new.tmo_buf = old.tmo_buf; >>>> > - TB_SET_PAD(new); >>>> > - /* Atomic CAS will fail if we experienced torn >>>> > reads, >>>> > - * retry update sequence until CAS succeeds */ >>>> > - } while (!_odp_atomic_u128_cmp_xchg_mm( >>>> > - (_odp_atomic_u128_t *)tb, >>>> > - (_uint128_t *)&old, >>>> > - (_uint128_t *)&new, >>>> > - _ODP_MEMMODEL_RLS, >>>> > - _ODP_MEMMODEL_RLX)); >>>> > -#else >>>> > #ifdef __ARM_ARCH >>>> > /* Since barriers are not good for C-A15, we take an >>>> > * alternative approach using relaxed memory model */ >>>> > @@ -453,7 +407,6 @@ static bool timer_reset(uint32_t idx, >>>> > /* Release the lock */ >>>> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >>>> > #endif >>>> > -#endif >>>> > } else { >>>> > /* We have a new timeout buffer which replaces any >>>> old one >>>> > */ >>>> > /* Fill in some (constant) header fields for timeout >>>> > events */ >>>> > @@ -468,19 +421,6 @@ static bool timer_reset(uint32_t idx, >>>> > } >>>> > /* Else ignore buffers of other types */ >>>> > odp_buffer_t old_buf = ODP_BUFFER_INVALID; >>>> > -#ifdef ODP_ATOMIC_U128 >>>> > - tick_buf_t new, old; >>>> > - new.exp_tck.v = abs_tck; >>>> > - new.tmo_buf = *tmo_buf; >>>> > - TB_SET_PAD(new); >>>> > - /* We are releasing the new timeout buffer to some >>>> other >>>> > - * thread */ >>>> > - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, >>>> > - (_uint128_t *)&new, >>>> > - (_uint128_t *)&old, >>>> > - _ODP_MEMMODEL_ACQ_RLS); >>>> > - old_buf = old.tmo_buf; >>>> > -#else >>>> > /* Take a related lock */ >>>> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >>>> > /* While lock is taken, spin using relaxed >>>> loads */ >>>> > @@ -496,7 +436,6 @@ static bool timer_reset(uint32_t idx, >>>> > >>>> > /* Release the lock */ >>>> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >>>> > -#endif >>>> > /* Return old timeout buffer */ >>>> > *tmo_buf = old_buf; >>>> > } >>>> > @@ -510,18 +449,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool >>>> *tp, >>>> > tick_buf_t *tb = &tp->tick_buf[idx]; >>>> > odp_buffer_t old_buf; >>>> > >>>> > -#ifdef ODP_ATOMIC_U128 >>>> > - tick_buf_t new, old; >>>> > - /* Update the timer state (e.g. cancel the current timeout) */ >>>> > - new.exp_tck.v = new_state; >>>> > - /* Swap out the old buffer */ >>>> > - new.tmo_buf = ODP_BUFFER_INVALID; >>>> > - TB_SET_PAD(new); >>>> > - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, >>>> > - (_uint128_t *)&new, (_uint128_t >>>> *)&old, >>>> > - _ODP_MEMMODEL_RLX); >>>> > - old_buf = old.tmo_buf; >>>> > -#else >>>> > /* Take a related lock */ >>>> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >>>> > /* While lock is taken, spin using relaxed loads */ >>>> > @@ -537,7 +464,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool >>>> *tp, >>>> > >>>> > /* Release the lock */ >>>> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >>>> > -#endif >>>> > /* Return the old buffer */ >>>> > return old_buf; >>>> > } >>>> > @@ -548,34 +474,6 @@ static unsigned timer_expire(odp_timer_pool *tp, >>>> > uint32_t idx, uint64_t tick) >>>> > tick_buf_t *tb = &tp->tick_buf[idx]; >>>> > odp_buffer_t tmo_buf = ODP_BUFFER_INVALID; >>>> > uint64_t exp_tck; >>>> > -#ifdef ODP_ATOMIC_U128 >>>> > - /* Atomic re-read for correctness */ >>>> > - exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck, >>>> _ODP_MEMMODEL_RLX); >>>> > - /* Re-check exp_tck */ >>>> > - if (odp_likely(exp_tck <= tick)) { >>>> > - /* Attempt to grab timeout buffer, replace with >>>> inactive >>>> > timer >>>> > - * and invalid buffer */ >>>> > - tick_buf_t new, old; >>>> > - old.exp_tck.v = exp_tck; >>>> > - old.tmo_buf = tb->tmo_buf; >>>> > - TB_SET_PAD(old); >>>> > - /* Set the inactive/expired bit keeping the expiration >>>> > tick so >>>> > - * that we can check against the expiration tick of >>>> the >>>> > timeout >>>> > - * when it is received */ >>>> > - new.exp_tck.v = exp_tck | TMO_INACTIVE; >>>> > - new.tmo_buf = ODP_BUFFER_INVALID; >>>> > - TB_SET_PAD(new); >>>> > - int succ = _odp_atomic_u128_cmp_xchg_mm( >>>> > - (_odp_atomic_u128_t *)tb, >>>> > - (_uint128_t *)&old, (_uint128_t >>>> *)&new, >>>> > - _ODP_MEMMODEL_RLS, _ODP_MEMMODEL_RLX); >>>> > - if (succ) >>>> > - tmo_buf = old.tmo_buf; >>>> > - /* Else CAS failed, something changed => skip timer >>>> > - * this tick, it will be checked again next tick */ >>>> > - } >>>> > - /* Else false positive, ignore */ >>>> > -#else >>>> > /* Take a related lock */ >>>> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >>>> > /* While lock is taken, spin using relaxed loads */ >>>> > @@ -600,7 +498,6 @@ static unsigned timer_expire(odp_timer_pool *tp, >>>> > uint32_t idx, uint64_t tick) >>>> > /* Else false positive, ignore */ >>>> > /* Release the lock */ >>>> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >>>> > -#endif >>>> > if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) { >>>> > /* Fill in expiration tick for timeout events */ >>>> > if (odp_event_type(odp_buffer_to_event(tmo_buf)) == >>>> > @@ -974,13 +871,9 @@ void odp_timeout_free(odp_timeout_t tmo) >>>> > >>>> > int odp_timer_init_global(void) >>>> > { >>>> > -#ifndef ODP_ATOMIC_U128 >>>> > uint32_t i; >>>> > for (i = 0; i < NUM_LOCKS; i++) >>>> > _odp_atomic_flag_clear(&locks[i]); >>>> > -#else >>>> > - ODP_DBG("Using lock-less timer implementation\n"); >>>> > -#endif >>>> > odp_atomic_init_u32(&num_timer_pools, 0); >>>> > >>>> > block_sigalarm(); >>>> > -- >>>> > 2.7.1.250.gff4ea60 >>>> > >>>> > _______________________________________________ >>>> > lng-odp mailing list >>>> > lng-odp@lists.linaro.org >>>> > https://lists.linaro.org/mailman/listinfo/lng-odp >>>> > >>>> _______________________________________________ >>>> lng-odp mailing list >>>> lng-odp@lists.linaro.org >>>> https://lists.linaro.org/mailman/listinfo/lng-odp >>>> >>> >>> >> >
On 17 May 2016 at 00:45, Bill Fischofer <bill.fischofer@linaro.org> wrote: > > > On Mon, May 16, 2016 at 4:55 PM, Ola Liljedahl <ola.liljedahl@linaro.org> > wrote: > >> On 16 May 2016 at 23:31, Bill Fischofer <bill.fischofer@linaro.org> >> wrote: >> >>> Thanks Ola. The original bug is that this fails compiling with clang on >>> 32-bit systems and the reason is that the struct in that environment winds >>> up being 24 bytes rather than 16 bytes long, so the ODP_STATIC_ASSERT() >>> fails. >>> >> The timer code should stop using odp_atomic_u64_t (which will include the >> spinlock for systems that do not support atomic operations on 64-bit >> locations, ARMv7A is one of the few 32-bit platforms which do support >> 64-bit atomics natively). Instead use a plain 64-bit datatype (uint64_t) on >> such systems. For systems which do not support 128-bit (or 64-bit?) atomic >> operations (which is the ideal but I think I managed to get something >> working with only 64-bit atomics), we should use locks instead. I think the >> code does use a separate array of locks which is indexed through a hash of >> the timer handle or something similar. This locks makes the additional lock >> in odp_atomic_u64_t unused. >> >> Which platforms use the 64-bit atomic code in the timer? >> ARMv7A (32-bit but with 64-bit atomics support - ldrexd/strexd) >> OCTEON/MIPS64 (with 64-bit atomics support - lld/scd) >> 64-bit POWER? (with 64--bit atomics support - ldarx/stdcx.) >> >> Se we need a couple of things: >> 1) 128-bit CAS support on e.g. x86 (requires that configure detects and >> enables -mcx16 flag). >> 2) 128-bit ldxp/stxp support on ARM64 (A64 ISA in AArch64 mode). I don't >> think common compilers support 128-bit CAS on ARM64 (it's a bit tricky) so >> need to use inline assembly. >> 3) Some cleanup in the timer code not to use odp_atomic_u64_t on systems >> which do not support 64-bit atomic operations (and instead uses the spin >> lock array). This will avoid the failed static assert. >> > > Unless you'd like to submit that patch this week, we need to do something > on at least a temporary basis to close out Monarch RC3. Sounds like this is > an area we should revisit for restructure/cleanup for Tiger Moth. Given the > divergence between near-term need and the (better) longer-term solution you > outline, what do you recommend for now? > Fix the static assert problem on certain 32-bit targets by changing odp_atomic_u64_t to uint64_t on those targets. > > >> >> >>> My original (simple) patch is at >>> http://patches.opendataplane.org/patch/5932/ however Maxim didn't like >>> it. Perhaps you can offer a compromise? >>> >>> On Mon, May 16, 2016 at 4:21 PM, Ola Liljedahl <ola.liljedahl@linaro.org >>> > wrote: >>> >>>> I disagree. The 128-bit support is important because that's the >>>> lock-free timer implementation which was the whole idea. The lock-based >>>> code is just a work-around. We should rather add support in configure to >>>> detect compiler support for the -mcx16 flag which enables 128-bit CAS on >>>> x86-64. Now when I have an ARM64 target, I can actually write and test >>>> 128-bit CAS support on ARM (A64 ISA in AArch64 mode) and contribute that in >>>> another patch. I am not sure whether common compilers (e.g. GCC) properly >>>> supports 128-bit CAS on ARM64, might have to use LDXP/STXP (load/store >>>> exclusive pair) directly. >>>> >>>> -- Ola >>>> >>>> On 16 May 2016 at 22:43, Bill Fischofer <bill.fischofer@linaro.org> >>>> wrote: >>>> >>>>> On Mon, May 16, 2016 at 2:15 PM, Maxim Uvarov <maxim.uvarov@linaro.org >>>>> > >>>>> wrote: >>>>> >>>>> > Remove totally untested branch with 128 bit optimizations >>>>> > for timer code. >>>>> > This also fixes static assert bug: >>>>> > https://bugs.linaro.org/show_bug.cgi?id=2211 >>>>> > >>>>> > Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org> >>>>> > >>>>> >>>>> Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org> >>>>> >>>>> >>>>> > --- >>>>> > platform/linux-generic/odp_timer.c | 107 >>>>> > ------------------------------------- >>>>> > 1 file changed, 107 deletions(-) >>>>> > >>>>> > diff --git a/platform/linux-generic/odp_timer.c >>>>> > b/platform/linux-generic/odp_timer.c >>>>> > index 6b84309..be28da3 100644 >>>>> > --- a/platform/linux-generic/odp_timer.c >>>>> > +++ b/platform/linux-generic/odp_timer.c >>>>> > @@ -11,15 +11,7 @@ >>>>> > * >>>>> > */ >>>>> > >>>>> > -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 >>>>> flag on >>>>> > x86 */ >>>>> > -/* Using spin lock actually seems faster on Core2 */ >>>>> > -#ifdef ODP_ATOMIC_U128 >>>>> > -/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */ >>>>> > -#define TB_NEEDS_PAD >>>>> > -#define TB_SET_PAD(x) ((x).pad = 0) >>>>> > -#else >>>>> > #define TB_SET_PAD(x) (void)(x) >>>>> > -#endif >>>>> > >>>>> > #include <odp_posix_extensions.h> >>>>> > >>>>> > @@ -70,11 +62,9 @@ >>>>> > * Mutual exclusion in the absence of CAS16 >>>>> > >>>>> > >>>>> *****************************************************************************/ >>>>> > >>>>> > -#ifndef ODP_ATOMIC_U128 >>>>> > #define NUM_LOCKS 1024 >>>>> > static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per >>>>> cache >>>>> > line! */ >>>>> > #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS]) >>>>> > -#endif >>>>> > >>>>> > >>>>> > >>>>> /****************************************************************************** >>>>> > * Translation between timeout buffer and timeout header >>>>> > @@ -98,17 +88,9 @@ static odp_timeout_hdr_t >>>>> *timeout_hdr(odp_timeout_t tmo) >>>>> > typedef struct tick_buf_s { >>>>> > odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */ >>>>> > odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not >>>>> active */ >>>>> > -#ifdef TB_NEEDS_PAD >>>>> > - uint32_t pad;/* Need to be able to access padding for >>>>> successful >>>>> > CAS */ >>>>> > -#endif >>>>> > } tick_buf_t >>>>> > -#ifdef ODP_ATOMIC_U128 >>>>> > -ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned >>>>> > addresses */ >>>>> > -#endif >>>>> > ; >>>>> > >>>>> > -ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) == >>>>> 16"); >>>>> > - >>>>> > typedef struct odp_timer_s { >>>>> > void *user_ptr; >>>>> > odp_queue_t queue;/* Used for free list when timer is free */ >>>>> > @@ -378,34 +360,6 @@ static bool timer_reset(uint32_t idx, >>>>> > tick_buf_t *tb = &tp->tick_buf[idx]; >>>>> > >>>>> > if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) { >>>>> > -#ifdef ODP_ATOMIC_U128 >>>>> > - tick_buf_t new, old; >>>>> > - do { >>>>> > - /* Relaxed and non-atomic read of current >>>>> values */ >>>>> > - old.exp_tck.v = tb->exp_tck.v; >>>>> > - old.tmo_buf = tb->tmo_buf; >>>>> > - TB_SET_PAD(old); >>>>> > - /* Check if there actually is a timeout >>>>> buffer >>>>> > - * present */ >>>>> > - if (old.tmo_buf == ODP_BUFFER_INVALID) { >>>>> > - /* Cannot reset a timer with neither >>>>> old >>>>> > nor >>>>> > - * new timeout buffer */ >>>>> > - success = false; >>>>> > - break; >>>>> > - } >>>>> > - /* Set up new values */ >>>>> > - new.exp_tck.v = abs_tck; >>>>> > - new.tmo_buf = old.tmo_buf; >>>>> > - TB_SET_PAD(new); >>>>> > - /* Atomic CAS will fail if we experienced >>>>> torn >>>>> > reads, >>>>> > - * retry update sequence until CAS succeeds >>>>> */ >>>>> > - } while (!_odp_atomic_u128_cmp_xchg_mm( >>>>> > - (_odp_atomic_u128_t *)tb, >>>>> > - (_uint128_t *)&old, >>>>> > - (_uint128_t *)&new, >>>>> > - _ODP_MEMMODEL_RLS, >>>>> > - _ODP_MEMMODEL_RLX)); >>>>> > -#else >>>>> > #ifdef __ARM_ARCH >>>>> > /* Since barriers are not good for C-A15, we take an >>>>> > * alternative approach using relaxed memory model */ >>>>> > @@ -453,7 +407,6 @@ static bool timer_reset(uint32_t idx, >>>>> > /* Release the lock */ >>>>> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >>>>> > #endif >>>>> > -#endif >>>>> > } else { >>>>> > /* We have a new timeout buffer which replaces any >>>>> old one >>>>> > */ >>>>> > /* Fill in some (constant) header fields for timeout >>>>> > events */ >>>>> > @@ -468,19 +421,6 @@ static bool timer_reset(uint32_t idx, >>>>> > } >>>>> > /* Else ignore buffers of other types */ >>>>> > odp_buffer_t old_buf = ODP_BUFFER_INVALID; >>>>> > -#ifdef ODP_ATOMIC_U128 >>>>> > - tick_buf_t new, old; >>>>> > - new.exp_tck.v = abs_tck; >>>>> > - new.tmo_buf = *tmo_buf; >>>>> > - TB_SET_PAD(new); >>>>> > - /* We are releasing the new timeout buffer to some >>>>> other >>>>> > - * thread */ >>>>> > - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, >>>>> > - (_uint128_t *)&new, >>>>> > - (_uint128_t *)&old, >>>>> > - _ODP_MEMMODEL_ACQ_RLS); >>>>> > - old_buf = old.tmo_buf; >>>>> > -#else >>>>> > /* Take a related lock */ >>>>> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >>>>> > /* While lock is taken, spin using relaxed >>>>> loads */ >>>>> > @@ -496,7 +436,6 @@ static bool timer_reset(uint32_t idx, >>>>> > >>>>> > /* Release the lock */ >>>>> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >>>>> > -#endif >>>>> > /* Return old timeout buffer */ >>>>> > *tmo_buf = old_buf; >>>>> > } >>>>> > @@ -510,18 +449,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool >>>>> *tp, >>>>> > tick_buf_t *tb = &tp->tick_buf[idx]; >>>>> > odp_buffer_t old_buf; >>>>> > >>>>> > -#ifdef ODP_ATOMIC_U128 >>>>> > - tick_buf_t new, old; >>>>> > - /* Update the timer state (e.g. cancel the current timeout) >>>>> */ >>>>> > - new.exp_tck.v = new_state; >>>>> > - /* Swap out the old buffer */ >>>>> > - new.tmo_buf = ODP_BUFFER_INVALID; >>>>> > - TB_SET_PAD(new); >>>>> > - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, >>>>> > - (_uint128_t *)&new, (_uint128_t >>>>> *)&old, >>>>> > - _ODP_MEMMODEL_RLX); >>>>> > - old_buf = old.tmo_buf; >>>>> > -#else >>>>> > /* Take a related lock */ >>>>> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >>>>> > /* While lock is taken, spin using relaxed loads */ >>>>> > @@ -537,7 +464,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool >>>>> *tp, >>>>> > >>>>> > /* Release the lock */ >>>>> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >>>>> > -#endif >>>>> > /* Return the old buffer */ >>>>> > return old_buf; >>>>> > } >>>>> > @@ -548,34 +474,6 @@ static unsigned timer_expire(odp_timer_pool *tp, >>>>> > uint32_t idx, uint64_t tick) >>>>> > tick_buf_t *tb = &tp->tick_buf[idx]; >>>>> > odp_buffer_t tmo_buf = ODP_BUFFER_INVALID; >>>>> > uint64_t exp_tck; >>>>> > -#ifdef ODP_ATOMIC_U128 >>>>> > - /* Atomic re-read for correctness */ >>>>> > - exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck, >>>>> _ODP_MEMMODEL_RLX); >>>>> > - /* Re-check exp_tck */ >>>>> > - if (odp_likely(exp_tck <= tick)) { >>>>> > - /* Attempt to grab timeout buffer, replace with >>>>> inactive >>>>> > timer >>>>> > - * and invalid buffer */ >>>>> > - tick_buf_t new, old; >>>>> > - old.exp_tck.v = exp_tck; >>>>> > - old.tmo_buf = tb->tmo_buf; >>>>> > - TB_SET_PAD(old); >>>>> > - /* Set the inactive/expired bit keeping the >>>>> expiration >>>>> > tick so >>>>> > - * that we can check against the expiration tick of >>>>> the >>>>> > timeout >>>>> > - * when it is received */ >>>>> > - new.exp_tck.v = exp_tck | TMO_INACTIVE; >>>>> > - new.tmo_buf = ODP_BUFFER_INVALID; >>>>> > - TB_SET_PAD(new); >>>>> > - int succ = _odp_atomic_u128_cmp_xchg_mm( >>>>> > - (_odp_atomic_u128_t *)tb, >>>>> > - (_uint128_t *)&old, (_uint128_t >>>>> *)&new, >>>>> > - _ODP_MEMMODEL_RLS, >>>>> _ODP_MEMMODEL_RLX); >>>>> > - if (succ) >>>>> > - tmo_buf = old.tmo_buf; >>>>> > - /* Else CAS failed, something changed => skip timer >>>>> > - * this tick, it will be checked again next tick */ >>>>> > - } >>>>> > - /* Else false positive, ignore */ >>>>> > -#else >>>>> > /* Take a related lock */ >>>>> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >>>>> > /* While lock is taken, spin using relaxed loads */ >>>>> > @@ -600,7 +498,6 @@ static unsigned timer_expire(odp_timer_pool *tp, >>>>> > uint32_t idx, uint64_t tick) >>>>> > /* Else false positive, ignore */ >>>>> > /* Release the lock */ >>>>> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >>>>> > -#endif >>>>> > if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) { >>>>> > /* Fill in expiration tick for timeout events */ >>>>> > if (odp_event_type(odp_buffer_to_event(tmo_buf)) == >>>>> > @@ -974,13 +871,9 @@ void odp_timeout_free(odp_timeout_t tmo) >>>>> > >>>>> > int odp_timer_init_global(void) >>>>> > { >>>>> > -#ifndef ODP_ATOMIC_U128 >>>>> > uint32_t i; >>>>> > for (i = 0; i < NUM_LOCKS; i++) >>>>> > _odp_atomic_flag_clear(&locks[i]); >>>>> > -#else >>>>> > - ODP_DBG("Using lock-less timer implementation\n"); >>>>> > -#endif >>>>> > odp_atomic_init_u32(&num_timer_pools, 0); >>>>> > >>>>> > block_sigalarm(); >>>>> > -- >>>>> > 2.7.1.250.gff4ea60 >>>>> > >>>>> > _______________________________________________ >>>>> > lng-odp mailing list >>>>> > lng-odp@lists.linaro.org >>>>> > https://lists.linaro.org/mailman/listinfo/lng-odp >>>>> > >>>>> _______________________________________________ >>>>> lng-odp mailing list >>>>> lng-odp@lists.linaro.org >>>>> https://lists.linaro.org/mailman/listinfo/lng-odp >>>>> >>>> >>>> >>> >> >
Coding would be much simpler if one could use GCC __atomic builtins directly instead of ODP public and internal atomic API's. The problem with odp_atomic_u64_t sometimes (i.e. for most 32-bit architectures, ARMv7a and x86-32 the exceptions) containing a lock would go away. -- Ola On 17 May 2016 at 09:00, Ola Liljedahl <ola.liljedahl@linaro.org> wrote: > > > On 17 May 2016 at 00:45, Bill Fischofer <bill.fischofer@linaro.org> wrote: > >> >> >> On Mon, May 16, 2016 at 4:55 PM, Ola Liljedahl <ola.liljedahl@linaro.org> >> wrote: >> >>> On 16 May 2016 at 23:31, Bill Fischofer <bill.fischofer@linaro.org> >>> wrote: >>> >>>> Thanks Ola. The original bug is that this fails compiling with clang >>>> on 32-bit systems and the reason is that the struct in that environment >>>> winds up being 24 bytes rather than 16 bytes long, so the >>>> ODP_STATIC_ASSERT() fails. >>>> >>> The timer code should stop using odp_atomic_u64_t (which will include >>> the spinlock for systems that do not support atomic operations on 64-bit >>> locations, ARMv7A is one of the few 32-bit platforms which do support >>> 64-bit atomics natively). Instead use a plain 64-bit datatype (uint64_t) on >>> such systems. For systems which do not support 128-bit (or 64-bit?) atomic >>> operations (which is the ideal but I think I managed to get something >>> working with only 64-bit atomics), we should use locks instead. I think the >>> code does use a separate array of locks which is indexed through a hash of >>> the timer handle or something similar. This locks makes the additional lock >>> in odp_atomic_u64_t unused. >>> >>> Which platforms use the 64-bit atomic code in the timer? >>> ARMv7A (32-bit but with 64-bit atomics support - ldrexd/strexd) >>> OCTEON/MIPS64 (with 64-bit atomics support - lld/scd) >>> 64-bit POWER? (with 64--bit atomics support - ldarx/stdcx.) >>> >>> Se we need a couple of things: >>> 1) 128-bit CAS support on e.g. x86 (requires that configure detects and >>> enables -mcx16 flag). >>> 2) 128-bit ldxp/stxp support on ARM64 (A64 ISA in AArch64 mode). I don't >>> think common compilers support 128-bit CAS on ARM64 (it's a bit tricky) so >>> need to use inline assembly. >>> 3) Some cleanup in the timer code not to use odp_atomic_u64_t on systems >>> which do not support 64-bit atomic operations (and instead uses the spin >>> lock array). This will avoid the failed static assert. >>> >> >> Unless you'd like to submit that patch this week, we need to do something >> on at least a temporary basis to close out Monarch RC3. Sounds like this is >> an area we should revisit for restructure/cleanup for Tiger Moth. Given the >> divergence between near-term need and the (better) longer-term solution you >> outline, what do you recommend for now? >> > Fix the static assert problem on certain 32-bit targets by changing > odp_atomic_u64_t to uint64_t on those targets. > > >> >> >>> >>> >>>> My original (simple) patch is at >>>> http://patches.opendataplane.org/patch/5932/ however Maxim didn't like >>>> it. Perhaps you can offer a compromise? >>>> >>>> On Mon, May 16, 2016 at 4:21 PM, Ola Liljedahl < >>>> ola.liljedahl@linaro.org> wrote: >>>> >>>>> I disagree. The 128-bit support is important because that's the >>>>> lock-free timer implementation which was the whole idea. The lock-based >>>>> code is just a work-around. We should rather add support in configure to >>>>> detect compiler support for the -mcx16 flag which enables 128-bit CAS on >>>>> x86-64. Now when I have an ARM64 target, I can actually write and test >>>>> 128-bit CAS support on ARM (A64 ISA in AArch64 mode) and contribute that in >>>>> another patch. I am not sure whether common compilers (e.g. GCC) properly >>>>> supports 128-bit CAS on ARM64, might have to use LDXP/STXP (load/store >>>>> exclusive pair) directly. >>>>> >>>>> -- Ola >>>>> >>>>> On 16 May 2016 at 22:43, Bill Fischofer <bill.fischofer@linaro.org> >>>>> wrote: >>>>> >>>>>> On Mon, May 16, 2016 at 2:15 PM, Maxim Uvarov < >>>>>> maxim.uvarov@linaro.org> >>>>>> wrote: >>>>>> >>>>>> > Remove totally untested branch with 128 bit optimizations >>>>>> > for timer code. >>>>>> > This also fixes static assert bug: >>>>>> > https://bugs.linaro.org/show_bug.cgi?id=2211 >>>>>> > >>>>>> > Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org> >>>>>> > >>>>>> >>>>>> Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org> >>>>>> >>>>>> >>>>>> > --- >>>>>> > platform/linux-generic/odp_timer.c | 107 >>>>>> > ------------------------------------- >>>>>> > 1 file changed, 107 deletions(-) >>>>>> > >>>>>> > diff --git a/platform/linux-generic/odp_timer.c >>>>>> > b/platform/linux-generic/odp_timer.c >>>>>> > index 6b84309..be28da3 100644 >>>>>> > --- a/platform/linux-generic/odp_timer.c >>>>>> > +++ b/platform/linux-generic/odp_timer.c >>>>>> > @@ -11,15 +11,7 @@ >>>>>> > * >>>>>> > */ >>>>>> > >>>>>> > -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 >>>>>> flag on >>>>>> > x86 */ >>>>>> > -/* Using spin lock actually seems faster on Core2 */ >>>>>> > -#ifdef ODP_ATOMIC_U128 >>>>>> > -/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */ >>>>>> > -#define TB_NEEDS_PAD >>>>>> > -#define TB_SET_PAD(x) ((x).pad = 0) >>>>>> > -#else >>>>>> > #define TB_SET_PAD(x) (void)(x) >>>>>> > -#endif >>>>>> > >>>>>> > #include <odp_posix_extensions.h> >>>>>> > >>>>>> > @@ -70,11 +62,9 @@ >>>>>> > * Mutual exclusion in the absence of CAS16 >>>>>> > >>>>>> > >>>>>> *****************************************************************************/ >>>>>> > >>>>>> > -#ifndef ODP_ATOMIC_U128 >>>>>> > #define NUM_LOCKS 1024 >>>>>> > static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per >>>>>> cache >>>>>> > line! */ >>>>>> > #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS]) >>>>>> > -#endif >>>>>> > >>>>>> > >>>>>> > >>>>>> /****************************************************************************** >>>>>> > * Translation between timeout buffer and timeout header >>>>>> > @@ -98,17 +88,9 @@ static odp_timeout_hdr_t >>>>>> *timeout_hdr(odp_timeout_t tmo) >>>>>> > typedef struct tick_buf_s { >>>>>> > odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */ >>>>>> > odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not >>>>>> active */ >>>>>> > -#ifdef TB_NEEDS_PAD >>>>>> > - uint32_t pad;/* Need to be able to access padding for >>>>>> successful >>>>>> > CAS */ >>>>>> > -#endif >>>>>> > } tick_buf_t >>>>>> > -#ifdef ODP_ATOMIC_U128 >>>>>> > -ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned >>>>>> > addresses */ >>>>>> > -#endif >>>>>> > ; >>>>>> > >>>>>> > -ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) == >>>>>> 16"); >>>>>> > - >>>>>> > typedef struct odp_timer_s { >>>>>> > void *user_ptr; >>>>>> > odp_queue_t queue;/* Used for free list when timer is free >>>>>> */ >>>>>> > @@ -378,34 +360,6 @@ static bool timer_reset(uint32_t idx, >>>>>> > tick_buf_t *tb = &tp->tick_buf[idx]; >>>>>> > >>>>>> > if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) { >>>>>> > -#ifdef ODP_ATOMIC_U128 >>>>>> > - tick_buf_t new, old; >>>>>> > - do { >>>>>> > - /* Relaxed and non-atomic read of current >>>>>> values */ >>>>>> > - old.exp_tck.v = tb->exp_tck.v; >>>>>> > - old.tmo_buf = tb->tmo_buf; >>>>>> > - TB_SET_PAD(old); >>>>>> > - /* Check if there actually is a timeout >>>>>> buffer >>>>>> > - * present */ >>>>>> > - if (old.tmo_buf == ODP_BUFFER_INVALID) { >>>>>> > - /* Cannot reset a timer with >>>>>> neither old >>>>>> > nor >>>>>> > - * new timeout buffer */ >>>>>> > - success = false; >>>>>> > - break; >>>>>> > - } >>>>>> > - /* Set up new values */ >>>>>> > - new.exp_tck.v = abs_tck; >>>>>> > - new.tmo_buf = old.tmo_buf; >>>>>> > - TB_SET_PAD(new); >>>>>> > - /* Atomic CAS will fail if we experienced >>>>>> torn >>>>>> > reads, >>>>>> > - * retry update sequence until CAS succeeds >>>>>> */ >>>>>> > - } while (!_odp_atomic_u128_cmp_xchg_mm( >>>>>> > - (_odp_atomic_u128_t *)tb, >>>>>> > - (_uint128_t *)&old, >>>>>> > - (_uint128_t *)&new, >>>>>> > - _ODP_MEMMODEL_RLS, >>>>>> > - _ODP_MEMMODEL_RLX)); >>>>>> > -#else >>>>>> > #ifdef __ARM_ARCH >>>>>> > /* Since barriers are not good for C-A15, we take an >>>>>> > * alternative approach using relaxed memory model >>>>>> */ >>>>>> > @@ -453,7 +407,6 @@ static bool timer_reset(uint32_t idx, >>>>>> > /* Release the lock */ >>>>>> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >>>>>> > #endif >>>>>> > -#endif >>>>>> > } else { >>>>>> > /* We have a new timeout buffer which replaces any >>>>>> old one >>>>>> > */ >>>>>> > /* Fill in some (constant) header fields for timeout >>>>>> > events */ >>>>>> > @@ -468,19 +421,6 @@ static bool timer_reset(uint32_t idx, >>>>>> > } >>>>>> > /* Else ignore buffers of other types */ >>>>>> > odp_buffer_t old_buf = ODP_BUFFER_INVALID; >>>>>> > -#ifdef ODP_ATOMIC_U128 >>>>>> > - tick_buf_t new, old; >>>>>> > - new.exp_tck.v = abs_tck; >>>>>> > - new.tmo_buf = *tmo_buf; >>>>>> > - TB_SET_PAD(new); >>>>>> > - /* We are releasing the new timeout buffer to some >>>>>> other >>>>>> > - * thread */ >>>>>> > - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, >>>>>> > - (_uint128_t *)&new, >>>>>> > - (_uint128_t *)&old, >>>>>> > - _ODP_MEMMODEL_ACQ_RLS); >>>>>> > - old_buf = old.tmo_buf; >>>>>> > -#else >>>>>> > /* Take a related lock */ >>>>>> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >>>>>> > /* While lock is taken, spin using relaxed >>>>>> loads */ >>>>>> > @@ -496,7 +436,6 @@ static bool timer_reset(uint32_t idx, >>>>>> > >>>>>> > /* Release the lock */ >>>>>> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >>>>>> > -#endif >>>>>> > /* Return old timeout buffer */ >>>>>> > *tmo_buf = old_buf; >>>>>> > } >>>>>> > @@ -510,18 +449,6 @@ static odp_buffer_t >>>>>> timer_cancel(odp_timer_pool *tp, >>>>>> > tick_buf_t *tb = &tp->tick_buf[idx]; >>>>>> > odp_buffer_t old_buf; >>>>>> > >>>>>> > -#ifdef ODP_ATOMIC_U128 >>>>>> > - tick_buf_t new, old; >>>>>> > - /* Update the timer state (e.g. cancel the current timeout) >>>>>> */ >>>>>> > - new.exp_tck.v = new_state; >>>>>> > - /* Swap out the old buffer */ >>>>>> > - new.tmo_buf = ODP_BUFFER_INVALID; >>>>>> > - TB_SET_PAD(new); >>>>>> > - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, >>>>>> > - (_uint128_t *)&new, (_uint128_t >>>>>> *)&old, >>>>>> > - _ODP_MEMMODEL_RLX); >>>>>> > - old_buf = old.tmo_buf; >>>>>> > -#else >>>>>> > /* Take a related lock */ >>>>>> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >>>>>> > /* While lock is taken, spin using relaxed loads */ >>>>>> > @@ -537,7 +464,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool >>>>>> *tp, >>>>>> > >>>>>> > /* Release the lock */ >>>>>> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >>>>>> > -#endif >>>>>> > /* Return the old buffer */ >>>>>> > return old_buf; >>>>>> > } >>>>>> > @@ -548,34 +474,6 @@ static unsigned timer_expire(odp_timer_pool >>>>>> *tp, >>>>>> > uint32_t idx, uint64_t tick) >>>>>> > tick_buf_t *tb = &tp->tick_buf[idx]; >>>>>> > odp_buffer_t tmo_buf = ODP_BUFFER_INVALID; >>>>>> > uint64_t exp_tck; >>>>>> > -#ifdef ODP_ATOMIC_U128 >>>>>> > - /* Atomic re-read for correctness */ >>>>>> > - exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck, >>>>>> _ODP_MEMMODEL_RLX); >>>>>> > - /* Re-check exp_tck */ >>>>>> > - if (odp_likely(exp_tck <= tick)) { >>>>>> > - /* Attempt to grab timeout buffer, replace with >>>>>> inactive >>>>>> > timer >>>>>> > - * and invalid buffer */ >>>>>> > - tick_buf_t new, old; >>>>>> > - old.exp_tck.v = exp_tck; >>>>>> > - old.tmo_buf = tb->tmo_buf; >>>>>> > - TB_SET_PAD(old); >>>>>> > - /* Set the inactive/expired bit keeping the >>>>>> expiration >>>>>> > tick so >>>>>> > - * that we can check against the expiration tick of >>>>>> the >>>>>> > timeout >>>>>> > - * when it is received */ >>>>>> > - new.exp_tck.v = exp_tck | TMO_INACTIVE; >>>>>> > - new.tmo_buf = ODP_BUFFER_INVALID; >>>>>> > - TB_SET_PAD(new); >>>>>> > - int succ = _odp_atomic_u128_cmp_xchg_mm( >>>>>> > - (_odp_atomic_u128_t *)tb, >>>>>> > - (_uint128_t *)&old, (_uint128_t >>>>>> *)&new, >>>>>> > - _ODP_MEMMODEL_RLS, >>>>>> _ODP_MEMMODEL_RLX); >>>>>> > - if (succ) >>>>>> > - tmo_buf = old.tmo_buf; >>>>>> > - /* Else CAS failed, something changed => skip timer >>>>>> > - * this tick, it will be checked again next tick */ >>>>>> > - } >>>>>> > - /* Else false positive, ignore */ >>>>>> > -#else >>>>>> > /* Take a related lock */ >>>>>> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >>>>>> > /* While lock is taken, spin using relaxed loads */ >>>>>> > @@ -600,7 +498,6 @@ static unsigned timer_expire(odp_timer_pool *tp, >>>>>> > uint32_t idx, uint64_t tick) >>>>>> > /* Else false positive, ignore */ >>>>>> > /* Release the lock */ >>>>>> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >>>>>> > -#endif >>>>>> > if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) { >>>>>> > /* Fill in expiration tick for timeout events */ >>>>>> > if (odp_event_type(odp_buffer_to_event(tmo_buf)) == >>>>>> > @@ -974,13 +871,9 @@ void odp_timeout_free(odp_timeout_t tmo) >>>>>> > >>>>>> > int odp_timer_init_global(void) >>>>>> > { >>>>>> > -#ifndef ODP_ATOMIC_U128 >>>>>> > uint32_t i; >>>>>> > for (i = 0; i < NUM_LOCKS; i++) >>>>>> > _odp_atomic_flag_clear(&locks[i]); >>>>>> > -#else >>>>>> > - ODP_DBG("Using lock-less timer implementation\n"); >>>>>> > -#endif >>>>>> > odp_atomic_init_u32(&num_timer_pools, 0); >>>>>> > >>>>>> > block_sigalarm(); >>>>>> > -- >>>>>> > 2.7.1.250.gff4ea60 >>>>>> > >>>>>> > _______________________________________________ >>>>>> > lng-odp mailing list >>>>>> > lng-odp@lists.linaro.org >>>>>> > https://lists.linaro.org/mailman/listinfo/lng-odp >>>>>> > >>>>>> _______________________________________________ >>>>>> lng-odp mailing list >>>>>> lng-odp@lists.linaro.org >>>>>> https://lists.linaro.org/mailman/listinfo/lng-odp >>>>>> >>>>> >>>>> >>>> >>> >> >
diff --git a/platform/linux-generic/odp_timer.c b/platform/linux-generic/odp_timer.c index 6b84309..be28da3 100644 --- a/platform/linux-generic/odp_timer.c +++ b/platform/linux-generic/odp_timer.c @@ -11,15 +11,7 @@ * */ -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 flag on x86 */ -/* Using spin lock actually seems faster on Core2 */ -#ifdef ODP_ATOMIC_U128 -/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */ -#define TB_NEEDS_PAD -#define TB_SET_PAD(x) ((x).pad = 0) -#else #define TB_SET_PAD(x) (void)(x) -#endif #include <odp_posix_extensions.h> @@ -70,11 +62,9 @@ * Mutual exclusion in the absence of CAS16 *****************************************************************************/ -#ifndef ODP_ATOMIC_U128 #define NUM_LOCKS 1024 static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per cache line! */ #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS]) -#endif /****************************************************************************** * Translation between timeout buffer and timeout header @@ -98,17 +88,9 @@ static odp_timeout_hdr_t *timeout_hdr(odp_timeout_t tmo) typedef struct tick_buf_s { odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */ odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not active */ -#ifdef TB_NEEDS_PAD - uint32_t pad;/* Need to be able to access padding for successful CAS */ -#endif } tick_buf_t -#ifdef ODP_ATOMIC_U128 -ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned addresses */ -#endif ; -ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) == 16"); - typedef struct odp_timer_s { void *user_ptr; odp_queue_t queue;/* Used for free list when timer is free */ @@ -378,34 +360,6 @@ static bool timer_reset(uint32_t idx, tick_buf_t *tb = &tp->tick_buf[idx]; if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) { -#ifdef ODP_ATOMIC_U128 - tick_buf_t new, old; - do { - /* Relaxed and non-atomic read of current values */ - old.exp_tck.v = tb->exp_tck.v; - old.tmo_buf = tb->tmo_buf; - TB_SET_PAD(old); - /* Check if there actually is a timeout buffer - * present */ - if (old.tmo_buf == ODP_BUFFER_INVALID) { - /* Cannot reset a timer with neither old nor - * new timeout buffer */ - success = false; - break; - } - /* Set up new values */ - new.exp_tck.v = abs_tck; - new.tmo_buf = old.tmo_buf; - TB_SET_PAD(new); - /* Atomic CAS will fail if we experienced torn reads, - * retry update sequence until CAS succeeds */ - } while (!_odp_atomic_u128_cmp_xchg_mm( - (_odp_atomic_u128_t *)tb, - (_uint128_t *)&old, - (_uint128_t *)&new, - _ODP_MEMMODEL_RLS, - _ODP_MEMMODEL_RLX)); -#else #ifdef __ARM_ARCH /* Since barriers are not good for C-A15, we take an * alternative approach using relaxed memory model */ @@ -453,7 +407,6 @@ static bool timer_reset(uint32_t idx, /* Release the lock */ _odp_atomic_flag_clear(IDX2LOCK(idx)); #endif -#endif } else { /* We have a new timeout buffer which replaces any old one */ /* Fill in some (constant) header fields for timeout events */ @@ -468,19 +421,6 @@ static bool timer_reset(uint32_t idx, } /* Else ignore buffers of other types */ odp_buffer_t old_buf = ODP_BUFFER_INVALID; -#ifdef ODP_ATOMIC_U128 - tick_buf_t new, old; - new.exp_tck.v = abs_tck; - new.tmo_buf = *tmo_buf; - TB_SET_PAD(new); - /* We are releasing the new timeout buffer to some other - * thread */ - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, - (_uint128_t *)&new, - (_uint128_t *)&old, - _ODP_MEMMODEL_ACQ_RLS); - old_buf = old.tmo_buf; -#else /* Take a related lock */ while (_odp_atomic_flag_tas(IDX2LOCK(idx))) /* While lock is taken, spin using relaxed loads */ @@ -496,7 +436,6 @@ static bool timer_reset(uint32_t idx, /* Release the lock */ _odp_atomic_flag_clear(IDX2LOCK(idx)); -#endif /* Return old timeout buffer */ *tmo_buf = old_buf; } @@ -510,18 +449,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool *tp, tick_buf_t *tb = &tp->tick_buf[idx]; odp_buffer_t old_buf; -#ifdef ODP_ATOMIC_U128 - tick_buf_t new, old; - /* Update the timer state (e.g. cancel the current timeout) */ - new.exp_tck.v = new_state; - /* Swap out the old buffer */ - new.tmo_buf = ODP_BUFFER_INVALID; - TB_SET_PAD(new); - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, - (_uint128_t *)&new, (_uint128_t *)&old, - _ODP_MEMMODEL_RLX); - old_buf = old.tmo_buf; -#else /* Take a related lock */ while (_odp_atomic_flag_tas(IDX2LOCK(idx))) /* While lock is taken, spin using relaxed loads */ @@ -537,7 +464,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool *tp, /* Release the lock */ _odp_atomic_flag_clear(IDX2LOCK(idx)); -#endif /* Return the old buffer */ return old_buf; } @@ -548,34 +474,6 @@ static unsigned timer_expire(odp_timer_pool *tp, uint32_t idx, uint64_t tick) tick_buf_t *tb = &tp->tick_buf[idx]; odp_buffer_t tmo_buf = ODP_BUFFER_INVALID; uint64_t exp_tck; -#ifdef ODP_ATOMIC_U128 - /* Atomic re-read for correctness */ - exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck, _ODP_MEMMODEL_RLX); - /* Re-check exp_tck */ - if (odp_likely(exp_tck <= tick)) { - /* Attempt to grab timeout buffer, replace with inactive timer - * and invalid buffer */ - tick_buf_t new, old; - old.exp_tck.v = exp_tck; - old.tmo_buf = tb->tmo_buf; - TB_SET_PAD(old); - /* Set the inactive/expired bit keeping the expiration tick so - * that we can check against the expiration tick of the timeout - * when it is received */ - new.exp_tck.v = exp_tck | TMO_INACTIVE; - new.tmo_buf = ODP_BUFFER_INVALID; - TB_SET_PAD(new); - int succ = _odp_atomic_u128_cmp_xchg_mm( - (_odp_atomic_u128_t *)tb, - (_uint128_t *)&old, (_uint128_t *)&new, - _ODP_MEMMODEL_RLS, _ODP_MEMMODEL_RLX); - if (succ) - tmo_buf = old.tmo_buf; - /* Else CAS failed, something changed => skip timer - * this tick, it will be checked again next tick */ - } - /* Else false positive, ignore */ -#else /* Take a related lock */ while (_odp_atomic_flag_tas(IDX2LOCK(idx))) /* While lock is taken, spin using relaxed loads */ @@ -600,7 +498,6 @@ static unsigned timer_expire(odp_timer_pool *tp, uint32_t idx, uint64_t tick) /* Else false positive, ignore */ /* Release the lock */ _odp_atomic_flag_clear(IDX2LOCK(idx)); -#endif if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) { /* Fill in expiration tick for timeout events */ if (odp_event_type(odp_buffer_to_event(tmo_buf)) == @@ -974,13 +871,9 @@ void odp_timeout_free(odp_timeout_t tmo) int odp_timer_init_global(void) { -#ifndef ODP_ATOMIC_U128 uint32_t i; for (i = 0; i < NUM_LOCKS; i++) _odp_atomic_flag_clear(&locks[i]); -#else - ODP_DBG("Using lock-less timer implementation\n"); -#endif odp_atomic_init_u32(&num_timer_pools, 0); block_sigalarm();
Remove totally untested branch with 128 bit optimizations for timer code. This also fixes static assert bug: https://bugs.linaro.org/show_bug.cgi?id=2211 Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org> --- platform/linux-generic/odp_timer.c | 107 ------------------------------------- 1 file changed, 107 deletions(-)