From patchwork Tue Oct 8 21:12:19 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Honnappa Nagarahalli X-Patchwork-Id: 175581 Delivered-To: patch@linaro.org Received: by 2002:a92:7e96:0:0:0:0:0 with SMTP id q22csp6231687ill; Tue, 8 Oct 2019 14:13:28 -0700 (PDT) X-Google-Smtp-Source: APXvYqxNQbBr1fAT1wTPF3+5311u2I1o4f/vKANxKDv+ZtBxhIMnfIVt3nZN+OOhTFkiUjAeEpzX X-Received: by 2002:a17:906:1801:: with SMTP id v1mr30498540eje.146.1570569208616; Tue, 08 Oct 2019 14:13:28 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1570569208; cv=none; d=google.com; s=arc-20160816; b=Wm5pzfZbuJbugPDZFMT7YKOc2guEukKwnzswd/+KnIvVN96MIVtRfdIoVIipuW8fz+ XEKn5bMFf84eYehve08chw9OOD+hHDmYaOod1cjK5sZTB5zCjp/M3JW8T1mat9N0Q/O/ wdnaP5z7+NF0At5R8mTQS1AFE7egnVVnQ+CmcDz1f0m6aX/CIlaeMOd7Q4RizWrqxP64 xFvGbUGJwMiwxHmXPe3IU9ts6A6C2T92yF+dzxCtlORV8/ULewBWupyRlvBYzXFE0ey3 XKlrAPhVgSL7hNYUTZi3lzq6fEPHCLcK08QZP7M9dBTGAEN/AwxtP0FaTP0DqlUZ85D2 kbuA== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=sender:errors-to:list-subscribe:list-help:list-post:list-archive :list-unsubscribe:list-id:precedence:subject:references:in-reply-to :message-id:date:cc:to:from; bh=/AkSQxzBAdp6jE2ombKeKyFkXLmyvqpJnd8wp+DAcl8=; b=OKUv5W5Jr+gFSHN40s/LXR5hdUPDFZRDcxqc7MZUowD1l9mksyTK+HqJ6VKPFrHX/d Ye30o7SZn2RNTEwwq78k4+OCiHj4eCAQtXQMH8V48HKCHy+1GI/NOnu9JD+/ml+JrKs0 wXSDhdPU0Ik1EZpSBighkP37M7HI9kVl2SczRhizOftfj/zrLBvDKLg1xyhLfjAX773d WC+NO5wYAEpNyoU8mQCR4kjXPEiCBPAHVzWbGYfEQrZ2Fhf7hCQYLFrIgmtFF8whzobb g28iZ0EnRDCg2YuEkfOeb3KPv7isn/pMu2cw6aJR5awi1/OQIZFtm/JbpJuQ4RK+g6iV 5FJA== ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: domain of dev-bounces@dpdk.org designates 92.243.14.124 as permitted sender) smtp.mailfrom=dev-bounces@dpdk.org Return-Path: Received: from dpdk.org (dpdk.org. [92.243.14.124]) by mx.google.com with ESMTP id e50si195938edb.177.2019.10.08.14.13.28; Tue, 08 Oct 2019 14:13:28 -0700 (PDT) Received-SPF: pass (google.com: domain of dev-bounces@dpdk.org designates 92.243.14.124 as permitted sender) client-ip=92.243.14.124; Authentication-Results: mx.google.com; spf=pass (google.com: domain of dev-bounces@dpdk.org designates 92.243.14.124 as permitted sender) smtp.mailfrom=dev-bounces@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 9F3D61D427; Tue, 8 Oct 2019 23:12:53 +0200 (CEST) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by dpdk.org (Postfix) with ESMTP id 3F1B91C138; Tue, 8 Oct 2019 23:12:44 +0200 (CEST) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id C909B337; Tue, 8 Oct 2019 14:12:43 -0700 (PDT) Received: from qc2400f-1.austin.arm.com (qc2400f-1.austin.arm.com [10.118.12.34]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id BFF9E3F68E; Tue, 8 Oct 2019 14:12:43 -0700 (PDT) From: Honnappa Nagarahalli To: honnappa.nagarahalli@arm.com, david.marchand@redhat.com, konstantin.ananyev@intel.com Cc: dev@dpdk.org, ruifeng.wang@arm.com, stable@dpdk.org, nd@arm.com Date: Tue, 8 Oct 2019 16:12:19 -0500 Message-Id: <20191008211220.31586-7-honnappa.nagarahalli@arm.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20191008211220.31586-1-honnappa.nagarahalli@arm.com> References: <20191008211220.31586-1-honnappa.nagarahalli@arm.com> Subject: [dpdk-dev] [PATCH v2 6/7] lib/rcu: add least acknowledged token optimization X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" When the rte_rcu_qsbr_check API is called, it is possible to calculate the least valued token acknowledged by all the readers. When the API is called next time, the readers' token counters do not need to be scanned if the value of the token being queried is less than the last least token acknowledged. This avoids the cache line bounces between readers and writer. Fixes: 64994b56cfd7 ("rcu: add RCU library supporting QSBR mechanism") Cc: stable@dpdk.org Signed-off-by: Honnappa Nagarahalli Reviewed-by: Gavin Hu --- lib/librte_rcu/rte_rcu_qsbr.c | 4 ++++ lib/librte_rcu/rte_rcu_qsbr.h | 42 +++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) -- 2.17.1 diff --git a/lib/librte_rcu/rte_rcu_qsbr.c b/lib/librte_rcu/rte_rcu_qsbr.c index ce7f93dd3..c9ca66aaa 100644 --- a/lib/librte_rcu/rte_rcu_qsbr.c +++ b/lib/librte_rcu/rte_rcu_qsbr.c @@ -73,6 +73,7 @@ rte_rcu_qsbr_init(struct rte_rcu_qsbr *v, uint32_t max_threads) __RTE_QSBR_THRID_ARRAY_ELM_SIZE) / __RTE_QSBR_THRID_ARRAY_ELM_SIZE; v->token = __RTE_QSBR_CNT_INIT; + v->acked_token = __RTE_QSBR_CNT_INIT - 1; return 0; } @@ -245,6 +246,9 @@ rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v) fprintf(f, " Token = %"PRIu64"\n", __atomic_load_n(&v->token, __ATOMIC_ACQUIRE)); + fprintf(f, " Least Acknowledged Token = %"PRIu64"\n", + __atomic_load_n(&v->acked_token, __ATOMIC_ACQUIRE)); + fprintf(f, "Quiescent State Counts for readers:\n"); for (i = 0; i < v->num_elems; i++) { bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i), diff --git a/lib/librte_rcu/rte_rcu_qsbr.h b/lib/librte_rcu/rte_rcu_qsbr.h index c80f15c00..3f445ba6c 100644 --- a/lib/librte_rcu/rte_rcu_qsbr.h +++ b/lib/librte_rcu/rte_rcu_qsbr.h @@ -83,6 +83,7 @@ struct rte_rcu_qsbr_cnt { #define __RTE_QSBR_CNT_THR_OFFLINE 0 #define __RTE_QSBR_CNT_INIT 1 +#define __RTE_QSBR_CNT_MAX ((uint64_t)~0) /* RTE Quiescent State variable structure. * This structure has two elements that vary in size based on the @@ -93,6 +94,10 @@ struct rte_rcu_qsbr_cnt { struct rte_rcu_qsbr { uint64_t token __rte_cache_aligned; /**< Counter to allow for multiple concurrent quiescent state queries */ + uint64_t acked_token; + /**< Least token acked by all the threads in the last call to + * rte_rcu_qsbr_check API. + */ uint32_t num_elems __rte_cache_aligned; /**< Number of elements in the thread ID array */ @@ -472,6 +477,7 @@ __rte_rcu_qsbr_check_selective(struct rte_rcu_qsbr *v, uint64_t t, bool wait) uint64_t bmap; uint64_t c; uint64_t *reg_thread_id; + uint64_t acked_token = __RTE_QSBR_CNT_MAX; for (i = 0, reg_thread_id = __RTE_QSBR_THRID_ARRAY_ELM(v, 0); i < v->num_elems; @@ -493,6 +499,7 @@ __rte_rcu_qsbr_check_selective(struct rte_rcu_qsbr *v, uint64_t t, bool wait) __RTE_RCU_DP_LOG(DEBUG, "%s: status: token = %"PRIu64", wait = %d, Thread QS cnt = %"PRIu64", Thread ID = %d", __func__, t, wait, c, id+j); + /* Counter is not checked for wrap-around condition * as it is a 64b counter. */ @@ -512,10 +519,25 @@ __rte_rcu_qsbr_check_selective(struct rte_rcu_qsbr *v, uint64_t t, bool wait) continue; } + /* This thread is in quiescent state. Use the counter + * to find the least acknowledged token among all the + * readers. + */ + if (c != __RTE_QSBR_CNT_THR_OFFLINE && acked_token > c) + acked_token = c; + bmap &= ~(1UL << j); } } + /* All readers are checked, update least acknowledged token. + * There might be multiple writers trying to update this. There is + * no need to update this very accurately using compare-and-swap. + */ + if (acked_token != __RTE_QSBR_CNT_MAX) + __atomic_store_n(&v->acked_token, acked_token, + __ATOMIC_RELAXED); + return 1; } @@ -528,6 +550,7 @@ __rte_rcu_qsbr_check_all(struct rte_rcu_qsbr *v, uint64_t t, bool wait) uint32_t i; struct rte_rcu_qsbr_cnt *cnt; uint64_t c; + uint64_t acked_token = __RTE_QSBR_CNT_MAX; for (i = 0, cnt = v->qsbr_cnt; i < v->max_threads; i++, cnt++) { __RTE_RCU_DP_LOG(DEBUG, @@ -538,6 +561,7 @@ __rte_rcu_qsbr_check_all(struct rte_rcu_qsbr *v, uint64_t t, bool wait) __RTE_RCU_DP_LOG(DEBUG, "%s: status: token = %"PRIu64", wait = %d, Thread QS cnt = %"PRIu64", Thread ID = %d", __func__, t, wait, c, i); + /* Counter is not checked for wrap-around condition * as it is a 64b counter. */ @@ -550,8 +574,22 @@ __rte_rcu_qsbr_check_all(struct rte_rcu_qsbr *v, uint64_t t, bool wait) rte_pause(); } + + /* This thread is in quiescent state. Use the counter to find + * the least acknowledged token among all the readers. + */ + if (likely(c != __RTE_QSBR_CNT_THR_OFFLINE && acked_token > c)) + acked_token = c; } + /* All readers are checked, update least acknowledged token. + * There might be multiple writers trying to update this. There is + * no need to update this very accurately using compare-and-swap. + */ + if (acked_token != __RTE_QSBR_CNT_MAX) + __atomic_store_n(&v->acked_token, acked_token, + __ATOMIC_RELAXED); + return 1; } @@ -595,6 +633,10 @@ rte_rcu_qsbr_check(struct rte_rcu_qsbr *v, uint64_t t, bool wait) { RTE_ASSERT(v != NULL); + /* Check if all the readers have already acknowledged this token */ + if (likely(t <= v->acked_token)) + return 1; + if (likely(v->num_threads == v->max_threads)) return __rte_rcu_qsbr_check_all(v, t, wait); else