From patchwork Sun Dec 20 00:11:18 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Pinski X-Patchwork-Id: 58766 Delivered-To: patch@linaro.org Received: by 10.112.89.199 with SMTP id bq7csp1831360lbb; Sat, 19 Dec 2015 16:11:28 -0800 (PST) X-Received: by 10.98.67.148 with SMTP id l20mr16482246pfi.109.1450570287892; Sat, 19 Dec 2015 16:11:27 -0800 (PST) Return-Path: Received: from vger.kernel.org (vger.kernel.org. [209.132.180.67]) by mx.google.com with ESMTP id 1si30392368pfp.53.2015.12.19.16.11.27; Sat, 19 Dec 2015 16:11:27 -0800 (PST) Received-SPF: pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) client-ip=209.132.180.67; Authentication-Results: mx.google.com; spf=pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org; dkim=neutral (body hash did not verify) header.i=@cavium-com.20150623.gappssmtp.com Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S965107AbbLTALZ (ORCPT + 29 others); Sat, 19 Dec 2015 19:11:25 -0500 Received: from mail-ig0-f180.google.com ([209.85.213.180]:34512 "EHLO mail-ig0-f180.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753653AbbLTALY (ORCPT ); Sat, 19 Dec 2015 19:11:24 -0500 Received: by mail-ig0-f180.google.com with SMTP id m11so13406606igk.1 for ; Sat, 19 Dec 2015 16:11:24 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=cavium-com.20150623.gappssmtp.com; s=20150623; h=from:to:cc:subject:date:message-id; bh=pqPrPkGwR+OfoN0wydrA5qOkyuAz7tzmPAwvLbthSDE=; b=iSyOmmyPMcW+R2gKEdZ9jPrqRV4K7XFyB2lFV/dzXip01dRTDZR88Y9vJj4lQnKAM1 oEQtIM7yMY29XfNWxQWhWs6W3MX4QcYCwmiZZRalscPt7KkLOewWyMUIr4MIX1HzMO7l BtUv26D5BitNS/Zffylg8MpMFjoy4fYnyQEtDB1h69OC3qhe8+FXYUGpJ72LDmUPPnl4 3sdV5TjENb7rNQi9m5f1NSkf1fm4bjQ1EyFARBVed0SVmdf4Vod/OuaaJprTmdqD08a3 wpds2rW8vgfuiVcSb4r9eb7h7ls0W1aHQ3PS4xrLetURcNSBWlbSrNskn4aYYf5xRvDR dCuA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:cc:subject:date:message-id; bh=pqPrPkGwR+OfoN0wydrA5qOkyuAz7tzmPAwvLbthSDE=; b=Cm9mltwc2QZ7fGCkrkNJtvL6tZPibyNUIb1zcS9/yzkBixBeTgn17zMWML5BWLY13+ 5kq73rp0u53jN0DVl1Jzee+pA4z/e7k7V0ZrkqwhCWoLCiQjdkIArId1ZKdYvwEFlRhY iKk3b1N/J9yf+mVQjZYiPYN7rbWZY6pFEuDtQfLacekRh+hjHLsIp2/cb07nW45acrfg eM4A4NRA8AlFHM9qDNfZdJnqGoEB6/N80pFRJYb4CNX2kh2kzvI+veQXcj/omAfjBAdx MyNP+FSol8jByJ1hGvM5UA7ry4x893EuDhAm286NjCB51vx8Bx9R7kYdhfYd8Amx/jiB TV8Q== X-Gm-Message-State: ALoCoQlZZkWKtENgrA4c0gmDdVqAcqwgrdAVUa+UTQZy/PyLUxGZptKpPDd3qwL0f45GozltyibRyw2+ckdmHP4nAR7pGPcKgg== X-Received: by 10.50.83.72 with SMTP id o8mr10657665igy.48.1450570283908; Sat, 19 Dec 2015 16:11:23 -0800 (PST) Received: from localhost.localdomain ([64.2.3.194]) by smtp.gmail.com with ESMTPSA id 4sm5143067igz.2.2015.12.19.16.11.22 (version=TLS1 cipher=AES128-SHA bits=128/128); Sat, 19 Dec 2015 16:11:23 -0800 (PST) Received: from localhost.localdomain (apinskidesktop [127.0.0.1]) by localhost.localdomain (8.14.3/8.14.3/Debian-9.4) with ESMTP id tBK0BLjF019440 (version=TLSv1/SSLv3 cipher=DHE-DSS-AES256-SHA bits=256 verify=NO); Sat, 19 Dec 2015 16:11:21 -0800 Received: (from apinski@localhost) by localhost.localdomain (8.14.3/8.14.3/Submit) id tBK0BLZx019438; Sat, 19 Dec 2015 16:11:21 -0800 From: Andrew Pinski To: pinsia@gmail.com, linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org Cc: Andrew Pinski Subject: [PATCH] ARM64: Improve copy_page for 128 cache line sizes. Date: Sat, 19 Dec 2015 16:11:18 -0800 Message-Id: <1450570278-19404-1-git-send-email-apinski@cavium.com> X-Mailer: git-send-email 1.7.2.5 Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Adding a check for the cache line size is not much overhead. Special case 128 byte cache line size. This improves copy_page by 85% on ThunderX compared to the original implementation. For LMBench, it improves between 4-10%. Signed-off-by: Andrew Pinski --- arch/arm64/lib/copy_page.S | 39 +++++++++++++++++++++++++++++++++++++++ 1 files changed, 39 insertions(+), 0 deletions(-) -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 512b9a7..4c28789 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -18,6 +18,7 @@ #include #include #include +#include /* * Copy a page from src to dest (both are page aligned) @@ -27,8 +28,17 @@ * x1 - src */ ENTRY(copy_page) + /* Special case 128 byte or more cache lines */ + mrs x2, ctr_el0 + lsr x2, x2, CTR_CWG_SHIFT + and w2, w2, CTR_CWG_MASK + cmp w2, 5 + b.ge 2f + /* Assume cache line size is 64 bytes. */ prfm pldl1strm, [x1, #64] + /* Align the loop is it fits in one cache line. */ + .balign 64 1: ldp x2, x3, [x1] ldp x4, x5, [x1, #16] ldp x6, x7, [x1, #32] @@ -43,4 +53,33 @@ ENTRY(copy_page) tst x1, #(PAGE_SIZE - 1) b.ne 1b ret + +2: + /* The cache line size is at least 128 bytes. */ + prfm pldl1strm, [x1, #128] + /* Align the loop so it fits in one cache line */ + .balign 128 +1: prfm pldl1strm, [x1, #256] + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + ldp x6, x7, [x1, #32] + ldp x8, x9, [x1, #48] + stnp x2, x3, [x0] + stnp x4, x5, [x0, #16] + stnp x6, x7, [x0, #32] + stnp x8, x9, [x0, #48] + + ldp x2, x3, [x1, #64] + ldp x4, x5, [x1, #80] + ldp x6, x7, [x1, #96] + ldp x8, x9, [x1, #112] + add x1, x1, #128 + stnp x2, x3, [x0, #64] + stnp x4, x5, [x0, #80] + stnp x6, x7, [x0, #96] + stnp x8, x9, [x0, #112] + add x0, x0, #128 + tst x1, #(PAGE_SIZE - 1) + b.ne 1b + ret ENDPROC(copy_page)