From patchwork Wed Oct 19 10:48:41 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eelis van der Weegen X-Patchwork-Id: 78220 Delivered-To: patch@linaro.org Received: by 10.140.97.247 with SMTP id m110csp178730qge; Wed, 19 Oct 2016 03:49:11 -0700 (PDT) X-Received: by 10.98.202.156 with SMTP id y28mr10169007pfk.130.1476874151012; Wed, 19 Oct 2016 03:49:11 -0700 (PDT) Return-Path: Received: from sourceware.org (server1.sourceware.org. [209.132.180.131]) by mx.google.com with ESMTPS id o9si36530967pgc.284.2016.10.19.03.49.10 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Wed, 19 Oct 2016 03:49:11 -0700 (PDT) Received-SPF: pass (google.com: domain of gcc-patches-return-439004-patch=linaro.org@gcc.gnu.org designates 209.132.180.131 as permitted sender) client-ip=209.132.180.131; Authentication-Results: mx.google.com; dkim=pass header.i=@gcc.gnu.org; spf=pass (google.com: domain of gcc-patches-return-439004-patch=linaro.org@gcc.gnu.org designates 209.132.180.131 as permitted sender) smtp.mailfrom=gcc-patches-return-439004-patch=linaro.org@gcc.gnu.org DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:to :from:subject:message-id:date:mime-version:content-type; q=dns; s=default; b=lsCjcYbuvzKF2D+vafVzlyX97MFBLvvuZv1M1VSoFKWe8TqhT6 3DG+XSn9qesIikWTyA6o9UOaez1KKueeokN8RIYjF8/5shbsh31e8MbDQ0lptfxt 2Gxfa86vbv/MJPFGjffW3JvAbrKanLlermArU/ICDkmya4HUHXgK7v2WA= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:to :from:subject:message-id:date:mime-version:content-type; s= default; bh=sG8u8MoRQ4JzOqd0ubS1GkNiEsA=; b=ZtXyPhrSa3kvm4CFb9gH sL5ALWWaK90vbBztJmhAMDRJXp7knf/IALXmp6o6Eqd9lSn8/gBXVMGjNRNVE1Qf SRkXvzYqSuWUsPbm8SRih5B1IoAHmbAyaBc/E+dxvSu8zmcaaAh3JXQOEpE3Yibd YeVxOWFxyhXUEwuKCHGidtg= Received: (qmail 59257 invoked by alias); 19 Oct 2016 10:48:52 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 59211 invoked by uid 89); 19 Oct 2016 10:48:51 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.6 required=5.0 tests=AWL, BAYES_00, KAM_LAZY_DOMAIN_SECURITY, RCVD_IN_DNSWL_LOW autolearn=no version=3.3.2 spammy=uniformly, 241299, H*RU:esmtpa, Hx-spam-relays-external:esmtpa X-Spam-User: qpsmtpd, 2 recipients X-HELO: smtpq4.tb.mail.iss.as9143.net Received: from smtpq4.tb.mail.iss.as9143.net (HELO smtpq4.tb.mail.iss.as9143.net) (212.54.42.167) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 19 Oct 2016 10:48:48 +0000 Received: from [212.54.42.118] (helo=lsmtp4.tb.mail.iss.as9143.net) by smtpq4.tb.mail.iss.as9143.net with esmtp (Exim 4.86_2) (envelope-from ) id 1bwoQQ-0002v3-DM; Wed, 19 Oct 2016 12:48:46 +0200 Received: from i23017.upc-i.chello.nl ([62.195.23.17] helo=[192.168.0.16]) by lsmtp4.tb.mail.iss.as9143.net with esmtpa (Exim 4.82) (envelope-from ) id 1bwoQQ-0006z1-A9; Wed, 19 Oct 2016 12:48:46 +0200 To: libstdc++@gcc.gnu.org, GCC Patches From: Eelis van der Weegen Subject: [patch, libstdc++] Optimize selection sampling by using generator to get two values at once Message-ID: <58074F89.4050403@eelis.net> Date: Wed, 19 Oct 2016 12:48:41 +0200 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.7.0 MIME-Version: 1.0 X-Ziggo-spambar: / X-Ziggo-spamscore: 0.0 X-Ziggo-spamreport: CMAE Analysis: v=2.1 cv=B99Eq7ZM c=1 sm=0 tr=0 a=9+rZDBEiDlHhcck0kWbJtElFXBc=:19 a=CH0kA5CcgfcA:10 a=r77TgQKjGQsHNAKrUKIA:9 a=de-Ec-xMLL1rah3KFlMA:9 a=QEXdDO2ut3YA:10 a=9beazMEsHtwA:10 a=2ze0Mziqef8wrtP3e24A:9 xcat=Undefined/Undefined none X-Ziggo-Spam-Status: No This is the same optimization as was recently applied to std::shuffle. It reduces the runtime of the following program by 20% on my machine: int main() { std::vector a(10000), b(1000); std::mt19937 gen; uint64_t c = 0; for (int i = 0; i != 1000; ++i) { std::sample(a.begin(), a.end(), b.begin(), b.size(), gen); c += uint64_t(b[32]); } std::cout << c; } Index: bits/stl_algo.h =================================================================== --- bits/stl_algo.h (revision 241299) +++ bits/stl_algo.h (working copy) @@ -3741,6 +3741,37 @@ #ifdef _GLIBCXX_USE_C99_STDINT_TR1 /** + * @brief Generate two uniformly distributed integers using a + * single distribution invocation. + * @param __b0 The upper bound for the first integer. + * @param __b1 The upper bound for the second integer. + * @param __g A UniformRandomBitGenerator. + * @return A pair (i, j) with i and j uniformly distributed + * over [0, __b0) and [0, __b1), respectively. + * + * Requires: __b0 * __b1 <= __g.max() - __g.min(). + * + * Using uniform_int_distribution with a range that is very + * small relative to the range of the generator ends up wasting + * potentially expensively generated randomness, since + * uniform_int_distribution does not store leftover randomness + * between invocations. + * + * If we know we want two integers in ranges that are sufficiently + * small, we can compose the ranges, use a single distribution + * invocation, and significantly reduce the waste. + */ + template + std::pair<_IntType, _IntType> + __gen_two_uniform_ints(_IntType __b0, _IntType __b1, + _UniformRandomBitGenerator&& __g) + { + _IntType __x = uniform_int_distribution<_IntType>{0, (__b0 * __b1) - 1}(__g); + return std::make_pair(__x / __b1, __x % __b1); + } + + /** * @brief Shuffle the elements of a sequence using a uniform random * number generator. * @ingroup mutating_algorithms @@ -3801,13 +3832,12 @@ while (__i != __last) { const __uc_type __swap_range = __uc_type(__i - __first) + 1; - const __uc_type __comp_range = __swap_range * (__swap_range + 1); - std::uniform_int_distribution<__uc_type> __d{0, __comp_range - 1}; - const __uc_type __pospos = __d(__g); + const std::pair<__uc_type, __uc_type> __pospos = + __gen_two_uniform_ints(__swap_range, __swap_range + 1); - std::iter_swap(__i++, __first + (__pospos % __swap_range)); - std::iter_swap(__i++, __first + (__pospos / __swap_range)); + std::iter_swap(__i++, __first + __pospos.first); + std::iter_swap(__i++, __first + __pospos.second); } return; @@ -5695,9 +5725,51 @@ { using __distrib_type = uniform_int_distribution<_Size>; using __param_type = typename __distrib_type::param_type; + using _USize = typename std::make_unsigned<_Size>::type; + using _Gen = typename std::remove_reference<_UniformRandomBitGenerator>::type; + using __uc_type = typename std::common_type::type; + __distrib_type __d{}; _Size __unsampled_sz = std::distance(__first, __last); - for (__n = std::min(__n, __unsampled_sz); __n != 0; ++__first) + __n = std::min(__n, __unsampled_sz); + + // If possible, we use __gen_two_uniform_ints to efficiently produce + // two random numbers using a single distribution invocation: + + const __uc_type __urngrange = __g.max() - __g.min(); + if (__urngrange / __uc_type(__unsampled_sz) >= __uc_type(__unsampled_sz)) + // I.e. (__urngrange >= __unsampled_sz * __unsampled_sz) but without wrap issues. + { + while (__n != 0 && __unsampled_sz >= 2) + { + const std::pair<_Size, _Size> p = + __gen_two_uniform_ints(__unsampled_sz, __unsampled_sz - 1, __g); + + --__unsampled_sz; + if (p.first < __n) + { + *__out++ = *__first; + --__n; + } + + ++__first; + + if (__n == 0) break; + + --__unsampled_sz; + if (p.second < __n) + { + *__out++ = *__first; + --__n; + } + + ++__first; + } + } + + // The loop above is otherwise equivalent to this one-at-a-time version: + + for (; __n != 0; ++__first) if (__d(__g, __param_type{0, --__unsampled_sz}) < __n) { *__out++ = *__first;