RandUtil: Add Mersene Twister Enginer to thread's local storage to avoid
the overhead associated with seeding the generator. One can do ~18 M (~26 M) calls per second when using unifom_int_distribution on top of Mersene engine versus <1M (~ 8M) calls per second when using the current implementation on POSIX and Windows, respectively. Note that these figures are obtained when the generator is seeded once, otherwise the figure obtained for the suggested implementation would be <1M (this is for POSIX, we don't have the figure for windows). To that end, we use thread's local storage to seed the generator once. Change-Id: I628d2e0ab882a9703e52b4007d53fa5cf49d4d47 Reviewed-on: https://chromium-review.googlesource.com/982115 Commit-Queue: Farah Charab <farahcharab@chromium.org> Reviewed-by: Gabriel Charette <gab@chromium.org> Reviewed-by: Alexander Timin <altimin@chromium.org> Cr-Commit-Position: refs/heads/master@{#550496}
This commit is contained in:
@ -1897,6 +1897,7 @@ test("base_perftests") {
|
||||
sources = [
|
||||
"message_loop/message_loop_perftest.cc",
|
||||
"message_loop/message_pump_perftest.cc",
|
||||
"rand_util_perftest.cc",
|
||||
|
||||
# "test/run_all_unittests.cc",
|
||||
"json/json_perftest.cc",
|
||||
|
@ -7,12 +7,64 @@
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <ctime>
|
||||
#include <limits>
|
||||
#include <random>
|
||||
|
||||
#include "base/lazy_instance.h"
|
||||
#include "base/logging.h"
|
||||
#include "base/strings/string_util.h"
|
||||
#include "base/threading/thread_local.h"
|
||||
|
||||
namespace {
|
||||
|
||||
class PrngThreadLocalStorage;
|
||||
|
||||
base::LazyInstance<
|
||||
base::ThreadLocalUniquePointer<PrngThreadLocalStorage>>::Leaky prng_tls =
|
||||
LAZY_INSTANCE_INITIALIZER;
|
||||
|
||||
// PrngThreadLocalStorage stores a pointer to the thread's seeded random number
|
||||
// generator in the thread's local storage. Note that at most one generator can
|
||||
// be bound to each thread at a time.
|
||||
// Example Usage:
|
||||
// prng = PrngThreadLocalStorage::GetGenerator();
|
||||
// prng -> GetRandomInteger(0,20);
|
||||
class PrngThreadLocalStorage {
|
||||
public:
|
||||
PrngThreadLocalStorage() : prng_(time(nullptr)){};
|
||||
|
||||
~PrngThreadLocalStorage() = default;
|
||||
|
||||
// Returns true if a pseudo-random number generator has been assigned to
|
||||
// the current thread.
|
||||
static bool IsSet() { return prng_tls.Get().Get(); }
|
||||
|
||||
// Returns the random generator bound to the current thread. If no such
|
||||
// generator exist, it creates an instance and binds it to the thread.
|
||||
static PrngThreadLocalStorage* GetGenerator() {
|
||||
PrngThreadLocalStorage* instance = prng_tls.Get().Get();
|
||||
if (!instance) {
|
||||
prng_tls.Get().Set(std::make_unique<PrngThreadLocalStorage>());
|
||||
instance = prng_tls.Get().Get();
|
||||
}
|
||||
return instance;
|
||||
}
|
||||
|
||||
// Returns a uniformly distributed random integer in the range [start,end].
|
||||
int GetRandomInteger(int start, int end) {
|
||||
std::uniform_int_distribution<> distribution(start, end);
|
||||
return distribution(prng_);
|
||||
}
|
||||
|
||||
private:
|
||||
std::mt19937 prng_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(PrngThreadLocalStorage);
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace base {
|
||||
|
||||
@ -25,11 +77,9 @@ uint64_t RandUint64() {
|
||||
int RandInt(int min, int max) {
|
||||
DCHECK_LE(min, max);
|
||||
|
||||
uint64_t range = static_cast<uint64_t>(max) - min + 1;
|
||||
// |range| is at most UINT_MAX + 1, so the result of RandGenerator(range)
|
||||
// is at most UINT_MAX. Hence it's safe to cast it from uint64_t to int64_t.
|
||||
int result =
|
||||
static_cast<int>(min + static_cast<int64_t>(base::RandGenerator(range)));
|
||||
PrngThreadLocalStorage* prng = PrngThreadLocalStorage::GetGenerator();
|
||||
int result = prng->GetRandomInteger(min, max);
|
||||
|
||||
DCHECK_GE(result, min);
|
||||
DCHECK_LE(result, max);
|
||||
return result;
|
||||
|
91
base/rand_util_perftest.cc
Normal file
91
base/rand_util_perftest.cc
Normal file
@ -0,0 +1,91 @@
|
||||
// Copyright 2018 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/rand_util.h"
|
||||
|
||||
#include <ctime>
|
||||
#include <random>
|
||||
|
||||
#include "base/time/time.h"
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "testing/perf/perf_test.h"
|
||||
|
||||
namespace {
|
||||
|
||||
// Deprecated. Needed to benchmark the performance of the previous
|
||||
// implementation of base::RandInt().
|
||||
int RandIntDeprecated(int min, int max) {
|
||||
DCHECK_LE(min, max);
|
||||
|
||||
uint64_t range = static_cast<uint64_t>(max) - min + 1;
|
||||
// |range| is at most UINT_MAX + 1, so the result of RandGenerator(range)
|
||||
// is at most UINT_MAX. Hence it's safe to cast it from uint64_t to int64_t.
|
||||
int result =
|
||||
static_cast<int>(min + static_cast<int64_t>(base::RandGenerator(range)));
|
||||
|
||||
DCHECK_GE(result, min);
|
||||
DCHECK_LE(result, max);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace base {
|
||||
|
||||
// Logs the average number of calls per second to base::RandInt(). Time is
|
||||
// measured every 1000 calls and the total number of calls is normalized to
|
||||
// reflect the average number of calls per second.
|
||||
TEST(RandUtilPerfTest, AverageNumberOfCallsPerSecondToGetRandInt) {
|
||||
base::TimeTicks start = base::TimeTicks::Now();
|
||||
base::TimeTicks now;
|
||||
|
||||
size_t num_rounds = 0;
|
||||
constexpr int kBatchSize = 1000;
|
||||
|
||||
do {
|
||||
int num_calls = 0;
|
||||
while (num_calls < kBatchSize) {
|
||||
RandInt(0, 1000);
|
||||
++num_calls;
|
||||
}
|
||||
now = base::TimeTicks::Now();
|
||||
++num_rounds;
|
||||
} while (now - start < base::TimeDelta::FromSeconds(1));
|
||||
|
||||
perf_test::PrintResult(
|
||||
"Task", " (Time is measured after every 1000 function calls)",
|
||||
"Average number of calls per second to base::GetRandInt",
|
||||
static_cast<double>(num_rounds * kBatchSize) / (now - start).InSecondsF(),
|
||||
"number of calls per second", true);
|
||||
}
|
||||
|
||||
// Logs the average number of calls per second to RandIntDeprecated(). Time is
|
||||
// measured every 1000 calls and the total number of calls is normalized to
|
||||
// reflect the average number of calls per second.
|
||||
TEST(RandUtilPerfTest, AverageNumberOfCallsPerSecondToRandIntDeprecated) {
|
||||
base::TimeTicks start = base::TimeTicks::Now();
|
||||
base::TimeTicks now;
|
||||
|
||||
size_t num_rounds = 0;
|
||||
constexpr int kBatchSize = 1000;
|
||||
|
||||
do {
|
||||
int num_calls = 0;
|
||||
while (num_calls < kBatchSize) {
|
||||
RandIntDeprecated(0, 1000);
|
||||
++num_calls;
|
||||
}
|
||||
now = base::TimeTicks::Now();
|
||||
++num_rounds;
|
||||
} while (now - start < base::TimeDelta::FromSeconds(1));
|
||||
|
||||
perf_test::PrintResult(
|
||||
"Task", " (Time is measured after every 1000 function calls)",
|
||||
"Average number of calls per second to RandIntDeprecated()",
|
||||
static_cast<double>(num_rounds * kBatchSize) / (now - start).InSecondsF(),
|
||||
"number of calls per second", true);
|
||||
}
|
||||
|
||||
} // namespace base
|
@ -50,6 +50,8 @@
|
||||
#ifndef BASE_THREADING_THREAD_LOCAL_H_
|
||||
#define BASE_THREADING_THREAD_LOCAL_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "base/macros.h"
|
||||
#include "base/threading/thread_local_storage.h"
|
||||
|
||||
@ -61,9 +63,7 @@ class ThreadLocalPointer {
|
||||
ThreadLocalPointer() = default;
|
||||
~ThreadLocalPointer() = default;
|
||||
|
||||
Type* Get() {
|
||||
return static_cast<Type*>(slot_.Get());
|
||||
}
|
||||
Type* Get() { return static_cast<Type*>(slot_.Get()); }
|
||||
|
||||
void Set(Type* ptr) {
|
||||
slot_.Set(const_cast<void*>(static_cast<const void*>(ptr)));
|
||||
@ -75,18 +75,38 @@ class ThreadLocalPointer {
|
||||
DISALLOW_COPY_AND_ASSIGN(ThreadLocalPointer<Type>);
|
||||
};
|
||||
|
||||
template <typename Type>
|
||||
class ThreadLocalUniquePointer {
|
||||
public:
|
||||
ThreadLocalUniquePointer()
|
||||
: slot_(ThreadLocalUniquePointer::ThreadLocalUniquePointerDestructor) {}
|
||||
|
||||
~ThreadLocalUniquePointer() = default;
|
||||
|
||||
Type* Get() { return static_cast<Type*>(slot_.Get()); }
|
||||
|
||||
void Set(std::unique_ptr<Type> ptr) {
|
||||
slot_.Set(const_cast<void*>(static_cast<const void*>(ptr.release())));
|
||||
}
|
||||
|
||||
private:
|
||||
static void ThreadLocalUniquePointerDestructor(void* value) {
|
||||
std::unique_ptr<Type> to_be_deleted(static_cast<Type*>(value));
|
||||
}
|
||||
|
||||
ThreadLocalStorage::Slot slot_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(ThreadLocalUniquePointer<Type>);
|
||||
};
|
||||
|
||||
class ThreadLocalBoolean {
|
||||
public:
|
||||
ThreadLocalBoolean() = default;
|
||||
~ThreadLocalBoolean() = default;
|
||||
|
||||
bool Get() {
|
||||
return tlp_.Get() != nullptr;
|
||||
}
|
||||
bool Get() { return tlp_.Get() != nullptr; }
|
||||
|
||||
void Set(bool val) {
|
||||
tlp_.Set(val ? this : nullptr);
|
||||
}
|
||||
void Set(bool val) { tlp_.Set(val ? this : nullptr); }
|
||||
|
||||
private:
|
||||
ThreadLocalPointer<void> tlp_;
|
||||
|
Reference in New Issue
Block a user