0

Add DumpWithoutCrashing throttling variants.

When using the legacy DwC function, we observe circumstances where
DwC is run hundreds or thousands of times in the course of few
minutes. This may leads to browser unresponsiveness as OS struggles
to keep up with the dump requests because of the frequency with which
it is called. To prevent, this CL introduces new variants of the
DumpWithoutCrashing function that would limit the frequency with
which the dumps are taken.

The throttling variant, which will also be the default, is named
DumpWithoutCrashing() and most of the usage will be steered towards
the default variant. Additionally, another variant, which uses unique
identifier, will be DumpWithoutCrashingWithUniqueId() whereas the
unthrottled version will be named DumpWithoutCrashingUnthrottled().

In addition, this CL also includes:
1. Addition of DumpWithoutCrashingUnthrottled() to banned function in
PRESUBMIT script which will restrict users from using unthrottled DwC.
2. A histogram to track the frequency of throttled vs uploaded dump
count.
3. Unittests coverage to test the throttling variants.

Bug: chromium:1259381
Change-Id: Ib217a8ce28d82927ba39cb10f774987ed6bdb5ba
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3218378
Reviewed-by: Charlie Harrison <csharrison@chromium.org>
Reviewed-by: Daniel Cheng <dcheng@chromium.org>
Reviewed-by: Brian White <bcwhite@chromium.org>
Reviewed-by: Sean Kau <skau@chromium.org>
Reviewed-by: Will Harris <wfh@chromium.org>
Commit-Queue: Will Harris <wfh@chromium.org>
Cr-Commit-Position: refs/heads/main@{#969294}
This commit is contained in:
Aditya Kushwah
2022-02-10 04:54:43 +00:00
committed by Chromium LUCI CQ
parent 5b18e0c904
commit 5a286b7d1a
10 changed files with 329 additions and 14 deletions

@ -973,6 +973,16 @@ _BANNED_CPP_FUNCTIONS = (
r'^base/tracing/.*',
),
),
(
r'/\bbase::debug::DumpWithoutCrashingUnthrottled[(][)]',
(
'base::debug::DumpWithoutCrashingUnthrottled() does not throttle',
'dumps and may spam crash reports. Consider if the throttled',
'variants suffice instead.',
),
False,
(),
),
(
'RoInitialize',
(

@ -3089,6 +3089,7 @@ test("base_unittests") {
"debug/alias_unittest.cc",
"debug/crash_logging_unittest.cc",
"debug/debugger_unittest.cc",
"debug/dump_without_crashing_unittest.cc",
"debug/stack_trace_unittest.cc",
"debug/task_trace_unittest.cc",
"environment_unittest.cc",

@ -5,22 +5,66 @@
#include "base/debug/dump_without_crashing.h"
#include "base/check.h"
#include "base/metrics/histogram_functions.h"
#include "base/no_destructor.h"
#include "base/trace_event/base_tracing.h"
namespace {
// Pointer to the function that's called by DumpWithoutCrashing() to dump the
// Pointer to the function that's called by DumpWithoutCrashing* to dump the
// process's memory.
void(CDECL* dump_without_crashing_function_)() = nullptr;
template <typename Map, typename Key>
bool ShouldDump(Map& map, Key& key, base::TimeDelta time_between_dumps) {
static base::NoDestructor<base::Lock> lock;
base::AutoLock auto_lock(*lock);
base::TimeTicks now = base::TimeTicks::Now();
auto [it, inserted] = map.emplace(key, now);
if (inserted) {
return true;
}
if (now - it->second >= time_between_dumps) {
it->second = now;
return true;
}
return false;
}
// This function takes `location` and `time_between_dumps` as an input
// and checks if DumpWithoutCrashing() meets the requirements to take the dump
// or not.
bool ShouldDumpWithoutCrashWithLocation(const base::Location& location,
base::TimeDelta time_between_dumps) {
static base::NoDestructor<std::map<base::Location, base::TimeTicks>>
location_to_timestamp;
return ShouldDump(*location_to_timestamp, location, time_between_dumps);
}
// Pair of `location` and `unique_identifier` creates a unique key and checks
// if DumpWithoutCrashingWithUniqueId() meets the requirements to take dump or
// not.
bool ShouldDumpWithoutCrashWithLocationAndUniqueId(
size_t unique_identifier,
const base::Location& location,
base::TimeDelta time_between_dumps) {
static base::NoDestructor<
std::map<std::pair<base::Location, size_t>, base::TimeTicks>>
location_and_unique_identifier_to_timestamp;
std::pair<base::Location, size_t> key(location, unique_identifier);
return ShouldDump(*location_and_unique_identifier_to_timestamp, key,
time_between_dumps);
}
} // namespace
namespace base {
namespace debug {
bool DumpWithoutCrashing() {
TRACE_EVENT0("base", "DumpWithoutCrashing");
bool DumpWithoutCrashingUnthrottled() {
TRACE_EVENT0("base", "DumpWithoutCrashingUnthrottled");
if (dump_without_crashing_function_) {
(*dump_without_crashing_function_)();
return true;
@ -28,6 +72,38 @@ bool DumpWithoutCrashing() {
return false;
}
bool DumpWithoutCrashing(const base::Location& location,
base::TimeDelta time_between_dumps) {
TRACE_EVENT0("base", "DumpWithoutCrashing");
if (dump_without_crashing_function_ &&
ShouldDumpWithoutCrashWithLocation(location, time_between_dumps)) {
(*dump_without_crashing_function_)();
base::UmaHistogramEnumeration("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kUploaded);
return true;
}
base::UmaHistogramEnumeration("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kThrottled);
return false;
}
bool DumpWithoutCrashingWithUniqueId(size_t unique_identifier,
const base::Location& location,
base::TimeDelta time_between_dumps) {
TRACE_EVENT0("base", "DumpWithoutCrashingWithUniqueId");
if (dump_without_crashing_function_ &&
ShouldDumpWithoutCrashWithLocationAndUniqueId(unique_identifier, location,
time_between_dumps)) {
(*dump_without_crashing_function_)();
base::UmaHistogramEnumeration("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kUploaded);
return true;
}
base::UmaHistogramEnumeration("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kThrottled);
return false;
}
void SetDumpWithoutCrashingFunction(void (CDECL *function)()) {
#if !defined(COMPONENT_BUILD)
// In component builds, the same base is shared between modules
@ -39,5 +115,4 @@ void SetDumpWithoutCrashingFunction(void (CDECL *function)()) {
}
} // namespace debug
} // namespace base

@ -7,8 +7,18 @@
#include "base/base_export.h"
#include "base/compiler_specific.h"
#include "base/location.h"
#include "base/time/time.h"
#include "build/build_config.h"
// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
enum class DumpWithoutCrashingStatus {
kThrottled,
kUploaded,
kMaxValue = kUploaded
};
namespace base {
namespace debug {
@ -28,15 +38,46 @@ namespace debug {
// This function must not be called with a tail call because that would cause
// the caller to be omitted from the call stack in the crash dump, and that is
// confusing and omits what is likely the most important context.
BASE_EXPORT bool NOT_TAIL_CALLED DumpWithoutCrashing();
//
// Note: Calls to this function will not be throttled. To avoid performance
// problems if this is called many times in quick succession, prefer using one
// of the below variants.
BASE_EXPORT bool NOT_TAIL_CALLED DumpWithoutCrashingUnthrottled();
// Handler to silently dump the current process without crashing, that keeps
// track of call location so some throttling can be applied to avoid very
// frequent dump captures, which can have side-effects.
// `location` Location of the file from where the function is called.
// `time_between_dumps` Time until the next dump should be captured.
BASE_EXPORT bool NOT_TAIL_CALLED
DumpWithoutCrashing(const base::Location& location = base::Location::Current(),
base::TimeDelta time_between_dumps = base::Minutes(5));
// Handler to silently dump the current process without crashing that takes a
// location and unique id to keep a track and apply throttling. This function
// should be used when a domain wishes to capture dumps for multiple, unique
// reasons from a single location. An example would be unique bad mojo
// messages, or a value outside an expected range and the value should be
// considered interesting in the dump. The goal is to allow a single call to
// generate multiple dumps as needed and throttle future instances of the same
// identifiers for a short period of time.
// `unique_identifier` Hash to uniquely identify the function call. Consider
// using base::FastHash to generate the hash.
// `location` Location of the file from where the function is called.
// `time_between_dumps` Time until the next dump should be captured.
// Note: The unique identifier, as of now, is not comparable across different
// runs or builds and is stable only for a process lifetime.
BASE_EXPORT bool NOT_TAIL_CALLED DumpWithoutCrashingWithUniqueId(
size_t unique_identifier,
const base::Location& location = base::Location::Current(),
base::TimeDelta time_between_dumps = base::Minutes(5));
// Sets a function that'll be invoked to dump the current process when
// DumpWithoutCrashing() is called. May be called with null to remove a
// DumpWithoutCrashing* is called. May be called with null to remove a
// previously set function.
BASE_EXPORT void SetDumpWithoutCrashingFunction(void (CDECL *function)());
} // namespace debug
} // namespace base
#endif // BASE_DEBUG_DUMP_WITHOUT_CRASHING_H_

@ -0,0 +1,161 @@
// Copyright 2021 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/debug/dump_without_crashing.h"
#include "base/hash/hash.h"
#include "base/location.h"
#include "base/test/metrics/histogram_tester.h"
#include "base/test/scoped_mock_clock_override.h"
#include "build/build_config.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace base {
namespace debug {
class DumpWithoutCrashingTest : public testing::Test {
public:
static void DummyDumpWithoutCrashing() { number_of_dump_calls_++; }
static int number_of_dump_calls() { return number_of_dump_calls_; }
protected:
void SetUp() override {
SetDumpWithoutCrashingFunction(
&DumpWithoutCrashingTest::DummyDumpWithoutCrashing);
number_of_dump_calls_ = 0;
}
void TearDown() override { SetDumpWithoutCrashingFunction(nullptr); }
// Set override.
ScopedMockClockOverride clock_;
const base::HistogramTester histogram_tester_;
const base::Location location1_ = FROM_HERE;
const base::Location location2_ = FROM_HERE;
const size_t unique_identifier1_ =
base::FastHash("DumpWithoutCrashingFirstTest");
const size_t unique_identifier2_ =
base::FastHash("DumpWithoutCrashingSecondTest");
private:
static int number_of_dump_calls_;
};
int DumpWithoutCrashingTest::number_of_dump_calls_ = 0;
// TODO(crbug.com/1295506) This test is flaky on iOS.
#if BUILDFLAG(IS_IOS)
#define MAYBE_DumpWithoutCrashingWithLocation \
DISABLED_DumpWithoutCrashingWithLocation
#else
#define MAYBE_DumpWithoutCrashingWithLocation DumpWithoutCrashingWithLocation
#endif
TEST_F(DumpWithoutCrashingTest, MAYBE_DumpWithoutCrashingWithLocation) {
EXPECT_EQ(0, DumpWithoutCrashingTest::number_of_dump_calls());
histogram_tester_.ExpectBucketCount("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kThrottled, 0);
histogram_tester_.ExpectBucketCount("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kUploaded, 0);
// The first call to DumpWithoutCrashing will always capture the dump and
// will return true
EXPECT_TRUE(DumpWithoutCrashing(location1_, base::Seconds(1)));
EXPECT_EQ(1, DumpWithoutCrashingTest::number_of_dump_calls());
histogram_tester_.ExpectBucketCount("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kThrottled, 0);
histogram_tester_.ExpectBucketCount("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kUploaded, 1);
// If DumpWithoutCrashing is called within 1 second, expected result is false.
EXPECT_FALSE(DumpWithoutCrashing(location1_, base::Seconds(1)));
EXPECT_EQ(1, DumpWithoutCrashingTest::number_of_dump_calls());
// For testing, the time for capturing dump again is 1 second and if the
// function is called after 1 second, it will return true.
clock_.Advance(Seconds(2));
EXPECT_TRUE(DumpWithoutCrashing(location1_, base::Seconds(1)));
EXPECT_EQ(2, DumpWithoutCrashingTest::number_of_dump_calls());
histogram_tester_.ExpectBucketCount("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kThrottled, 1);
histogram_tester_.ExpectBucketCount("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kUploaded, 2);
EXPECT_TRUE(DumpWithoutCrashing(location2_, base::Seconds(1)));
EXPECT_EQ(3, DumpWithoutCrashingTest::number_of_dump_calls());
EXPECT_FALSE(DumpWithoutCrashing(location2_, base::Seconds(1)));
EXPECT_EQ(3, DumpWithoutCrashingTest::number_of_dump_calls());
histogram_tester_.ExpectBucketCount("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kThrottled, 2);
histogram_tester_.ExpectBucketCount("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kUploaded, 3);
}
// TODO(crbug.com/1295506) This test is flaky on iOS.
#if BUILDFLAG(IS_IOS)
#define MAYBE_DumpWithoutCrashingWithLocationAndUniqueId \
DISABLED_DumpWithoutCrashingWithLocationAndUniqueId
#else
#define MAYBE_DumpWithoutCrashingWithLocationAndUniqueId \
DumpWithoutCrashingWithLocationAndUniqueId
#endif
TEST_F(DumpWithoutCrashingTest,
MAYBE_DumpWithoutCrashingWithLocationAndUniqueId) {
EXPECT_EQ(0, DumpWithoutCrashingTest::number_of_dump_calls());
histogram_tester_.ExpectBucketCount("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kThrottled, 0);
histogram_tester_.ExpectBucketCount("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kUploaded, 0);
// Test the variant of DumpWithoutCrashingWithUniqueId where the function
// takes a location and unique id.
EXPECT_TRUE(DumpWithoutCrashingWithUniqueId(unique_identifier1_, location1_,
base::Seconds(1)));
EXPECT_EQ(1, DumpWithoutCrashingTest::number_of_dump_calls());
histogram_tester_.ExpectBucketCount("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kThrottled, 0);
histogram_tester_.ExpectBucketCount("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kUploaded, 1);
// If DumpWithoutCrashingWithUniqueId called within 1 second, expected result
// is false.
EXPECT_FALSE(DumpWithoutCrashingWithUniqueId(unique_identifier1_, location1_,
base::Seconds(1)));
EXPECT_EQ(1, DumpWithoutCrashingTest::number_of_dump_calls());
// For testing, the time for capturing dump again is 1 second and if the
// function is called after 1 second, it will return true.
clock_.Advance(Seconds(2));
EXPECT_TRUE(DumpWithoutCrashingWithUniqueId(unique_identifier1_, location1_,
base::Seconds(1)));
EXPECT_EQ(2, DumpWithoutCrashingTest::number_of_dump_calls());
histogram_tester_.ExpectBucketCount("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kThrottled, 1);
histogram_tester_.ExpectBucketCount("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kUploaded, 2);
EXPECT_TRUE(DumpWithoutCrashingWithUniqueId(unique_identifier2_, location2_,
base::Seconds(1)));
EXPECT_EQ(3, DumpWithoutCrashingTest::number_of_dump_calls());
EXPECT_FALSE(DumpWithoutCrashingWithUniqueId(unique_identifier2_, location2_,
base::Seconds(1)));
EXPECT_EQ(3, DumpWithoutCrashingTest::number_of_dump_calls());
histogram_tester_.ExpectBucketCount("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kThrottled, 2);
histogram_tester_.ExpectBucketCount("Stability.DumpWithoutCrashingStatus",
DumpWithoutCrashingStatus::kUploaded, 3);
}
} // namespace debug
} // namespace base

@ -55,6 +55,12 @@ class BASE_EXPORT Location {
return program_counter_ == other.program_counter_;
}
// Comparator is necessary to use location object within an ordered container
// type (eg. std::map).
bool operator<(const Location& other) const {
return program_counter_ < other.program_counter_;
}
// Returns true if there is source code location info. If this is false,
// the Location object only contains a program counter or is
// default-initialized (the program counter is also null).

@ -28,7 +28,8 @@ class PrintersSyncBridgeTest : public testing::Test {
bridge_ = std::make_unique<PrintersSyncBridge>(
syncer::ModelTypeStoreTestUtil::FactoryForInMemoryStoreForTest(),
base::BindRepeating(
base::IgnoreResult(&base::debug::DumpWithoutCrashing)));
base::IgnoreResult(&base::debug::DumpWithoutCrashing), FROM_HERE,
base::Minutes(5)));
}
protected:

@ -60,12 +60,13 @@ class LoggingObserver : public SyncedPrintersManager::Observer {
class SyncedPrintersManagerTest : public testing::Test {
protected:
SyncedPrintersManagerTest()
: manager_(SyncedPrintersManager::Create(
std::make_unique<PrintersSyncBridge>(
syncer::ModelTypeStoreTestUtil::
FactoryForInMemoryStoreForTest(),
base::BindRepeating(
base::IgnoreResult(&base::debug::DumpWithoutCrashing))))) {
: manager_(SyncedPrintersManager::Create(std::make_unique<
PrintersSyncBridge>(
syncer::ModelTypeStoreTestUtil::FactoryForInMemoryStoreForTest(),
base::BindRepeating(
base::IgnoreResult(&base::debug::DumpWithoutCrashing),
FROM_HERE,
base::Minutes(5))))) {
base::RunLoop().RunUntilIdle();
}

@ -25267,6 +25267,11 @@ Called by update_document_policy_enum.py.-->
<int value="3" label="Success from Breakpad"/>
</enum>
<enum name="DumpWithoutCrashingStatus">
<int value="0" label="Dump throttled"/>
<int value="1" label="Dump uploaded"/>
</enum>
<enum name="DuplicateBookmarkEntityOnRemoteUpdateCondition">
<int value="0" label="Entity with server ID is a tombstone"/>
<int value="1" label="Only entity with temporary ID is a tombstone"/>

@ -292,6 +292,20 @@ chromium-metrics-reviews@google.com.
</summary>
</histogram>
<histogram name="Stability.DumpWithoutCrashingStatus"
enum="DumpWithoutCrashingStatus" expires_after="never">
<!-- expires-never: Used for monitoring the throttling frequency of
DumpWithoutCrashing. -->
<owner>dcheng@chromium.org</owner>
<owner>wfh@chromium.org</owner>
<owner>adkushwa@microsoft.com</owner>
<summary>
Keeps a count of number of times uploading the dump was throttled when
calling DumpWithoutCrashing.
</summary>
</histogram>
<histogram name="Stability.Experimental.PageLoads" enum="StabilityPageLoadType"
expires_after="2022-06-12">
<owner>fdoray@chromium.org</owner>