0

Run HangWatcher in zygote children

On Linux and ChromeOS, the HangWatcher never runs in zygote children
(renderers, gpu process, and some utility processes) because it's
only called in ContentMainRunnerImpl::RunBrowser() and
RunOtherNamedProcessTypeMain(), neither of which run in zygote children.

It does get initialized on the main thread because that is run in
ChromeMainDelegate::CommonEarlyInitialization(), which is called
from RunZygote().

The Linux sandbox cannot be started in a multithreaded process, so
the children that need to apply a sandbox will only start the
HangWatcher after applying the sandbox.

Use of the HangWatcher in zygote children is gated by a new
EnableHangWatcherInZygoteChildren base::Feature.

Bug: 1290880, 1079808
Change-Id: I553004b2002f5c193e7366c9699b891714f687e8
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4194031
Code-Coverage: findit-for-me@appspot.gserviceaccount.com <findit-for-me@appspot.gserviceaccount.com>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: Olivier Li <olivierli@chromium.org>
Commit-Queue: Matthew Denton <mpdenton@chromium.org>
Reviewed-by: John Abd-El-Malek <jam@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1187409}
This commit is contained in:
Matthew Denton
2023-08-23 18:49:46 +00:00
committed by Chromium LUCI CQ
parent 93dd1489c9
commit 15b5ea3d27
12 changed files with 191 additions and 31 deletions

@ -164,6 +164,10 @@ BASE_FEATURE(kEnableHangWatcher,
"EnableHangWatcher",
FEATURE_ENABLED_BY_DEFAULT);
BASE_FEATURE(kEnableHangWatcherInZygoteChildren,
"EnableHangWatcherInZygoteChildren",
FEATURE_ENABLED_BY_DEFAULT);
// Browser process.
constexpr base::FeatureParam<int> kIOThreadLogLevel{
&kEnableHangWatcher, "io_thread_log_level",
@ -318,7 +322,8 @@ WatchHangsInScope::~WatchHangsInScope() {
}
// static
void HangWatcher::InitializeOnMainThread(ProcessType process_type) {
void HangWatcher::InitializeOnMainThread(ProcessType process_type,
bool is_zygote_child) {
DCHECK(!g_use_hang_watcher);
DCHECK(g_io_thread_log_level == LoggingLevel::kNone);
DCHECK(g_main_thread_log_level == LoggingLevel::kNone);
@ -326,6 +331,14 @@ void HangWatcher::InitializeOnMainThread(ProcessType process_type) {
bool enable_hang_watcher = base::FeatureList::IsEnabled(kEnableHangWatcher);
#if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
if (is_zygote_child) {
enable_hang_watcher =
enable_hang_watcher &&
base::FeatureList::IsEnabled(kEnableHangWatcherInZygoteChildren);
}
#endif
// Do not start HangWatcher in the GPU process until the issue related to
// invalid magic signature in the GPU WatchDog is fixed
// (https://crbug.com/1297760).
@ -544,12 +557,14 @@ HangWatcher::~HangWatcher() {
void HangWatcher::Start() {
thread_.Start();
thread_started_ = true;
}
void HangWatcher::Stop() {
g_keep_monitoring.store(false, std::memory_order_relaxed);
should_monitor_.Signal();
thread_.Join();
thread_started_ = false;
// In production HangWatcher is always leaked but during testing it's possibly
// stopped and restarted using a new instance. This makes sure the next call

@ -150,7 +150,8 @@ class BASE_EXPORT HangWatcher : public DelegateSimpleThread::Delegate {
// Initializes HangWatcher. Must be called once on the main thread during
// startup while single-threaded.
static void InitializeOnMainThread(ProcessType process_type);
static void InitializeOnMainThread(ProcessType process_type,
bool is_zygote_child);
// Returns the values that were set through InitializeOnMainThread() to their
// default value. Used for testing since in prod initialization should happen
@ -244,6 +245,10 @@ class BASE_EXPORT HangWatcher : public DelegateSimpleThread::Delegate {
// Begin executing the monitoring loop on the HangWatcher thread.
void Start();
// Returns true if Start() has been called and Stop() has not been called
// since.
bool IsStarted() const { return thread_started_; }
// Returns the value of the crash key with the time since last system power
// resume.
std::string GetTimeSinceLastSystemPowerResumeCrashKeyValue() const;
@ -377,6 +382,7 @@ class BASE_EXPORT HangWatcher : public DelegateSimpleThread::Delegate {
GUARDED_BY_CONTEXT(hang_watcher_thread_checker_);
base::DelegateSimpleThread thread_;
bool thread_started_;
RepeatingClosure after_monitor_closure_for_testing_;
RepeatingClosure on_hang_closure_for_testing_;

@ -11,6 +11,7 @@
#include "base/functional/callback.h"
#include "base/functional/callback_helpers.h"
#include "base/memory/raw_ptr.h"
#include "base/metrics/field_trial_params.h"
#include "base/run_loop.h"
#include "base/strings/string_number_conversions.h"
#include "base/synchronization/lock.h"
@ -57,6 +58,51 @@ constexpr uint64_t kAllZeros = 0x0000000000000000u;
constexpr uint64_t kOnesThenZeroes = 0xAAAAAAAAAAAAAAAAu;
constexpr uint64_t kZeroesThenOnes = 0x5555555555555555u;
#if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
class HangWatcherEnabledInZygoteChildTest
: public testing::TestWithParam<std::tuple<bool, bool>> {
public:
HangWatcherEnabledInZygoteChildTest() {
std::vector<base::test::FeatureRefAndParams> enabled_features =
kFeatureAndParams;
std::vector<test::FeatureRef> disabled_features;
if (std::get<0>(GetParam())) {
enabled_features.push_back(test::FeatureRefAndParams(
base::kEnableHangWatcherInZygoteChildren, {}));
} else {
disabled_features.push_back(base::kEnableHangWatcherInZygoteChildren);
}
feature_list_.InitWithFeaturesAndParameters(enabled_features,
disabled_features);
HangWatcher::InitializeOnMainThread(
HangWatcher::ProcessType::kUtilityProcess,
/*is_zygote_child=*/std::get<1>(GetParam()));
}
void TearDown() override { HangWatcher::UnitializeOnMainThreadForTesting(); }
HangWatcherEnabledInZygoteChildTest(
const HangWatcherEnabledInZygoteChildTest& other) = delete;
HangWatcherEnabledInZygoteChildTest& operator=(
const HangWatcherEnabledInZygoteChildTest& other) = delete;
protected:
base::test::ScopedFeatureList feature_list_;
};
TEST_P(HangWatcherEnabledInZygoteChildTest, IsEnabled) {
// If the kEnableHangWatcherInZygoteChildren feature is disabled and
// InitializeOnMainThread is called with is_zygote_child==true, IsEnabled()
// should return false. It should return true in all other situations.
ASSERT_EQ(std::get<0>(GetParam()) || !std::get<1>(GetParam()),
HangWatcher::IsEnabled());
}
INSTANTIATE_TEST_SUITE_P(HangWatcherZygoteTest,
HangWatcherEnabledInZygoteChildTest,
testing::Combine(testing::Bool(), testing::Bool()));
#endif // BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
// Waits on provided WaitableEvent before executing and signals when done.
class BlockingThread : public DelegateSimpleThread::Delegate {
public:
@ -117,8 +163,8 @@ class HangWatcherTest : public testing::Test {
HangWatcherTest() {
feature_list_.InitWithFeaturesAndParameters(kFeatureAndParams, {});
hang_watcher_.InitializeOnMainThread(
HangWatcher::ProcessType::kBrowserProcess);
HangWatcher::InitializeOnMainThread(
HangWatcher::ProcessType::kBrowserProcess, false);
hang_watcher_.SetAfterMonitorClosureForTesting(base::BindRepeating(
&WaitableEvent::Signal, base::Unretained(&monitor_event_)));
@ -134,7 +180,7 @@ class HangWatcherTest : public testing::Test {
hang_watcher_.Start();
}
void TearDown() override { hang_watcher_.UnitializeOnMainThreadForTesting(); }
void TearDown() override { HangWatcher::UnitializeOnMainThreadForTesting(); }
HangWatcherTest(const HangWatcherTest& other) = delete;
HangWatcherTest& operator=(const HangWatcherTest& other) = delete;
@ -516,15 +562,15 @@ class HangWatcherSnapshotTest : public testing::Test {
public:
void SetUp() override {
feature_list_.InitWithFeaturesAndParameters(kFeatureAndParams, {});
hang_watcher_.InitializeOnMainThread(
HangWatcher::ProcessType::kBrowserProcess);
HangWatcher::InitializeOnMainThread(
HangWatcher::ProcessType::kBrowserProcess, false);
// The monitoring loop behavior is not verified in this test so we want to
// trigger monitoring manually.
hang_watcher_.SetMonitoringPeriodForTesting(kVeryLongDelta);
}
void TearDown() override { hang_watcher_.UnitializeOnMainThreadForTesting(); }
void TearDown() override { HangWatcher::UnitializeOnMainThreadForTesting(); }
HangWatcherSnapshotTest() = default;
HangWatcherSnapshotTest(const HangWatcherSnapshotTest& other) = delete;
@ -779,7 +825,7 @@ class HangWatcherPeriodicMonitoringTest : public testing::Test {
public:
HangWatcherPeriodicMonitoringTest() {
hang_watcher_.InitializeOnMainThread(
HangWatcher::ProcessType::kBrowserProcess);
HangWatcher::ProcessType::kBrowserProcess, false);
hang_watcher_.SetMonitoringPeriodForTesting(kMonitoringPeriod);
hang_watcher_.SetOnHangClosureForTesting(base::BindRepeating(
@ -935,8 +981,8 @@ class WatchHangsInScopeBlockingTest : public testing::Test {
public:
WatchHangsInScopeBlockingTest() {
feature_list_.InitWithFeaturesAndParameters(kFeatureAndParams, {});
hang_watcher_.InitializeOnMainThread(
HangWatcher::ProcessType::kBrowserProcess);
HangWatcher::InitializeOnMainThread(
HangWatcher::ProcessType::kBrowserProcess, false);
hang_watcher_.SetOnHangClosureForTesting(base::BindLambdaForTesting([&] {
capture_started_.Signal();
@ -964,7 +1010,7 @@ class WatchHangsInScopeBlockingTest : public testing::Test {
HangWatcher::RegisterThread(base::HangWatcher::ThreadType::kMainThread);
}
void TearDown() override { hang_watcher_.UnitializeOnMainThreadForTesting(); }
void TearDown() override { HangWatcher::UnitializeOnMainThreadForTesting(); }
WatchHangsInScopeBlockingTest(const WatchHangsInScopeBlockingTest& other) =
delete;

@ -31,6 +31,7 @@ BASE_EXPORT BASE_DECLARE_FEATURE(kAboveNormalCompositingBrowserWin);
#endif
BASE_EXPORT BASE_DECLARE_FEATURE(kEnableHangWatcher);
BASE_EXPORT BASE_DECLARE_FEATURE(kEnableHangWatcherInZygoteChildren);
} // namespace base

@ -17,6 +17,7 @@
#include "base/files/file_path.h"
#include "base/files/file_util.h"
#include "base/functional/bind.h"
#include "base/functional/overloaded.h"
#include "base/i18n/rtl.h"
#include "base/immediate_crash.h"
#include "base/lazy_instance.h"
@ -674,7 +675,7 @@ absl::optional<int> ChromeMainDelegate::PostEarlyInitialization(
const auto* invoked_in_browser =
absl::get_if<InvokedInBrowserProcess>(&invoked_in);
if (!invoked_in_browser) {
CommonEarlyInitialization();
CommonEarlyInitialization(invoked_in);
return absl::nullopt;
}
@ -839,7 +840,7 @@ absl::optional<int> ChromeMainDelegate::PostEarlyInitialization(
}
#endif // BUILDFLAG(IS_CHROMEOS_LACROS)
CommonEarlyInitialization();
CommonEarlyInitialization(invoked_in);
// Initializes the resource bundle and determines the locale.
std::string actual_locale = LoadLocalState(
@ -896,7 +897,7 @@ bool ChromeMainDelegate::ShouldInitializeMojo(InvokedIn invoked_in) {
return ShouldCreateFeatureList(invoked_in);
}
void ChromeMainDelegate::CommonEarlyInitialization() {
void ChromeMainDelegate::CommonEarlyInitialization(InvokedIn invoked_in) {
const base::CommandLine* const command_line =
base::CommandLine::ForCurrentProcess();
std::string process_type =
@ -946,7 +947,16 @@ void ChromeMainDelegate::CommonEarlyInitialization() {
} else {
hang_watcher_process_type = base::HangWatcher::ProcessType::kUnknownProcess;
}
base::HangWatcher::InitializeOnMainThread(hang_watcher_process_type);
bool is_zygote_child = absl::visit(
base::Overloaded{[](const InvokedInBrowserProcess& invoked_in_browser) {
return false;
},
[](const InvokedInChildProcess& invoked_in_child) {
return invoked_in_child.is_zygote_child;
}},
invoked_in);
base::HangWatcher::InitializeOnMainThread(
hang_watcher_process_type, /*is_zygote_child=*/is_zygote_child);
base::InitializeCpuReductionExperiment();
base::sequence_manager::internal::SequenceManagerImpl::InitializeFeatures();

@ -78,7 +78,7 @@ class ChromeMainDelegate : public content::ContentMainDelegate {
content::ContentUtilityClient* CreateContentUtilityClient() override;
// Initialization that happens in all process types.
void CommonEarlyInitialization();
void CommonEarlyInitialization(InvokedIn invoked_in);
// Initializes |tracing_sampler_profiler_|. Deletes any existing
// |tracing_sampler_profiler_| as well.

@ -631,27 +631,46 @@ int NO_STACK_PROTECTOR RunZygote(ContentMainDelegate* delegate) {
ContentClientInitializer::Set(process_type, delegate);
MainFunctionParams main_params(command_line);
main_params.zygote_child = true;
main_params.needs_startup_tracing_after_mojo_init = true;
if (delegate->ShouldCreateFeatureList(
ContentMainDelegate::InvokedInChildProcess())) {
const ContentMainDelegate::InvokedInChildProcess invoked_in_child{
.is_zygote_child = true};
if (delegate->ShouldCreateFeatureList(invoked_in_child)) {
InitializeFieldTrialAndFeatureList();
}
if (delegate->ShouldInitializeMojo(
ContentMainDelegate::InvokedInChildProcess())) {
if (delegate->ShouldInitializeMojo(invoked_in_child)) {
InitializeMojoCore();
}
delegate->PostEarlyInitialization(
ContentMainDelegate::InvokedInChildProcess());
delegate->PostEarlyInitialization(invoked_in_child);
base::allocator::PartitionAllocSupport::Get()
->ReconfigureAfterFeatureListInit(process_type);
for (size_t i = 0; i < std::size(kMainFunctions); ++i) {
if (process_type == kMainFunctions[i].name)
return kMainFunctions[i].function(std::move(main_params));
MainFunctionParams main_params(command_line);
main_params.zygote_child = true;
main_params.needs_startup_tracing_after_mojo_init = true;
// The hang watcher needs to be created once the feature list is available
// but before the IO thread is started.
base::ScopedClosureRunner unregister_thread_closure;
if (base::HangWatcher::IsEnabled()) {
base::HangWatcher::CreateHangWatcherInstance();
unregister_thread_closure = base::HangWatcher::RegisterThread(
base::HangWatcher::ThreadType::kMainThread);
// If the process is unsandboxed the HangWatcher can start now. Otherwise,
// the sandbox can't be initialized with multiple threads, so the
// HangWatcher will be started after the sandbox is initialized.
if (sandbox::policy::IsUnsandboxedSandboxType(
sandbox::policy::SandboxTypeFromCommandLine(*command_line))) {
base::HangWatcher::GetInstance()->Start();
} else {
main_params.hang_watcher_not_started_time = base::TimeTicks::Now();
}
}
for (auto& kMainFunction : kMainFunctions) {
if (process_type == kMainFunction.name) {
return kMainFunction.function(std::move(main_params));
}
}
auto exit_code = delegate->RunProcess(process_type, std::move(main_params));
@ -731,6 +750,9 @@ RunOtherNamedProcessTypeMain(const std::string& process_type,
#endif // BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
if (start_hang_watcher_now) {
base::HangWatcher::GetInstance()->Start();
} else {
main_function_params.hang_watcher_not_started_time =
base::TimeTicks::Now();
}
}

@ -39,7 +39,10 @@ class CONTENT_EXPORT ContentMainDelegate {
// Indicates the delegate is being invoked in a child process. The
// `kProcessType` switch will hold the precise child process type.
struct InvokedInChildProcess {};
struct InvokedInChildProcess {
// True if the child process was forked from one of the browser's zygotes.
bool is_zygote_child = false;
};
// The context in which a delegate method is invoked, including the process
// type and whether it is in a test harness. Can distinguish between

@ -11,8 +11,10 @@
#include "base/functional/callback.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/raw_ptr_exclusion.h"
#include "base/time/time.h"
#include "build/build_config.h"
#include "content/common/content_export.h"
#include "third_party/abseil-cpp/absl/types/optional.h"
#if BUILDFLAG(IS_WIN)
namespace sandbox {
@ -60,6 +62,10 @@ struct CONTENT_EXPORT MainFunctionParams {
// tracing after initializing Mojo.
bool needs_startup_tracing_after_mojo_init = false;
// If non-null, this is the time the HangWatcher would have started if not
// delayed until after sandbox initialization.
absl::optional<base::TimeTicks> hang_watcher_not_started_time;
// Used by BrowserTestBase. If set, BrowserMainLoop runs this task instead of
// the main message loop.
base::OnceClosure ui_task;

@ -22,6 +22,7 @@
#include "base/strings/string_number_conversions.h"
#include "base/system/sys_info.h"
#include "base/task/sequence_manager/sequence_manager.h"
#include "base/threading/hang_watcher.h"
#include "base/threading/platform_thread.h"
#include "base/timer/hi_res_timer_manager.h"
#include "base/trace_event/trace_event.h"
@ -317,6 +318,20 @@ int RendererMain(MainFunctionParams parameters) {
}
}
// Start the HangWatcher now that the sandbox is engaged, if it hasn't
// already been started.
if (base::HangWatcher::IsEnabled() &&
!base::HangWatcher::GetInstance()->IsStarted()) {
DCHECK(parameters.hang_watcher_not_started_time.has_value());
base::TimeDelta uncovered_hang_watcher_time =
base::TimeTicks::Now() -
parameters.hang_watcher_not_started_time.value();
base::UmaHistogramTimes(
"HangWatcher.RendererProcess.UncoveredStartupTime",
uncovered_hang_watcher_time);
base::HangWatcher::GetInstance()->Start();
}
#if BUILDFLAG(MOJO_RANDOM_DELAYS_ENABLED)
mojo::BeginRandomMojoDelays();
#endif

@ -7,10 +7,13 @@
#include "base/debug/leak_annotations.h"
#include "base/functional/bind.h"
#include "base/message_loop/message_pump_type.h"
#include "base/metrics/histogram_functions.h"
#include "base/power_monitor/power_monitor.h"
#include "base/run_loop.h"
#include "base/task/single_thread_task_executor.h"
#include "base/threading/hang_watcher.h"
#include "base/threading/platform_thread.h"
#include "base/time/time.h"
#include "base/timer/hi_res_timer_manager.h"
#include "build/build_config.h"
#include "build/chromeos_buildflags.h"
@ -300,6 +303,20 @@ int UtilityMain(MainFunctionParams parameters) {
sandbox::policy::Sandbox::Initialize(
sandbox_type, std::move(pre_sandbox_hook), sandbox_options);
}
// Start the HangWatcher now that the sandbox is engaged, if it hasn't
// already been started.
if (base::HangWatcher::IsEnabled() &&
!base::HangWatcher::GetInstance()->IsStarted()) {
DCHECK(parameters.hang_watcher_not_started_time.has_value());
base::TimeDelta uncovered_hang_watcher_time =
base::TimeTicks::Now() -
parameters.hang_watcher_not_started_time.value();
base::UmaHistogramTimes("HangWatcher.UtilityProcess.UncoveredStartupTime",
uncovered_hang_watcher_time);
base::HangWatcher::GetInstance()->Start();
}
#elif BUILDFLAG(IS_WIN)
g_utility_target_services = parameters.sandbox_info->target_services;

@ -42,6 +42,7 @@ chromium-metrics-reviews@google.com.
<variants name="ProcessType">
<variant name="BrowserProcess"/>
<variant name="RendererProcess"/>
<variant name="UtilityProcess"/>
</variants>
<histogram name="HangWatcher.IsThreadHung.{ProcessAndThreadType}"
@ -85,6 +86,24 @@ chromium-metrics-reviews@google.com.
<token key="ProcessType" variants="ProcessType"/>
</histogram>
<histogram name="HangWatcher.{ProcessType}.UncoveredStartupTime" units="ms"
expires_after="2023-12-22">
<owner>olivierli@chromium.org</owner>
<owner>catan-team@chromium.org</owner>
<summary>
Because HangWatcher start is delayed until the sandbox is initialized on
Linux and ChromeOS, this measures the delta between the time HangWatcher
would have started in an unsandboxed process and the time the HangWatcher
actually started. The metric is emitted in the process type's main function
(e.g. RendererMain, UtilityMain) right after the sandbox is engaged and
right before HangWatcher is started.
This will be used to evaluate whether hangs are missed during startup
because of this delay.
</summary>
<token key="ProcessType" variants="ProcessType"/>
</histogram>
</histograms>
</histogram-configuration>