diff --git a/base/threading/hang_watcher.cc b/base/threading/hang_watcher.cc index f55bcc8648563..af5b53a6a723f 100644 --- a/base/threading/hang_watcher.cc +++ b/base/threading/hang_watcher.cc @@ -164,6 +164,10 @@ BASE_FEATURE(kEnableHangWatcher, "EnableHangWatcher", FEATURE_ENABLED_BY_DEFAULT); +BASE_FEATURE(kEnableHangWatcherInZygoteChildren, + "EnableHangWatcherInZygoteChildren", + FEATURE_ENABLED_BY_DEFAULT); + // Browser process. constexpr base::FeatureParam<int> kIOThreadLogLevel{ &kEnableHangWatcher, "io_thread_log_level", @@ -318,7 +322,8 @@ WatchHangsInScope::~WatchHangsInScope() { } // static -void HangWatcher::InitializeOnMainThread(ProcessType process_type) { +void HangWatcher::InitializeOnMainThread(ProcessType process_type, + bool is_zygote_child) { DCHECK(!g_use_hang_watcher); DCHECK(g_io_thread_log_level == LoggingLevel::kNone); DCHECK(g_main_thread_log_level == LoggingLevel::kNone); @@ -326,6 +331,14 @@ void HangWatcher::InitializeOnMainThread(ProcessType process_type) { bool enable_hang_watcher = base::FeatureList::IsEnabled(kEnableHangWatcher); +#if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS) + if (is_zygote_child) { + enable_hang_watcher = + enable_hang_watcher && + base::FeatureList::IsEnabled(kEnableHangWatcherInZygoteChildren); + } +#endif + // Do not start HangWatcher in the GPU process until the issue related to // invalid magic signature in the GPU WatchDog is fixed // (https://crbug.com/1297760). @@ -544,12 +557,14 @@ HangWatcher::~HangWatcher() { void HangWatcher::Start() { thread_.Start(); + thread_started_ = true; } void HangWatcher::Stop() { g_keep_monitoring.store(false, std::memory_order_relaxed); should_monitor_.Signal(); thread_.Join(); + thread_started_ = false; // In production HangWatcher is always leaked but during testing it's possibly // stopped and restarted using a new instance. This makes sure the next call diff --git a/base/threading/hang_watcher.h b/base/threading/hang_watcher.h index 470f627bc8fef..6887bff03a31d 100644 --- a/base/threading/hang_watcher.h +++ b/base/threading/hang_watcher.h @@ -150,7 +150,8 @@ class BASE_EXPORT HangWatcher : public DelegateSimpleThread::Delegate { // Initializes HangWatcher. Must be called once on the main thread during // startup while single-threaded. - static void InitializeOnMainThread(ProcessType process_type); + static void InitializeOnMainThread(ProcessType process_type, + bool is_zygote_child); // Returns the values that were set through InitializeOnMainThread() to their // default value. Used for testing since in prod initialization should happen @@ -244,6 +245,10 @@ class BASE_EXPORT HangWatcher : public DelegateSimpleThread::Delegate { // Begin executing the monitoring loop on the HangWatcher thread. void Start(); + // Returns true if Start() has been called and Stop() has not been called + // since. + bool IsStarted() const { return thread_started_; } + // Returns the value of the crash key with the time since last system power // resume. std::string GetTimeSinceLastSystemPowerResumeCrashKeyValue() const; @@ -377,6 +382,7 @@ class BASE_EXPORT HangWatcher : public DelegateSimpleThread::Delegate { GUARDED_BY_CONTEXT(hang_watcher_thread_checker_); base::DelegateSimpleThread thread_; + bool thread_started_; RepeatingClosure after_monitor_closure_for_testing_; RepeatingClosure on_hang_closure_for_testing_; diff --git a/base/threading/hang_watcher_unittest.cc b/base/threading/hang_watcher_unittest.cc index badffabf626f8..424d92fe7f9b4 100644 --- a/base/threading/hang_watcher_unittest.cc +++ b/base/threading/hang_watcher_unittest.cc @@ -11,6 +11,7 @@ #include "base/functional/callback.h" #include "base/functional/callback_helpers.h" #include "base/memory/raw_ptr.h" +#include "base/metrics/field_trial_params.h" #include "base/run_loop.h" #include "base/strings/string_number_conversions.h" #include "base/synchronization/lock.h" @@ -57,6 +58,51 @@ constexpr uint64_t kAllZeros = 0x0000000000000000u; constexpr uint64_t kOnesThenZeroes = 0xAAAAAAAAAAAAAAAAu; constexpr uint64_t kZeroesThenOnes = 0x5555555555555555u; +#if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS) +class HangWatcherEnabledInZygoteChildTest + : public testing::TestWithParam<std::tuple<bool, bool>> { + public: + HangWatcherEnabledInZygoteChildTest() { + std::vector<base::test::FeatureRefAndParams> enabled_features = + kFeatureAndParams; + std::vector<test::FeatureRef> disabled_features; + if (std::get<0>(GetParam())) { + enabled_features.push_back(test::FeatureRefAndParams( + base::kEnableHangWatcherInZygoteChildren, {})); + } else { + disabled_features.push_back(base::kEnableHangWatcherInZygoteChildren); + } + feature_list_.InitWithFeaturesAndParameters(enabled_features, + disabled_features); + HangWatcher::InitializeOnMainThread( + HangWatcher::ProcessType::kUtilityProcess, + /*is_zygote_child=*/std::get<1>(GetParam())); + } + + void TearDown() override { HangWatcher::UnitializeOnMainThreadForTesting(); } + + HangWatcherEnabledInZygoteChildTest( + const HangWatcherEnabledInZygoteChildTest& other) = delete; + HangWatcherEnabledInZygoteChildTest& operator=( + const HangWatcherEnabledInZygoteChildTest& other) = delete; + + protected: + base::test::ScopedFeatureList feature_list_; +}; + +TEST_P(HangWatcherEnabledInZygoteChildTest, IsEnabled) { + // If the kEnableHangWatcherInZygoteChildren feature is disabled and + // InitializeOnMainThread is called with is_zygote_child==true, IsEnabled() + // should return false. It should return true in all other situations. + ASSERT_EQ(std::get<0>(GetParam()) || !std::get<1>(GetParam()), + HangWatcher::IsEnabled()); +} + +INSTANTIATE_TEST_SUITE_P(HangWatcherZygoteTest, + HangWatcherEnabledInZygoteChildTest, + testing::Combine(testing::Bool(), testing::Bool())); +#endif // BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS) + // Waits on provided WaitableEvent before executing and signals when done. class BlockingThread : public DelegateSimpleThread::Delegate { public: @@ -117,8 +163,8 @@ class HangWatcherTest : public testing::Test { HangWatcherTest() { feature_list_.InitWithFeaturesAndParameters(kFeatureAndParams, {}); - hang_watcher_.InitializeOnMainThread( - HangWatcher::ProcessType::kBrowserProcess); + HangWatcher::InitializeOnMainThread( + HangWatcher::ProcessType::kBrowserProcess, false); hang_watcher_.SetAfterMonitorClosureForTesting(base::BindRepeating( &WaitableEvent::Signal, base::Unretained(&monitor_event_))); @@ -134,7 +180,7 @@ class HangWatcherTest : public testing::Test { hang_watcher_.Start(); } - void TearDown() override { hang_watcher_.UnitializeOnMainThreadForTesting(); } + void TearDown() override { HangWatcher::UnitializeOnMainThreadForTesting(); } HangWatcherTest(const HangWatcherTest& other) = delete; HangWatcherTest& operator=(const HangWatcherTest& other) = delete; @@ -516,15 +562,15 @@ class HangWatcherSnapshotTest : public testing::Test { public: void SetUp() override { feature_list_.InitWithFeaturesAndParameters(kFeatureAndParams, {}); - hang_watcher_.InitializeOnMainThread( - HangWatcher::ProcessType::kBrowserProcess); + HangWatcher::InitializeOnMainThread( + HangWatcher::ProcessType::kBrowserProcess, false); // The monitoring loop behavior is not verified in this test so we want to // trigger monitoring manually. hang_watcher_.SetMonitoringPeriodForTesting(kVeryLongDelta); } - void TearDown() override { hang_watcher_.UnitializeOnMainThreadForTesting(); } + void TearDown() override { HangWatcher::UnitializeOnMainThreadForTesting(); } HangWatcherSnapshotTest() = default; HangWatcherSnapshotTest(const HangWatcherSnapshotTest& other) = delete; @@ -779,7 +825,7 @@ class HangWatcherPeriodicMonitoringTest : public testing::Test { public: HangWatcherPeriodicMonitoringTest() { hang_watcher_.InitializeOnMainThread( - HangWatcher::ProcessType::kBrowserProcess); + HangWatcher::ProcessType::kBrowserProcess, false); hang_watcher_.SetMonitoringPeriodForTesting(kMonitoringPeriod); hang_watcher_.SetOnHangClosureForTesting(base::BindRepeating( @@ -935,8 +981,8 @@ class WatchHangsInScopeBlockingTest : public testing::Test { public: WatchHangsInScopeBlockingTest() { feature_list_.InitWithFeaturesAndParameters(kFeatureAndParams, {}); - hang_watcher_.InitializeOnMainThread( - HangWatcher::ProcessType::kBrowserProcess); + HangWatcher::InitializeOnMainThread( + HangWatcher::ProcessType::kBrowserProcess, false); hang_watcher_.SetOnHangClosureForTesting(base::BindLambdaForTesting([&] { capture_started_.Signal(); @@ -964,7 +1010,7 @@ class WatchHangsInScopeBlockingTest : public testing::Test { HangWatcher::RegisterThread(base::HangWatcher::ThreadType::kMainThread); } - void TearDown() override { hang_watcher_.UnitializeOnMainThreadForTesting(); } + void TearDown() override { HangWatcher::UnitializeOnMainThreadForTesting(); } WatchHangsInScopeBlockingTest(const WatchHangsInScopeBlockingTest& other) = delete; diff --git a/base/threading/threading_features.h b/base/threading/threading_features.h index a682df497da4d..329a5c7fd8ea6 100644 --- a/base/threading/threading_features.h +++ b/base/threading/threading_features.h @@ -31,6 +31,7 @@ BASE_EXPORT BASE_DECLARE_FEATURE(kAboveNormalCompositingBrowserWin); #endif BASE_EXPORT BASE_DECLARE_FEATURE(kEnableHangWatcher); +BASE_EXPORT BASE_DECLARE_FEATURE(kEnableHangWatcherInZygoteChildren); } // namespace base diff --git a/chrome/app/chrome_main_delegate.cc b/chrome/app/chrome_main_delegate.cc index c4acb61c7dea0..cfd0b9f673745 100644 --- a/chrome/app/chrome_main_delegate.cc +++ b/chrome/app/chrome_main_delegate.cc @@ -17,6 +17,7 @@ #include "base/files/file_path.h" #include "base/files/file_util.h" #include "base/functional/bind.h" +#include "base/functional/overloaded.h" #include "base/i18n/rtl.h" #include "base/immediate_crash.h" #include "base/lazy_instance.h" @@ -674,7 +675,7 @@ absl::optional<int> ChromeMainDelegate::PostEarlyInitialization( const auto* invoked_in_browser = absl::get_if<InvokedInBrowserProcess>(&invoked_in); if (!invoked_in_browser) { - CommonEarlyInitialization(); + CommonEarlyInitialization(invoked_in); return absl::nullopt; } @@ -839,7 +840,7 @@ absl::optional<int> ChromeMainDelegate::PostEarlyInitialization( } #endif // BUILDFLAG(IS_CHROMEOS_LACROS) - CommonEarlyInitialization(); + CommonEarlyInitialization(invoked_in); // Initializes the resource bundle and determines the locale. std::string actual_locale = LoadLocalState( @@ -896,7 +897,7 @@ bool ChromeMainDelegate::ShouldInitializeMojo(InvokedIn invoked_in) { return ShouldCreateFeatureList(invoked_in); } -void ChromeMainDelegate::CommonEarlyInitialization() { +void ChromeMainDelegate::CommonEarlyInitialization(InvokedIn invoked_in) { const base::CommandLine* const command_line = base::CommandLine::ForCurrentProcess(); std::string process_type = @@ -946,7 +947,16 @@ void ChromeMainDelegate::CommonEarlyInitialization() { } else { hang_watcher_process_type = base::HangWatcher::ProcessType::kUnknownProcess; } - base::HangWatcher::InitializeOnMainThread(hang_watcher_process_type); + bool is_zygote_child = absl::visit( + base::Overloaded{[](const InvokedInBrowserProcess& invoked_in_browser) { + return false; + }, + [](const InvokedInChildProcess& invoked_in_child) { + return invoked_in_child.is_zygote_child; + }}, + invoked_in); + base::HangWatcher::InitializeOnMainThread( + hang_watcher_process_type, /*is_zygote_child=*/is_zygote_child); base::InitializeCpuReductionExperiment(); base::sequence_manager::internal::SequenceManagerImpl::InitializeFeatures(); diff --git a/chrome/app/chrome_main_delegate.h b/chrome/app/chrome_main_delegate.h index dad9f981d2e01..176b6248f18e8 100644 --- a/chrome/app/chrome_main_delegate.h +++ b/chrome/app/chrome_main_delegate.h @@ -78,7 +78,7 @@ class ChromeMainDelegate : public content::ContentMainDelegate { content::ContentUtilityClient* CreateContentUtilityClient() override; // Initialization that happens in all process types. - void CommonEarlyInitialization(); + void CommonEarlyInitialization(InvokedIn invoked_in); // Initializes |tracing_sampler_profiler_|. Deletes any existing // |tracing_sampler_profiler_| as well. diff --git a/content/app/content_main_runner_impl.cc b/content/app/content_main_runner_impl.cc index 6cb42330a794c..1c208507130ba 100644 --- a/content/app/content_main_runner_impl.cc +++ b/content/app/content_main_runner_impl.cc @@ -631,27 +631,46 @@ int NO_STACK_PROTECTOR RunZygote(ContentMainDelegate* delegate) { ContentClientInitializer::Set(process_type, delegate); - MainFunctionParams main_params(command_line); - main_params.zygote_child = true; - main_params.needs_startup_tracing_after_mojo_init = true; - - if (delegate->ShouldCreateFeatureList( - ContentMainDelegate::InvokedInChildProcess())) { + const ContentMainDelegate::InvokedInChildProcess invoked_in_child{ + .is_zygote_child = true}; + if (delegate->ShouldCreateFeatureList(invoked_in_child)) { InitializeFieldTrialAndFeatureList(); } - if (delegate->ShouldInitializeMojo( - ContentMainDelegate::InvokedInChildProcess())) { + if (delegate->ShouldInitializeMojo(invoked_in_child)) { InitializeMojoCore(); } - delegate->PostEarlyInitialization( - ContentMainDelegate::InvokedInChildProcess()); + delegate->PostEarlyInitialization(invoked_in_child); base::allocator::PartitionAllocSupport::Get() ->ReconfigureAfterFeatureListInit(process_type); - for (size_t i = 0; i < std::size(kMainFunctions); ++i) { - if (process_type == kMainFunctions[i].name) - return kMainFunctions[i].function(std::move(main_params)); + MainFunctionParams main_params(command_line); + main_params.zygote_child = true; + main_params.needs_startup_tracing_after_mojo_init = true; + + // The hang watcher needs to be created once the feature list is available + // but before the IO thread is started. + base::ScopedClosureRunner unregister_thread_closure; + if (base::HangWatcher::IsEnabled()) { + base::HangWatcher::CreateHangWatcherInstance(); + unregister_thread_closure = base::HangWatcher::RegisterThread( + base::HangWatcher::ThreadType::kMainThread); + + // If the process is unsandboxed the HangWatcher can start now. Otherwise, + // the sandbox can't be initialized with multiple threads, so the + // HangWatcher will be started after the sandbox is initialized. + if (sandbox::policy::IsUnsandboxedSandboxType( + sandbox::policy::SandboxTypeFromCommandLine(*command_line))) { + base::HangWatcher::GetInstance()->Start(); + } else { + main_params.hang_watcher_not_started_time = base::TimeTicks::Now(); + } + } + + for (auto& kMainFunction : kMainFunctions) { + if (process_type == kMainFunction.name) { + return kMainFunction.function(std::move(main_params)); + } } auto exit_code = delegate->RunProcess(process_type, std::move(main_params)); @@ -731,6 +750,9 @@ RunOtherNamedProcessTypeMain(const std::string& process_type, #endif // BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS) if (start_hang_watcher_now) { base::HangWatcher::GetInstance()->Start(); + } else { + main_function_params.hang_watcher_not_started_time = + base::TimeTicks::Now(); } } diff --git a/content/public/app/content_main_delegate.h b/content/public/app/content_main_delegate.h index d4bb0e9a017a8..c891c5649a6ae 100644 --- a/content/public/app/content_main_delegate.h +++ b/content/public/app/content_main_delegate.h @@ -39,7 +39,10 @@ class CONTENT_EXPORT ContentMainDelegate { // Indicates the delegate is being invoked in a child process. The // `kProcessType` switch will hold the precise child process type. - struct InvokedInChildProcess {}; + struct InvokedInChildProcess { + // True if the child process was forked from one of the browser's zygotes. + bool is_zygote_child = false; + }; // The context in which a delegate method is invoked, including the process // type and whether it is in a test harness. Can distinguish between diff --git a/content/public/common/main_function_params.h b/content/public/common/main_function_params.h index 67954301c1ede..a5d381a04360a 100644 --- a/content/public/common/main_function_params.h +++ b/content/public/common/main_function_params.h @@ -11,8 +11,10 @@ #include "base/functional/callback.h" #include "base/memory/raw_ptr.h" #include "base/memory/raw_ptr_exclusion.h" +#include "base/time/time.h" #include "build/build_config.h" #include "content/common/content_export.h" +#include "third_party/abseil-cpp/absl/types/optional.h" #if BUILDFLAG(IS_WIN) namespace sandbox { @@ -60,6 +62,10 @@ struct CONTENT_EXPORT MainFunctionParams { // tracing after initializing Mojo. bool needs_startup_tracing_after_mojo_init = false; + // If non-null, this is the time the HangWatcher would have started if not + // delayed until after sandbox initialization. + absl::optional<base::TimeTicks> hang_watcher_not_started_time; + // Used by BrowserTestBase. If set, BrowserMainLoop runs this task instead of // the main message loop. base::OnceClosure ui_task; diff --git a/content/renderer/renderer_main.cc b/content/renderer/renderer_main.cc index 855f2d8f6efe7..5ee97825c889e 100644 --- a/content/renderer/renderer_main.cc +++ b/content/renderer/renderer_main.cc @@ -22,6 +22,7 @@ #include "base/strings/string_number_conversions.h" #include "base/system/sys_info.h" #include "base/task/sequence_manager/sequence_manager.h" +#include "base/threading/hang_watcher.h" #include "base/threading/platform_thread.h" #include "base/timer/hi_res_timer_manager.h" #include "base/trace_event/trace_event.h" @@ -317,6 +318,20 @@ int RendererMain(MainFunctionParams parameters) { } } + // Start the HangWatcher now that the sandbox is engaged, if it hasn't + // already been started. + if (base::HangWatcher::IsEnabled() && + !base::HangWatcher::GetInstance()->IsStarted()) { + DCHECK(parameters.hang_watcher_not_started_time.has_value()); + base::TimeDelta uncovered_hang_watcher_time = + base::TimeTicks::Now() - + parameters.hang_watcher_not_started_time.value(); + base::UmaHistogramTimes( + "HangWatcher.RendererProcess.UncoveredStartupTime", + uncovered_hang_watcher_time); + base::HangWatcher::GetInstance()->Start(); + } + #if BUILDFLAG(MOJO_RANDOM_DELAYS_ENABLED) mojo::BeginRandomMojoDelays(); #endif diff --git a/content/utility/utility_main.cc b/content/utility/utility_main.cc index 80f244ebe7bf8..4469d678f74b0 100644 --- a/content/utility/utility_main.cc +++ b/content/utility/utility_main.cc @@ -7,10 +7,13 @@ #include "base/debug/leak_annotations.h" #include "base/functional/bind.h" #include "base/message_loop/message_pump_type.h" +#include "base/metrics/histogram_functions.h" #include "base/power_monitor/power_monitor.h" #include "base/run_loop.h" #include "base/task/single_thread_task_executor.h" +#include "base/threading/hang_watcher.h" #include "base/threading/platform_thread.h" +#include "base/time/time.h" #include "base/timer/hi_res_timer_manager.h" #include "build/build_config.h" #include "build/chromeos_buildflags.h" @@ -300,6 +303,20 @@ int UtilityMain(MainFunctionParams parameters) { sandbox::policy::Sandbox::Initialize( sandbox_type, std::move(pre_sandbox_hook), sandbox_options); } + + // Start the HangWatcher now that the sandbox is engaged, if it hasn't + // already been started. + if (base::HangWatcher::IsEnabled() && + !base::HangWatcher::GetInstance()->IsStarted()) { + DCHECK(parameters.hang_watcher_not_started_time.has_value()); + base::TimeDelta uncovered_hang_watcher_time = + base::TimeTicks::Now() - + parameters.hang_watcher_not_started_time.value(); + base::UmaHistogramTimes("HangWatcher.UtilityProcess.UncoveredStartupTime", + uncovered_hang_watcher_time); + base::HangWatcher::GetInstance()->Start(); + } + #elif BUILDFLAG(IS_WIN) g_utility_target_services = parameters.sandbox_info->target_services; diff --git a/tools/metrics/histograms/metadata/hang_watcher/histograms.xml b/tools/metrics/histograms/metadata/hang_watcher/histograms.xml index 88bfaa15d303d..9705b6ba52929 100644 --- a/tools/metrics/histograms/metadata/hang_watcher/histograms.xml +++ b/tools/metrics/histograms/metadata/hang_watcher/histograms.xml @@ -42,6 +42,7 @@ chromium-metrics-reviews@google.com. <variants name="ProcessType"> <variant name="BrowserProcess"/> <variant name="RendererProcess"/> + <variant name="UtilityProcess"/> </variants> <histogram name="HangWatcher.IsThreadHung.{ProcessAndThreadType}" @@ -85,6 +86,24 @@ chromium-metrics-reviews@google.com. <token key="ProcessType" variants="ProcessType"/> </histogram> +<histogram name="HangWatcher.{ProcessType}.UncoveredStartupTime" units="ms" + expires_after="2023-12-22"> + <owner>olivierli@chromium.org</owner> + <owner>catan-team@chromium.org</owner> + <summary> + Because HangWatcher start is delayed until the sandbox is initialized on + Linux and ChromeOS, this measures the delta between the time HangWatcher + would have started in an unsandboxed process and the time the HangWatcher + actually started. The metric is emitted in the process type's main function + (e.g. RendererMain, UtilityMain) right after the sandbox is engaged and + right before HangWatcher is started. + + This will be used to evaluate whether hangs are missed during startup + because of this delay. + </summary> + <token key="ProcessType" variants="ProcessType"/> +</histogram> + </histograms> </histogram-configuration>