
* Shorten code * Use const more * Inline temps * Use safe conversions for float -> int * TimeDelta::InMicrosecondsF() returns a double Bug: none Change-Id: I798ee7107bcbda863f95ee7921c4ddfd76768183 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2359008 Commit-Queue: Cait Phillips <caitkp@chromium.org> Reviewed-by: Cait Phillips <caitkp@chromium.org> Auto-Submit: Peter Kasting <pkasting@chromium.org> Cr-Commit-Position: refs/heads/master@{#799616}
389 lines
15 KiB
C++
389 lines
15 KiB
C++
// Copyright 2016 The Chromium Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
#include "components/ntp_snippets/user_classifier.h"
|
|
|
|
#include <algorithm>
|
|
#include <cfloat>
|
|
#include <string>
|
|
|
|
#include "base/logging.h"
|
|
#include "base/metrics/histogram_macros.h"
|
|
#include "base/numerics/ranges.h"
|
|
#include "base/stl_util.h"
|
|
#include "base/strings/string_number_conversions.h"
|
|
#include "base/time/clock.h"
|
|
#include "components/ntp_snippets/features.h"
|
|
#include "components/ntp_snippets/pref_names.h"
|
|
#include "components/ntp_snippets/time_serialization.h"
|
|
#include "components/prefs/pref_registry_simple.h"
|
|
#include "components/prefs/pref_service.h"
|
|
#include "components/variations/variations_associated_data.h"
|
|
|
|
namespace ntp_snippets {
|
|
|
|
namespace {
|
|
|
|
// The discount rate for computing the discounted-average metrics. Must be
|
|
// strictly larger than 0 and strictly smaller than 1!
|
|
const double kDiscountRatePerDay = 0.25;
|
|
const char kDiscountRatePerDayParam[] = "user_classifier_discount_rate_per_day";
|
|
|
|
// Never consider any larger interval than this (so that extreme situations such
|
|
// as losing your phone or going for a long offline vacation do not skew the
|
|
// average too much).
|
|
// When everriding via variation parameters, it is better to use smaller values
|
|
// than |kMaxHours| as this it the maximum value reported in the histograms.
|
|
const double kMaxHours = 7 * 24;
|
|
const char kMaxHoursParam[] = "user_classifier_max_hours";
|
|
|
|
// Ignore events within |kMinHours| hours since the last event (|kMinHours| is
|
|
// the length of the browsing session where subsequent events of the same type
|
|
// do not count again).
|
|
const double kMinHours = 0.5;
|
|
const char kMinHoursParam[] = "user_classifier_min_hours";
|
|
|
|
// Classification constants.
|
|
const double kActiveConsumerClicksAtLeastOncePerHours = 96;
|
|
const char kActiveConsumerClicksAtLeastOncePerHoursParam[] =
|
|
"user_classifier_active_consumer_clicks_at_least_once_per_hours";
|
|
|
|
// The previous value in production was 66, i.e. 2.75 days. The new value is a
|
|
// shift in the direction we want (having more active users).
|
|
const double kRareUserOpensNTPAtMostOncePerHours = 96;
|
|
const char kRareUserOpensNTPAtMostOncePerHoursParam[] =
|
|
"user_classifier_rare_user_opens_ntp_at_most_once_per_hours";
|
|
|
|
// Histograms for logging the estimated average hours to next event.
|
|
const char kHistogramAverageHoursToOpenNTP[] =
|
|
"NewTabPage.UserClassifier.AverageHoursToOpenNTP";
|
|
const char kHistogramAverageHoursToShowSuggestions[] =
|
|
"NewTabPage.UserClassifier.AverageHoursToShowSuggestions";
|
|
const char kHistogramAverageHoursToUseSuggestions[] =
|
|
"NewTabPage.UserClassifier.AverageHoursToUseSuggestions";
|
|
|
|
// The enum used for iteration.
|
|
const UserClassifier::Metric kMetrics[] = {
|
|
UserClassifier::Metric::NTP_OPENED,
|
|
UserClassifier::Metric::SUGGESTIONS_SHOWN,
|
|
UserClassifier::Metric::SUGGESTIONS_USED};
|
|
|
|
// The summary of the prefs.
|
|
const char* kMetricKeys[] = {
|
|
prefs::kUserClassifierAverageNTPOpenedPerHour,
|
|
prefs::kUserClassifierAverageSuggestionsShownPerHour,
|
|
prefs::kUserClassifierAverageSuggestionsUsedPerHour};
|
|
const char* kLastTimeKeys[] = {prefs::kUserClassifierLastTimeToOpenNTP,
|
|
prefs::kUserClassifierLastTimeToShowSuggestions,
|
|
prefs::kUserClassifierLastTimeToUseSuggestions};
|
|
|
|
// Default lengths of the intervals for new users for the metrics.
|
|
const double kInitialHoursBetweenEvents[] = {24, 48, 120};
|
|
const char* kInitialHoursBetweenEventsParams[] = {
|
|
"user_classifier_default_interval_ntp_opened",
|
|
"user_classifier_default_interval_suggestions_shown",
|
|
"user_classifier_default_interval_suggestions_used"};
|
|
|
|
static_assert(base::size(kMetrics) ==
|
|
static_cast<int>(UserClassifier::Metric::COUNT) &&
|
|
base::size(kMetricKeys) ==
|
|
static_cast<int>(UserClassifier::Metric::COUNT) &&
|
|
base::size(kLastTimeKeys) ==
|
|
static_cast<int>(UserClassifier::Metric::COUNT) &&
|
|
base::size(kInitialHoursBetweenEvents) ==
|
|
static_cast<int>(UserClassifier::Metric::COUNT) &&
|
|
base::size(kInitialHoursBetweenEventsParams) ==
|
|
static_cast<int>(UserClassifier::Metric::COUNT),
|
|
"Fill in info for all metrics.");
|
|
|
|
// Computes the discount rate.
|
|
double GetDiscountRatePerHour() {
|
|
double discount_rate_per_day = variations::GetVariationParamByFeatureAsDouble(
|
|
kArticleSuggestionsFeature, kDiscountRatePerDayParam,
|
|
kDiscountRatePerDay);
|
|
// Check for illegal values.
|
|
if (discount_rate_per_day <= 0 || discount_rate_per_day >= 1) {
|
|
DLOG(WARNING) << "Illegal value " << discount_rate_per_day
|
|
<< " for the parameter " << kDiscountRatePerDayParam
|
|
<< " (must be strictly between 0 and 1; the default "
|
|
<< kDiscountRatePerDay << " is used, instead).";
|
|
discount_rate_per_day = kDiscountRatePerDay;
|
|
}
|
|
// Compute discount_rate_per_hour such that
|
|
// discount_rate_per_day = 1 - e^{-discount_rate_per_hour * 24}.
|
|
return std::log(1.0 / (1.0 - discount_rate_per_day)) / 24.0;
|
|
}
|
|
|
|
double GetInitialHoursBetweenEvents(UserClassifier::Metric metric) {
|
|
return variations::GetVariationParamByFeatureAsDouble(
|
|
kArticleSuggestionsFeature,
|
|
kInitialHoursBetweenEventsParams[static_cast<int>(metric)],
|
|
kInitialHoursBetweenEvents[static_cast<int>(metric)]);
|
|
}
|
|
|
|
double GetMinHours() {
|
|
return variations::GetVariationParamByFeatureAsDouble(
|
|
kArticleSuggestionsFeature, kMinHoursParam, kMinHours);
|
|
}
|
|
|
|
double GetMaxHours() {
|
|
return variations::GetVariationParamByFeatureAsDouble(
|
|
kArticleSuggestionsFeature, kMaxHoursParam, kMaxHours);
|
|
}
|
|
|
|
// Returns the new value of the metric using its |old_value|, assuming
|
|
// |hours_since_last_time| hours have passed since it was last discounted.
|
|
double DiscountMetric(double old_value,
|
|
double hours_since_last_time,
|
|
double discount_rate_per_hour) {
|
|
// Compute the new discounted average according to the formula
|
|
// avg_events := e^{-discount_rate_per_hour * hours_since} * avg_events
|
|
return std::exp(-discount_rate_per_hour * hours_since_last_time) * old_value;
|
|
}
|
|
|
|
// Compute the number of hours between two events for the given metric value
|
|
// assuming the events were equally distributed.
|
|
double GetEstimateHoursBetweenEvents(double metric_value,
|
|
double discount_rate_per_hour,
|
|
double min_hours,
|
|
double max_hours) {
|
|
// The computation below is well-defined only for |metric_value| > 1 (log of
|
|
// negative value or division by zero). When |metric_value| -> 1, the estimate
|
|
// below -> infinity, so max_hours is a natural result, here.
|
|
if (metric_value <= 1) {
|
|
return max_hours;
|
|
}
|
|
|
|
// This is the estimate with the assumption that last event happened right
|
|
// now and the system is in the steady-state. Solve estimate_hours in the
|
|
// steady-state equation:
|
|
// metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
|
|
// i.e.
|
|
// -discount_rate * estimate_hours = log((metric_value - 1) / metric_value),
|
|
// discount_rate * estimate_hours = log(metric_value / (metric_value - 1)),
|
|
// estimate_hours = log(metric_value / (metric_value - 1)) / discount_rate.
|
|
double estimate_hours =
|
|
std::log(metric_value / (metric_value - 1)) / discount_rate_per_hour;
|
|
return base::ClampToRange(estimate_hours, min_hours, max_hours);
|
|
}
|
|
|
|
// The inverse of GetEstimateHoursBetweenEvents().
|
|
double GetMetricValueForEstimateHoursBetweenEvents(
|
|
double estimate_hours,
|
|
double discount_rate_per_hour,
|
|
double min_hours,
|
|
double max_hours) {
|
|
estimate_hours = base::ClampToRange(estimate_hours, min_hours, max_hours);
|
|
// Return |metric_value| such that GetEstimateHoursBetweenEvents for
|
|
// |metric_value| returns |estimate_hours|. Thus, solve |metric_value| in
|
|
// metric_value = 1 + e^{-discount_rate * estimate_hours} * metric_value,
|
|
// i.e.
|
|
// metric_value * (1 - e^{-discount_rate * estimate_hours}) = 1,
|
|
// metric_value = 1 / (1 - e^{-discount_rate * estimate_hours}).
|
|
return 1.0 / (1.0 - std::exp(-discount_rate_per_hour * estimate_hours));
|
|
}
|
|
|
|
} // namespace
|
|
|
|
UserClassifier::UserClassifier(PrefService* pref_service, base::Clock* clock)
|
|
: pref_service_(pref_service),
|
|
clock_(clock),
|
|
discount_rate_per_hour_(GetDiscountRatePerHour()),
|
|
min_hours_(GetMinHours()),
|
|
max_hours_(GetMaxHours()),
|
|
active_consumer_clicks_at_least_once_per_hours_(
|
|
variations::GetVariationParamByFeatureAsDouble(
|
|
kArticleSuggestionsFeature,
|
|
kActiveConsumerClicksAtLeastOncePerHoursParam,
|
|
kActiveConsumerClicksAtLeastOncePerHours)),
|
|
rare_user_opens_ntp_at_most_once_per_hours_(
|
|
variations::GetVariationParamByFeatureAsDouble(
|
|
kArticleSuggestionsFeature,
|
|
kRareUserOpensNTPAtMostOncePerHoursParam,
|
|
kRareUserOpensNTPAtMostOncePerHours)) {
|
|
// The pref_service_ can be null in tests.
|
|
if (!pref_service_) {
|
|
return;
|
|
}
|
|
|
|
// TODO(jkrcal): Store the current discount rate per hour into prefs. If it
|
|
// differs from the previous value, rescale the metric values so that the
|
|
// expectation does not change abruptly!
|
|
|
|
// Initialize the prefs storing the last time: the counter has just started!
|
|
for (const Metric metric : kMetrics) {
|
|
if (!HasLastTime(metric)) {
|
|
SetLastTimeToNow(metric);
|
|
}
|
|
}
|
|
}
|
|
|
|
UserClassifier::~UserClassifier() = default;
|
|
|
|
// static
|
|
void UserClassifier::RegisterProfilePrefs(PrefRegistrySimple* registry) {
|
|
double discount_rate = GetDiscountRatePerHour();
|
|
double min_hours = GetMinHours();
|
|
double max_hours = GetMaxHours();
|
|
|
|
for (Metric metric : kMetrics) {
|
|
double default_metric_value = GetMetricValueForEstimateHoursBetweenEvents(
|
|
GetInitialHoursBetweenEvents(metric), discount_rate, min_hours,
|
|
max_hours);
|
|
registry->RegisterDoublePref(kMetricKeys[static_cast<int>(metric)],
|
|
default_metric_value);
|
|
registry->RegisterInt64Pref(kLastTimeKeys[static_cast<int>(metric)], 0);
|
|
}
|
|
}
|
|
|
|
void UserClassifier::OnEvent(Metric metric) {
|
|
DCHECK_NE(metric, Metric::COUNT);
|
|
double metric_value = UpdateMetricOnEvent(metric);
|
|
|
|
double avg = GetEstimateHoursBetweenEvents(
|
|
metric_value, discount_rate_per_hour_, min_hours_, max_hours_);
|
|
// We use kMaxHours as the max value below as the maximum value for the
|
|
// histograms must be constant.
|
|
switch (metric) {
|
|
case Metric::NTP_OPENED:
|
|
UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToOpenNTP, avg, 1,
|
|
kMaxHours, 50);
|
|
break;
|
|
case Metric::SUGGESTIONS_SHOWN:
|
|
UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToShowSuggestions, avg,
|
|
1, kMaxHours, 50);
|
|
break;
|
|
case Metric::SUGGESTIONS_USED:
|
|
UMA_HISTOGRAM_CUSTOM_COUNTS(kHistogramAverageHoursToUseSuggestions, avg,
|
|
1, kMaxHours, 50);
|
|
break;
|
|
case Metric::COUNT:
|
|
NOTREACHED();
|
|
break;
|
|
}
|
|
}
|
|
|
|
double UserClassifier::GetEstimatedAvgTime(Metric metric) const {
|
|
DCHECK_NE(metric, Metric::COUNT);
|
|
double metric_value = GetUpToDateMetricValue(metric);
|
|
return GetEstimateHoursBetweenEvents(metric_value, discount_rate_per_hour_,
|
|
min_hours_, max_hours_);
|
|
}
|
|
|
|
UserClassifier::UserClass UserClassifier::GetUserClass() const {
|
|
// The pref_service_ can be null in tests.
|
|
if (!pref_service_) {
|
|
return UserClass::ACTIVE_NTP_USER;
|
|
}
|
|
|
|
if (GetEstimatedAvgTime(Metric::NTP_OPENED) >=
|
|
rare_user_opens_ntp_at_most_once_per_hours_) {
|
|
return UserClass::RARE_NTP_USER;
|
|
}
|
|
|
|
if (GetEstimatedAvgTime(Metric::SUGGESTIONS_USED) <=
|
|
active_consumer_clicks_at_least_once_per_hours_) {
|
|
return UserClass::ACTIVE_SUGGESTIONS_CONSUMER;
|
|
}
|
|
|
|
return UserClass::ACTIVE_NTP_USER;
|
|
}
|
|
|
|
std::string UserClassifier::GetUserClassDescriptionForDebugging() const {
|
|
switch (GetUserClass()) {
|
|
case UserClass::RARE_NTP_USER:
|
|
return "Rare user of the NTP";
|
|
case UserClass::ACTIVE_NTP_USER:
|
|
return "Active user of the NTP";
|
|
case UserClass::ACTIVE_SUGGESTIONS_CONSUMER:
|
|
return "Active consumer of NTP suggestions";
|
|
}
|
|
NOTREACHED();
|
|
return std::string();
|
|
}
|
|
|
|
void UserClassifier::ClearClassificationForDebugging() {
|
|
// The pref_service_ can be null in tests.
|
|
if (!pref_service_) {
|
|
return;
|
|
}
|
|
|
|
for (const Metric& metric : kMetrics) {
|
|
ClearMetricValue(metric);
|
|
SetLastTimeToNow(metric);
|
|
}
|
|
}
|
|
|
|
double UserClassifier::UpdateMetricOnEvent(Metric metric) {
|
|
// The pref_service_ can be null in tests.
|
|
if (!pref_service_) {
|
|
return 0;
|
|
}
|
|
|
|
double hours_since_last_time =
|
|
std::min(max_hours_, GetHoursSinceLastTime(metric));
|
|
// Ignore events within the same "browsing session".
|
|
if (hours_since_last_time < min_hours_) {
|
|
return GetUpToDateMetricValue(metric);
|
|
}
|
|
|
|
SetLastTimeToNow(metric);
|
|
|
|
double metric_value = GetMetricValue(metric);
|
|
// Add 1 to the discounted metric as the event has happened right now.
|
|
double new_metric_value =
|
|
1 + DiscountMetric(metric_value, hours_since_last_time,
|
|
discount_rate_per_hour_);
|
|
SetMetricValue(metric, new_metric_value);
|
|
return new_metric_value;
|
|
}
|
|
|
|
double UserClassifier::GetUpToDateMetricValue(Metric metric) const {
|
|
// The pref_service_ can be null in tests.
|
|
if (!pref_service_) {
|
|
return 0;
|
|
}
|
|
|
|
double hours_since_last_time =
|
|
std::min(max_hours_, GetHoursSinceLastTime(metric));
|
|
|
|
double metric_value = GetMetricValue(metric);
|
|
return DiscountMetric(metric_value, hours_since_last_time,
|
|
discount_rate_per_hour_);
|
|
}
|
|
|
|
double UserClassifier::GetHoursSinceLastTime(Metric metric) const {
|
|
if (!HasLastTime(metric)) {
|
|
return 0;
|
|
}
|
|
|
|
const base::TimeDelta since_last_time =
|
|
clock_->Now() - DeserializeTime(pref_service_->GetInt64(
|
|
kLastTimeKeys[static_cast<int>(metric)]));
|
|
return since_last_time / base::TimeDelta::FromHours(1);
|
|
}
|
|
|
|
bool UserClassifier::HasLastTime(Metric metric) const {
|
|
return pref_service_->HasPrefPath(kLastTimeKeys[static_cast<int>(metric)]);
|
|
}
|
|
|
|
void UserClassifier::SetLastTimeToNow(Metric metric) {
|
|
pref_service_->SetInt64(kLastTimeKeys[static_cast<int>(metric)],
|
|
SerializeTime(clock_->Now()));
|
|
}
|
|
|
|
double UserClassifier::GetMetricValue(Metric metric) const {
|
|
return pref_service_->GetDouble(kMetricKeys[static_cast<int>(metric)]);
|
|
}
|
|
|
|
void UserClassifier::SetMetricValue(Metric metric, double metric_value) {
|
|
pref_service_->SetDouble(kMetricKeys[static_cast<int>(metric)], metric_value);
|
|
}
|
|
|
|
void UserClassifier::ClearMetricValue(Metric metric) {
|
|
pref_service_->ClearPref(kMetricKeys[static_cast<int>(metric)]);
|
|
}
|
|
|
|
} // namespace ntp_snippets
|