diff --git a/net/BUILD.gn b/net/BUILD.gn index 8971de32d923b..92af51f7a84e5 100644 --- a/net/BUILD.gn +++ b/net/BUILD.gn @@ -675,6 +675,8 @@ component("net") { "http/http_vary_data.cc", "http/http_vary_data.h", "http/http_version.h", + "http/no_vary_search_cache.cc", + "http/no_vary_search_cache.h", "http/partial_data.cc", "http/partial_data.h", "http/proxy_client_socket.cc", @@ -2794,6 +2796,7 @@ target(_test_target_type, "net_unittests") { "http/http_vary_data_unittest.cc", "http/mock_allow_http_auth_preferences.cc", "http/mock_allow_http_auth_preferences.h", + "http/no_vary_search_cache_unittest.cc", "http/test_upload_data_stream_not_allow_http1.cc", "http/test_upload_data_stream_not_allow_http1.h", "http/transport_security_persister_unittest.cc", diff --git a/net/http/http_no_vary_search_data.cc b/net/http/http_no_vary_search_data.cc index 234546c7b48ae..180f154d062c4 100644 --- a/net/http/http_no_vary_search_data.cc +++ b/net/http/http_no_vary_search_data.cc @@ -110,7 +110,7 @@ HttpNoVarySearchData::ParseFromHeaders( std::optional<std::string> normalized_header = response_headers.GetNormalizedHeader("No-Vary-Search"); if (!normalized_header) { - // This means there is no No-Vary-Search header. Return nullopt. + // This means there is no No-Vary-Search header. return base::unexpected(ParseErrorEnum::kOk); } @@ -124,6 +124,11 @@ HttpNoVarySearchData::ParseFromHeaders( return ParseNoVarySearchDictionary(dict.value()); } +bool HttpNoVarySearchData::operator==(const HttpNoVarySearchData& rhs) const = + default; +std::strong_ordering HttpNoVarySearchData::operator<=>( + const HttpNoVarySearchData& rhs) const = default; + const base::flat_set<std::string>& HttpNoVarySearchData::no_vary_params() const { return no_vary_params_; diff --git a/net/http/http_no_vary_search_data.h b/net/http/http_no_vary_search_data.h index 96252d86b7a5b..6b13cfcd94d09 100644 --- a/net/http/http_no_vary_search_data.h +++ b/net/http/http_no_vary_search_data.h @@ -24,7 +24,7 @@ class HttpResponseHeaders; class NET_EXPORT_PRIVATE HttpNoVarySearchData { public: enum class ParseErrorEnum { - kOk, // Parsing is correct. Also returned if there is no header. + kOk, // There is no No-Vary-Search header. kDefaultValue, // Parsing is correct but led to default value - the header // could be removed. kNotDictionary, // Header value is not a dictionary. @@ -56,6 +56,11 @@ class NET_EXPORT_PRIVATE HttpNoVarySearchData { HttpNoVarySearchData::ParseErrorEnum> ParseFromHeaders(const HttpResponseHeaders& response_headers); + bool operator==(const HttpNoVarySearchData& rhs) const; + + // HttpNoVarySearchData objects can be used as a key in a map. + std::strong_ordering operator<=>(const HttpNoVarySearchData& rhs) const; + bool AreEquivalent(const GURL& a, const GURL& b) const; const base::flat_set<std::string>& no_vary_params() const; diff --git a/net/http/http_no_vary_search_data_unittest.cc b/net/http/http_no_vary_search_data_unittest.cc index 1721c9bf68c7f..b69deeb043ded 100644 --- a/net/http/http_no_vary_search_data_unittest.cc +++ b/net/http/http_no_vary_search_data_unittest.cc @@ -9,11 +9,15 @@ #include "net/http/http_no_vary_search_data.h" +#include <algorithm> +#include <array> #include <string> #include <string_view> +#include <vector> #include "base/containers/flat_map.h" #include "base/containers/flat_set.h" +#include "base/containers/to_vector.h" #include "base/memory/scoped_refptr.h" #include "base/strings/string_util.h" #include "base/test/gmock_expected_support.h" @@ -1178,6 +1182,42 @@ TEST(HttpNoVarySearchResponseHeadersParseHistogramTest, UnrecognizedKeys) { "Net.HttpNoVarySearch.HasUnrecognizedKeys", true, 1); } +TEST(HttpNoVarySearchDataTest, ComparisonOperators) { + constexpr auto kValues = std::to_array<std::string_view>( + {"params", "key-order", "params, key-order", R"(params=("a"))", + R"(params=("b"))", R"(params, except=("a"))", R"(params, except=("b"))", + R"(params, except=("a"), key-order)"}); + auto data_vector = base::ToVector(kValues, [](std::string_view value) { + auto headers = HttpResponseHeaders::Builder({1, 1}, "200 OK") + .AddHeader("No-Vary-Search", value) + .Build(); + auto result = HttpNoVarySearchData::ParseFromHeaders(*headers); + CHECK(result.has_value()); + return result.value(); + }); + // We don't actually care what the order is, just that it is consistent, so + // sort the vector. + std::ranges::sort(data_vector); + + // Compare everything to itself. + for (const auto& data : data_vector) { + EXPECT_EQ(data, data); + EXPECT_EQ(data <=> data, std::strong_ordering::equal); + } + // Compare everything to everything else. + for (size_t i = 0; i < data_vector.size() - 1; ++i) { + for (size_t j = i + 1; j < data_vector.size(); ++j) { + // Commutativity of !=. + EXPECT_NE(data_vector[i], data_vector[j]); + EXPECT_NE(data_vector[j], data_vector[i]); + + // Transitivity of <. + EXPECT_LT(data_vector[i], data_vector[j]); + EXPECT_GT(data_vector[j], data_vector[i]); + } + } +} + } // namespace } // namespace net diff --git a/net/http/no_vary_search_cache.cc b/net/http/no_vary_search_cache.cc new file mode 100644 index 0000000000000..ade980758e03e --- /dev/null +++ b/net/http/no_vary_search_cache.cc @@ -0,0 +1,512 @@ +// Copyright 2025 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/http/no_vary_search_cache.h" + +#include <compare> +#include <iostream> +#include <tuple> +#include <type_traits> +#include <utility> + +#include "base/check.h" +#include "base/check_op.h" +#include "base/memory/weak_ptr.h" +#include "base/metrics/histogram_macros.h" +#include "base/notreached.h" +#include "base/time/time.h" +#include "net/http/http_no_vary_search_data.h" + +namespace net { + +namespace { + +// We need to use a separate enum for the +// HttpCache.NoVarySearch.HeaderParseResult than +// HttpNoVarySearchData::ParseErrorEnum, as that enum does not have a value for +// a successful parse. +// +// These values are persisted to logs. Entries should not be renumbered and +// numeric values should never be reused. +// +// LINT.IfChange(NoVarySearchHeaderParseResult) +enum class NoVarySearchHeaderParseResult { + kSuccess = 0, + kNoHeader = 1, + kDefaultValue = 2, + kNotDictionary = 3, + kNonBooleanKeyOrder = 4, + kParamsNotStringList = 5, + kExceptNotStringList = 6, + kExceptWithoutTrueParams = 7, + kMaxValue = kExceptWithoutTrueParams, +}; +// LINT.ThenChange(//tools/metrics/histograms/metadata/net/enums.xml:NoVarySearchHeaderParseResult) + +NoVarySearchHeaderParseResult MapParseErrorEnum( + HttpNoVarySearchData::ParseErrorEnum error) { + using enum HttpNoVarySearchData::ParseErrorEnum; + switch (error) { + case kOk: + return NoVarySearchHeaderParseResult::kNoHeader; + + case kDefaultValue: + return NoVarySearchHeaderParseResult::kDefaultValue; + + case kNotDictionary: + return NoVarySearchHeaderParseResult::kNotDictionary; + + case kUnknownDictionaryKey: + NOTREACHED(); // No longer used. + + case kNonBooleanKeyOrder: + return NoVarySearchHeaderParseResult::kNonBooleanKeyOrder; + + case kParamsNotStringList: + return NoVarySearchHeaderParseResult::kParamsNotStringList; + + case kExceptNotStringList: + return NoVarySearchHeaderParseResult::kExceptNotStringList; + + case kExceptWithoutTrueParams: + return NoVarySearchHeaderParseResult::kExceptWithoutTrueParams; + } + NOTREACHED(); +} + +void EmitNoVarySearchHeaderParseResultHistogram( + const base::expected<HttpNoVarySearchData, + HttpNoVarySearchData::ParseErrorEnum>& result) { + auto value = NoVarySearchHeaderParseResult::kSuccess; + if (!result.has_value()) { + value = MapParseErrorEnum(result.error()); + } + UMA_HISTOGRAM_ENUMERATION("HttpCache.NoVarySearch.HeaderParseResult", value); +} + +// Stripping the URL of its query and fragment (ref) needs to be done for every +// request, so we want to avoid allocating memory for a GURL in the case of a +// cache miss. The return value points at memory owned by `url`, so should not +// outlive it. +std::string_view ExtractBaseURLView(const GURL& url) { + CHECK(url.is_valid()); + CHECK(url.has_path()); + const std::string& spec = url.possibly_invalid_spec(); + const url::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); + const url::Component& path = parsed.path; + CHECK(path.is_valid()); + const size_t path_end_offset = base::checked_cast<size_t>(path.end()); + + auto base_url = std::string_view(spec).substr(0, path_end_offset); + + if constexpr (DCHECK_IS_ON()) { + // This correctness check is very expensive, so is only done when DCHECKs + // are enabled. + GURL generated_base_url(base_url); + DCHECK(generated_base_url.is_valid()); + GURL::Replacements replacements; + replacements.ClearQuery(); + replacements.ClearRef(); + GURL replaced_url = url.ReplaceComponents(replacements); + DCHECK_EQ(generated_base_url, replaced_url); + } + + return base_url; +} + +bool URLIsAcceptable(const GURL& url) { + // HTTP(S) URLs always have a path starting with "/" after canonicalization. + return url.is_valid() && url.has_path() && !url.has_username() && + !url.has_password(); +} + +} // namespace + +// base::LinkedList only supports having an object in a single linked list at a +// time. However, we need QueryString objects to be in two separate lists: +// * the least-recently-used (LRU) list, which has the most recently added or +// used entry at its head and the next entry to be evicted at its tail. This +// list contains every QueryString in the cache. +// * the list of cached QueryString objects for a particular base URL and +// No-Vary-Search parameter. These lists have the most recently inserted +// entry for this {base URL, NVS} pair at their heads. +// +// In order to work-around the limitation of base::LinkedList, QueryString has +// two base classes. LruNode represents its role as a member of the LRU list, +// and QueryStringListNode represents its role as a member of the +// QueryStringList list. By calling base::LinkNode methods via the appropriate +// base class it can control which list it is manipulating. +class NoVarySearchCache::LruNode : public base::LinkNode<LruNode> { + public: + // Not copyable or movable. + LruNode(const LruNode&) = delete; + LruNode& operator=(const LruNode&) = delete; + + // Every instance of LruNode is actually a QueryString. + QueryString* ToQueryString(); + + private: + friend class QueryString; + + LruNode() = default; + ~LruNode() = default; +}; + +class NoVarySearchCache::QueryStringListNode + : public base::LinkNode<QueryStringListNode> { + public: + // Not copyable or movable. + QueryStringListNode(const QueryStringListNode&) = delete; + QueryStringListNode& operator=(const QueryStringListNode&) = delete; + + // Every instance of QueryStringListNode is actually a QueryString. + QueryString* ToQueryString(); + + private: + friend class QueryString; + + QueryStringListNode() = default; + ~QueryStringListNode() = default; +}; + +// QueryString is the entry type for the cache. Its main purpose is to hold the +// query string, ie. everything between the "?" and the "#" in the original URL. +// Together with the `base_url` that forms part of the Key, this can be used to +// reconstruct the original URL that was used to store the original request in +// the disk cache. +class NoVarySearchCache::QueryString final + : public NoVarySearchCache::LruNode, + public NoVarySearchCache::QueryStringListNode { + public: + // Creates a QueryString and adds it to `list` and `lru_list`. + static void CreateAndInsert(std::optional<std::string_view> query, + QueryStringList& query_string_list, + base::LinkedList<LruNode>& lru_list) { + DCHECK(!query || query->find('#') == std::string_view::npos) + << "Query contained a '#' character, meaning that the URL reassembly " + "will not work correctly because the '#' will be re-interpreted as " + "the start of a fragment. This should not happen. Query was '" + << query.value() << "'"; + // This use of bare new is needed because base::LinkedList does not have + // ownership semantics. + auto* query_string = new QueryString(query, query_string_list); + query_string->LruNode::InsertBefore(lru_list.head()); + query_string->QueryStringListNode::InsertBefore( + query_string_list.list.head()); + } + + // Not copyable or movable. + QueryString(const QueryString&) = delete; + QueryString& operator=(const QueryString&) = delete; + + // Removes this object from both lists and deletes it. + void RemoveAndDelete() { + LruNode::RemoveFromList(); + QueryStringListNode::RemoveFromList(); + delete this; + } + + // Moves this object to the head of `list`. + template <typename List> + void MoveToHead(List& linked_list) { + auto* head = linked_list.head(); + if (head != this) { + MoveBeforeNode(linked_list.head()->value()); + } + } + + const std::optional<std::string>& query() const { return query_; } + + QueryStringList& query_string_list_ref() { + return query_string_list_ref_.get(); + } + + base::Time insertion_time() const { return insertion_time_; } + + void UpdateInsertionTime() { insertion_time_ = base::Time::Now(); } + + // Return the original GURL that this entry was constructed from (not + // including any fragment). It's important to use this method to correctly + // reconstruct URLs that have an empty query (end in '?'). + GURL ReconstructOriginalURL(std::string_view base_url) { + if (query_.has_value()) { + return GURL(base::StrCat({base_url, "?", query_.value()})); + } else { + return GURL(base_url); + } + } + + EraseHandle CreateEraseHandle() { + return EraseHandle(weak_factory_.GetWeakPtr()); + } + + private: + QueryString(std::optional<std::string_view> query, + QueryStringList& query_string_list) + : query_(query), + query_string_list_ref_(query_string_list), + insertion_time_(base::Time::Now()) {} + + // Must only be called from RemoveAndDelete(). + ~QueryString() = default; + + // Moves this object in front of `node`. + template <typename NodeType> + void MoveBeforeNode(NodeType* node) { + static_assert(std::same_as<NodeType, LruNode> || + std::same_as<NodeType, QueryStringListNode>, + "This assert is just documentation"); + CHECK_NE(node, this); + NodeType::RemoveFromList(); + NodeType::InsertBefore(node); + } + + // No-Vary-Search treats "http://www.example.com/" and + // "http://www.example.com/?" as the same URL, but the disk cache key treats + // them as different URLs, so we need to be able to distinguish them to + // correctly reconstruct the original URL. `query_ == std::nullopt` means that + // there was no `?` in the original URL, and `query_ == ""` means there was. + const std::optional<std::string> query_; + + // `query_string_list_ref_` allows the keys for this entry to be located in + // the cache so that it can be erased efficiently. + const raw_ref<QueryStringList> query_string_list_ref_; + + // `insertion_time_` breaks ties when there are multiple possible matches. The + // most recent entry will be used as it is most likely to still exist in the + // disk cache. + base::Time insertion_time_; + + // EraseHandle uses weak pointers to QueryString objects to enable an entry to + // be deleted from the cache if it is found not to be readable from the disk + // cache. + base::WeakPtrFactory<QueryString> weak_factory_{this}; +}; + +// The two implementations of ToQueryString() are defined out-of-line so that +// the compiler has seen the definition of QueryString and so knows the correct +// offsets to apply to the `this` pointer. +NoVarySearchCache::QueryString* NoVarySearchCache::LruNode::ToQueryString() { + return static_cast<QueryString*>(this); +} + +NoVarySearchCache::QueryString* +NoVarySearchCache::QueryStringListNode::ToQueryString() { + return static_cast<QueryString*>(this); +} + +NoVarySearchCache::EraseHandle::EraseHandle( + base::WeakPtr<QueryString> query_string) + : query_string_(std::move(query_string)) {} + +NoVarySearchCache::EraseHandle::~EraseHandle() = default; + +NoVarySearchCache::EraseHandle::EraseHandle(EraseHandle&& rhs) = default; +NoVarySearchCache::EraseHandle& NoVarySearchCache::EraseHandle::operator=( + EraseHandle&& rhs) = default; + +bool NoVarySearchCache::EraseHandle::EqualsForTesting( + const EraseHandle& rhs) const { + return query_string_.get() == rhs.query_string_.get(); +} + +bool NoVarySearchCache::EraseHandle::IsGoneForTesting() const { + return !query_string_; +} + +NoVarySearchCache::NoVarySearchCache(size_t max_size) : max_size_(max_size) { + CHECK_GT(max_size_, 0u); +} + +NoVarySearchCache::~NoVarySearchCache() { + map_.clear(); + // Clearing the map should have freed all the QueryString objects. + CHECK(lru_.empty()); +} + +std::optional<NoVarySearchCache::LookupResult> NoVarySearchCache::Lookup( + const NetworkIsolationKey& nik, + const GURL& url) { + if (nik.IsTransient() || !URLIsAcceptable(url)) { + return std::nullopt; + } + std::string_view base_url = ExtractBaseURLView(url); + const auto it = map_.find(KeyReference(nik, base_url)); + if (it == map_.end()) { + return std::nullopt; + } + // We have a match, so we need to create a real URL now. + QueryString* best_match = nullptr; + GURL original_url; + for (auto& [nvs_data, query_strings] : it->second) { + auto result = FindQueryStringInList(query_strings, base_url, url, nvs_data); + if (result && (!best_match || best_match->insertion_time() < + result->match->insertion_time())) { + best_match = result->match; + original_url = result->original_url; + } + } + if (!best_match) { + return std::nullopt; + } + // Move to head of `lru_` list. + best_match->MoveToHead(lru_); + + return LookupResult(original_url, best_match->CreateEraseHandle()); +} + +void NoVarySearchCache::MaybeInsert(const NetworkIsolationKey& nik, + const GURL& url, + const HttpResponseHeaders& headers) { + if (nik.IsTransient() || !URLIsAcceptable(url)) { + return; + } + auto maybe_nvs_data = HttpNoVarySearchData::ParseFromHeaders(headers); + EmitNoVarySearchHeaderParseResultHistogram(maybe_nvs_data); + if (!maybe_nvs_data.has_value()) { + return; + } + const std::string_view base_url = ExtractBaseURLView(url); + + std::optional<std::string_view> query; + if (url.has_query()) { + query = url.query_piece(); + } + + // Using lower_bound() followed by emplace_hint() allows us to avoid + // constructing a Key object if there is already a matching key in the map, + // and do only a single logN lookup. + const KeyReference key(nik, base_url); + auto it = map_.lower_bound(key); + if (it == map_.end() || it->first != key) { + it = map_.emplace_hint(it, Key(nik, GURL(base_url)), DataMapType()); + } + DataMapType& data_map = it->second; + auto [data_it, inserted] = + data_map.emplace(std::move(*maybe_nvs_data), it->first); + const HttpNoVarySearchData& nvs_data = data_it->first; + QueryStringList& query_strings = data_it->second; + if (inserted) { + query_strings.nvs_data_ref = &nvs_data; + } else { + // There was already an entry for this `nvs_data`. We need to check if it + // has a match for the URL we're trying to insert. If it does, we should + // update or replace the existing QueryString. + if (auto result = + FindQueryStringInList(query_strings, base_url, url, nvs_data)) { + QueryString* match = result->match; + if (match->query() == query) { + // In the exact match case we can use the existing QueryString object. + match->UpdateInsertionTime(); + match->MoveToHead(query_strings.list); + match->MoveToHead(lru_); + return; + } + + // No-Vary-Search matches are transitive. Any future requests that might + // be a match for `match` are also a match for `url`. Since `url` is newer + // we will prefer it, and so `match` will never be used again and we can + // safely remove it from the cache. + --size_; + match->RemoveAndDelete(); + } + } + EvictIfFull(); + CHECK_LT(size_, max_size_); + ++size_; + QueryString::CreateAndInsert(query, query_strings, lru_); +} + +void NoVarySearchCache::Erase(EraseHandle handle) { + if (QueryString* query_string = handle.query_string_.get()) { + EraseQuery(query_string); + } +} + +// This is out-of-line to discourage inlining so the bots can detect if it is +// accidentally linked into the binary. +size_t NoVarySearchCache::GetSizeForTesting() const { + return size_; +} + +bool NoVarySearchCache::IsTopLevelMapEmptyForTesting() const { + return map_.empty(); +} + +bool NoVarySearchCache::Key::operator==(const Key& rhs) const = default; + +bool NoVarySearchCache::KeyComparator::operator()(const Key& lhs, + const Key& rhs) const { + return std::tie(lhs.nik, lhs.base_url) < std::tie(rhs.nik, rhs.base_url); +} + +bool NoVarySearchCache::KeyComparator::operator()( + const Key& lhs, + const KeyReference& rhs) const { + const auto lhs_ref = + KeyReference(lhs.nik, lhs.base_url.possibly_invalid_spec()); + return lhs_ref < rhs; +} + +bool NoVarySearchCache::KeyComparator::operator()(const KeyReference& lhs, + const Key& rhs) const { + const auto rhs_ref = + KeyReference(rhs.nik, rhs.base_url.possibly_invalid_spec()); + return lhs < rhs_ref; +} + +NoVarySearchCache::QueryStringList::QueryStringList(const Key& key) + : key_ref(key) {} + +NoVarySearchCache::QueryStringList::~QueryStringList() { + while (!list.empty()) { + list.head()->value()->ToQueryString()->RemoveAndDelete(); + } +} + +void NoVarySearchCache::EvictIfFull() { + CHECK_LE(size_, max_size_); + if (size_ == max_size_) { + EraseQuery(lru_.tail()->value()->ToQueryString()); + } +} + +void NoVarySearchCache::EraseQuery(QueryString* query_string) { + CHECK_GT(size_, 0u); + --size_; + const QueryStringList& query_strings = query_string->query_string_list_ref(); + query_string->RemoveAndDelete(); + if (query_strings.list.empty()) { + const HttpNoVarySearchData* nvs_data_ref = query_strings.nvs_data_ref.get(); + const Key& key_ref = query_strings.key_ref.get(); + const auto map_it = map_.find(key_ref); + CHECK(map_it != map_.end()); + size_t removed_count = map_it->second.erase(*nvs_data_ref); + CHECK_EQ(removed_count, 1u); + if (map_it->second.empty()) { + map_.erase(map_it); + } + } +} + +// static +std::optional<NoVarySearchCache::FindQueryStringResult> +NoVarySearchCache::FindQueryStringInList(QueryStringList& query_strings, + std::string_view base_url, + const GURL& url, + const HttpNoVarySearchData& nvs_data) { + for (auto* node = query_strings.list.head(); node != query_strings.list.end(); + node = node->next()) { + QueryString* query_string = node->value()->ToQueryString(); + // TODO(crbug.com/382394774): Stop allocating GURLs in a tight loop. + GURL node_url = query_string->ReconstructOriginalURL(base_url); + CHECK(node_url.is_valid()); + if (nvs_data.AreEquivalent(url, node_url)) { + return FindQueryStringResult(query_string, std::move(node_url)); + } + } + return std::nullopt; +} + +} // namespace net diff --git a/net/http/no_vary_search_cache.h b/net/http/no_vary_search_cache.h new file mode 100644 index 0000000000000..9d8245870c12e --- /dev/null +++ b/net/http/no_vary_search_cache.h @@ -0,0 +1,200 @@ +// Copyright 2025 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef NET_HTTP_NO_VARY_SEARCH_CACHE_H_ +#define NET_HTTP_NO_VARY_SEARCH_CACHE_H_ + +#include <stddef.h> + +#include <compare> +#include <map> +#include <optional> +#include <string> +#include <string_view> +#include <tuple> +#include <utility> + +#include "base/containers/linked_list.h" +#include "base/memory/raw_ptr.h" +#include "base/memory/raw_ref.h" +#include "base/memory/stack_allocated.h" +#include "base/memory/weak_ptr.h" +#include "net/base/net_export.h" +#include "net/base/network_isolation_key.h" +#include "net/http/http_no_vary_search_data.h" +#include "url/gurl.h" + +namespace net { + +class HttpResponseHeaders; + +// An in-memory cache that permits looking up a {NIK, URL} pair and seeing if it +// matches a previous response according to the rules of the No-Vary-Search +// header (see +// https://httpwg.org/http-extensions/draft-ietf-httpbis-no-vary-search.html). +// See also the design doc at +// https://docs.google.com/document/d/1RS3q6qZ7-k9CvZsDYseGOXzcdQ9fGZ6YYnaW7fTPu7A/edit +// +// Owned by net::HttpCache. +// +// Ignoring eviction, the data structure is approximately equivalent to +// std::map<std::tuple<NetworkIsolationKey, GURL, HttpNoVarySearchData>, +// QueryString>. +class NET_EXPORT_PRIVATE NoVarySearchCache { + private: + // Declared here so that it can be mentioned in the definition of EraseHandle. + class QueryString; + + public: + // Opaque object that permits erasure of an item from the cache. + // See comments on the Lookup() and Erase() methods for usage. + class NET_EXPORT_PRIVATE EraseHandle { + public: + ~EraseHandle(); + + // Not copyable. + EraseHandle(const EraseHandle&) = delete; + EraseHandle& operator=(const EraseHandle&) = delete; + + // Movable. + EraseHandle(EraseHandle&& rhs); + EraseHandle& operator=(EraseHandle&& rhs); + + // For unit tests it is useful to be able to inspect this. + bool EqualsForTesting(const EraseHandle& rhs) const; + bool IsGoneForTesting() const; + + private: + friend class NoVarySearchCache; + friend class QueryString; + + explicit EraseHandle(base::WeakPtr<QueryString> query_string); + + base::WeakPtr<QueryString> query_string_; + }; + + struct LookupResult { + GURL original_url; + EraseHandle erase_handle; + }; + + // The cache will hold at most `max_size` entries. Each entry stores the query + // parameter from a previous response, which will typically be 100 to 200 + // bytes. + explicit NoVarySearchCache(size_t max_size); + + // Not copyable, assignable or movable. + NoVarySearchCache(const NoVarySearchCache&) = delete; + NoVarySearchCache& operator=(const NoVarySearchCache&) = delete; + + ~NoVarySearchCache(); + + // Finds an entry in the cache equivalent to `url`. If a result is returned, + // then `original_url` can be used to find a disk cache entry. `erase_handle` + // can be used to remove the entry from this cache if it was not in the disk + // cache. Not const because it updates the LRU linked list to mark the entry + // as recently used. + std::optional<LookupResult> Lookup(const NetworkIsolationKey& nik, + const GURL& url); + + // Inserts `url` into the cache if a non-default "No-Vary-Search" header was + // found in `headers`. On insertion, will remove any existing matching entry + // with the same No-Vary-Search header, as the older entry would never be + // returned by Lookup() anyway. May evict the oldest entry in the cache to + // avoid the size exceeding `max_size_`. + void MaybeInsert(const NetworkIsolationKey& nik, + const GURL& url, + const HttpResponseHeaders& headers); + + // Erases the entry referenced by `erase_handle` from the cache. Does nothing + // if the entry no longer exists. + void Erase(EraseHandle handle); + + // Returns the size (number of stored original query strings) of the cache. + size_t GetSizeForTesting() const; + + // Returns true if the top-level map is empty. This should be equivalent to + // GetSizeForTesting() == 0 in the absence of bugs. + bool IsTopLevelMapEmptyForTesting() const; + + private: + class LruNode; + class QueryStringListNode; + + struct Key { + NetworkIsolationKey nik; + GURL base_url; + + bool operator==(const Key& rhs) const; + }; + + // KeyReference allows us to do a map lookup without constructing a GURL and a + // Key. + using KeyReference = std::pair<const NetworkIsolationKey&, std::string_view>; + + struct KeyComparator { + using is_transparent = void; + + bool operator()(const Key& lhs, const Key& rhs) const; + bool operator()(const Key& lhs, const KeyReference& rhs) const; + bool operator()(const KeyReference& lhs, const Key& rhs) const; + }; + + friend bool operator==(const Key& lhs, const KeyReference& rhs) { + return lhs.nik == rhs.first && lhs.base_url == rhs.second; + } + + struct QueryStringList { + base::LinkedList<QueryStringListNode> list; + // nvs_data_ref can't be raw_ref because it needs to be lazily initialized + // after the QueryStringList has been added to the map. + raw_ptr<const HttpNoVarySearchData> nvs_data_ref; + raw_ref<const Key> key_ref; + + explicit QueryStringList(const Key& key); + // base::LinkedList<> does not do memory management, so make sure the + // constents of `list` are deleted on destruction. + ~QueryStringList(); + }; + + struct FindQueryStringResult { + STACK_ALLOCATED(); // `match` doesn't need to be raw_ptr. + + public: + QueryString* match; + GURL original_url; + }; + + // TODO(crbug.com/382394774): Investigate performance of different map types. + using DataMapType = std::map<HttpNoVarySearchData, QueryStringList>; + using OuterMapType = std::map<Key, DataMapType, KeyComparator>; + + // Erases an entry from the cache if it is full; + void EvictIfFull(); + + // Erases `query_string` from the cache. + void EraseQuery(QueryString* query_string); + + static std::optional<FindQueryStringResult> FindQueryStringInList( + QueryStringList& query_strings, + std::string_view base, + const GURL& url, + const HttpNoVarySearchData& nvs_data); + + // The main cache data structure. + OuterMapType map_; + + // lru_.tail() is the least-recently-used QueryString. + base::LinkedList<LruNode> lru_; + + // The number of QueryString objects in the cache. + size_t size_ = 0u; + + // QueryString objects will be evicted to avoid exceeding `max_size_`. + const size_t max_size_; +}; + +} // namespace net + +#endif // NET_HTTP_NO_VARY_SEARCH_CACHE_H_ diff --git a/net/http/no_vary_search_cache_unittest.cc b/net/http/no_vary_search_cache_unittest.cc new file mode 100644 index 0000000000000..bc5cdc8947df6 --- /dev/null +++ b/net/http/no_vary_search_cache_unittest.cc @@ -0,0 +1,517 @@ +// Copyright 2025 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/http/no_vary_search_cache.h" + +#include <array> +#include <optional> +#include <string> +#include <string_view> +#include <utility> + +#include "base/threading/platform_thread.h" +#include "base/time/time.h" +#include "net/base/network_isolation_key.h" +#include "net/base/schemeful_site.h" +#include "net/http/http_response_headers.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "url/gurl.h" + +namespace net { + +namespace { + +constexpr size_t kMaxSize = 5; + +GURL TestURL(std::string_view query = {}) { + GURL url("https://example.com/"); + if (query.empty()) { + return url; + } + + GURL::Replacements replacements; + replacements.SetQueryStr(query); + return url.ReplaceComponents(replacements); +} + +NetworkIsolationKey TestNIK() { + SchemefulSite site(TestURL()); + return NetworkIsolationKey(site, site); +} + +scoped_refptr<HttpResponseHeaders> TestHeaders( + std::string_view no_vary_search) { + return HttpResponseHeaders::Builder({1, 1}, "200 OK") + .AddHeader("No-Vary-Search", no_vary_search) + .Build(); +} + +class NoVarySearchCacheTest : public ::testing::Test { + protected: + NoVarySearchCache& cache() { return cache_; } + + // Inserts a URL with query `query` into cache with a No-Vary-Search header + // value of `no_vary_search`. + void Insert(std::string_view query, std::string_view no_vary_search) { + cache_.MaybeInsert(TestNIK(), TestURL(query), *TestHeaders(no_vary_search)); + } + + // Returns true if TestURL(query) exists in cache. + bool Exists(std::string_view query) { + return cache_.Lookup(TestNIK(), TestURL(query)).has_value(); + } + + private: + NoVarySearchCache cache_{kMaxSize}; +}; + +TEST_F(NoVarySearchCacheTest, NewlyConstructedCacheIsEmpty) { + EXPECT_EQ(cache().GetSizeForTesting(), 0u); +} + +TEST_F(NoVarySearchCacheTest, LookupOnEmptyCache) { + EXPECT_EQ(cache().Lookup(TestNIK(), TestURL()), std::nullopt); +} + +TEST_F(NoVarySearchCacheTest, InsertLookupErase) { + Insert("", "key-order"); + + auto result = cache().Lookup(TestNIK(), TestURL()); + ASSERT_TRUE(result); + EXPECT_EQ(result->original_url, TestURL()); + + EXPECT_EQ(cache().GetSizeForTesting(), 1u); + + cache().Erase(std::move(result->erase_handle)); + EXPECT_EQ(cache().GetSizeForTesting(), 0u); + EXPECT_TRUE(cache().IsTopLevelMapEmptyForTesting()); +} + +// An asan build will find leaks, but this test works on any build. +TEST_F(NoVarySearchCacheTest, QueryNotLeaked) { + std::optional<NoVarySearchCache::LookupResult> result; + { + NoVarySearchCache cache(kMaxSize); + + cache.MaybeInsert(TestNIK(), TestURL(), *TestHeaders("params")); + result = cache.Lookup(TestNIK(), TestURL()); + ASSERT_TRUE(result); + EXPECT_FALSE(result->erase_handle.IsGoneForTesting()); + } + EXPECT_TRUE(result->erase_handle.IsGoneForTesting()); +} + +TEST_F(NoVarySearchCacheTest, OldestItemIsEvicted) { + for (size_t i = 0; i < kMaxSize + 1; ++i) { + std::string query = "i=" + base::NumberToString(i); + Insert(query, "params, except=(\"i\")"); + EXPECT_TRUE(Exists(query)); + } + + EXPECT_EQ(cache().GetSizeForTesting(), kMaxSize); + + EXPECT_FALSE(Exists("i=0")); +} + +TEST_F(NoVarySearchCacheTest, RecentlyUsedItemIsNotEvicted) { + for (size_t i = 0; i < kMaxSize + 1; ++i) { + std::string query = "i=" + base::NumberToString(i); + Insert(query, "params, except=(\"i\")"); + EXPECT_TRUE(Exists(query)); + // Exists() calls Lookup(), which makes an entry "used". + EXPECT_TRUE(Exists("i=0")); + } + + EXPECT_EQ(cache().GetSizeForTesting(), kMaxSize); + + EXPECT_TRUE(Exists("i=0")); + EXPECT_FALSE(Exists("i=1")); +} + +TEST_F(NoVarySearchCacheTest, MostRecentlyUsedItemIsNotEvicted) { + static constexpr char kNoVarySearchValue[] = "params, except=(\"i\")"; + const auto query = [](int i) { return "i=" + base::NumberToString(i); }; + // Fill the cache. + for (size_t i = 0; i < kMaxSize; ++i) { + Insert(query(i), kNoVarySearchValue); + } + EXPECT_EQ(cache().GetSizeForTesting(), kMaxSize); + + // Make "i=3" be the most recently used item. + EXPECT_TRUE(Exists("i=3")); + + // Evict kMaxSize - 1 items. + for (size_t i = kMaxSize; i < kMaxSize * 2 - 1; ++i) { + Insert(query(i), kNoVarySearchValue); + EXPECT_TRUE(Exists(query(i))); + } + + EXPECT_EQ(cache().GetSizeForTesting(), kMaxSize); + + EXPECT_TRUE(Exists("i=3")); +} + +TEST_F(NoVarySearchCacheTest, LeastRecentlyUsedItemIsEvicted) { + static constexpr char kNoVarySearchValue[] = "params, except=(\"i\")"; + const auto query = [](int i) { return "i=" + base::NumberToString(i); }; + // Fill the cache. + for (size_t i = 0; i < kMaxSize; ++i) { + Insert(query(i), kNoVarySearchValue); + } + EXPECT_EQ(cache().GetSizeForTesting(), kMaxSize); + + // Make "i=kMaxSize-1" be the least recently used item. + for (size_t i = 0; i < kMaxSize - 1; ++i) { + EXPECT_TRUE(Exists(query(i))); + } + + // Evict one item. + Insert(query(kMaxSize), kNoVarySearchValue); + + // Verify it was the least-recently-used item. + EXPECT_FALSE(Exists(query(kMaxSize - 1))); +} + +TEST_F(NoVarySearchCacheTest, InsertUpdatesIdenticalItem) { + Insert("a=b", "params=(\"c\")"); + auto original_result = cache().Lookup(TestNIK(), TestURL("a=b")); + ASSERT_TRUE(original_result); + Insert("a=b", "params=(\"c\")"); + auto new_result = cache().Lookup(TestNIK(), TestURL("a=b")); + ASSERT_TRUE(new_result); + EXPECT_TRUE( + original_result->erase_handle.EqualsForTesting(new_result->erase_handle)); +} + +TEST_F(NoVarySearchCacheTest, InsertRemovesMatchingItem) { + Insert("a=b&c=1", "params=(\"c\")"); + auto original_result = cache().Lookup(TestNIK(), TestURL("a=b")); + ASSERT_TRUE(original_result); + EXPECT_EQ(original_result->original_url, TestURL("a=b&c=1")); + Insert("a=b&c=2", "params=(\"c\")"); + EXPECT_TRUE(original_result->erase_handle.IsGoneForTesting()); + EXPECT_EQ(cache().GetSizeForTesting(), 1u); + auto new_result = cache().Lookup(TestNIK(), TestURL("a=b")); + EXPECT_EQ(new_result->original_url, TestURL("a=b&c=2")); +} + +TEST_F(NoVarySearchCacheTest, MaybeInsertDoesNothingWithNoNoVarySearchHeader) { + auto headers = HttpResponseHeaders::Builder({1, 1}, "200 OK").Build(); + cache().MaybeInsert(TestNIK(), TestURL(), *headers); + EXPECT_EQ(cache().GetSizeForTesting(), 0u); + EXPECT_TRUE(cache().IsTopLevelMapEmptyForTesting()); +} + +TEST_F(NoVarySearchCacheTest, MaybeInsertDoesNothingForDefaultBehavior) { + // The following header values are all equivalent to default behavior. + static constexpr auto kDefaultCases = std::to_array<std::string_view>( + {"", " ", "params=?0", "params=()", "key-order=?0", "nonsense"}); + + for (auto no_vary_search : kDefaultCases) { + NoVarySearchCache cache(kMaxSize); + + Insert("a=b", no_vary_search); + EXPECT_EQ(cache.GetSizeForTesting(), 0u) << no_vary_search; + } +} + +TEST_F(NoVarySearchCacheTest, MatchesWithoutQueryString) { + auto url_with_query = GURL("https://example.com/foo?"); + cache().MaybeInsert(TestNIK(), url_with_query, *TestHeaders("key-order")); + auto result = cache().Lookup(TestNIK(), GURL("https://example.com/foo")); + ASSERT_TRUE(result); + EXPECT_EQ(result->original_url, url_with_query); +} + +TEST_F(NoVarySearchCacheTest, InsertInvalidURLIsIgnored) { + auto invalid_url = GURL("???"); + ASSERT_FALSE(invalid_url.is_valid()); + cache().MaybeInsert(TestNIK(), invalid_url, *TestHeaders("key-order")); + EXPECT_EQ(cache().GetSizeForTesting(), 0u); +} + +// There's no way to insert an invalid URL into the cache. There's also no way +// to add a query to a valid URL to make it invalid. So this test just verifies +// that we don't crash. +TEST_F(NoVarySearchCacheTest, LookupInvalidURLReturnsNullopt) { + GURL invalid_url = GURL("???"); + ASSERT_FALSE(invalid_url.is_valid()); + auto result = cache().Lookup(TestNIK(), invalid_url); + EXPECT_FALSE(result); +} + +bool Matches(std::string_view insert, + std::string_view lookup, + std::string_view no_vary_search) { + NoVarySearchCache cache(kMaxSize); + + cache.MaybeInsert(TestNIK(), TestURL(insert), *TestHeaders(no_vary_search)); + EXPECT_EQ(cache.GetSizeForTesting(), 1u); + + const auto exists = [&cache](std::string_view query) { + return cache.Lookup(TestNIK(), TestURL(query)).has_value(); + }; + + // It would be bad if the query didn't match itself. + EXPECT_TRUE(exists(insert)); + + return exists(lookup); +} + +bool InsertMatches(std::string_view insert1, + std::string_view insert2, + std::string_view no_vary_search) { + NoVarySearchCache cache(kMaxSize); + const auto insert = [&cache, no_vary_search](std::string_view query) { + cache.MaybeInsert(TestNIK(), TestURL(query), *TestHeaders(no_vary_search)); + }; + + insert(insert1); + EXPECT_EQ(cache.GetSizeForTesting(), 1u); + insert(insert2); + return cache.GetSizeForTesting() == 1u; +} + +TEST_F(NoVarySearchCacheTest, MatchCases) { + struct Case { + std::string_view description; + std::string_view query1; + std::string_view query2; + std::string_view no_vary_search; + }; + + static constexpr Case cases[] = { + {"Encoded & in key", "%26=a&b=c", "b=c", "params=(\"&\")"}, + {"Encoded & in value", "a=b%26&c=d", "c=d&a=b%26", "key-order"}, + {"Encoded =", "%3d=a", "%3D=a", "key-order"}, + {"Encoded and unencoded =", "a=%3d", "a==", "key-order"}, + {"Embedded null in key", "a%00b=c", "", "params=(\"a%00b\")"}, + {"Embedded null in value", "a=b%00c", "a=b%00c", "key-order"}, + {"Encoded space in key", "+=a", "%20=b", "params=(\" \")"}, + {"Encoded space in value", "a=b&c=+", "c=%20&a=b", "key-order"}, + {"Key is ?", "?=1", "", "params=(\"?\")"}, + {"Empty key", "=7&c=d", "c=d", "params=(\"\")"}, + {"Empty value", "a=&c=d", "c=d&a", "key-order"}, + {"Bad UTF8", "%fe=%ff", "\xfe=\xff", "key-order"}, + {"Two params removed", "a=b&c=d&e", "e", R"(params=("a" "c"))"}, + }; + + for (const auto& [description, query1, query2, no_vary_search] : cases) { + EXPECT_TRUE(Matches(query1, query2, no_vary_search)) + << "Testing forwards: " << description; + EXPECT_TRUE(Matches(query2, query1, no_vary_search)) + << "Testing backwards: " << description; + EXPECT_TRUE(InsertMatches(query1, query2, no_vary_search)) + << "Testing double insert: " << description; + } +} + +TEST_F(NoVarySearchCacheTest, NoMatchCases) { + struct Case { + std::string_view description; + std::string_view query1; + std::string_view query2; + }; + + static constexpr Case cases[] = { + {"Encoded &", "a&b", "a%26b"}, + {"Encoded =", "a=b", "a%3db"}, + }; + + static constexpr std::string_view kKeyOrder = "key-order"; + + for (const auto& [description, query1, query2] : cases) { + EXPECT_FALSE(Matches(query1, query2, kKeyOrder)) + << "Testing forwards: " << description; + EXPECT_FALSE(Matches(query2, query1, kKeyOrder)) + << "Testing backwards: " << description; + EXPECT_FALSE(InsertMatches(query1, query2, kKeyOrder)) + << "Testing double insert: " << description; + } +} + +// Different representations of No-Very-Search headers that should compare +// equal. +TEST_F(NoVarySearchCacheTest, NoVarySearchVariants) { + struct Case { + std::string_view description; + std::string_view variant1; + std::string_view variant2; + }; + + static constexpr Case cases[] = { + {"Extra space", R"(params=("a" "b"))", R"(params=( "a" "b" ))"}, + {"Bool or omitted params", "params", "params=?1"}, + {"Bool or omitted key-order", "key-order", "key-order=?1"}, + {"Absent or false key-order", "params, key-order=?0", "params"}, + {"Ignored entry", "params, ignored", "params"}, + {"Empty except", "params, except=()", "except=(), params"}, + {"Different order", R"(params, except=("a"), key-order)", + R"(key-order, except=("a"), params)"}, + }; + + static constexpr std::string_view kQuery = "a=b&b=7&c=d"; + + for (const auto& [description, variant1, variant2] : cases) { + NoVarySearchCache cache(kMaxSize); + + cache.MaybeInsert(TestNIK(), TestURL(kQuery), *TestHeaders(variant1)); + EXPECT_EQ(cache.GetSizeForTesting(), 1u); + cache.MaybeInsert(TestNIK(), TestURL(kQuery), *TestHeaders(variant2)); + EXPECT_EQ(cache.GetSizeForTesting(), 1u) + << "Failing: " << description << "; variant1='" << variant1 + << "'; variant2 = '" << variant2 << "'"; + } +} + +// Items with a transient NIK will not be stored in the disk cache, and so they +// shouldn't be stored in the NoVarySearchCache either. +TEST_F(NoVarySearchCacheTest, TransientNIK) { + const auto transient = NetworkIsolationKey::CreateTransientForTesting(); + + cache().MaybeInsert(transient, TestURL(), *TestHeaders("params")); + EXPECT_EQ(cache().GetSizeForTesting(), 0u); + EXPECT_EQ(cache().Lookup(transient, TestURL()), std::nullopt); +} + +TEST_F(NoVarySearchCacheTest, DifferentNIK) { + const NetworkIsolationKey nik1 = TestNIK(); + const NetworkIsolationKey nik2( + SchemefulSite(TestURL()), + SchemefulSite(GURL("https://thirdparty.example/"))); + + cache().MaybeInsert(nik1, TestURL(), *TestHeaders("params")); + cache().MaybeInsert(nik2, TestURL(), *TestHeaders("params")); + EXPECT_EQ(cache().GetSizeForTesting(), 2u); + const auto result1 = cache().Lookup(nik1, TestURL()); + const auto result2 = cache().Lookup(nik2, TestURL()); + ASSERT_TRUE(result1); + ASSERT_TRUE(result2); + EXPECT_FALSE(result1->erase_handle.EqualsForTesting(result2->erase_handle)); +} + +TEST_F(NoVarySearchCacheTest, DifferentURL) { + const GURL url1("https://example.com/a?a=b"); + const GURL url2("https://example.com/b?a=b"); + + cache().MaybeInsert(TestNIK(), url1, *TestHeaders("key-order")); + cache().MaybeInsert(TestNIK(), url2, *TestHeaders("key-order")); + EXPECT_EQ(cache().GetSizeForTesting(), 2u); + const auto result1 = cache().Lookup(TestNIK(), url1); + const auto result2 = cache().Lookup(TestNIK(), url2); + ASSERT_TRUE(result1); + ASSERT_TRUE(result2); + EXPECT_FALSE(result1->erase_handle.EqualsForTesting(result2->erase_handle)); +} + +void SpinUntilCurrentTimeChanges() { + const auto start = base::Time::Now(); + while (start == base::Time::Now()) { + base::PlatformThread::YieldCurrentThread(); + } +} + +TEST_F(NoVarySearchCacheTest, DifferentNoVarySearch) { + Insert("a=b&c=d", "params, except=(\"a\")"); + // Make sure that the two inserts reliably get a different `inserted` + // timestamp so that the ordering is deterministic. + SpinUntilCurrentTimeChanges(); + Insert("a=b", "key-order"); + + EXPECT_EQ(cache().GetSizeForTesting(), 2u); + const auto result = cache().Lookup(TestNIK(), TestURL("a=b")); + ASSERT_TRUE(result); + // If time goes backwards this test will flake. + EXPECT_EQ(result->original_url, TestURL("a=b")); +} + +// The winner of the lookup should depend only on insertion order and not on the +// order of iteration of the map. To ensure this works for any iteration order, +// we perform the same test in the opposite direction. +TEST_F(NoVarySearchCacheTest, DifferentNoVarySearchReverseOrder) { + Insert("a=b", "key-order"); + SpinUntilCurrentTimeChanges(); + Insert("a=b&c=d", "params, except=(\"a\")"); + + EXPECT_EQ(cache().GetSizeForTesting(), 2u); + const auto result = cache().Lookup(TestNIK(), TestURL("a=b")); + ASSERT_TRUE(result); + // If time goes backwards this test will flake. + EXPECT_EQ(result->original_url, TestURL("a=b&c=d")); +} + +TEST_F(NoVarySearchCacheTest, EraseInDifferentOrder) { + // Insert in order a, b, c. + Insert("a", "key-order"); + Insert("b", "key-order"); + Insert("c", "key-order"); + + // Look up in order b, c, a. + const auto lookup = [this](std::string_view query) { + return cache().Lookup(TestNIK(), TestURL(query)); + }; + + auto result_b = lookup("b"); + auto result_c = lookup("c"); + auto result_a = lookup("a"); + + ASSERT_TRUE(result_a); + ASSERT_TRUE(result_b); + ASSERT_TRUE(result_c); + + // Erase in order c, a, b. + cache().Erase(std::move(result_c->erase_handle)); + EXPECT_TRUE(Exists("a")); + EXPECT_TRUE(Exists("b")); + EXPECT_FALSE(Exists("c")); + + cache().Erase(std::move(result_a->erase_handle)); + EXPECT_FALSE(Exists("a")); + EXPECT_TRUE(Exists("b")); + + cache().Erase(std::move(result_b->erase_handle)); + EXPECT_FALSE(Exists("b")); + + EXPECT_EQ(cache().GetSizeForTesting(), 0u); + EXPECT_TRUE(cache().IsTopLevelMapEmptyForTesting()); +} + +// The URL "ref", also known as the "fragment", also known as the "hash", is +// ignored for matching and not stored in the cache. +TEST_F(NoVarySearchCacheTest, URLRefIsIgnored) { + cache().MaybeInsert(TestNIK(), GURL("https://example.com/?a=b#foo"), + *TestHeaders("key-order")); + cache().MaybeInsert(TestNIK(), GURL("https://example.com/?a=b#bar"), + *TestHeaders("key-order")); + EXPECT_EQ(cache().GetSizeForTesting(), 1u); + auto result = cache().Lookup(TestNIK(), GURL("https://example.com/?a=b#baz")); + EXPECT_TRUE(result); + EXPECT_EQ(result->original_url, GURL("https://example.com/?a=b")); +} + +TEST_F(NoVarySearchCacheTest, URLWithUsernameIsRejected) { + const GURL url_with_username("https://me@example.com/?a=b"); + cache().MaybeInsert(TestNIK(), url_with_username, *TestHeaders("key-order")); + EXPECT_EQ(cache().GetSizeForTesting(), 0u); + + // See if it matches against the URL without the username. + cache().MaybeInsert(TestNIK(), GURL("https://example.com/?a=b"), + *TestHeaders("key-order")); + EXPECT_FALSE(cache().Lookup(TestNIK(), url_with_username)); +} + +TEST_F(NoVarySearchCacheTest, URLWithPasswordIsRejected) { + const GURL url_with_password("https://:hunter123@example.com/?a=b"); + cache().MaybeInsert(TestNIK(), url_with_password, *TestHeaders("key-order")); + EXPECT_EQ(cache().GetSizeForTesting(), 0u); + + // See if it matches against the URL without the password. + cache().MaybeInsert(TestNIK(), GURL("https://example.com/?a=b"), + *TestHeaders("key-order")); + EXPECT_FALSE(cache().Lookup(TestNIK(), url_with_password)); +} + +} // namespace + +} // namespace net diff --git a/tools/metrics/histograms/metadata/net/enums.xml b/tools/metrics/histograms/metadata/net/enums.xml index f2144313c2166..94fec2c6e64bc 100644 --- a/tools/metrics/histograms/metadata/net/enums.xml +++ b/tools/metrics/histograms/metadata/net/enums.xml @@ -1874,6 +1874,42 @@ Called by update_net_trust_anchors.py.--> <int value="2" label="Metered"/> </enum> +<!-- LINT.IfChange(NoVarySearchHeaderParseResult) --> + +<enum name="NoVarySearchHeaderParseResult"> + <int value="0" label="Success"> + The No-Vary-Search header was successfully parsed. + </int> + <int value="1" label="No header"> + The No-Vary-Search header was not present in the response headers. + </int> + <int value="2" label="Default value"> + The No-Vary-Search header value was equivalent to the default (vary on all + query parameters), and so was ignored. + </int> + <int value="3" label="Not dictionary"> + The value of the No-Vary-Search header could not be parsed as a structured + headers dictionary. + </int> + <int value="4" label="Non-boolean key-order"> + The "key-order" parameter does not have a boolean value. + </int> + <int value="5" label="Params not string list"> + The "params" parameter is not an inner list, or contains a + non-string value. + </int> + <int value="6" label="Except not string list"> + The "except" parameter is not an inner list, or contains a + non-string value. + </int> + <int value="7" label="Except without true params"> + The "except" parameter was supplied, but the "params" + parameter did not have a boolean true value. + </int> +</enum> + +<!-- LINT.ThenChange(//net/http/http_no_vary_search_data.h:NoVarySearchHeaderParseResult) --> + <enum name="NsswitchService"> <int value="0" label="Unknown"/> <int value="1" label=""files""/> diff --git a/tools/metrics/histograms/metadata/net/histograms.xml b/tools/metrics/histograms/metadata/net/histograms.xml index b2e87a37e1887..c689a12d8e381 100644 --- a/tools/metrics/histograms/metadata/net/histograms.xml +++ b/tools/metrics/histograms/metadata/net/histograms.xml @@ -477,6 +477,17 @@ chromium-metrics-reviews@google.com. </summary> </histogram> +<histogram name="HttpCache.NoVarySearch.HeaderParseResult" + enum="NoVarySearchHeaderParseResult" expires_after="2025-12-16"> + <owner>ricea@chromium.org</owner> + <owner>net-dev@chromium.org</owner> + <summary> + The result of parsing the No-Vary-Search value in the HTTP response header. + This metric is emitted every time an HTTP response is written to the HTTP + disk cache. + </summary> +</histogram> + <histogram name="HttpCache.OpenDiskEntry" units="ms" expires_after="2025-07-13"> <owner>yhirano@chromium.org</owner> <owner>net-dev@chromium.org</owner>