0

Add new class to represent a schemeful etld+1

What:
Adds a new class which represents a scheme + etld+1 of an origin as
described by:
https://html.spec.whatwg.org/multipage/origin.html#obtain-a-site

This is very similar to the steps used by the NIK:
https://source.chromium.org/chromium/chromium/src/+/master:net/base/network_isolation_key.cc;l=29

How:
Takes a given origin and wraps the resulting origin of the above
steps in a new class to be used for equivalence checks and
serialization.

Why:
The ConversionMeasurement API uses the concept of site  when checking
if a conversion on a given origin is associated with an impression.
See: https://crrev.com/c/2194182

Change-Id: Id3dccb4b00bab7bc6848656dd25bdd2d1186aef6
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2458466
Reviewed-by: Daniel Cheng <dcheng@chromium.org>
Reviewed-by: Charlie Harrison <csharrison@chromium.org>
Reviewed-by: Matt Menke <mmenke@chromium.org>
Commit-Queue: John Delaney <johnidel@chromium.org>
Cr-Commit-Position: refs/heads/master@{#822340}
This commit is contained in:
John Delaney
2020-10-29 20:16:04 +00:00
committed by Commit Bot
parent 34f521b701
commit 513e53f79c
7 changed files with 430 additions and 4 deletions

@ -157,6 +157,8 @@ component("net") {
"base/rand_callback.h",
"base/registry_controlled_domains/registry_controlled_domain.cc",
"base/registry_controlled_domains/registry_controlled_domain.h",
"base/schemeful_site.cc",
"base/schemeful_site.h",
"base/sockaddr_storage.cc",
"base/sockaddr_storage.h",
"base/sys_addrinfo.h",
@ -4152,6 +4154,7 @@ test("net_unittests") {
"base/registry_controlled_domains/registry_controlled_domain_unittest.cc",
"base/scheme_host_port_matcher_rule_unittest.cc",
"base/scheme_host_port_matcher_unittest.cc",
"base/schemeful_site_unittest.cc",
"base/test_completion_callback_unittest.cc",
"base/test_proxy_delegate.cc",
"base/test_proxy_delegate.h",

125
net/base/schemeful_site.cc Normal file

@ -0,0 +1,125 @@
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "net/base/schemeful_site.h"
#include "base/check.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "url/gurl.h"
#include "url/url_canon.h"
#include "url/url_constants.h"
namespace net {
namespace {
// Return a new origin using the registerable domain of `origin` if possible and
// a port of 0. Otherwise, returns the passed in origin. Follows steps specified
// in https://html.spec.whatwg.org/multipage/origin.html#obtain-a-site
url::Origin SwitchToRegistrableDomainAndRemovePort(const url::Origin& origin) {
// There is currently no reason for getting the schemeful site of a web
// socket, so disallow passing in websocket origins.
DCHECK_NE(origin.scheme(), url::kWsScheme);
DCHECK_NE(origin.scheme(), url::kWssScheme);
// 1. If origin is an opaque origin, then return origin.
if (origin.opaque())
return origin;
std::string registerable_domain;
// Non-normative step.
// We only lookup the registerable domain for HTTP/HTTPS schemes, this is
// non-normative. Other schemes for non-opaque origins like "file" do not
// meaningfully have a registerable domain for their host, so they are
// skipped.
if (origin.scheme() == url::kHttpsScheme ||
origin.scheme() == url::kHttpScheme) {
registerable_domain = GetDomainAndRegistry(
origin, net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
}
// If origin's host's registrable domain is null, then return (origin's
// scheme, origin's host).
//
// `GetDomainAndRegistry()` returns an empty string for IP literals and
// effective TLDs.
if (registerable_domain.empty())
registerable_domain = origin.host();
DCHECK(!registerable_domain.empty());
int port = url::DefaultPortForScheme(origin.scheme().c_str(),
origin.scheme().length());
// Provide a default port of 0 for non-standard schemes.
if (port == url::PORT_UNSPECIFIED)
port = 0;
// We tack on a port of 0, as a port is not included in the result of running
// the above algorithm.
return url::Origin::CreateFromNormalizedTuple(origin.scheme(),
registerable_domain, port);
}
} // namespace
SchemefulSite::SchemefulSite(const url::Origin& origin)
: origin_(SwitchToRegistrableDomainAndRemovePort(origin)) {}
SchemefulSite::SchemefulSite(const GURL& url)
: origin_(
SwitchToRegistrableDomainAndRemovePort(url::Origin::Create(url))) {}
SchemefulSite::SchemefulSite(const SchemefulSite& other) = default;
SchemefulSite::SchemefulSite(SchemefulSite&& other) = default;
SchemefulSite& SchemefulSite::operator=(const SchemefulSite& other) = default;
SchemefulSite& SchemefulSite::operator=(SchemefulSite&& other) = default;
// static
SchemefulSite SchemefulSite::Deserialize(const std::string& value) {
return SchemefulSite(GURL(value));
}
std::string SchemefulSite::Serialize() const {
return origin_.Serialize();
}
std::string SchemefulSite::GetDebugString() const {
return origin_.GetDebugString();
}
const url::Origin& SchemefulSite::GetInternalOriginForTesting() const {
return origin_;
}
bool SchemefulSite::operator==(const SchemefulSite& other) const {
return origin_ == other.origin_;
}
bool SchemefulSite::operator!=(const SchemefulSite& other) const {
return !(*this == other);
}
// Allows SchemefulSite to be used as a key in STL containers (for example, a
// std::set or std::map).
bool SchemefulSite::operator<(const SchemefulSite& other) const {
return origin_ < other.origin_;
}
// static
base::Optional<SchemefulSite> SchemefulSite::DeserializeWithNonce(
const std::string& value) {
base::Optional<url::Origin> result = url::Origin::Deserialize(value);
if (!result)
return base::nullopt;
return SchemefulSite(result.value());
}
base::Optional<std::string> SchemefulSite::SerializeWithNonce() {
return origin_.SerializeWithNonceAndInitIfNeeded();
}
} // namespace net

80
net/base/schemeful_site.h Normal file

@ -0,0 +1,80 @@
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef NET_BASE_SCHEMEFUL_SITE_H_
#define NET_BASE_SCHEMEFUL_SITE_H_
#include <string>
#include "base/gtest_prod_util.h"
#include "base/optional.h"
#include "net/base/net_export.h"
#include "url/origin.h"
class GURL;
namespace net {
// Class which represents a scheme and etld+1 for an origin, as specified by
// https://html.spec.whatwg.org/multipage/origin.html#obtain-a-site.
class NET_EXPORT SchemefulSite {
public:
SchemefulSite() = default;
explicit SchemefulSite(const url::Origin& origin);
// Using the origin constructor is preferred as this is less efficient.
// Should only be used if the origin for a given GURL is not readily
// available.
explicit SchemefulSite(const GURL& url);
SchemefulSite(const SchemefulSite& other);
SchemefulSite(SchemefulSite&& other);
SchemefulSite& operator=(const SchemefulSite& other);
SchemefulSite& operator=(SchemefulSite&& other);
// Deserializes a string obtained from `Serialize()` to a `SchemefulSite`.
// Returns an opaque `SchemefulSite` if the value was invalid in any way.
static SchemefulSite Deserialize(const std::string& value);
// Returns a serialized version of `origin_`. If the underlying origin is
// invalid, returns an empty string. If serialization of opaque origins with
// their associated nonce is necessary, see `SerializeWithNonce()`.
std::string Serialize() const;
std::string GetDebugString() const;
bool opaque() const { return origin_.opaque(); }
// Testing only function which allows tests to access the underlying `origin_`
// in order to verify behavior.
const url::Origin& GetInternalOriginForTesting() const;
bool operator==(const SchemefulSite& other) const;
bool operator!=(const SchemefulSite& other) const;
bool operator<(const SchemefulSite& other) const;
private:
FRIEND_TEST_ALL_PREFIXES(SchemefulSiteTest, OpaqueSerialization);
// Deserializes a string obtained from `SerializeWithNonce()` to a
// `SchemefulSite`. Returns nullopt if the value was invalid in any way.
static base::Optional<SchemefulSite> DeserializeWithNonce(
const std::string& value);
// Returns a serialized version of `origin_`. For an opaque `origin_`, this
// serializes with the nonce. See `url::origin::SerializeWithNonce()` for
// usage information.
base::Optional<std::string> SerializeWithNonce();
// Origin which stores the result of running the steps documented at
// https://html.spec.whatwg.org/multipage/origin.html#obtain-a-site.
url::Origin origin_;
};
} // namespace net
#endif // NET_BASE_SCHEMEFUL_SITE_H_

@ -0,0 +1,181 @@
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "net/base/schemeful_site.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
#include "url/origin.h"
#include "url/url_util.h"
namespace net {
TEST(SchemefulSiteTest, DifferentOriginSameRegisterableDomain) {
// List of origins which should all share a schemeful site.
url::Origin kTestOrigins[] = {
url::Origin::Create(GURL("http://a.foo.test")),
url::Origin::Create(GURL("http://b.foo.test")),
url::Origin::Create(GURL("http://foo.test")),
url::Origin::Create(GURL("http://a.b.foo.test"))};
for (const auto& origin_a : kTestOrigins) {
for (const auto& origin_b : kTestOrigins) {
EXPECT_EQ(SchemefulSite(origin_a), SchemefulSite(origin_b));
}
}
}
TEST(SchemefulSiteTest, Operators) {
// Create a list of origins that should all have different schemeful sites.
// These are in ascending order.
url::Origin kTestOrigins[] = {
url::Origin::Create(GURL("data:text/html,<body>Hello World</body>")),
url::Origin::Create(GURL("file://foo")),
url::Origin::Create(GURL("http://a.bar.test")),
url::Origin::Create(GURL("http://c.test")),
url::Origin::Create(GURL("http://d.test")),
url::Origin::Create(GURL("http://a.foo.test")),
url::Origin::Create(GURL("https://a.bar.test")),
url::Origin::Create(GURL("https://c.test")),
url::Origin::Create(GURL("https://d.test")),
url::Origin::Create(GURL("https://a.foo.test"))};
// Compare each origin to every other origin and ensure the operators work as
// expected.
for (size_t first = 0; first < base::size(kTestOrigins); ++first) {
SchemefulSite site1 = SchemefulSite(kTestOrigins[first]);
SCOPED_TRACE(site1.GetDebugString());
EXPECT_EQ(site1, site1);
EXPECT_FALSE(site1 < site1);
// Check the operators work on copies.
SchemefulSite site1_copy = site1;
EXPECT_EQ(site1, site1_copy);
EXPECT_FALSE(site1 < site1_copy);
for (size_t second = first + 1; second < base::size(kTestOrigins);
++second) {
SchemefulSite site2 = SchemefulSite(kTestOrigins[second]);
SCOPED_TRACE(site2.GetDebugString());
EXPECT_TRUE(site1 < site2);
EXPECT_FALSE(site2 < site1);
EXPECT_FALSE(site1 == site2);
EXPECT_FALSE(site2 == site1);
}
}
}
TEST(SchemefulSiteTest, SchemeUsed) {
url::Origin origin_a = url::Origin::Create(GURL("https://foo.test"));
url::Origin origin_b = url::Origin::Create(GURL("http://foo.test"));
EXPECT_NE(SchemefulSite(origin_a), SchemefulSite(origin_b));
}
TEST(SchemefulSiteTest, PortIgnored) {
// Both origins are non-opaque.
url::Origin origin_a = url::Origin::Create(GURL("https://foo.test:80"));
url::Origin origin_b = url::Origin::Create(GURL("https://foo.test:2395"));
EXPECT_EQ(SchemefulSite(origin_a), SchemefulSite(origin_b));
}
TEST(SchemefulSiteTest, TopLevelDomainsNotModified) {
url::Origin origin_tld = url::Origin::Create(GURL("https://com"));
EXPECT_EQ(url::Origin::Create(GURL("https://com")),
SchemefulSite(origin_tld).GetInternalOriginForTesting());
// Unknown TLD's should not be modified.
url::Origin origin_tld_unknown =
url::Origin::Create(GURL("https://bar:1234"));
EXPECT_EQ(url::Origin::Create(GURL("https://bar")),
SchemefulSite(origin_tld_unknown).GetInternalOriginForTesting());
// Check for two-part TLDs.
url::Origin origin_two_part_tld = url::Origin::Create(GURL("http://a.co.uk"));
EXPECT_EQ(url::Origin::Create(GURL("http://a.co.uk")),
SchemefulSite(origin_two_part_tld).GetInternalOriginForTesting());
}
TEST(SchemefulSiteTest, NonStandardScheme) {
url::ScopedSchemeRegistryForTests scoped_registry;
url::AddStandardScheme("foo", url::SCHEME_WITH_HOST);
url::Origin origin = url::Origin::Create(GURL("foo://a.b.test"));
EXPECT_FALSE(origin.opaque());
// We should not use registerable domains for non-standard schemes, even if
// one exists for the host.
EXPECT_EQ(url::Origin::Create(GURL("foo://a.b.test")),
SchemefulSite(origin).GetInternalOriginForTesting());
}
TEST(SchemefulSiteTest, IPBasedOriginsRemovePort) {
// IPv4 and IPv6 origins should not be modified, except for removing their
// ports.
url::Origin origin_ipv4_a =
url::Origin::Create(GURL("http://127.0.0.1:1234"));
url::Origin origin_ipv4_b = url::Origin::Create(GURL("http://127.0.0.1"));
EXPECT_EQ(url::Origin::Create(GURL("http://127.0.0.1")),
SchemefulSite(origin_ipv4_a).GetInternalOriginForTesting());
EXPECT_EQ(SchemefulSite(origin_ipv4_a), SchemefulSite(origin_ipv4_b));
url::Origin origin_ipv6 = url::Origin::Create(GURL("https://[::1]"));
EXPECT_EQ(url::Origin::Create(GURL("https://[::1]")),
SchemefulSite(origin_ipv6).GetInternalOriginForTesting());
}
TEST(SchemefulSiteTest, OpaqueOrigins) {
url::Origin opaque_origin_a =
url::Origin::Create(GURL("data:text/html,<body>Hello World</body>"));
// The schemeful site of an opaque origin should always equal other schemeful
// site instances of the same origin.
EXPECT_EQ(SchemefulSite(opaque_origin_a), SchemefulSite(opaque_origin_a));
url::Origin opaque_origin_b =
url::Origin::Create(GURL("data:text/html,<body>Hello World</body>"));
// Two different opaque origins should never have the same SchemefulSite.
EXPECT_NE(SchemefulSite(opaque_origin_a), SchemefulSite(opaque_origin_b));
}
TEST(SchemefulSiteTest, SerializationConsistent) {
url::ScopedSchemeRegistryForTests scoped_registry;
url::AddStandardScheme("chrome", url::SCHEME_WITH_HOST);
// List of origins which should all share a schemeful site.
SchemefulSite kTestSites[] = {
SchemefulSite(url::Origin::Create(GURL("http://a.foo.test"))),
SchemefulSite(url::Origin::Create(GURL("https://b.foo.test"))),
SchemefulSite(url::Origin::Create(GURL("http://b.foo.test"))),
SchemefulSite(url::Origin::Create(GURL("http://a.b.foo.test"))),
SchemefulSite(url::Origin::Create(GURL("chrome://a.b.test")))};
for (const auto& site : kTestSites) {
SCOPED_TRACE(site.GetDebugString());
EXPECT_FALSE(site.GetInternalOriginForTesting().opaque());
base::Optional<SchemefulSite> deserialized_site =
SchemefulSite::Deserialize(site.Serialize());
EXPECT_TRUE(deserialized_site);
EXPECT_EQ(site, deserialized_site);
}
}
TEST(SchemefulSiteTest, OpaqueSerialization) {
// List of origins which should all share a schemeful site.
SchemefulSite kTestSites[] = {
SchemefulSite(), SchemefulSite(url::Origin()),
SchemefulSite(GURL("data:text/html,<body>Hello World</body>"))};
for (auto& site : kTestSites) {
base::Optional<SchemefulSite> deserialized_site =
SchemefulSite::DeserializeWithNonce(*site.SerializeWithNonce());
EXPECT_TRUE(deserialized_site);
EXPECT_EQ(site, *deserialized_site);
}
}
} // namespace net

@ -287,11 +287,20 @@ Origin::Origin(const Nonce& nonce, SchemeHostPort precursor)
DCHECK_EQ(0U, port());
}
base::Optional<std::string> Origin::SerializeWithNonce() const {
return SerializeWithNonceImpl();
}
base::Optional<std::string> Origin::SerializeWithNonceAndInitIfNeeded() {
GetNonceForSerialization();
return SerializeWithNonceImpl();
}
// The pickle is saved in the following format, in order:
// string - tuple_.GetURL().spec().
// uint64_t (if opaque) - high bits of nonce if opaque. 0 if not initialized.
// uint64_t (if opaque) - low bits of nonce if opaque. 0 if not initialized.
base::Optional<std::string> Origin::SerializeWithNonce() const {
base::Optional<std::string> Origin::SerializeWithNonceImpl() const {
if (!opaque() && !tuple_.IsValid())
return base::nullopt;

@ -58,6 +58,7 @@ struct UrlOriginAdapter;
namespace net {
class NetworkIsolationKey;
class OpaqueNonTransientNetworkIsolationKeyTest;
class SchemefulSite;
} // namespace net
namespace url {
@ -299,6 +300,9 @@ class COMPONENT_EXPORT(URL) Origin {
private:
friend class blink::SecurityOrigin;
friend class net::NetworkIsolationKey;
// SchemefulSite needs access to the serialization/deserialization logic which
// includes the nonce.
friend class net::SchemefulSite;
friend class net::OpaqueNonTransientNetworkIsolationKeyTest;
friend class OriginTest;
friend struct mojo::UrlOriginAdapter;
@ -395,11 +399,17 @@ class COMPONENT_EXPORT(URL) Origin {
base::Optional<base::UnguessableToken> GetNonceForSerialization() const;
// Serializes this Origin, including its nonce if it is opaque. If an opaque
// origin's |tuple_| is invalid or the nonce isn't initialized, nullopt is
// returned. Use of this method should be limited as an opaque origin will
// never be matchable in future browser sessions.
// origin's |tuple_| is invalid nullopt is returned. If the nonce is not
// initialized, a nonce of 0 is used. Use of this method should be limited as
// an opaque origin will never be matchable in future browser sessions.
base::Optional<std::string> SerializeWithNonce() const;
// Like SerializeWithNonce(), but forces |nonce_| to be initialized prior to
// serializing.
base::Optional<std::string> SerializeWithNonceAndInitIfNeeded();
base::Optional<std::string> SerializeWithNonceImpl() const;
// Deserializes an origin from |ToValueWithNonce|. Returns nullopt if the
// value was invalid in any way.
static base::Optional<Origin> Deserialize(const std::string& value);

@ -109,6 +109,11 @@ class OriginTest : public ::testing::Test {
return origin.SerializeWithNonce();
}
base::Optional<std::string> SerializeWithNonceAndInitIfNeeded(
Origin& origin) {
return origin.SerializeWithNonceAndInitIfNeeded();
}
base::Optional<Origin> Deserialize(const std::string& value) {
return Origin::Deserialize(value);
}
@ -933,6 +938,19 @@ TEST_F(OriginTest, SerializeTBDNonce) {
// Can't use DoEqualityComparisons here since empty nonces are never == unless
// they are the same object.
EXPECT_EQ(opaque.GetDebugString(), deserialized.value().GetDebugString());
// Now force initialization of the nonce prior to serialization.
for (const GURL& url : invalid_urls) {
SCOPED_TRACE(url.spec());
Origin origin = Origin::Create(url);
base::Optional<std::string> serialized =
SerializeWithNonceAndInitIfNeeded(origin);
base::Optional<Origin> deserialized = Deserialize(std::move(*serialized));
ASSERT_TRUE(deserialized.has_value());
// The nonce should have been initialized prior to Serialization().
EXPECT_EQ(origin, deserialized.value());
}
}
TEST_F(OriginTest, DeserializeValidNonce) {