0

url: add GURL(string) comments re canonicalization

Prior to this commit, constructor behavior was not obvious (just from
the *.h and *_unittest.cc files), e.g. with respect to percent-escaping.

Bug: None
Change-Id: I0bac39e19826e89e3fb16fcd7719e1f457491ed6
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5505210
Reviewed-by: Daniel Cheng <dcheng@chromium.org>
Commit-Queue: Nigel Tao <nigeltao@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1295213}
This commit is contained in:
Nigel Tao
2024-05-02 00:59:20 +00:00
committed by Chromium LUCI CQ
parent 4758f19292
commit f513a54fd7
2 changed files with 26 additions and 1 deletions

@ -61,7 +61,14 @@ class COMPONENT_EXPORT(URL) GURL {
GURL(const GURL& other);
GURL(GURL&& other) noexcept;
// The strings to this constructor should be UTF-8 / UTF-16.
// The strings to this constructor should be UTF-8 / UTF-16. They will be
// parsed and canonicalized. For example, the host is lower cased.
//
// URL canonicalization is subtle: url_canon.h and its unit tests are not
// short. For example, GURL("http://example.com/aβc%2Etxt") produces a GURL
// object whose path() is "/a%CE%B2c.txt", where the "β" was percent-encoded
// as "%CE%B2" and the "%2E" was percent-unencoded as ".". Subtly, converting
// "%2E" to "." applies to a URL's path component but not to its query.
explicit GURL(std::string_view url_string);
explicit GURL(std::u16string_view url_string);

@ -87,6 +87,24 @@ TEST(GURLTest, Components) {
EXPECT_EQ("%40!$&%27()*+,%3B%3D%3A", url_special_pass.password());
EXPECT_EQ("google.com", url_special_pass.host());
EXPECT_EQ("12345", url_special_pass.port());
// Test path collapsing.
GURL url_path_collapse("http://example.com/a/./b/c/d/../../e");
EXPECT_EQ("/a/b/e", url_path_collapse.path());
// Test an IDNA (Internationalizing Domain Names in Applications) host.
GURL url_idna("http://Bücher.exAMple/");
EXPECT_EQ("xn--bcher-kva.example", url_idna.host());
// Test non-ASCII characters, outside of the host (IDNA).
GURL url_non_ascii("http://example.com/foo/aβc%2Etxt?q=r🙂s");
EXPECT_EQ("/foo/a%CE%B2c.txt", url_non_ascii.path());
EXPECT_EQ("q=r%F0%9F%99%82s", url_non_ascii.query());
// Test already percent-escaped strings.
GURL url_percent_escaped("http://example.com/a/./%2e/i%2E%2F%2fj?q=r%2Es");
EXPECT_EQ("/a/i.%2F%2fj", url_percent_escaped.path());
EXPECT_EQ("q=r%2Es", url_percent_escaped.query());
}
TEST(GURLTest, Empty) {