Proof-read comments in src/url/.
Review URL: https://codereview.chromium.org/1270443006 Cr-Commit-Position: refs/heads/master@{#343473}
This commit is contained in:
url
gurl.ccgurl.hgurl_unittest.ccorigin.hurl_canon.hurl_canon_etc.ccurl_canon_host.ccurl_canon_internal.ccurl_canon_internal.hurl_canon_internal_file.hurl_canon_ip.ccurl_canon_ip.hurl_canon_mailtourl.ccurl_canon_path.ccurl_canon_pathurl.ccurl_canon_query.ccurl_canon_relative.ccurl_canon_stdurl.ccurl_canon_unittest.ccurl_parse_internal.hurl_parse_unittest.ccurl_test_utils.hurl_util.ccurl_util.hurl_util_unittest.cc
17
url/gurl.cc
17
url/gurl.cc
@ -61,7 +61,7 @@ const std::string& EmptyStringForGURL() {
|
||||
|
||||
#endif // WIN32
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
||||
GURL::GURL() : is_valid_(false) {
|
||||
}
|
||||
@ -132,7 +132,7 @@ void GURL::InitializeFromCanonicalSpec() {
|
||||
#ifndef NDEBUG
|
||||
// For testing purposes, check that the parsed canonical URL is identical to
|
||||
// what we would have produced. Skip checking for invalid URLs have no meaning
|
||||
// and we can't always canonicalize then reproducabely.
|
||||
// and we can't always canonicalize then reproducibly.
|
||||
if (is_valid_) {
|
||||
url::Component scheme;
|
||||
// We can't do this check on the inner_url of a filesystem URL, as
|
||||
@ -311,7 +311,7 @@ GURL GURL::ReplaceComponents(
|
||||
|
||||
GURL GURL::GetOrigin() const {
|
||||
// This doesn't make sense for invalid or nonstandard URLs, so return
|
||||
// the empty URL
|
||||
// the empty URL.
|
||||
if (!is_valid_ || !IsStandard())
|
||||
return GURL();
|
||||
|
||||
@ -408,16 +408,17 @@ std::string GURL::ExtractFileName() const {
|
||||
}
|
||||
|
||||
std::string GURL::PathForRequest() const {
|
||||
DCHECK(parsed_.path.len > 0) << "Canonical path for requests should be non-empty";
|
||||
DCHECK(parsed_.path.len > 0)
|
||||
<< "Canonical path for requests should be non-empty";
|
||||
if (parsed_.ref.len >= 0) {
|
||||
// Clip off the reference when it exists. The reference starts after the #
|
||||
// sign, so we have to subtract one to also remove it.
|
||||
// Clip off the reference when it exists. The reference starts after the
|
||||
// #-sign, so we have to subtract one to also remove it.
|
||||
return std::string(spec_, parsed_.path.begin,
|
||||
parsed_.ref.begin - parsed_.path.begin - 1);
|
||||
}
|
||||
// Compute the actual path length, rather than depending on the spec's
|
||||
// terminator. If we're an inner_url, our spec continues on into our outer
|
||||
// url's path/query/ref.
|
||||
// terminator. If we're an inner_url, our spec continues on into our outer
|
||||
// URL's path/query/ref.
|
||||
int path_len = parsed_.path.len;
|
||||
if (parsed_.query.is_valid())
|
||||
path_len = parsed_.query.end() - parsed_.path.begin;
|
||||
|
19
url/gurl.h
19
url/gurl.h
@ -92,7 +92,7 @@ class URL_EXPORT GURL {
|
||||
|
||||
// Returns the potentially invalid spec for a the URL. This spec MUST NOT be
|
||||
// modified or sent over the network. It is designed to be displayed in error
|
||||
// messages to the user, as the apperance of the spec may explain the error.
|
||||
// messages to the user, as the appearance of the spec may explain the error.
|
||||
// If the spec is valid, the valid spec will be returned.
|
||||
//
|
||||
// The returned string is guaranteed to be valid UTF-8.
|
||||
@ -125,9 +125,8 @@ class URL_EXPORT GURL {
|
||||
// pages.
|
||||
//
|
||||
// It may be impossible to resolve the URLs properly. If the input is not
|
||||
// "standard" (SchemeIsStandard() == false) and the input looks relative, we
|
||||
// can't resolve it. In these cases, the result will be an empty, invalid
|
||||
// GURL.
|
||||
// "standard" (IsStandard() == false) and the input looks relative, we can't
|
||||
// resolve it. In these cases, the result will be an empty, invalid GURL.
|
||||
//
|
||||
// The result may also be a nonempty, invalid URL if the input has some kind
|
||||
// of encoding error. In these cases, we will try to construct a "good" URL
|
||||
@ -283,8 +282,8 @@ class URL_EXPORT GURL {
|
||||
return ComponentString(parsed_.ref);
|
||||
}
|
||||
|
||||
// Existance querying. These functions will return true if the corresponding
|
||||
// URL component exists in this URL. Note that existance is different than
|
||||
// Existence querying. These functions will return true if the corresponding
|
||||
// URL component exists in this URL. Note that existence is different than
|
||||
// being nonempty. http://www.google.com/? has a query that just happens to
|
||||
// be empty, and has_query() will return true.
|
||||
bool has_scheme() const {
|
||||
@ -297,7 +296,7 @@ class URL_EXPORT GURL {
|
||||
return parsed_.password.len >= 0;
|
||||
}
|
||||
bool has_host() const {
|
||||
// Note that hosts are special, absense of host means length 0.
|
||||
// Note that hosts are special, absence of host means length 0.
|
||||
return parsed_.host.len > 0;
|
||||
}
|
||||
bool has_port() const {
|
||||
@ -347,7 +346,7 @@ class URL_EXPORT GURL {
|
||||
// object constructions are done.
|
||||
bool DomainIs(base::StringPiece lower_ascii_domain) const;
|
||||
|
||||
// Swaps the contents of this GURL object with the argument without doing
|
||||
// Swaps the contents of this GURL object with |other|, without doing
|
||||
// any memory allocations.
|
||||
void Swap(GURL* other);
|
||||
|
||||
@ -364,8 +363,8 @@ class URL_EXPORT GURL {
|
||||
|
||||
private:
|
||||
// Variant of the string parsing constructor that allows the caller to elect
|
||||
// retain trailing whitespace, if any, on the passed URL spec but only if the
|
||||
// scheme is one that allows trailing whitespace. The primary use-case is
|
||||
// retain trailing whitespace, if any, on the passed URL spec, but only if
|
||||
// the scheme is one that allows trailing whitespace. The primary use-case is
|
||||
// for data: URLs. In most cases, you want to use the single parameter
|
||||
// constructor above.
|
||||
enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE };
|
||||
|
@ -45,14 +45,15 @@ TEST(GURLTest, Types) {
|
||||
EXPECT_EQ("something:///HOSTNAME.com/",
|
||||
TypesTestCase("something:///HOSTNAME.com/"));
|
||||
|
||||
// In the reverse, known schemes should always trigger standard URL handling.
|
||||
// Conversely, URLs with known schemes should always trigger standard URL
|
||||
// handling.
|
||||
EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com"));
|
||||
EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com"));
|
||||
EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com"));
|
||||
EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com"));
|
||||
|
||||
#ifdef WIN32
|
||||
// URLs that look like absolute Windows drive specs.
|
||||
// URLs that look like Windows absolute path specs.
|
||||
EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt"));
|
||||
EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt"));
|
||||
EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt"));
|
||||
@ -60,7 +61,7 @@ TEST(GURLTest, Types) {
|
||||
#endif
|
||||
}
|
||||
|
||||
// Test the basic creation and querying of components in a GURL. We assume
|
||||
// Test the basic creation and querying of components in a GURL. We assume that
|
||||
// the parser is already tested and works, so we are mostly interested if the
|
||||
// object does the right thing with the results.
|
||||
TEST(GURLTest, Components) {
|
||||
@ -175,7 +176,7 @@ TEST(GURLTest, Assign) {
|
||||
EXPECT_EQ("", invalid2.ref());
|
||||
}
|
||||
|
||||
// This is a regression test for http://crbug.com/309975 .
|
||||
// This is a regression test for http://crbug.com/309975.
|
||||
TEST(GURLTest, SelfAssign) {
|
||||
GURL a("filesystem:http://example.com/temporary/");
|
||||
// This should not crash.
|
||||
@ -245,9 +246,9 @@ TEST(GURLTest, IsValid) {
|
||||
}
|
||||
|
||||
TEST(GURLTest, ExtraSlashesBeforeAuthority) {
|
||||
// According to RFC3986, the hier-part for URI with an authority must use only
|
||||
// two slashes, GURL intentionally just ignores slashes more than 2 and parses
|
||||
// the following part as an authority.
|
||||
// According to RFC3986, the hierarchical part for URI with an authority
|
||||
// must use only two slashes; GURL intentionally just ignores extra slashes
|
||||
// if there are more than 2, and parses the following part as an authority.
|
||||
GURL url("http:///host");
|
||||
EXPECT_EQ("host", url.host());
|
||||
EXPECT_EQ("/", url.path());
|
||||
@ -378,7 +379,7 @@ TEST(GURLTest, GetWithEmptyPath) {
|
||||
}
|
||||
|
||||
TEST(GURLTest, Replacements) {
|
||||
// The url canonicalizer replacement test will handle most of these case.
|
||||
// The URL canonicalizer replacement test will handle most of these case.
|
||||
// The most important thing to do here is to check that the proper
|
||||
// canonicalizer gets called based on the scheme of the input.
|
||||
struct ReplaceCase {
|
||||
@ -395,7 +396,7 @@ TEST(GURLTest, Replacements) {
|
||||
} replace_cases[] = {
|
||||
{"http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, NULL, "/", "", "", "http://www.google.com/"},
|
||||
{"http://www.google.com/foo/bar.html?foo#bar", "javascript", "", "", "", "", "window.open('foo');", "", "", "javascript:window.open('foo');"},
|
||||
{"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo","search", "ref", "http://www.google.com:99/foo?search#ref"},
|
||||
{"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo", "search", "ref", "http://www.google.com:99/foo?search#ref"},
|
||||
#ifdef WIN32
|
||||
{"http://www.google.com/foo/bar.html?foo#bar", "file", "", "", "", "", "c:\\", "", "", "file:///C:/"},
|
||||
#endif
|
||||
@ -435,7 +436,7 @@ TEST(GURLTest, ClearFragmentOnDataUrl) {
|
||||
|
||||
EXPECT_EQ("data: one ? two ", url_no_ref.spec());
|
||||
|
||||
// Importing a parsed url via this constructor overload will retain trailing
|
||||
// Importing a parsed URL via this constructor overload will retain trailing
|
||||
// whitespace.
|
||||
GURL import_url(url_no_ref.spec(),
|
||||
url_no_ref.parsed_for_possibly_invalid_spec(),
|
||||
|
@ -133,4 +133,4 @@ URL_EXPORT std::ostream& operator<<(std::ostream& out,
|
||||
|
||||
} // namespace url
|
||||
|
||||
#endif // URL_SCHEME_HOST_PORT_H_
|
||||
#endif // URL_ORIGIN_H_
|
||||
|
@ -285,7 +285,7 @@ URL_EXPORT bool CanonicalizeScheme(const base::char16* spec,
|
||||
// User info: username/password. If present, this will add the delimiters so
|
||||
// the output will be "<username>:<password>@" or "<username>@". Empty
|
||||
// username/password pairs, or empty passwords, will get converted to
|
||||
// nonexistant in the canonical version.
|
||||
// nonexistent in the canonical version.
|
||||
//
|
||||
// The components for the username and password refer to ranges in the
|
||||
// respective source strings. Usually, these will be the same string, which
|
||||
@ -317,13 +317,13 @@ struct CanonHostInfo {
|
||||
|
||||
// This field summarizes how the input was classified by the canonicalizer.
|
||||
enum Family {
|
||||
NEUTRAL, // - Doesn't resemble an IP address. As far as the IP
|
||||
NEUTRAL, // - Doesn't resemble an IP address. As far as the IP
|
||||
// canonicalizer is concerned, it should be treated as a
|
||||
// hostname.
|
||||
BROKEN, // - Almost an IP, but was not canonicalized. This could be an
|
||||
BROKEN, // - Almost an IP, but was not canonicalized. This could be an
|
||||
// IPv4 address where truncation occurred, or something
|
||||
// containing the special characters :[] which did not parse
|
||||
// as an IPv6 address. Never attempt to connect to this
|
||||
// as an IPv6 address. Never attempt to connect to this
|
||||
// address, because it might actually succeed!
|
||||
IPV4, // - Successfully canonicalized as an IPv4 address.
|
||||
IPV6, // - Successfully canonicalized as an IPv6 address.
|
||||
@ -331,7 +331,7 @@ struct CanonHostInfo {
|
||||
Family family;
|
||||
|
||||
// If |family| is IPV4, then this is the number of nonempty dot-separated
|
||||
// components in the input text, from 1 to 4. If |family| is not IPV4,
|
||||
// components in the input text, from 1 to 4. If |family| is not IPV4,
|
||||
// this value is undefined.
|
||||
int num_ipv4_components;
|
||||
|
||||
@ -355,7 +355,7 @@ struct CanonHostInfo {
|
||||
|
||||
// Host.
|
||||
//
|
||||
// The 8-bit version requires UTF-8 encoding. Use this version when you only
|
||||
// The 8-bit version requires UTF-8 encoding. Use this version when you only
|
||||
// need to know whether canonicalization succeeded.
|
||||
URL_EXPORT bool CanonicalizeHost(const char* spec,
|
||||
const Component& host,
|
||||
@ -368,7 +368,7 @@ URL_EXPORT bool CanonicalizeHost(const base::char16* spec,
|
||||
|
||||
// Extended version of CanonicalizeHost, which returns additional information.
|
||||
// Use this when you need to know whether the hostname was an IP address.
|
||||
// A successful return is indicated by host_info->family != BROKEN. See the
|
||||
// A successful return is indicated by host_info->family != BROKEN. See the
|
||||
// definition of CanonHostInfo above for details.
|
||||
URL_EXPORT void CanonicalizeHostVerbose(const char* spec,
|
||||
const Component& host,
|
||||
@ -554,7 +554,7 @@ URL_EXPORT bool CanonicalizePathURL(const base::char16* spec,
|
||||
CanonOutput* output,
|
||||
Parsed* new_parsed);
|
||||
|
||||
// Use for mailto URLs. This "canonicalizes" the url into a path and query
|
||||
// Use for mailto URLs. This "canonicalizes" the URL into a path and query
|
||||
// component. It does not attempt to merge "to" fields. It uses UTF-8 for
|
||||
// the query encoding if there is a query. This is because a mailto URL is
|
||||
// really intended for an external mail program, and the encoding of a page,
|
||||
@ -578,9 +578,9 @@ URL_EXPORT bool CanonicalizeMailtoURL(const base::char16* spec,
|
||||
// treated on the same code path as regular canonicalization (the same string
|
||||
// for each component).
|
||||
//
|
||||
// A Parsed structure usually goes along with this. Those
|
||||
// components identify offsets within these strings, so that they can all be
|
||||
// in the same string, or spread arbitrarily across different ones.
|
||||
// A Parsed structure usually goes along with this. Those components identify
|
||||
// offsets within these strings, so that they can all be in the same string,
|
||||
// or spread arbitrarily across different ones.
|
||||
//
|
||||
// This structures does not own any data. It is the caller's responsibility to
|
||||
// ensure that the data the pointers point to stays in scope and is not
|
||||
@ -725,7 +725,7 @@ class Replacements {
|
||||
}
|
||||
bool IsRefOverridden() const { return sources_.ref != NULL; }
|
||||
|
||||
// Getters for the itnernal data. See the variables below for how the
|
||||
// Getters for the internal data. See the variables below for how the
|
||||
// information is encoded.
|
||||
const URLComponentSource<CHAR>& sources() const { return sources_; }
|
||||
const Parsed& components() const { return components_; }
|
||||
@ -863,7 +863,7 @@ URL_EXPORT bool IsRelativeURL(const char* base,
|
||||
// The base URL should be canonical and have a host (may be empty for file
|
||||
// URLs) and a path. If it doesn't have these, we can't resolve relative
|
||||
// URLs off of it and will return the base as the output with an error flag.
|
||||
// Becausee it is canonical is should also be ASCII.
|
||||
// Because it is canonical is should also be ASCII.
|
||||
//
|
||||
// The query charset converter follows the same rules as CanonicalizeQuery.
|
||||
//
|
||||
|
@ -95,9 +95,9 @@ bool DoScheme(const CHAR* spec,
|
||||
// The output scheme starts from the current position.
|
||||
out_scheme->begin = output->length();
|
||||
|
||||
// Danger: it's important that this code does not strip any characters: it
|
||||
// only emits the canonical version (be it valid or escaped) of each of
|
||||
// the input characters. Stripping would put it out of sync with
|
||||
// Danger: it's important that this code does not strip any characters;
|
||||
// it only emits the canonical version (be it valid or escaped) for each
|
||||
// of the input characters. Stripping would put it out of sync with
|
||||
// FindAndCompareScheme, which could cause some security checks on
|
||||
// schemes to be incorrect.
|
||||
bool success = true;
|
||||
@ -218,7 +218,7 @@ bool DoPort(const CHAR* spec,
|
||||
char buf[buf_size];
|
||||
WritePortInt(buf, buf_size, port_num);
|
||||
|
||||
// Append the port number to the output, preceeded by a colon.
|
||||
// Append the port number to the output, preceded by a colon.
|
||||
output->push_back(':');
|
||||
out_port->begin = output->length();
|
||||
for (int i = 0; i < buf_size && buf[i]; i++)
|
||||
|
@ -34,7 +34,7 @@ namespace {
|
||||
// NOTE: I didn't actually test all the control characters. Some may be
|
||||
// disallowed in the input, but they are all accepted escaped except for 0.
|
||||
// I also didn't test if characters affecting HTML parsing are allowed
|
||||
// unescaped, eg. (") or (#), which would indicate the beginning of the path.
|
||||
// unescaped, e.g. (") or (#), which would indicate the beginning of the path.
|
||||
// Surprisingly, space is accepted in the input and always escaped.
|
||||
|
||||
// This table lists the canonical version of all characters we allow in the
|
||||
@ -316,11 +316,11 @@ void DoHost(const CHAR* spec,
|
||||
}
|
||||
|
||||
if (!success) {
|
||||
// Canonicalization failed. Set BROKEN to notify the caller.
|
||||
// Canonicalization failed. Set BROKEN to notify the caller.
|
||||
host_info->family = CanonHostInfo::BROKEN;
|
||||
} else {
|
||||
// After all the other canonicalization, check if we ended up with an IP
|
||||
// address. IP addresses are small, so writing into this temporary buffer
|
||||
// address. IP addresses are small, so writing into this temporary buffer
|
||||
// should not cause an allocation.
|
||||
RawCanonOutput<64> canon_ip;
|
||||
CanonicalizeIPAddress(output->data(),
|
||||
@ -328,7 +328,7 @@ void DoHost(const CHAR* spec,
|
||||
&canon_ip, host_info);
|
||||
|
||||
// If we got an IPv4/IPv6 address, copy the canonical form back to the
|
||||
// real buffer. Otherwise, it's a hostname or broken IP, in which case
|
||||
// real buffer. Otherwise, it's a hostname or broken IP, in which case
|
||||
// we just leave it in place.
|
||||
if (host_info->IsIPAddress()) {
|
||||
output->set_length(output_begin);
|
||||
|
@ -249,9 +249,9 @@ void AppendStringOfType(const base::char16* source, int length,
|
||||
|
||||
bool ReadUTFChar(const char* str, int* begin, int length,
|
||||
unsigned* code_point_out) {
|
||||
// This depends on ints and int32s being the same thing. If they're not, it
|
||||
// This depends on ints and int32s being the same thing. If they're not, it
|
||||
// will fail to compile.
|
||||
// TODO(mmenke): This should probably be fixed.
|
||||
// TODO(mmenke): This should probably be fixed.
|
||||
if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
|
||||
!base::IsValidCharacter(*code_point_out)) {
|
||||
*code_point_out = kUnicodeReplacementCharacter;
|
||||
@ -262,9 +262,9 @@ bool ReadUTFChar(const char* str, int* begin, int length,
|
||||
|
||||
bool ReadUTFChar(const base::char16* str, int* begin, int length,
|
||||
unsigned* code_point_out) {
|
||||
// This depends on ints and int32s being the same thing. If they're not, it
|
||||
// This depends on ints and int32s being the same thing. If they're not, it
|
||||
// will fail to compile.
|
||||
// TODO(mmenke): This should probably be fixed.
|
||||
// TODO(mmenke): This should probably be fixed.
|
||||
if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
|
||||
!base::IsValidCharacter(*code_point_out)) {
|
||||
*code_point_out = kUnicodeReplacementCharacter;
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
// This file is intended to be included in another C++ file where the character
|
||||
// types are defined. This allows us to write mostly generic code, but not have
|
||||
// templace bloat because everything is inlined when anybody calls any of our
|
||||
// template bloat because everything is inlined when anybody calls any of our
|
||||
// functions.
|
||||
|
||||
#include <stdlib.h>
|
||||
@ -41,7 +41,7 @@ enum SharedCharTypes {
|
||||
// Valid in an ASCII-representation of an octal digit.
|
||||
CHAR_OCT = 32,
|
||||
|
||||
// Characters that do not require escaping in encodeURIComponent. Characters
|
||||
// Characters that do not require escaping in encodeURIComponent. Characters
|
||||
// that do not have this flag will be escaped; see url_util.cc.
|
||||
CHAR_COMPONENT = 64,
|
||||
};
|
||||
@ -175,7 +175,7 @@ inline void DoAppendUTF8(unsigned char_value, Output* output) {
|
||||
output);
|
||||
Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
|
||||
output);
|
||||
} else if (char_value <= 0x10FFFF) { // Max unicode code point.
|
||||
} else if (char_value <= 0x10FFFF) { // Max Unicode code point.
|
||||
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
Appender(static_cast<unsigned char>(0xf0 | (char_value >> 18)),
|
||||
output);
|
||||
@ -199,7 +199,7 @@ inline void AppendCharToOutput(unsigned char ch, CanonOutput* output) {
|
||||
}
|
||||
|
||||
// Writes the given character to the output as UTF-8. This does NO checking
|
||||
// of the validity of the unicode characters; the caller should ensure that
|
||||
// of the validity of the Unicode characters; the caller should ensure that
|
||||
// the value it is appending is valid to append.
|
||||
inline void AppendUTF8Value(unsigned char_value, CanonOutput* output) {
|
||||
DoAppendUTF8<CanonOutput, AppendCharToOutput>(char_value, output);
|
||||
@ -207,7 +207,7 @@ inline void AppendUTF8Value(unsigned char_value, CanonOutput* output) {
|
||||
|
||||
// Writes the given character to the output as UTF-8, escaping ALL
|
||||
// characters (even when they are ASCII). This does NO checking of the
|
||||
// validity of the unicode characters; the caller should ensure that the value
|
||||
// validity of the Unicode characters; the caller should ensure that the value
|
||||
// it is appending is valid to append.
|
||||
inline void AppendUTF8EscapedValue(unsigned char_value, CanonOutput* output) {
|
||||
DoAppendUTF8<CanonOutput, AppendEscapedChar>(char_value, output);
|
||||
@ -260,7 +260,7 @@ inline void AppendUTF16Value(unsigned code_point,
|
||||
// that any following characters are.
|
||||
inline bool AppendUTF8EscapedChar(const base::char16* str, int* begin,
|
||||
int length, CanonOutput* output) {
|
||||
// UTF-16 input. Readchar16 will handle invalid characters for us and give
|
||||
// UTF-16 input. ReadUTFChar will handle invalid characters for us and give
|
||||
// us the kUnicodeReplacementCharacter, so we don't have to do special
|
||||
// checking after failure, just pass through the failure to the caller.
|
||||
unsigned char_value;
|
||||
|
@ -113,15 +113,15 @@ static bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
|
||||
new_parsed->path.begin = output->length();
|
||||
output->push_back('/');
|
||||
|
||||
// Copies and normalizes the "c:" at the beginning, if present.
|
||||
// Copy and normalize the "c:" at the beginning, if present.
|
||||
int after_drive = FileDoDriveSpec(source.path, parsed.path.begin,
|
||||
parsed.path.end(), output);
|
||||
|
||||
// Copies the rest of the path
|
||||
// Copy the rest of the path.
|
||||
FileDoPath<CHAR, UCHAR>(source.path, after_drive, parsed.path.end(), output);
|
||||
new_parsed->path.len = output->length() - new_parsed->path.begin;
|
||||
|
||||
// Things following the path we can use the standard canonicalizers for.
|
||||
// For things following the path, we can use the standard canonicalizers.
|
||||
success &= URLCanonInternal<CHAR, UCHAR>::DoQuery(
|
||||
source.query, parsed.query, output, &new_parsed->query);
|
||||
success &= URLCanonInternal<CHAR, UCHAR>::DoRef(
|
||||
|
@ -119,7 +119,7 @@ CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec,
|
||||
base_prefix_len++;
|
||||
|
||||
// Put the component, minus any base prefix, into a NULL-terminated buffer so
|
||||
// we can call the standard library. Because leading zeros have already been
|
||||
// we can call the standard library. Because leading zeros have already been
|
||||
// discarded, filling the entire buffer is guaranteed to trigger the 32-bit
|
||||
// overflow check.
|
||||
const int kMaxComponentLen = 16;
|
||||
@ -134,7 +134,7 @@ CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec,
|
||||
if (!IsCharOfType(input, base))
|
||||
return CanonHostInfo::NEUTRAL;
|
||||
|
||||
// Fill the buffer, if there's space remaining. This check allows us to
|
||||
// Fill the buffer, if there's space remaining. This check allows us to
|
||||
// verify that all characters are numeric, even those that don't fit.
|
||||
if (dest_i < kMaxComponentLen)
|
||||
buf[dest_i++] = input;
|
||||
@ -150,7 +150,7 @@ CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec,
|
||||
if (num > std::numeric_limits<uint32_t>::max())
|
||||
return CanonHostInfo::BROKEN;
|
||||
|
||||
// No overflow. Success!
|
||||
// No overflow. Success!
|
||||
*number = static_cast<uint32_t>(num);
|
||||
return CanonHostInfo::IPV4;
|
||||
}
|
||||
@ -171,7 +171,7 @@ CanonHostInfo::Family DoIPv4AddressToNumber(const CHAR* spec,
|
||||
uint32_t component_values[4];
|
||||
int existing_components = 0;
|
||||
|
||||
// Set to true if one or more components are BROKEN. BROKEN is only
|
||||
// Set to true if one or more components are BROKEN. BROKEN is only
|
||||
// returned if all components are IPV4 or BROKEN, so, for example,
|
||||
// 12345678912345.de returns NEUTRAL rather than broken.
|
||||
bool broken = false;
|
||||
@ -441,7 +441,7 @@ bool CheckIPv6ComponentsSize(const IPv6Parsed& parsed,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Converts a hex comonent into a number. This cannot fail since the caller has
|
||||
// Converts a hex component into a number. This cannot fail since the caller has
|
||||
// already verified that each character in the string was a hex digit, and
|
||||
// that there were no more than 4 characters.
|
||||
template <typename CHAR>
|
||||
@ -578,7 +578,7 @@ bool DoCanonicalizeIPv6Address(const CHAR* spec,
|
||||
}
|
||||
}
|
||||
|
||||
// No invalid characters. Could still be IPv4 or a hostname.
|
||||
// No invalid characters. Could still be IPv4 or a hostname.
|
||||
host_info->family = CanonHostInfo::NEUTRAL;
|
||||
return false;
|
||||
}
|
||||
|
@ -30,14 +30,14 @@ URL_EXPORT void AppendIPv6Address(const unsigned char address[16],
|
||||
// Not all components may exist. If there are only 3 components, for example,
|
||||
// the last one will have a length of -1 or 0 to indicate it does not exist.
|
||||
//
|
||||
// Note that many platform's inet_addr will ignore everything after a space
|
||||
// in certain curcumstances if the stuff before the space looks like an IP
|
||||
// Note that many platforms' inet_addr will ignore everything after a space
|
||||
// in certain circumstances if the stuff before the space looks like an IP
|
||||
// address. IE6 is included in this. We do NOT handle this case. In many cases,
|
||||
// the browser's canonicalization will get run before this which converts
|
||||
// spaces to %20 (in the case of IE7) or rejects them (in the case of
|
||||
// Mozilla), so this code path never gets hit. Our host canonicalization will
|
||||
// notice these spaces and escape them, which will make IP address finding
|
||||
// fail. This seems like better behavior than stripping after a space.
|
||||
// spaces to %20 (in the case of IE7) or rejects them (in the case of Mozilla),
|
||||
// so this code path never gets hit. Our host canonicalization will notice
|
||||
// these spaces and escape them, which will make IP address finding fail. This
|
||||
// seems like better behavior than stripping after a space.
|
||||
URL_EXPORT bool FindIPv4Components(const char* spec,
|
||||
const Component& host,
|
||||
Component components[4]);
|
||||
|
@ -55,7 +55,7 @@ bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
|
||||
new_parsed->path.reset();
|
||||
}
|
||||
|
||||
// Query -- always use the default utf8 charset converter.
|
||||
// Query -- always use the default UTF8 charset converter.
|
||||
CanonicalizeQuery(source.query, parsed.query, NULL,
|
||||
output, &new_parsed->query);
|
||||
|
||||
|
@ -173,7 +173,7 @@ void BackUpToPreviousSlash(int path_begin_in_output,
|
||||
// copied to the output.
|
||||
//
|
||||
// We do not collapse multiple slashes in a row to a single slash. It seems
|
||||
// no web browsers do this, and we don't want incompababilities, even though
|
||||
// no web browsers do this, and we don't want incompatibilities, even though
|
||||
// it would be correct for most systems.
|
||||
template<typename CHAR, typename UCHAR>
|
||||
bool DoPartialPath(const CHAR* spec,
|
||||
@ -200,7 +200,7 @@ bool DoPartialPath(const CHAR* spec,
|
||||
// Needs special handling of some sort.
|
||||
int dotlen;
|
||||
if ((dotlen = IsDot(spec, i, end)) > 0) {
|
||||
// See if this dot was preceeded by a slash in the output. We
|
||||
// See if this dot was preceded by a slash in the output. We
|
||||
// assume that when canonicalizing paths, they will always
|
||||
// start with a slash and not a dot, so we don't have to
|
||||
// bounds check the output.
|
||||
@ -230,7 +230,7 @@ bool DoPartialPath(const CHAR* spec,
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// This dot is not preceeded by a slash, it is just part of some
|
||||
// This dot is not preceded by a slash, it is just part of some
|
||||
// file name.
|
||||
output->push_back('.');
|
||||
i += dotlen - 1;
|
||||
|
@ -14,7 +14,7 @@ namespace url {
|
||||
namespace {
|
||||
|
||||
// Canonicalize the given |component| from |source| into |output| and
|
||||
// |new_component|. If |separator| is non-zero, it is pre-pended to |ouput|
|
||||
// |new_component|. If |separator| is non-zero, it is pre-pended to |output|
|
||||
// prior to the canonicalized component; i.e. for the '?' or '#' characters.
|
||||
template<typename CHAR, typename UCHAR>
|
||||
bool DoCanonicalizePathComponent(const CHAR* source,
|
||||
|
@ -80,7 +80,7 @@ void RunConverter(const char* spec,
|
||||
}
|
||||
|
||||
// Runs the converter with the given UTF-16 input. We don't have to do
|
||||
// anything, but this overriddden function allows us to use the same code
|
||||
// anything, but this overridden function allows us to use the same code
|
||||
// for both UTF-8 and UTF-16 input.
|
||||
void RunConverter(const base::char16* spec,
|
||||
const Component& query,
|
||||
|
@ -17,14 +17,14 @@ namespace url {
|
||||
namespace {
|
||||
|
||||
// Firefox does a case-sensitive compare (which is probably wrong--Mozilla bug
|
||||
// 379034), whereas IE is case-insensetive.
|
||||
// 379034), whereas IE is case-insensitive.
|
||||
//
|
||||
// We choose to be more permissive like IE. We don't need to worry about
|
||||
// unescaping or anything here: neither IE or Firefox allow this. We also
|
||||
// don't have to worry about invalid scheme characters since we are comparing
|
||||
// against the canonical scheme of the base.
|
||||
//
|
||||
// The base URL should always be canonical, therefore is ASCII.
|
||||
// The base URL should always be canonical, therefore it should be ASCII.
|
||||
template<typename CHAR>
|
||||
bool AreSchemesEqual(const char* base,
|
||||
const Component& base_scheme,
|
||||
@ -82,7 +82,7 @@ bool DoIsRelativeURL(const char* base,
|
||||
|
||||
#ifdef WIN32
|
||||
// We special case paths like "C:\foo" so they can link directly to the
|
||||
// file on Windows (IE compatability). The security domain stuff should
|
||||
// file on Windows (IE compatibility). The security domain stuff should
|
||||
// prevent a link like this from actually being followed if its on a
|
||||
// web page.
|
||||
//
|
||||
@ -91,22 +91,22 @@ bool DoIsRelativeURL(const char* base,
|
||||
// is a file and the answer will still be correct.
|
||||
//
|
||||
// We require strict backslashes when detecting UNC since two forward
|
||||
// shashes should be treated a a relative URL with a hostname.
|
||||
// slashes should be treated a a relative URL with a hostname.
|
||||
if (DoesBeginWindowsDriveSpec(url, begin, url_len) ||
|
||||
DoesBeginUNCPath(url, begin, url_len, true))
|
||||
return true;
|
||||
#endif // WIN32
|
||||
|
||||
// See if we've got a scheme, if not, we know this is a relative URL.
|
||||
// BUT: Just because we have a scheme, doesn't make it absolute.
|
||||
// BUT, just because we have a scheme, doesn't make it absolute.
|
||||
// "http:foo.html" is a relative URL with path "foo.html". If the scheme is
|
||||
// empty, we treat it as relative (":foo") like IE does.
|
||||
// empty, we treat it as relative (":foo"), like IE does.
|
||||
Component scheme;
|
||||
const bool scheme_is_empty =
|
||||
!ExtractScheme(url, url_len, &scheme) || scheme.len == 0;
|
||||
if (scheme_is_empty) {
|
||||
if (url[begin] == '#') {
|
||||
// |url| is a bare fragement (e.g. "#foo"). This can be resolved against
|
||||
// |url| is a bare fragment (e.g. "#foo"). This can be resolved against
|
||||
// any base. Fall-through.
|
||||
} else if (!is_base_hierarchical) {
|
||||
// Don't allow relative URLs if the base scheme doesn't support it.
|
||||
@ -145,7 +145,7 @@ bool DoIsRelativeURL(const char* base,
|
||||
int colon_offset = scheme.end();
|
||||
|
||||
// If it's a filesystem URL, the only valid way to make it relative is not to
|
||||
// supply a scheme. There's no equivalent to e.g. http:index.html.
|
||||
// supply a scheme. There's no equivalent to e.g. http:index.html.
|
||||
if (CompareSchemeComponent(url, scheme, kFileSystemScheme))
|
||||
return true;
|
||||
|
||||
@ -394,7 +394,7 @@ bool DoResolveRelativeHost(const char* base_url,
|
||||
query_converter, output, out_parsed);
|
||||
}
|
||||
|
||||
// Resolves a relative URL that happens to be an absolute file path. Examples
|
||||
// Resolves a relative URL that happens to be an absolute file path. Examples
|
||||
// include: "//hostname/path", "/c:/foo", and "//hostname/c:/foo".
|
||||
template<typename CHAR>
|
||||
bool DoResolveAbsoluteFile(const CHAR* relative_url,
|
||||
@ -460,7 +460,7 @@ bool DoResolveRelativeURL(const char* base_url,
|
||||
// how strict the UNC finder is).
|
||||
//
|
||||
// We also allow Windows absolute drive specs on any scheme (for example
|
||||
// "c:\foo") like IE does. There must be no preceeding slashes in this
|
||||
// "c:\foo") like IE does. There must be no preceding slashes in this
|
||||
// case (we reject anything like "/c:/foo") because that should be treated
|
||||
// as a path. For file URLs, we allow any number of slashes since that would
|
||||
// be setting the path.
|
||||
|
@ -169,7 +169,7 @@ bool ReplaceStandardURL(const char* base,
|
||||
}
|
||||
|
||||
// For 16-bit replacements, we turn all the replacements into UTF-8 so the
|
||||
// regular codepath can be used.
|
||||
// regular code path can be used.
|
||||
bool ReplaceStandardURL(const char* base,
|
||||
const Parsed& base_parsed,
|
||||
const Replacements<base::char16>& replacements,
|
||||
|
@ -38,7 +38,7 @@ struct DualComponentCase {
|
||||
bool expected_success;
|
||||
};
|
||||
|
||||
// Test cases for CanonicalizeIPAddress(). The inputs are identical to
|
||||
// Test cases for CanonicalizeIPAddress(). The inputs are identical to
|
||||
// DualComponentCase, but the output has extra CanonHostInfo fields.
|
||||
struct IPAddressCase {
|
||||
const char* input8;
|
||||
@ -127,7 +127,7 @@ TEST(URLCanonTest, DoAppendUTF8) {
|
||||
|
||||
#if defined(GTEST_HAS_DEATH_TEST)
|
||||
// TODO(mattm): Can't run this in debug mode for now, since the DCHECK will
|
||||
// cause the Chromium stacktrace dialog to appear and hang the test.
|
||||
// cause the Chromium stack trace dialog to appear and hang the test.
|
||||
// See http://crbug.com/49580.
|
||||
#if defined(NDEBUG) && !defined(DCHECK_ALWAYS_ON)
|
||||
#define MAYBE_DoAppendUTF8Invalid DoAppendUTF8Invalid
|
||||
@ -157,10 +157,10 @@ TEST(URLCanonTest, UTF) {
|
||||
} utf_cases[] = {
|
||||
// Valid canonical input should get passed through & escaped.
|
||||
{"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true, "%E4%BD%A0%E5%A5%BD"},
|
||||
// Test a characer that takes > 16 bits (U+10300 = old italic letter A)
|
||||
// Test a character that takes > 16 bits (U+10300 = old italic letter A)
|
||||
{"\xF0\x90\x8C\x80", L"\xd800\xdf00", true, "%F0%90%8C%80"},
|
||||
// Non-shortest-form UTF-8 are invalid. The bad char should be replaced
|
||||
// with the invalid character (EF BF DB in UTF-8).
|
||||
// Non-shortest-form UTF-8 characters are invalid. The bad character
|
||||
// should be replaced with the invalid character (EF BF DB in UTF-8).
|
||||
{"\xf0\x84\xbd\xa0\xe5\xa5\xbd", NULL, false, "%EF%BF%BD%E5%A5%BD"},
|
||||
// Invalid UTF-8 sequences should be marked as invalid (the first
|
||||
// sequence is truncated).
|
||||
@ -259,7 +259,7 @@ TEST(URLCanonTest, Scheme) {
|
||||
EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin);
|
||||
EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
|
||||
|
||||
// Now try the wide version
|
||||
// Now try the wide version.
|
||||
out_str.clear();
|
||||
StdStringCanonOutput output2(&out_str);
|
||||
|
||||
@ -275,7 +275,7 @@ TEST(URLCanonTest, Scheme) {
|
||||
EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
|
||||
}
|
||||
|
||||
// Test the case where the scheme is declared nonexistant, it should be
|
||||
// Test the case where the scheme is declared nonexistent, it should be
|
||||
// converted into an empty scheme.
|
||||
Component out_comp;
|
||||
out_str.clear();
|
||||
@ -638,7 +638,7 @@ TEST(URLCanonTest, IPv4) {
|
||||
{"0.0.0xFFFF", L"0.0.0xFFFF", "0.0.255.255", Component(0, 11), CanonHostInfo::IPV4, 3, "0000FFFF"},
|
||||
{"0.0xFFFFFF", L"0.0xFFFFFF", "0.255.255.255", Component(0, 13), CanonHostInfo::IPV4, 2, "00FFFFFF"},
|
||||
{"0xFFFFFFFF", L"0xFFFFFFFF", "255.255.255.255", Component(0, 15), CanonHostInfo::IPV4, 1, "FFFFFFFF"},
|
||||
// Old trunctations tests. They're all "BROKEN" now.
|
||||
// Old trunctations tests. They're all "BROKEN" now.
|
||||
{"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "", Component(), CanonHostInfo::BROKEN, -1, ""},
|
||||
{"192.168.0.257", L"192.168.0.257", "", Component(), CanonHostInfo::BROKEN, -1, ""},
|
||||
{"192.168.0xa20001", L"192.168.0xa20001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
|
||||
@ -754,16 +754,17 @@ TEST(URLCanonTest, IPv6) {
|
||||
|
||||
{"[2001:db8::1]", L"[2001:db8::1]", "[2001:db8::1]", Component(0,13), CanonHostInfo::IPV6, -1, "20010DB8000000000000000000000001"},
|
||||
|
||||
// Can only have one "::" contraction in an IPv6 string literal.
|
||||
// Can only have one "::" contraction in an IPv6 string literal.
|
||||
{"[2001::db8::1]", L"[2001::db8::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
|
||||
// No more than 2 consecutive ':'s.
|
||||
// No more than 2 consecutive ':'s.
|
||||
{"[2001:db8:::1]", L"[2001:db8:::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
|
||||
{"[:::]", L"[:::]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
|
||||
// Non-IP addresses due to invalid characters.
|
||||
// Non-IP addresses due to invalid characters.
|
||||
{"[2001::.com]", L"[2001::.com]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
|
||||
// If there are not enough components, the last one should fill them out.
|
||||
// If there are not enough components, the last one should fill them out.
|
||||
// ... omitted at this time ...
|
||||
// Too many components means not an IP address. Similarly with too few if using IPv4 compat or mapped addresses.
|
||||
// Too many components means not an IP address. Similarly, with too few
|
||||
// if using IPv4 compat or mapped addresses.
|
||||
{"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
|
||||
{"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
|
||||
{"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
|
||||
@ -887,7 +888,7 @@ TEST(URLCanonTest, UserInfo) {
|
||||
{"http://user:pass@/", "user:pass@", Component(0, 4), Component(5, 4), true},
|
||||
{"http://%2540:bar@domain.com/", "%2540:bar@", Component(0, 5), Component(6, 3), true },
|
||||
|
||||
// IE7 compatability: old versions allowed backslashes in usernames, but
|
||||
// IE7 compatibility: old versions allowed backslashes in usernames, but
|
||||
// IE7 does not. We disallow it as well.
|
||||
{"ftp://me\\mydomain:pass@foo.com/", "", Component(0, -1), Component(0, -1), true},
|
||||
};
|
||||
@ -943,7 +944,7 @@ TEST(URLCanonTest, Port) {
|
||||
// buffer. The parser unit tests will test scanning the number correctly.
|
||||
//
|
||||
// Note that the CanonicalizePort will always prepend a colon to the output
|
||||
// to separate it from the colon that it assumes preceeds it.
|
||||
// to separate it from the colon that it assumes precedes it.
|
||||
struct PortCase {
|
||||
const char* input;
|
||||
int default_port;
|
||||
@ -1329,7 +1330,7 @@ TEST(URLCanonTest, ReplaceStandardURL) {
|
||||
{"http://a:b@google.com:22/foo;bar?baz@cat", "https", "me", "pw", "host.com", "99", "/path", "query", "ref", "https://me:pw@host.com:99/path?query#ref"},
|
||||
// Replace nothing
|
||||
{"http://a:b@google.com:22/foo?baz@cat", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "http://a:b@google.com:22/foo?baz@cat"},
|
||||
// Replace scheme with filesystem. The result is garbage, but you asked
|
||||
// Replace scheme with filesystem. The result is garbage, but you asked
|
||||
// for it.
|
||||
{"http://a:b@google.com:22/foo?baz@cat", "filesystem", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem://a:b@google.com:22/foo?baz@cat"},
|
||||
};
|
||||
@ -1594,7 +1595,7 @@ TEST(URLCanonTest, CanonicalizeFileURL) {
|
||||
{"file:", "file:///", true, Component(), Component(7, 1)},
|
||||
{"file:UNChost/path", "file://unchost/path", true, Component(7, 7), Component(14, 5)},
|
||||
// CanonicalizeFileURL supports absolute Windows style paths for IE
|
||||
// compatability. Note that the caller must decide that this is a file
|
||||
// compatibility. Note that the caller must decide that this is a file
|
||||
// URL itself so it can call the file canonicalizer. This is usually
|
||||
// done automatically as part of relative URL resolving.
|
||||
{"c:\\foo\\bar", "file:///C:/foo/bar", true, Component(), Component(7, 11)},
|
||||
@ -1605,7 +1606,7 @@ TEST(URLCanonTest, CanonicalizeFileURL) {
|
||||
{"\\\\server\\file", "file://server/file", true, Component(7, 6), Component(13, 5)},
|
||||
{"/\\server/file", "file://server/file", true, Component(7, 6), Component(13, 5)},
|
||||
// We should preserve the number of slashes after the colon for IE
|
||||
// compatability, except when there is none, in which case we should
|
||||
// compatibility, except when there is none, in which case we should
|
||||
// add one.
|
||||
{"file:c:foo/bar.html", "file:///C:/foo/bar.html", true, Component(), Component(7, 16)},
|
||||
{"file:/\\/\\C:\\\\//foo\\bar.html", "file:///C:////foo/bar.html", true, Component(), Component(7, 19)},
|
||||
@ -1807,7 +1808,7 @@ TEST(URLCanonTest, CanonicalizeMailtoURL) {
|
||||
|
||||
TEST(URLCanonTest, _itoa_s) {
|
||||
// We fill the buffer with 0xff to ensure that it's getting properly
|
||||
// null-terminated. We also allocate one byte more than what we tell
|
||||
// null-terminated. We also allocate one byte more than what we tell
|
||||
// _itoa_s about, and ensure that the extra byte is untouched.
|
||||
char buf[6];
|
||||
memset(buf, 0xff, sizeof(buf));
|
||||
@ -1846,7 +1847,7 @@ TEST(URLCanonTest, _itoa_s) {
|
||||
|
||||
TEST(URLCanonTest, _itow_s) {
|
||||
// We fill the buffer with 0xff to ensure that it's getting properly
|
||||
// null-terminated. We also allocate one byte more than what we tell
|
||||
// null-terminated. We also allocate one byte more than what we tell
|
||||
// _itoa_s about, and ensure that the extra byte is untouched.
|
||||
base::char16 buf[6];
|
||||
const char fill_mem = 0xff;
|
||||
@ -2022,7 +2023,7 @@ TEST(URLCanonTest, ResolveRelativeURL) {
|
||||
// which is what is required.
|
||||
{"file:///foo.txt", true, true, "//host:80/bar.txt", true, true, false, "file://host:80/bar.txt"},
|
||||
// Filesystem URL tests; filesystem URLs are only valid and relative if
|
||||
// they have no scheme, e.g. "./index.html". There's no valid equivalent
|
||||
// they have no scheme, e.g. "./index.html". There's no valid equivalent
|
||||
// to http:index.html.
|
||||
{"filesystem:http://host/t/path", true, false, "filesystem:http://host/t/path2", true, false, false, NULL},
|
||||
{"filesystem:http://host/t/path", true, false, "filesystem:https://host/t/path2", true, false, false, NULL},
|
||||
@ -2090,10 +2091,10 @@ TEST(URLCanonTest, ResolveRelativeURL) {
|
||||
}
|
||||
}
|
||||
|
||||
// It used to be when we did a replacement with a long buffer of UTF-16
|
||||
// characters, we would get invalid data in the URL. This is because the buffer
|
||||
// it used to hold the UTF-8 data was resized, while some pointers were still
|
||||
// kept to the old buffer that was removed.
|
||||
// It used to be the case that when we did a replacement with a long buffer of
|
||||
// UTF-16 characters, we would get invalid data in the URL. This is because the
|
||||
// buffer that it used to hold the UTF-8 data was resized, while some pointers
|
||||
// were still kept to the old buffer that was removed.
|
||||
TEST(URLCanonTest, ReplacementOverflow) {
|
||||
const char src[] = "file:///C:/foo/bar";
|
||||
int src_len = static_cast<int>(strlen(src));
|
||||
@ -2101,7 +2102,7 @@ TEST(URLCanonTest, ReplacementOverflow) {
|
||||
ParseFileURL(src, src_len, &parsed);
|
||||
|
||||
// Override two components, the path with something short, and the query with
|
||||
// sonething long enough to trigger the bug.
|
||||
// something long enough to trigger the bug.
|
||||
Replacements<base::char16> repl;
|
||||
base::string16 new_query;
|
||||
for (int i = 0; i < 4800; i++)
|
||||
|
@ -11,7 +11,7 @@
|
||||
|
||||
namespace url {
|
||||
|
||||
// We treat slashes and backslashes the same for IE compatability.
|
||||
// We treat slashes and backslashes the same for IE compatibility.
|
||||
inline bool IsURLSlash(base::char16 ch) {
|
||||
return ch == '/' || ch == '\\';
|
||||
}
|
||||
|
@ -90,13 +90,13 @@ struct FileSystemURLParseCase {
|
||||
bool ComponentMatches(const char* input,
|
||||
const char* reference,
|
||||
const Component& component) {
|
||||
// If the component is nonexistant (length == -1), it should begin at 0.
|
||||
// If the component is nonexistent (length == -1), it should begin at 0.
|
||||
EXPECT_TRUE(component.len >= 0 || component.len == -1);
|
||||
|
||||
// Begin should be valid.
|
||||
EXPECT_LE(0, component.begin);
|
||||
|
||||
// A NULL reference means the component should be nonexistant.
|
||||
// A NULL reference means the component should be nonexistent.
|
||||
if (!reference)
|
||||
return component.len == -1;
|
||||
if (component.len < 0)
|
||||
@ -345,7 +345,7 @@ static PathURLParseCase path_cases[] = {
|
||||
|
||||
TEST(URLParser, PathURL) {
|
||||
// Declared outside for loop to try to catch cases in init() where we forget
|
||||
// to reset something that is reset by the construtor.
|
||||
// to reset something that is reset by the constructor.
|
||||
Parsed parsed;
|
||||
for (size_t i = 0; i < arraysize(path_cases); i++) {
|
||||
const char* url = path_cases[i].input;
|
||||
@ -356,7 +356,7 @@ TEST(URLParser, PathURL) {
|
||||
EXPECT_TRUE(ComponentMatches(url, path_cases[i].path, parsed.GetContent()))
|
||||
<< i;
|
||||
|
||||
// The remaining components are never used for path urls.
|
||||
// The remaining components are never used for path URLs.
|
||||
ExpectInvalidComponent(parsed.username);
|
||||
ExpectInvalidComponent(parsed.password);
|
||||
ExpectInvalidComponent(parsed.host);
|
||||
@ -537,7 +537,7 @@ static bool NthParameterIs(const char* url,
|
||||
Component key, value;
|
||||
if (!ExtractQueryKeyValue(url, &query, &key, &value)) {
|
||||
if (parameter >= i && !expected_key)
|
||||
return true; // Expected nonexistant key, got one.
|
||||
return true; // Expected nonexistent key, got one.
|
||||
return false; // Not enough keys.
|
||||
}
|
||||
|
||||
@ -613,7 +613,7 @@ static MailtoURLParseCase mailto_cases[] = {
|
||||
|
||||
TEST(URLParser, MailtoUrl) {
|
||||
// Declared outside for loop to try to catch cases in init() where we forget
|
||||
// to reset something that is reset by the construtor.
|
||||
// to reset something that is reset by the constructor.
|
||||
Parsed parsed;
|
||||
for (size_t i = 0; i < arraysize(mailto_cases); ++i) {
|
||||
const char* url = mailto_cases[i].input;
|
||||
@ -625,7 +625,7 @@ TEST(URLParser, MailtoUrl) {
|
||||
EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].query, parsed.query));
|
||||
EXPECT_EQ(PORT_UNSPECIFIED, port);
|
||||
|
||||
// The remaining components are never used for mailto urls.
|
||||
// The remaining components are never used for mailto URLs.
|
||||
ExpectInvalidComponent(parsed.username);
|
||||
ExpectInvalidComponent(parsed.password);
|
||||
ExpectInvalidComponent(parsed.port);
|
||||
@ -645,7 +645,7 @@ static FileSystemURLParseCase filesystem_cases[] = {
|
||||
|
||||
TEST(URLParser, FileSystemURL) {
|
||||
// Declared outside for loop to try to catch cases in init() where we forget
|
||||
// to reset something that is reset by the construtor.
|
||||
// to reset something that is reset by the constructor.
|
||||
Parsed parsed;
|
||||
for (size_t i = 0; i < arraysize(filesystem_cases); i++) {
|
||||
const FileSystemURLParseCase* parsecase = &filesystem_cases[i];
|
||||
@ -667,7 +667,7 @@ TEST(URLParser, FileSystemURL) {
|
||||
int port = ParsePort(url, parsed.inner_parsed()->port);
|
||||
EXPECT_EQ(parsecase->inner_port, port);
|
||||
|
||||
// The remaining components are never used for filesystem urls.
|
||||
// The remaining components are never used for filesystem URLs.
|
||||
ExpectInvalidComponent(parsed.inner_parsed()->query);
|
||||
ExpectInvalidComponent(parsed.inner_parsed()->ref);
|
||||
}
|
||||
@ -676,7 +676,7 @@ TEST(URLParser, FileSystemURL) {
|
||||
EXPECT_TRUE(ComponentMatches(url, parsecase->query, parsed.query));
|
||||
EXPECT_TRUE(ComponentMatches(url, parsecase->ref, parsed.ref));
|
||||
|
||||
// The remaining components are never used for filesystem urls.
|
||||
// The remaining components are never used for filesystem URLs.
|
||||
ExpectInvalidComponent(parsed.username);
|
||||
ExpectInvalidComponent(parsed.password);
|
||||
ExpectInvalidComponent(parsed.host);
|
||||
|
@ -19,7 +19,7 @@ namespace url {
|
||||
namespace test_utils {
|
||||
|
||||
// Converts a UTF-16 string from native wchar_t format to char16, by
|
||||
// truncating the high 32 bits. This is not meant to handle true UTF-32
|
||||
// truncating the high 32 bits. This is not meant to handle true UTF-32
|
||||
// encoded strings.
|
||||
inline base::string16 WStringToUTF16(const wchar_t* src) {
|
||||
base::string16 str;
|
||||
@ -30,7 +30,7 @@ inline base::string16 WStringToUTF16(const wchar_t* src) {
|
||||
return str;
|
||||
}
|
||||
|
||||
// Converts a string from UTF-8 to UTF-16
|
||||
// Converts a string from UTF-8 to UTF-16.
|
||||
inline base::string16 ConvertUTF8ToUTF16(const std::string& src) {
|
||||
int length = static_cast<int>(src.length());
|
||||
EXPECT_LT(length, 1024);
|
||||
@ -39,7 +39,7 @@ inline base::string16 ConvertUTF8ToUTF16(const std::string& src) {
|
||||
return base::string16(output.data(), output.length());
|
||||
}
|
||||
|
||||
// Converts a string from UTF-16 to UTF-8
|
||||
// Converts a string from UTF-16 to UTF-8.
|
||||
inline std::string ConvertUTF16ToUTF8(const base::string16& src) {
|
||||
std::string str;
|
||||
StdStringCanonOutput output(&str);
|
||||
|
@ -22,7 +22,7 @@ const int kNumStandardURLSchemes = 8;
|
||||
const char* kStandardURLSchemes[kNumStandardURLSchemes] = {
|
||||
kHttpScheme,
|
||||
kHttpsScheme,
|
||||
kFileScheme, // Yes, file urls can have a hostname!
|
||||
kFileScheme, // Yes, file URLs can have a hostname!
|
||||
kFtpScheme,
|
||||
kGopherScheme,
|
||||
kWsScheme, // WebSocket.
|
||||
@ -132,7 +132,7 @@ bool DoCanonicalize(const CHAR* in_spec,
|
||||
Parsed parsed_input;
|
||||
#ifdef WIN32
|
||||
// For Windows, we allow things that look like absolute Windows paths to be
|
||||
// fixed up magically to file URLs. This is done for IE compatability. For
|
||||
// fixed up magically to file URLs. This is done for IE compatibility. For
|
||||
// example, this will change "c:/foo" into a file URL rather than treating
|
||||
// it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt").
|
||||
// There is similar logic in url_canon_relative.cc for
|
||||
@ -175,13 +175,14 @@ bool DoCanonicalize(const CHAR* in_spec,
|
||||
charset_converter, output, output_parsed);
|
||||
|
||||
} else if (DoCompareSchemeComponent(spec, scheme, url::kMailToScheme)) {
|
||||
// Mailto are treated like a standard url with only a scheme, path, query
|
||||
// Mailto URLs are treated like standard URLs, with only a scheme, path,
|
||||
// and query.
|
||||
ParseMailtoURL(spec, spec_len, &parsed_input);
|
||||
success = CanonicalizeMailtoURL(spec, spec_len, parsed_input, output,
|
||||
output_parsed);
|
||||
|
||||
} else {
|
||||
// "Weird" URLs like data: and javascript:
|
||||
// "Weird" URLs like data: and javascript:.
|
||||
ParsePathURL(spec, spec_len, trim_path_end, &parsed_input);
|
||||
success = CanonicalizePathURL(spec, spec_len, parsed_input, output,
|
||||
output_parsed);
|
||||
@ -271,7 +272,7 @@ bool DoReplaceComponents(const char* spec,
|
||||
CanonOutput* output,
|
||||
Parsed* out_parsed) {
|
||||
// If the scheme is overridden, just do a simple string substitution and
|
||||
// reparse the whole thing. There are lots of edge cases that we really don't
|
||||
// re-parse the whole thing. There are lots of edge cases that we really don't
|
||||
// want to deal with. Like what happens if I replace "http://e:8080/foo"
|
||||
// with a file. Does it become "file:///E:/8080/foo" where the port number
|
||||
// becomes part of the path? Parsing that string as a file URL says "yes"
|
||||
@ -318,7 +319,7 @@ bool DoReplaceComponents(const char* spec,
|
||||
// getting replaced here. If ReplaceComponents didn't re-check everything,
|
||||
// we wouldn't know if something *not* getting replaced is a problem.
|
||||
// If the scheme-specific replacers are made more intelligent so they don't
|
||||
// re-check everything, we should instead recanonicalize the whole thing
|
||||
// re-check everything, we should instead re-canonicalize the whole thing
|
||||
// after this call to check validity (this assumes replacing the scheme is
|
||||
// much much less common than other types of replacements, like clearing the
|
||||
// ref).
|
||||
@ -371,7 +372,7 @@ void AddStandardScheme(const char* new_scheme) {
|
||||
//
|
||||
// This normally means you're trying to set up a new standard scheme too late
|
||||
// in your application's init process. Locate where your app does this
|
||||
// initialization and calls LockStandardScheme, and add your new standard
|
||||
// initialization and calls LockStandardSchemes, and add your new standard
|
||||
// scheme there.
|
||||
DCHECK(!standard_schemes_locked) <<
|
||||
"Trying to add a standard scheme after the list has been locked.";
|
||||
@ -380,7 +381,7 @@ void AddStandardScheme(const char* new_scheme) {
|
||||
if (scheme_len == 0)
|
||||
return;
|
||||
|
||||
// Dulicate the scheme into a new buffer and add it to the list of standard
|
||||
// Duplicate the scheme into a new buffer and add it to the list of standard
|
||||
// schemes. This pointer will be leaked on shutdown.
|
||||
char* dup_scheme = new char[scheme_len + 1];
|
||||
ANNOTATE_LEAKING_OBJECT_PTR(dup_scheme);
|
||||
|
@ -144,7 +144,7 @@ URL_EXPORT bool ResolveRelative(const char* base_spec,
|
||||
CanonOutput* output,
|
||||
Parsed* output_parsed);
|
||||
|
||||
// Replaces components in the given VALID input url. The new canonical URL info
|
||||
// Replaces components in the given VALID input URL. The new canonical URL info
|
||||
// is written to output and out_parsed.
|
||||
//
|
||||
// Returns true if the resulting URL is valid.
|
||||
@ -171,7 +171,7 @@ URL_EXPORT void DecodeURLEscapeSequences(const char* input,
|
||||
int length,
|
||||
CanonOutputW* output);
|
||||
|
||||
// Escapes the given string as defined by the JS method encodeURIComponent. See
|
||||
// Escapes the given string as defined by the JS method encodeURIComponent. See
|
||||
// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent
|
||||
URL_EXPORT void EncodeURIComponent(const char* input,
|
||||
int length,
|
||||
|
@ -44,7 +44,7 @@ TEST(URLUtilTest, FindAndCompareScheme) {
|
||||
EXPECT_FALSE(FindAndCompareScheme("", 0, "", &found_scheme));
|
||||
EXPECT_TRUE(found_scheme == Component());
|
||||
|
||||
// When there is a whitespace char in scheme, it should canonicalize the url
|
||||
// When there is a whitespace char in scheme, it should canonicalize the URL
|
||||
// before comparison.
|
||||
const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)";
|
||||
EXPECT_TRUE(FindAndCompareScheme(whtspc_str,
|
||||
@ -305,8 +305,8 @@ TEST(URLUtilTest, TestResolveRelativeWithNonStandardBase) {
|
||||
}
|
||||
|
||||
TEST(URLUtilTest, TestNoRefComponent) {
|
||||
// The hash-mark must be ignored when mailto: scheme is
|
||||
// parsed, even if the url has a base and relative part.
|
||||
// The hash-mark must be ignored when mailto: scheme is parsed,
|
||||
// even if the URL has a base and relative part.
|
||||
const char* base = "mailto://to/";
|
||||
const char* rel = "any#body";
|
||||
|
||||
|
Reference in New Issue
Block a user