0

Proof-read comments in src/url/.

Review URL: https://codereview.chromium.org/1270443006

Cr-Commit-Position: refs/heads/master@{#343473}
This commit is contained in:
qyearsley
2015-08-14 13:17:15 -07:00
committed by Commit bot
parent 1c92d5b76e
commit 2bc727de36
25 changed files with 148 additions and 145 deletions

@@ -132,7 +132,7 @@ void GURL::InitializeFromCanonicalSpec() {
#ifndef NDEBUG
// For testing purposes, check that the parsed canonical URL is identical to
// what we would have produced. Skip checking for invalid URLs have no meaning
// and we can't always canonicalize then reproducabely.
// and we can't always canonicalize then reproducibly.
if (is_valid_) {
url::Component scheme;
// We can't do this check on the inner_url of a filesystem URL, as
@@ -311,7 +311,7 @@ GURL GURL::ReplaceComponents(
GURL GURL::GetOrigin() const {
// This doesn't make sense for invalid or nonstandard URLs, so return
// the empty URL
// the empty URL.
if (!is_valid_ || !IsStandard())
return GURL();
@@ -408,16 +408,17 @@ std::string GURL::ExtractFileName() const {
}
std::string GURL::PathForRequest() const {
DCHECK(parsed_.path.len > 0) << "Canonical path for requests should be non-empty";
DCHECK(parsed_.path.len > 0)
<< "Canonical path for requests should be non-empty";
if (parsed_.ref.len >= 0) {
// Clip off the reference when it exists. The reference starts after the #
// sign, so we have to subtract one to also remove it.
// Clip off the reference when it exists. The reference starts after the
// #-sign, so we have to subtract one to also remove it.
return std::string(spec_, parsed_.path.begin,
parsed_.ref.begin - parsed_.path.begin - 1);
}
// Compute the actual path length, rather than depending on the spec's
// terminator. If we're an inner_url, our spec continues on into our outer
// url's path/query/ref.
// URL's path/query/ref.
int path_len = parsed_.path.len;
if (parsed_.query.is_valid())
path_len = parsed_.query.end() - parsed_.path.begin;

@@ -92,7 +92,7 @@ class URL_EXPORT GURL {
// Returns the potentially invalid spec for a the URL. This spec MUST NOT be
// modified or sent over the network. It is designed to be displayed in error
// messages to the user, as the apperance of the spec may explain the error.
// messages to the user, as the appearance of the spec may explain the error.
// If the spec is valid, the valid spec will be returned.
//
// The returned string is guaranteed to be valid UTF-8.
@@ -125,9 +125,8 @@ class URL_EXPORT GURL {
// pages.
//
// It may be impossible to resolve the URLs properly. If the input is not
// "standard" (SchemeIsStandard() == false) and the input looks relative, we
// can't resolve it. In these cases, the result will be an empty, invalid
// GURL.
// "standard" (IsStandard() == false) and the input looks relative, we can't
// resolve it. In these cases, the result will be an empty, invalid GURL.
//
// The result may also be a nonempty, invalid URL if the input has some kind
// of encoding error. In these cases, we will try to construct a "good" URL
@@ -283,8 +282,8 @@ class URL_EXPORT GURL {
return ComponentString(parsed_.ref);
}
// Existance querying. These functions will return true if the corresponding
// URL component exists in this URL. Note that existance is different than
// Existence querying. These functions will return true if the corresponding
// URL component exists in this URL. Note that existence is different than
// being nonempty. http://www.google.com/? has a query that just happens to
// be empty, and has_query() will return true.
bool has_scheme() const {
@@ -297,7 +296,7 @@ class URL_EXPORT GURL {
return parsed_.password.len >= 0;
}
bool has_host() const {
// Note that hosts are special, absense of host means length 0.
// Note that hosts are special, absence of host means length 0.
return parsed_.host.len > 0;
}
bool has_port() const {
@@ -347,7 +346,7 @@ class URL_EXPORT GURL {
// object constructions are done.
bool DomainIs(base::StringPiece lower_ascii_domain) const;
// Swaps the contents of this GURL object with the argument without doing
// Swaps the contents of this GURL object with |other|, without doing
// any memory allocations.
void Swap(GURL* other);
@@ -364,8 +363,8 @@ class URL_EXPORT GURL {
private:
// Variant of the string parsing constructor that allows the caller to elect
// retain trailing whitespace, if any, on the passed URL spec but only if the
// scheme is one that allows trailing whitespace. The primary use-case is
// retain trailing whitespace, if any, on the passed URL spec, but only if
// the scheme is one that allows trailing whitespace. The primary use-case is
// for data: URLs. In most cases, you want to use the single parameter
// constructor above.
enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE };

@@ -45,14 +45,15 @@ TEST(GURLTest, Types) {
EXPECT_EQ("something:///HOSTNAME.com/",
TypesTestCase("something:///HOSTNAME.com/"));
// In the reverse, known schemes should always trigger standard URL handling.
// Conversely, URLs with known schemes should always trigger standard URL
// handling.
EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com"));
EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com"));
EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com"));
EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com"));
#ifdef WIN32
// URLs that look like absolute Windows drive specs.
// URLs that look like Windows absolute path specs.
EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt"));
EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt"));
EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt"));
@@ -60,7 +61,7 @@ TEST(GURLTest, Types) {
#endif
}
// Test the basic creation and querying of components in a GURL. We assume
// Test the basic creation and querying of components in a GURL. We assume that
// the parser is already tested and works, so we are mostly interested if the
// object does the right thing with the results.
TEST(GURLTest, Components) {
@@ -175,7 +176,7 @@ TEST(GURLTest, Assign) {
EXPECT_EQ("", invalid2.ref());
}
// This is a regression test for http://crbug.com/309975 .
// This is a regression test for http://crbug.com/309975.
TEST(GURLTest, SelfAssign) {
GURL a("filesystem:http://example.com/temporary/");
// This should not crash.
@@ -245,9 +246,9 @@ TEST(GURLTest, IsValid) {
}
TEST(GURLTest, ExtraSlashesBeforeAuthority) {
// According to RFC3986, the hier-part for URI with an authority must use only
// two slashes, GURL intentionally just ignores slashes more than 2 and parses
// the following part as an authority.
// According to RFC3986, the hierarchical part for URI with an authority
// must use only two slashes; GURL intentionally just ignores extra slashes
// if there are more than 2, and parses the following part as an authority.
GURL url("http:///host");
EXPECT_EQ("host", url.host());
EXPECT_EQ("/", url.path());
@@ -378,7 +379,7 @@ TEST(GURLTest, GetWithEmptyPath) {
}
TEST(GURLTest, Replacements) {
// The url canonicalizer replacement test will handle most of these case.
// The URL canonicalizer replacement test will handle most of these case.
// The most important thing to do here is to check that the proper
// canonicalizer gets called based on the scheme of the input.
struct ReplaceCase {
@@ -395,7 +396,7 @@ TEST(GURLTest, Replacements) {
} replace_cases[] = {
{"http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, NULL, "/", "", "", "http://www.google.com/"},
{"http://www.google.com/foo/bar.html?foo#bar", "javascript", "", "", "", "", "window.open('foo');", "", "", "javascript:window.open('foo');"},
{"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo","search", "ref", "http://www.google.com:99/foo?search#ref"},
{"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo", "search", "ref", "http://www.google.com:99/foo?search#ref"},
#ifdef WIN32
{"http://www.google.com/foo/bar.html?foo#bar", "file", "", "", "", "", "c:\\", "", "", "file:///C:/"},
#endif
@@ -435,7 +436,7 @@ TEST(GURLTest, ClearFragmentOnDataUrl) {
EXPECT_EQ("data: one ? two ", url_no_ref.spec());
// Importing a parsed url via this constructor overload will retain trailing
// Importing a parsed URL via this constructor overload will retain trailing
// whitespace.
GURL import_url(url_no_ref.spec(),
url_no_ref.parsed_for_possibly_invalid_spec(),

@@ -133,4 +133,4 @@ URL_EXPORT std::ostream& operator<<(std::ostream& out,
} // namespace url
#endif // URL_SCHEME_HOST_PORT_H_
#endif // URL_ORIGIN_H_

@@ -285,7 +285,7 @@ URL_EXPORT bool CanonicalizeScheme(const base::char16* spec,
// User info: username/password. If present, this will add the delimiters so
// the output will be "<username>:<password>@" or "<username>@". Empty
// username/password pairs, or empty passwords, will get converted to
// nonexistant in the canonical version.
// nonexistent in the canonical version.
//
// The components for the username and password refer to ranges in the
// respective source strings. Usually, these will be the same string, which
@@ -554,7 +554,7 @@ URL_EXPORT bool CanonicalizePathURL(const base::char16* spec,
CanonOutput* output,
Parsed* new_parsed);
// Use for mailto URLs. This "canonicalizes" the url into a path and query
// Use for mailto URLs. This "canonicalizes" the URL into a path and query
// component. It does not attempt to merge "to" fields. It uses UTF-8 for
// the query encoding if there is a query. This is because a mailto URL is
// really intended for an external mail program, and the encoding of a page,
@@ -578,9 +578,9 @@ URL_EXPORT bool CanonicalizeMailtoURL(const base::char16* spec,
// treated on the same code path as regular canonicalization (the same string
// for each component).
//
// A Parsed structure usually goes along with this. Those
// components identify offsets within these strings, so that they can all be
// in the same string, or spread arbitrarily across different ones.
// A Parsed structure usually goes along with this. Those components identify
// offsets within these strings, so that they can all be in the same string,
// or spread arbitrarily across different ones.
//
// This structures does not own any data. It is the caller's responsibility to
// ensure that the data the pointers point to stays in scope and is not
@@ -725,7 +725,7 @@ class Replacements {
}
bool IsRefOverridden() const { return sources_.ref != NULL; }
// Getters for the itnernal data. See the variables below for how the
// Getters for the internal data. See the variables below for how the
// information is encoded.
const URLComponentSource<CHAR>& sources() const { return sources_; }
const Parsed& components() const { return components_; }
@@ -863,7 +863,7 @@ URL_EXPORT bool IsRelativeURL(const char* base,
// The base URL should be canonical and have a host (may be empty for file
// URLs) and a path. If it doesn't have these, we can't resolve relative
// URLs off of it and will return the base as the output with an error flag.
// Becausee it is canonical is should also be ASCII.
// Because it is canonical is should also be ASCII.
//
// The query charset converter follows the same rules as CanonicalizeQuery.
//

@@ -95,9 +95,9 @@ bool DoScheme(const CHAR* spec,
// The output scheme starts from the current position.
out_scheme->begin = output->length();
// Danger: it's important that this code does not strip any characters: it
// only emits the canonical version (be it valid or escaped) of each of
// the input characters. Stripping would put it out of sync with
// Danger: it's important that this code does not strip any characters;
// it only emits the canonical version (be it valid or escaped) for each
// of the input characters. Stripping would put it out of sync with
// FindAndCompareScheme, which could cause some security checks on
// schemes to be incorrect.
bool success = true;
@@ -218,7 +218,7 @@ bool DoPort(const CHAR* spec,
char buf[buf_size];
WritePortInt(buf, buf_size, port_num);
// Append the port number to the output, preceeded by a colon.
// Append the port number to the output, preceded by a colon.
output->push_back(':');
out_port->begin = output->length();
for (int i = 0; i < buf_size && buf[i]; i++)

@@ -34,7 +34,7 @@ namespace {
// NOTE: I didn't actually test all the control characters. Some may be
// disallowed in the input, but they are all accepted escaped except for 0.
// I also didn't test if characters affecting HTML parsing are allowed
// unescaped, eg. (") or (#), which would indicate the beginning of the path.
// unescaped, e.g. (") or (#), which would indicate the beginning of the path.
// Surprisingly, space is accepted in the input and always escaped.
// This table lists the canonical version of all characters we allow in the

@@ -7,7 +7,7 @@
// This file is intended to be included in another C++ file where the character
// types are defined. This allows us to write mostly generic code, but not have
// templace bloat because everything is inlined when anybody calls any of our
// template bloat because everything is inlined when anybody calls any of our
// functions.
#include <stdlib.h>
@@ -175,7 +175,7 @@ inline void DoAppendUTF8(unsigned char_value, Output* output) {
output);
Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
output);
} else if (char_value <= 0x10FFFF) { // Max unicode code point.
} else if (char_value <= 0x10FFFF) { // Max Unicode code point.
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
Appender(static_cast<unsigned char>(0xf0 | (char_value >> 18)),
output);
@@ -199,7 +199,7 @@ inline void AppendCharToOutput(unsigned char ch, CanonOutput* output) {
}
// Writes the given character to the output as UTF-8. This does NO checking
// of the validity of the unicode characters; the caller should ensure that
// of the validity of the Unicode characters; the caller should ensure that
// the value it is appending is valid to append.
inline void AppendUTF8Value(unsigned char_value, CanonOutput* output) {
DoAppendUTF8<CanonOutput, AppendCharToOutput>(char_value, output);
@@ -207,7 +207,7 @@ inline void AppendUTF8Value(unsigned char_value, CanonOutput* output) {
// Writes the given character to the output as UTF-8, escaping ALL
// characters (even when they are ASCII). This does NO checking of the
// validity of the unicode characters; the caller should ensure that the value
// validity of the Unicode characters; the caller should ensure that the value
// it is appending is valid to append.
inline void AppendUTF8EscapedValue(unsigned char_value, CanonOutput* output) {
DoAppendUTF8<CanonOutput, AppendEscapedChar>(char_value, output);
@@ -260,7 +260,7 @@ inline void AppendUTF16Value(unsigned code_point,
// that any following characters are.
inline bool AppendUTF8EscapedChar(const base::char16* str, int* begin,
int length, CanonOutput* output) {
// UTF-16 input. Readchar16 will handle invalid characters for us and give
// UTF-16 input. ReadUTFChar will handle invalid characters for us and give
// us the kUnicodeReplacementCharacter, so we don't have to do special
// checking after failure, just pass through the failure to the caller.
unsigned char_value;

@@ -113,15 +113,15 @@ static bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
new_parsed->path.begin = output->length();
output->push_back('/');
// Copies and normalizes the "c:" at the beginning, if present.
// Copy and normalize the "c:" at the beginning, if present.
int after_drive = FileDoDriveSpec(source.path, parsed.path.begin,
parsed.path.end(), output);
// Copies the rest of the path
// Copy the rest of the path.
FileDoPath<CHAR, UCHAR>(source.path, after_drive, parsed.path.end(), output);
new_parsed->path.len = output->length() - new_parsed->path.begin;
// Things following the path we can use the standard canonicalizers for.
// For things following the path, we can use the standard canonicalizers.
success &= URLCanonInternal<CHAR, UCHAR>::DoQuery(
source.query, parsed.query, output, &new_parsed->query);
success &= URLCanonInternal<CHAR, UCHAR>::DoRef(

@@ -441,7 +441,7 @@ bool CheckIPv6ComponentsSize(const IPv6Parsed& parsed,
return true;
}
// Converts a hex comonent into a number. This cannot fail since the caller has
// Converts a hex component into a number. This cannot fail since the caller has
// already verified that each character in the string was a hex digit, and
// that there were no more than 4 characters.
template <typename CHAR>

@@ -30,14 +30,14 @@ URL_EXPORT void AppendIPv6Address(const unsigned char address[16],
// Not all components may exist. If there are only 3 components, for example,
// the last one will have a length of -1 or 0 to indicate it does not exist.
//
// Note that many platform's inet_addr will ignore everything after a space
// in certain curcumstances if the stuff before the space looks like an IP
// Note that many platforms' inet_addr will ignore everything after a space
// in certain circumstances if the stuff before the space looks like an IP
// address. IE6 is included in this. We do NOT handle this case. In many cases,
// the browser's canonicalization will get run before this which converts
// spaces to %20 (in the case of IE7) or rejects them (in the case of
// Mozilla), so this code path never gets hit. Our host canonicalization will
// notice these spaces and escape them, which will make IP address finding
// fail. This seems like better behavior than stripping after a space.
// spaces to %20 (in the case of IE7) or rejects them (in the case of Mozilla),
// so this code path never gets hit. Our host canonicalization will notice
// these spaces and escape them, which will make IP address finding fail. This
// seems like better behavior than stripping after a space.
URL_EXPORT bool FindIPv4Components(const char* spec,
const Component& host,
Component components[4]);

@@ -55,7 +55,7 @@ bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
new_parsed->path.reset();
}
// Query -- always use the default utf8 charset converter.
// Query -- always use the default UTF8 charset converter.
CanonicalizeQuery(source.query, parsed.query, NULL,
output, &new_parsed->query);

@@ -173,7 +173,7 @@ void BackUpToPreviousSlash(int path_begin_in_output,
// copied to the output.
//
// We do not collapse multiple slashes in a row to a single slash. It seems
// no web browsers do this, and we don't want incompababilities, even though
// no web browsers do this, and we don't want incompatibilities, even though
// it would be correct for most systems.
template<typename CHAR, typename UCHAR>
bool DoPartialPath(const CHAR* spec,
@@ -200,7 +200,7 @@ bool DoPartialPath(const CHAR* spec,
// Needs special handling of some sort.
int dotlen;
if ((dotlen = IsDot(spec, i, end)) > 0) {
// See if this dot was preceeded by a slash in the output. We
// See if this dot was preceded by a slash in the output. We
// assume that when canonicalizing paths, they will always
// start with a slash and not a dot, so we don't have to
// bounds check the output.
@@ -230,7 +230,7 @@ bool DoPartialPath(const CHAR* spec,
break;
}
} else {
// This dot is not preceeded by a slash, it is just part of some
// This dot is not preceded by a slash, it is just part of some
// file name.
output->push_back('.');
i += dotlen - 1;

@@ -14,7 +14,7 @@ namespace url {
namespace {
// Canonicalize the given |component| from |source| into |output| and
// |new_component|. If |separator| is non-zero, it is pre-pended to |ouput|
// |new_component|. If |separator| is non-zero, it is pre-pended to |output|
// prior to the canonicalized component; i.e. for the '?' or '#' characters.
template<typename CHAR, typename UCHAR>
bool DoCanonicalizePathComponent(const CHAR* source,

@@ -80,7 +80,7 @@ void RunConverter(const char* spec,
}
// Runs the converter with the given UTF-16 input. We don't have to do
// anything, but this overriddden function allows us to use the same code
// anything, but this overridden function allows us to use the same code
// for both UTF-8 and UTF-16 input.
void RunConverter(const base::char16* spec,
const Component& query,

@@ -17,14 +17,14 @@ namespace url {
namespace {
// Firefox does a case-sensitive compare (which is probably wrong--Mozilla bug
// 379034), whereas IE is case-insensetive.
// 379034), whereas IE is case-insensitive.
//
// We choose to be more permissive like IE. We don't need to worry about
// unescaping or anything here: neither IE or Firefox allow this. We also
// don't have to worry about invalid scheme characters since we are comparing
// against the canonical scheme of the base.
//
// The base URL should always be canonical, therefore is ASCII.
// The base URL should always be canonical, therefore it should be ASCII.
template<typename CHAR>
bool AreSchemesEqual(const char* base,
const Component& base_scheme,
@@ -82,7 +82,7 @@ bool DoIsRelativeURL(const char* base,
#ifdef WIN32
// We special case paths like "C:\foo" so they can link directly to the
// file on Windows (IE compatability). The security domain stuff should
// file on Windows (IE compatibility). The security domain stuff should
// prevent a link like this from actually being followed if its on a
// web page.
//
@@ -91,22 +91,22 @@ bool DoIsRelativeURL(const char* base,
// is a file and the answer will still be correct.
//
// We require strict backslashes when detecting UNC since two forward
// shashes should be treated a a relative URL with a hostname.
// slashes should be treated a a relative URL with a hostname.
if (DoesBeginWindowsDriveSpec(url, begin, url_len) ||
DoesBeginUNCPath(url, begin, url_len, true))
return true;
#endif // WIN32
// See if we've got a scheme, if not, we know this is a relative URL.
// BUT: Just because we have a scheme, doesn't make it absolute.
// BUT, just because we have a scheme, doesn't make it absolute.
// "http:foo.html" is a relative URL with path "foo.html". If the scheme is
// empty, we treat it as relative (":foo") like IE does.
// empty, we treat it as relative (":foo"), like IE does.
Component scheme;
const bool scheme_is_empty =
!ExtractScheme(url, url_len, &scheme) || scheme.len == 0;
if (scheme_is_empty) {
if (url[begin] == '#') {
// |url| is a bare fragement (e.g. "#foo"). This can be resolved against
// |url| is a bare fragment (e.g. "#foo"). This can be resolved against
// any base. Fall-through.
} else if (!is_base_hierarchical) {
// Don't allow relative URLs if the base scheme doesn't support it.
@@ -460,7 +460,7 @@ bool DoResolveRelativeURL(const char* base_url,
// how strict the UNC finder is).
//
// We also allow Windows absolute drive specs on any scheme (for example
// "c:\foo") like IE does. There must be no preceeding slashes in this
// "c:\foo") like IE does. There must be no preceding slashes in this
// case (we reject anything like "/c:/foo") because that should be treated
// as a path. For file URLs, we allow any number of slashes since that would
// be setting the path.

@@ -169,7 +169,7 @@ bool ReplaceStandardURL(const char* base,
}
// For 16-bit replacements, we turn all the replacements into UTF-8 so the
// regular codepath can be used.
// regular code path can be used.
bool ReplaceStandardURL(const char* base,
const Parsed& base_parsed,
const Replacements<base::char16>& replacements,

@@ -127,7 +127,7 @@ TEST(URLCanonTest, DoAppendUTF8) {
#if defined(GTEST_HAS_DEATH_TEST)
// TODO(mattm): Can't run this in debug mode for now, since the DCHECK will
// cause the Chromium stacktrace dialog to appear and hang the test.
// cause the Chromium stack trace dialog to appear and hang the test.
// See http://crbug.com/49580.
#if defined(NDEBUG) && !defined(DCHECK_ALWAYS_ON)
#define MAYBE_DoAppendUTF8Invalid DoAppendUTF8Invalid
@@ -157,10 +157,10 @@ TEST(URLCanonTest, UTF) {
} utf_cases[] = {
// Valid canonical input should get passed through & escaped.
{"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true, "%E4%BD%A0%E5%A5%BD"},
// Test a characer that takes > 16 bits (U+10300 = old italic letter A)
// Test a character that takes > 16 bits (U+10300 = old italic letter A)
{"\xF0\x90\x8C\x80", L"\xd800\xdf00", true, "%F0%90%8C%80"},
// Non-shortest-form UTF-8 are invalid. The bad char should be replaced
// with the invalid character (EF BF DB in UTF-8).
// Non-shortest-form UTF-8 characters are invalid. The bad character
// should be replaced with the invalid character (EF BF DB in UTF-8).
{"\xf0\x84\xbd\xa0\xe5\xa5\xbd", NULL, false, "%EF%BF%BD%E5%A5%BD"},
// Invalid UTF-8 sequences should be marked as invalid (the first
// sequence is truncated).
@@ -259,7 +259,7 @@ TEST(URLCanonTest, Scheme) {
EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin);
EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
// Now try the wide version
// Now try the wide version.
out_str.clear();
StdStringCanonOutput output2(&out_str);
@@ -275,7 +275,7 @@ TEST(URLCanonTest, Scheme) {
EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
}
// Test the case where the scheme is declared nonexistant, it should be
// Test the case where the scheme is declared nonexistent, it should be
// converted into an empty scheme.
Component out_comp;
out_str.clear();
@@ -763,7 +763,8 @@ TEST(URLCanonTest, IPv6) {
{"[2001::.com]", L"[2001::.com]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
// If there are not enough components, the last one should fill them out.
// ... omitted at this time ...
// Too many components means not an IP address. Similarly with too few if using IPv4 compat or mapped addresses.
// Too many components means not an IP address. Similarly, with too few
// if using IPv4 compat or mapped addresses.
{"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
@@ -887,7 +888,7 @@ TEST(URLCanonTest, UserInfo) {
{"http://user:pass@/", "user:pass@", Component(0, 4), Component(5, 4), true},
{"http://%2540:bar@domain.com/", "%2540:bar@", Component(0, 5), Component(6, 3), true },
// IE7 compatability: old versions allowed backslashes in usernames, but
// IE7 compatibility: old versions allowed backslashes in usernames, but
// IE7 does not. We disallow it as well.
{"ftp://me\\mydomain:pass@foo.com/", "", Component(0, -1), Component(0, -1), true},
};
@@ -943,7 +944,7 @@ TEST(URLCanonTest, Port) {
// buffer. The parser unit tests will test scanning the number correctly.
//
// Note that the CanonicalizePort will always prepend a colon to the output
// to separate it from the colon that it assumes preceeds it.
// to separate it from the colon that it assumes precedes it.
struct PortCase {
const char* input;
int default_port;
@@ -1594,7 +1595,7 @@ TEST(URLCanonTest, CanonicalizeFileURL) {
{"file:", "file:///", true, Component(), Component(7, 1)},
{"file:UNChost/path", "file://unchost/path", true, Component(7, 7), Component(14, 5)},
// CanonicalizeFileURL supports absolute Windows style paths for IE
// compatability. Note that the caller must decide that this is a file
// compatibility. Note that the caller must decide that this is a file
// URL itself so it can call the file canonicalizer. This is usually
// done automatically as part of relative URL resolving.
{"c:\\foo\\bar", "file:///C:/foo/bar", true, Component(), Component(7, 11)},
@@ -1605,7 +1606,7 @@ TEST(URLCanonTest, CanonicalizeFileURL) {
{"\\\\server\\file", "file://server/file", true, Component(7, 6), Component(13, 5)},
{"/\\server/file", "file://server/file", true, Component(7, 6), Component(13, 5)},
// We should preserve the number of slashes after the colon for IE
// compatability, except when there is none, in which case we should
// compatibility, except when there is none, in which case we should
// add one.
{"file:c:foo/bar.html", "file:///C:/foo/bar.html", true, Component(), Component(7, 16)},
{"file:/\\/\\C:\\\\//foo\\bar.html", "file:///C:////foo/bar.html", true, Component(), Component(7, 19)},
@@ -2090,10 +2091,10 @@ TEST(URLCanonTest, ResolveRelativeURL) {
}
}
// It used to be when we did a replacement with a long buffer of UTF-16
// characters, we would get invalid data in the URL. This is because the buffer
// it used to hold the UTF-8 data was resized, while some pointers were still
// kept to the old buffer that was removed.
// It used to be the case that when we did a replacement with a long buffer of
// UTF-16 characters, we would get invalid data in the URL. This is because the
// buffer that it used to hold the UTF-8 data was resized, while some pointers
// were still kept to the old buffer that was removed.
TEST(URLCanonTest, ReplacementOverflow) {
const char src[] = "file:///C:/foo/bar";
int src_len = static_cast<int>(strlen(src));
@@ -2101,7 +2102,7 @@ TEST(URLCanonTest, ReplacementOverflow) {
ParseFileURL(src, src_len, &parsed);
// Override two components, the path with something short, and the query with
// sonething long enough to trigger the bug.
// something long enough to trigger the bug.
Replacements<base::char16> repl;
base::string16 new_query;
for (int i = 0; i < 4800; i++)

@@ -11,7 +11,7 @@
namespace url {
// We treat slashes and backslashes the same for IE compatability.
// We treat slashes and backslashes the same for IE compatibility.
inline bool IsURLSlash(base::char16 ch) {
return ch == '/' || ch == '\\';
}

@@ -90,13 +90,13 @@ struct FileSystemURLParseCase {
bool ComponentMatches(const char* input,
const char* reference,
const Component& component) {
// If the component is nonexistant (length == -1), it should begin at 0.
// If the component is nonexistent (length == -1), it should begin at 0.
EXPECT_TRUE(component.len >= 0 || component.len == -1);
// Begin should be valid.
EXPECT_LE(0, component.begin);
// A NULL reference means the component should be nonexistant.
// A NULL reference means the component should be nonexistent.
if (!reference)
return component.len == -1;
if (component.len < 0)
@@ -345,7 +345,7 @@ static PathURLParseCase path_cases[] = {
TEST(URLParser, PathURL) {
// Declared outside for loop to try to catch cases in init() where we forget
// to reset something that is reset by the construtor.
// to reset something that is reset by the constructor.
Parsed parsed;
for (size_t i = 0; i < arraysize(path_cases); i++) {
const char* url = path_cases[i].input;
@@ -356,7 +356,7 @@ TEST(URLParser, PathURL) {
EXPECT_TRUE(ComponentMatches(url, path_cases[i].path, parsed.GetContent()))
<< i;
// The remaining components are never used for path urls.
// The remaining components are never used for path URLs.
ExpectInvalidComponent(parsed.username);
ExpectInvalidComponent(parsed.password);
ExpectInvalidComponent(parsed.host);
@@ -537,7 +537,7 @@ static bool NthParameterIs(const char* url,
Component key, value;
if (!ExtractQueryKeyValue(url, &query, &key, &value)) {
if (parameter >= i && !expected_key)
return true; // Expected nonexistant key, got one.
return true; // Expected nonexistent key, got one.
return false; // Not enough keys.
}
@@ -613,7 +613,7 @@ static MailtoURLParseCase mailto_cases[] = {
TEST(URLParser, MailtoUrl) {
// Declared outside for loop to try to catch cases in init() where we forget
// to reset something that is reset by the construtor.
// to reset something that is reset by the constructor.
Parsed parsed;
for (size_t i = 0; i < arraysize(mailto_cases); ++i) {
const char* url = mailto_cases[i].input;
@@ -625,7 +625,7 @@ TEST(URLParser, MailtoUrl) {
EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].query, parsed.query));
EXPECT_EQ(PORT_UNSPECIFIED, port);
// The remaining components are never used for mailto urls.
// The remaining components are never used for mailto URLs.
ExpectInvalidComponent(parsed.username);
ExpectInvalidComponent(parsed.password);
ExpectInvalidComponent(parsed.port);
@@ -645,7 +645,7 @@ static FileSystemURLParseCase filesystem_cases[] = {
TEST(URLParser, FileSystemURL) {
// Declared outside for loop to try to catch cases in init() where we forget
// to reset something that is reset by the construtor.
// to reset something that is reset by the constructor.
Parsed parsed;
for (size_t i = 0; i < arraysize(filesystem_cases); i++) {
const FileSystemURLParseCase* parsecase = &filesystem_cases[i];
@@ -667,7 +667,7 @@ TEST(URLParser, FileSystemURL) {
int port = ParsePort(url, parsed.inner_parsed()->port);
EXPECT_EQ(parsecase->inner_port, port);
// The remaining components are never used for filesystem urls.
// The remaining components are never used for filesystem URLs.
ExpectInvalidComponent(parsed.inner_parsed()->query);
ExpectInvalidComponent(parsed.inner_parsed()->ref);
}
@@ -676,7 +676,7 @@ TEST(URLParser, FileSystemURL) {
EXPECT_TRUE(ComponentMatches(url, parsecase->query, parsed.query));
EXPECT_TRUE(ComponentMatches(url, parsecase->ref, parsed.ref));
// The remaining components are never used for filesystem urls.
// The remaining components are never used for filesystem URLs.
ExpectInvalidComponent(parsed.username);
ExpectInvalidComponent(parsed.password);
ExpectInvalidComponent(parsed.host);

@@ -30,7 +30,7 @@ inline base::string16 WStringToUTF16(const wchar_t* src) {
return str;
}
// Converts a string from UTF-8 to UTF-16
// Converts a string from UTF-8 to UTF-16.
inline base::string16 ConvertUTF8ToUTF16(const std::string& src) {
int length = static_cast<int>(src.length());
EXPECT_LT(length, 1024);
@@ -39,7 +39,7 @@ inline base::string16 ConvertUTF8ToUTF16(const std::string& src) {
return base::string16(output.data(), output.length());
}
// Converts a string from UTF-16 to UTF-8
// Converts a string from UTF-16 to UTF-8.
inline std::string ConvertUTF16ToUTF8(const base::string16& src) {
std::string str;
StdStringCanonOutput output(&str);

@@ -22,7 +22,7 @@ const int kNumStandardURLSchemes = 8;
const char* kStandardURLSchemes[kNumStandardURLSchemes] = {
kHttpScheme,
kHttpsScheme,
kFileScheme, // Yes, file urls can have a hostname!
kFileScheme, // Yes, file URLs can have a hostname!
kFtpScheme,
kGopherScheme,
kWsScheme, // WebSocket.
@@ -132,7 +132,7 @@ bool DoCanonicalize(const CHAR* in_spec,
Parsed parsed_input;
#ifdef WIN32
// For Windows, we allow things that look like absolute Windows paths to be
// fixed up magically to file URLs. This is done for IE compatability. For
// fixed up magically to file URLs. This is done for IE compatibility. For
// example, this will change "c:/foo" into a file URL rather than treating
// it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt").
// There is similar logic in url_canon_relative.cc for
@@ -175,13 +175,14 @@ bool DoCanonicalize(const CHAR* in_spec,
charset_converter, output, output_parsed);
} else if (DoCompareSchemeComponent(spec, scheme, url::kMailToScheme)) {
// Mailto are treated like a standard url with only a scheme, path, query
// Mailto URLs are treated like standard URLs, with only a scheme, path,
// and query.
ParseMailtoURL(spec, spec_len, &parsed_input);
success = CanonicalizeMailtoURL(spec, spec_len, parsed_input, output,
output_parsed);
} else {
// "Weird" URLs like data: and javascript:
// "Weird" URLs like data: and javascript:.
ParsePathURL(spec, spec_len, trim_path_end, &parsed_input);
success = CanonicalizePathURL(spec, spec_len, parsed_input, output,
output_parsed);
@@ -271,7 +272,7 @@ bool DoReplaceComponents(const char* spec,
CanonOutput* output,
Parsed* out_parsed) {
// If the scheme is overridden, just do a simple string substitution and
// reparse the whole thing. There are lots of edge cases that we really don't
// re-parse the whole thing. There are lots of edge cases that we really don't
// want to deal with. Like what happens if I replace "http://e:8080/foo"
// with a file. Does it become "file:///E:/8080/foo" where the port number
// becomes part of the path? Parsing that string as a file URL says "yes"
@@ -318,7 +319,7 @@ bool DoReplaceComponents(const char* spec,
// getting replaced here. If ReplaceComponents didn't re-check everything,
// we wouldn't know if something *not* getting replaced is a problem.
// If the scheme-specific replacers are made more intelligent so they don't
// re-check everything, we should instead recanonicalize the whole thing
// re-check everything, we should instead re-canonicalize the whole thing
// after this call to check validity (this assumes replacing the scheme is
// much much less common than other types of replacements, like clearing the
// ref).
@@ -371,7 +372,7 @@ void AddStandardScheme(const char* new_scheme) {
//
// This normally means you're trying to set up a new standard scheme too late
// in your application's init process. Locate where your app does this
// initialization and calls LockStandardScheme, and add your new standard
// initialization and calls LockStandardSchemes, and add your new standard
// scheme there.
DCHECK(!standard_schemes_locked) <<
"Trying to add a standard scheme after the list has been locked.";
@@ -380,7 +381,7 @@ void AddStandardScheme(const char* new_scheme) {
if (scheme_len == 0)
return;
// Dulicate the scheme into a new buffer and add it to the list of standard
// Duplicate the scheme into a new buffer and add it to the list of standard
// schemes. This pointer will be leaked on shutdown.
char* dup_scheme = new char[scheme_len + 1];
ANNOTATE_LEAKING_OBJECT_PTR(dup_scheme);

@@ -144,7 +144,7 @@ URL_EXPORT bool ResolveRelative(const char* base_spec,
CanonOutput* output,
Parsed* output_parsed);
// Replaces components in the given VALID input url. The new canonical URL info
// Replaces components in the given VALID input URL. The new canonical URL info
// is written to output and out_parsed.
//
// Returns true if the resulting URL is valid.

@@ -44,7 +44,7 @@ TEST(URLUtilTest, FindAndCompareScheme) {
EXPECT_FALSE(FindAndCompareScheme("", 0, "", &found_scheme));
EXPECT_TRUE(found_scheme == Component());
// When there is a whitespace char in scheme, it should canonicalize the url
// When there is a whitespace char in scheme, it should canonicalize the URL
// before comparison.
const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)";
EXPECT_TRUE(FindAndCompareScheme(whtspc_str,
@@ -305,8 +305,8 @@ TEST(URLUtilTest, TestResolveRelativeWithNonStandardBase) {
}
TEST(URLUtilTest, TestNoRefComponent) {
// The hash-mark must be ignored when mailto: scheme is
// parsed, even if the url has a base and relative part.
// The hash-mark must be ignored when mailto: scheme is parsed,
// even if the URL has a base and relative part.
const char* base = "mailto://to/";
const char* rel = "any#body";