0

I looked at the full dump from the previous crashes and it looks like a testing issue. Hopefully it will be magically unbroken.

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@151 0039d316-1c4b-4281-b951-d872f2087c98
This commit is contained in:
brettw@google.com
2008-07-30 23:50:22 +00:00
parent 96354fc5a0
commit f20dac42c3
7 changed files with 88 additions and 29 deletions

@ -446,7 +446,8 @@ wstring URLFixerUpper::FixupRelativeFile(const wstring& base_dir,
// escaped things. We need to go through 8-bit since the escaped values
// only represent 8-bit values.
std::wstring unescaped = UTF8ToWide(UnescapeURLComponent(
WideToUTF8(trimmed), UnescapeRule::SPACES | UnescapeRule::PERCENTS));
WideToUTF8(trimmed),
UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS));
if (!ValidPathForFile(unescaped, &full_path))
is_file = false;
}

@ -99,7 +99,8 @@ bool DataURL::Parse(const GURL& url, std::string* mime_type,
// could be part of the payload, so don't strip it.
if (base64_encoded) {
temp_data = UnescapeURLComponent(temp_data,
UnescapeRule::SPACES | UnescapeRule::PERCENTS);
UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS |
UnescapeRule::CONTROL_CHARS);
}
// Strip whitespace.
@ -112,7 +113,8 @@ bool DataURL::Parse(const GURL& url, std::string* mime_type,
if (!base64_encoded) {
temp_data = UnescapeURLComponent(temp_data,
UnescapeRule::SPACES | UnescapeRule::PERCENTS);
UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS |
UnescapeRule::CONTROL_CHARS);
}
if (base64_encoded)

@ -81,7 +81,6 @@ class Charmap {
uint32 map_[8];
};
// Given text to escape and a Charmap defining which values to escape,
// return an escaped string. If use_plus is true, spaces are converted
// to +, otherwise, if spaces are in the charmap, they are converted to
@ -105,6 +104,32 @@ const std::string Escape(const std::string& text, const Charmap& charmap,
return escaped;
}
// Contains nonzero when the corresponding character is unescapable for normal
// URLs. These characters are the ones that may change the parsing of a URL, so
// we don't want to unescape them sometimes. In many case we won't want to
// unescape spaces, but that is controlled by parameters to Unescape*.
//
// The basic rule is that we can't unescape anything that would changing parsing
// like # or ?. We also can't unescape &, =, or + since that could be part of a
// query and that could change the server's parsing of the query.
const char kUrlUnescape[128] = {
// NULL, control chars...
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// ' ' ! " # $ % & ' ( ) * + , - . /
0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1,
// 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0,
// @ A B C D E F G H I J K L M N O
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// P Q R S T U V W X Y Z [ \ ] ^ _
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// ` a b c d e f g h i j k l m n o
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// p q r s t u v w x y z { | } ~ <NBSP>
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
};
std::string UnescapeURLImpl(const std::string& escaped_text,
UnescapeRule::Type rules) {
// The output of the unescaping is always smaller than the input, so we can
@ -121,21 +146,34 @@ std::string UnescapeURLImpl(const std::string& escaped_text,
if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) {
unsigned char value = HexToInt(most_sig_digit) * 16 +
HexToInt(least_sig_digit);
if (((rules & UnescapeRule::PERCENTS) || value != '%') &&
((rules & UnescapeRule::SPACES) || value != ' ')) {
if (value >= 0x80 || // Unescape all high-bit characters.
// For 7-bit characters, the lookup table tells us all valid chars.
(kUrlUnescape[value] ||
// ...and we allow some additional unescaping when flags are set.
(value == ' ' && (rules & UnescapeRule::SPACES)) ||
// Allow any of the prohibited but non-control characters when
// we're doing "special" chars.
(value > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) ||
// Additionally allow control characters if requested.
(value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {
// Use the unescaped version of the character.
result.push_back(value);
i += 2;
} else {
// Keep escaped. Append a percent and we'll get the following two
// digits on the next loops through.
result.push_back('%');
}
} else {
// Invalid escape sequence, just pass the percent through and continue
// right after it.
result.push_back('%');
}
} else if ((rules & UnescapeRule::REPLACE_PLUS_WITH_SPACE) &&
escaped_text[i] == '+') {
result.push_back(' ');
} else {
// Normal case for unescaped characters.
result.push_back(escaped_text[i]);
}
}

@ -77,17 +77,20 @@ class UnescapeRule {
// by other applications.
SPACES = 1,
// Unescapes "%25" to "%". This must not be used when the resulting string
// will need to be interpreted as a URL again, since we won't know what
// should be escaped and what shouldn't. For example, "%2520" would be
// converted to "%20" which would have different meaning than the origina.
// This flag is used when generating final output like filenames for URLs
// where we won't be interpreting as a URL and want to do as much unescaping
// as possible.
PERCENTS = 2,
// Unescapes various characters that will change the meaning of URLs,
// including '%', '+', '&', '/', '#'. If we unescaped these charaters, the
// resulting URL won't be the same as the source one. This flag is used when
// generating final output like filenames for URLs where we won't be
// interpreting as a URL and want to do as much unescaping as possible.
URL_SPECIAL_CHARS = 2,
// Unescapes control characters such as %01. This INCLUDES NULLs!. This is
// used for rare cases such as data: URL decoding where the result is binary
// data. You should not use this for normal URLs!
CONTROL_CHARS = 4,
// URL queries use "+" for space. This flag controls that replacement.
REPLACE_PLUS_WITH_SPACE = 4,
REPLACE_PLUS_WITH_SPACE = 8,
};
};

@ -121,11 +121,18 @@ TEST(Escape, UnescapeURLComponent) {
{"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},
{"Some%20random text %25%3bOK", UnescapeRule::NORMAL, "Some%20random text %25;OK"},
{"Some%20random text %25%3bOK", UnescapeRule::SPACES, "Some random text %25;OK"},
{"Some%20random text %25%3bOK", UnescapeRule::PERCENTS, "Some%20random text %;OK"},
{"Some%20random text %25%3bOK", UnescapeRule::SPACES | UnescapeRule::PERCENTS, "Some random text %;OK"},
{"%01%02%03%04%05%06%07%08%09", UnescapeRule::NORMAL, "\x01\x02\x03\x04\x05\x06\x07\x08\x09"},
{"Some%20random text %25%3bOK", UnescapeRule::URL_SPECIAL_CHARS, "Some%20random text %;OK"},
{"Some%20random text %25%3bOK", UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, "Some random text %;OK"},
{"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"},
{"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"}
{"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"},
// Certain URL-sensitive characters should not be unescaped unless asked.
{"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES, "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
{"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::URL_SPECIAL_CHARS, "Hello%20%13%10world ## ?? == && %% ++"},
// Control characters.
{"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, "%01%02%03%04%05%06%07%08%09 %"},
{"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS, "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
{"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"},
{"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"},
};
for (int i = 0; i < arraysize(unescape_cases); i++) {
@ -134,17 +141,23 @@ TEST(Escape, UnescapeURLComponent) {
UnescapeURLComponent(str, unescape_cases[i].rules));
}
// test the NULL character escaping (which wouldn't work above since those
// are just char pointers)
// Test the NULL character unescaping (which wouldn't work above since those
// are just char pointers).
std::string input("Null");
input.push_back(0); // Also have a NULL in the input.
input.append("%00%39Test");
// When we're unescaping NULLs
std::string expected("Null");
expected.push_back(0);
expected.push_back(0);
expected.append("9Test");
EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
// When we're not unescaping NULLs.
expected = "Null";
expected.push_back(0);
expected.append("%009Test");
EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
}
@ -178,9 +191,9 @@ TEST(Escape, UnescapeAndDecodeURLComponent) {
"Some random text %25;OK",
L"Some random text %25;OK"},
{"UTF8", "%01%02%03%04%05%06%07%08%09",
"\x01\x02\x03\x04\x05\x06\x07\x08\x09",
"\x01\x02\x03\x04\x05\x06\x07\x08\x09",
L"\x01\x02\x03\x04\x05\x06\x07\x08\x09"},
"%01%02%03%04%05%06%07%08%09",
"%01%02%03%04%05%06%07%08%09",
L"%01%02%03%04%05%06%07%08%09"},
{"UTF8", "%E4%BD%A0+%E5%A5%BD",
"\xE4\xBD\xA0+\xE5\xA5\xBD",
"\xE4\xBD\xA0 \xE5\xA5\xBD",

@ -715,7 +715,7 @@ bool FileURLToFilePath(const GURL& url, std::wstring* file_path) {
// GURL stores strings as percent-encoded UTF-8, this will undo if possible.
path = UnescapeURLComponent(path,
UnescapeRule::SPACES | UnescapeRule::PERCENTS);
UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
if (!IsStringUTF8(path.c_str())) {
// Not UTF-8, assume encoding is native codepage and we're done. We know we
@ -937,9 +937,11 @@ std::wstring GetSuggestedFilename(const GURL& url,
TrimString(filename, L".", &filename);
}
if (filename.empty()) {
if (url.is_valid())
if (url.is_valid()) {
filename = UnescapeAndDecodeUTF8URLComponent(
url.ExtractFileName(), UnescapeRule::SPACES | UnescapeRule::PERCENTS);
url.ExtractFileName(),
UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
}
}
// Trim '.' once more.

@ -61,7 +61,7 @@ static bool UnescapeAndValidatePath(const URLRequest* request,
// we need to identify the encoding and convert to that encoding.
static const std::string kInvalidChars("\x00\x0d\x0a", 3);
*unescaped_path = UnescapeURLComponent(request->url().path(),
UnescapeRule::SPACES | UnescapeRule::PERCENTS);
UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
if (unescaped_path->find_first_of(kInvalidChars) != std::string::npos) {
SetLastError(ERROR_INTERNET_INVALID_URL);
// GURL path should not contain '%00' which is NULL(0x00) when unescaped.
@ -415,7 +415,7 @@ void URLRequestFtpJob::OnStartDirectoryTraversal() {
// Unescape the URL path and pass the raw 8bit directly to the browser.
string html = net_util::GetDirectoryListingHeader(
UnescapeURLComponent(request_->url().path(),
UnescapeRule::SPACES | UnescapeRule::PERCENTS));
UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS));
// If this isn't top level directory (i.e. the path isn't "/",) add a link to
// the parent directory.