I looked at the full dump from the previous crashes and it looks like a testing issue. Hopefully it will be magically unbroken.

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@151 0039d316-1c4b-4281-b951-d872f2087c98

This commit is contained in:

brettw@google.com

2008-07-30 23:50:22 +00:00

parent 96354fc5a0

commit f20dac42c3

7 changed files with 88 additions and 29 deletions

chrome/browser

url_fixer_upper.cc

net

base

data_url.cc escape.cc escape.h escape_unittest.cc net_util.cc

url_request

url_request_ftp_job.cc

									
										3

chrome/browser/url_fixer_upper.cc
									
					@ -446,7 +446,8 @@ wstring URLFixerUpper::FixupRelativeFile(const wstring& base_dir,

					    // escaped things. We need to go through 8-bit since the escaped values

					    // escaped things. We need to go through 8-bit since the escaped values

					    // only represent 8-bit values.

					    // only represent 8-bit values.

					    std::wstring unescaped = UTF8ToWide(UnescapeURLComponent(

					    std::wstring unescaped = UTF8ToWide(UnescapeURLComponent(

					        WideToUTF8(trimmed), UnescapeRule::SPACES | UnescapeRule::PERCENTS));

					        WideToUTF8(trimmed),

					        UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS));

					    if (!ValidPathForFile(unescaped, &full_path))

					    if (!ValidPathForFile(unescaped, &full_path))

					      is_file = false;

					      is_file = false;

					  }

					  }

									
										6

net/base/data_url.cc
									
					@ -99,7 +99,8 @@ bool DataURL::Parse(const GURL& url, std::string* mime_type,

					  // could be part of the payload, so don't strip it.

					  // could be part of the payload, so don't strip it.

					  if (base64_encoded) {

					  if (base64_encoded) {

					    temp_data = UnescapeURLComponent(temp_data,

					    temp_data = UnescapeURLComponent(temp_data,

					        UnescapeRule::SPACES | UnescapeRule::PERCENTS);

					        UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS |

					        UnescapeRule::CONTROL_CHARS);

					  }

					  }

					  // Strip whitespace.

					  // Strip whitespace.

					@ -112,7 +113,8 @@ bool DataURL::Parse(const GURL& url, std::string* mime_type,

					  if (!base64_encoded) {

					  if (!base64_encoded) {

					    temp_data = UnescapeURLComponent(temp_data,

					    temp_data = UnescapeURLComponent(temp_data,

					        UnescapeRule::SPACES | UnescapeRule::PERCENTS);

					        UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS |

					        UnescapeRule::CONTROL_CHARS);

					  }

					  }

					  if (base64_encoded)

					  if (base64_encoded)

									
										44

net/base/escape.cc
									
					@ -81,7 +81,6 @@ class Charmap {

					  uint32 map_[8];

					  uint32 map_[8];

					};

					};

					// Given text to escape and a Charmap defining which values to escape,

					// Given text to escape and a Charmap defining which values to escape,

					// return an escaped string.  If use_plus is true, spaces are converted

					// return an escaped string.  If use_plus is true, spaces are converted

					// to +, otherwise, if spaces are in the charmap, they are converted to

					// to +, otherwise, if spaces are in the charmap, they are converted to

					@ -105,6 +104,32 @@ const std::string Escape(const std::string& text, const Charmap& charmap,

					  return escaped;

					  return escaped;

					}

					}

					// Contains nonzero when the corresponding character is unescapable for normal

					// URLs. These characters are the ones that may change the parsing of a URL, so

					// we don't want to unescape them sometimes. In many case we won't want to

					// unescape spaces, but that is controlled by parameters to Unescape*.

					//

					// The basic rule is that we can't unescape anything that would changing parsing

					// like # or ?. We also can't unescape &, =, or + since that could be part of a

					// query and that could change the server's parsing of the query.

					const char kUrlUnescape[128] = {

					//   NULL, control chars...

					     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

					     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

					//  ' ' !  "  #  $  %  &  '  (  )  *  +  ,  -  .  /

					     0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1,

					//   0  1  2  3  4  5  6  7  8  9  :  ;  <  =  >  ?

					     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0,

					//   @  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O

					     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

					//   P  Q  R  S  T  U  V  W  X  Y  Z  [  \  ]  ^  _

					     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

					//   `  a  b  c  d  e  f  g  h  i  j  k  l  m  n  o

					     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

					//   p  q  r  s  t  u  v  w  x  y  z  {  |  }  ~  <NBSP>

					     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0

					};

					std::string UnescapeURLImpl(const std::string& escaped_text,

					std::string UnescapeURLImpl(const std::string& escaped_text,

					                            UnescapeRule::Type rules) {

					                            UnescapeRule::Type rules) {

					  // The output of the unescaping is always smaller than the input, so we can

					  // The output of the unescaping is always smaller than the input, so we can

					@ -121,21 +146,34 @@ std::string UnescapeURLImpl(const std::string& escaped_text,

					      if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) {

					      if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) {

					        unsigned char value = HexToInt(most_sig_digit) * 16 +

					        unsigned char value = HexToInt(most_sig_digit) * 16 +

					            HexToInt(least_sig_digit);

					            HexToInt(least_sig_digit);

					        if (((rules & UnescapeRule::PERCENTS) || value != '%') &&

					        if (value >= 0x80 ||  // Unescape all high-bit characters.

					            ((rules & UnescapeRule::SPACES) || value != ' ')) {

					            // For 7-bit characters, the lookup table tells us all valid chars.

					            (kUrlUnescape[value] ||

					             // ...and we allow some additional unescaping when flags are set.

					             (value == ' ' && (rules & UnescapeRule::SPACES)) ||

					             // Allow any of the prohibited but non-control characters when

					             // we're doing "special" chars.

					             (value > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) ||

					             // Additionally allow control characters if requested.

					             (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {

					          // Use the unescaped version of the character.

					          // Use the unescaped version of the character.

					          result.push_back(value);

					          result.push_back(value);

					          i += 2;

					          i += 2;

					        } else {

					        } else {

					          // Keep escaped. Append a percent and we'll get the following two

					          // digits on the next loops through.

					          result.push_back('%');

					          result.push_back('%');

					        }

					        }

					      } else {

					      } else {

					        // Invalid escape sequence, just pass the percent through and continue

					        // right after it.

					        result.push_back('%');

					        result.push_back('%');

					      }

					      }

					    } else if ((rules & UnescapeRule::REPLACE_PLUS_WITH_SPACE) &&

					    } else if ((rules & UnescapeRule::REPLACE_PLUS_WITH_SPACE) &&

					               escaped_text[i] == '+') {

					               escaped_text[i] == '+') {

					      result.push_back(' ');

					      result.push_back(' ');

					    } else {

					    } else {

					      // Normal case for unescaped characters.

					      result.push_back(escaped_text[i]);

					      result.push_back(escaped_text[i]);

					    }

					    }

					  }

					  }

									
										21

net/base/escape.h
									
					@ -77,17 +77,20 @@ class UnescapeRule {

					    // by other applications.

					    // by other applications.

					    SPACES = 1,

					    SPACES = 1,

					    // Unescapes "%25" to "%". This must not be used when the resulting string

					    // Unescapes various characters that will change the meaning of URLs,

					    // will need to be interpreted as a URL again, since we won't know what

					    // including '%', '+', '&', '/', '#'. If we unescaped these charaters, the

					    // should be escaped and what shouldn't. For example, "%2520" would be

					    // resulting URL won't be the same as the source one. This flag is used when

					    // converted to "%20" which would have different meaning than the origina.

					    // generating final output like filenames for URLs where we won't be

					    // This flag is used when generating final output like filenames for URLs

					    // interpreting as a URL and want to do as much unescaping as possible.

					    // where we won't be interpreting as a URL and want to do as much unescaping

					    URL_SPECIAL_CHARS = 2,

					    // as possible.

					    PERCENTS = 2,

					    // Unescapes control characters such as %01. This INCLUDES NULLs!. This is

					    // used for rare cases such as data: URL decoding where the result is binary

					    // data. You should not use this for normal URLs!

					    CONTROL_CHARS = 4,

					    // URL queries use "+" for space. This flag controls that replacement.

					    // URL queries use "+" for space. This flag controls that replacement.

					    REPLACE_PLUS_WITH_SPACE = 4,

					    REPLACE_PLUS_WITH_SPACE = 8,

					  };

					  };

					};

					};

									
										31

net/base/escape_unittest.cc
									
					@ -121,11 +121,18 @@ TEST(Escape, UnescapeURLComponent) {

					    {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},

					    {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},

					    {"Some%20random text %25%3bOK", UnescapeRule::NORMAL, "Some%20random text %25;OK"},

					    {"Some%20random text %25%3bOK", UnescapeRule::NORMAL, "Some%20random text %25;OK"},

					    {"Some%20random text %25%3bOK", UnescapeRule::SPACES, "Some random text %25;OK"},

					    {"Some%20random text %25%3bOK", UnescapeRule::SPACES, "Some random text %25;OK"},

					    {"Some%20random text %25%3bOK", UnescapeRule::PERCENTS, "Some%20random text %;OK"},

					    {"Some%20random text %25%3bOK", UnescapeRule::URL_SPECIAL_CHARS, "Some%20random text %;OK"},

					    {"Some%20random text %25%3bOK", UnescapeRule::SPACES | UnescapeRule::PERCENTS, "Some random text %;OK"},

					    {"Some%20random text %25%3bOK", UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, "Some random text %;OK"},

					    {"%01%02%03%04%05%06%07%08%09", UnescapeRule::NORMAL, "\x01\x02\x03\x04\x05\x06\x07\x08\x09"},

					    {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"},

					    {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"},

					    {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"}

					    {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"},

					    // Certain URL-sensitive characters should not be unescaped unless asked.

					    {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES, "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},

					    {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::URL_SPECIAL_CHARS, "Hello%20%13%10world ## ?? == && %% ++"},

					    // Control characters.

					    {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, "%01%02%03%04%05%06%07%08%09 %"},

					    {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS, "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},

					    {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"},

					    {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"},

					  };

					  };

					  for (int i = 0; i < arraysize(unescape_cases); i++) {

					  for (int i = 0; i < arraysize(unescape_cases); i++) {

					@ -134,17 +141,23 @@ TEST(Escape, UnescapeURLComponent) {

					              UnescapeURLComponent(str, unescape_cases[i].rules));

					              UnescapeURLComponent(str, unescape_cases[i].rules));

					  }

					  }

					  // test the NULL character escaping (which wouldn't work above since those

					  // Test the NULL character unescaping (which wouldn't work above since those

					  // are just char pointers)

					  // are just char pointers).

					  std::string input("Null");

					  std::string input("Null");

					  input.push_back(0);  // Also have a NULL in the input.

					  input.push_back(0);  // Also have a NULL in the input.

					  input.append("%00%39Test");

					  input.append("%00%39Test");

					  // When we're unescaping NULLs

					  std::string expected("Null");

					  std::string expected("Null");

					  expected.push_back(0);

					  expected.push_back(0);

					  expected.push_back(0);

					  expected.push_back(0);

					  expected.append("9Test");

					  expected.append("9Test");

					  EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));

					  // When we're not unescaping NULLs.

					  expected = "Null";

					  expected.push_back(0);

					  expected.append("%009Test");

					  EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));

					  EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));

					}

					}

					@ -178,9 +191,9 @@ TEST(Escape, UnescapeAndDecodeURLComponent) {

					             "Some random text %25;OK",

					             "Some random text %25;OK",

					             L"Some random text %25;OK"},

					             L"Some random text %25;OK"},

					    {"UTF8", "%01%02%03%04%05%06%07%08%09",

					    {"UTF8", "%01%02%03%04%05%06%07%08%09",

					             "\x01\x02\x03\x04\x05\x06\x07\x08\x09",

					             "%01%02%03%04%05%06%07%08%09",

					             "\x01\x02\x03\x04\x05\x06\x07\x08\x09",

					             "%01%02%03%04%05%06%07%08%09",

					             L"\x01\x02\x03\x04\x05\x06\x07\x08\x09"},

					             L"%01%02%03%04%05%06%07%08%09"},

					    {"UTF8", "%E4%BD%A0+%E5%A5%BD",

					    {"UTF8", "%E4%BD%A0+%E5%A5%BD",

					             "\xE4\xBD\xA0+\xE5\xA5\xBD",

					             "\xE4\xBD\xA0+\xE5\xA5\xBD",

					             "\xE4\xBD\xA0 \xE5\xA5\xBD",

					             "\xE4\xBD\xA0 \xE5\xA5\xBD",

									
										8

net/base/net_util.cc
									
					@ -715,7 +715,7 @@ bool FileURLToFilePath(const GURL& url, std::wstring* file_path) {

					  // GURL stores strings as percent-encoded UTF-8, this will undo if possible.

					  // GURL stores strings as percent-encoded UTF-8, this will undo if possible.

					  path = UnescapeURLComponent(path,

					  path = UnescapeURLComponent(path,

					                              UnescapeRule::SPACES | UnescapeRule::PERCENTS);

					      UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);

					  if (!IsStringUTF8(path.c_str())) {

					  if (!IsStringUTF8(path.c_str())) {

					    // Not UTF-8, assume encoding is native codepage and we're done. We know we

					    // Not UTF-8, assume encoding is native codepage and we're done. We know we

					@ -937,9 +937,11 @@ std::wstring GetSuggestedFilename(const GURL& url,

					    TrimString(filename, L".", &filename);

					    TrimString(filename, L".", &filename);

					  }

					  }

					  if (filename.empty()) {

					  if (filename.empty()) {

					    if (url.is_valid())

					    if (url.is_valid()) {

					      filename = UnescapeAndDecodeUTF8URLComponent(

					      filename = UnescapeAndDecodeUTF8URLComponent(

					          url.ExtractFileName(), UnescapeRule::SPACES | UnescapeRule::PERCENTS);

					          url.ExtractFileName(),

					          UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);

					    }

					  }

					  }

					  // Trim '.' once more.

					  // Trim '.' once more.

									
										4

net/url_request/url_request_ftp_job.cc
									
					@ -61,7 +61,7 @@ static bool UnescapeAndValidatePath(const URLRequest* request,

					  // we need to identify the encoding and convert to that encoding.

					  // we need to identify the encoding and convert to that encoding.

					  static const std::string kInvalidChars("\x00\x0d\x0a", 3);

					  static const std::string kInvalidChars("\x00\x0d\x0a", 3);

					  *unescaped_path = UnescapeURLComponent(request->url().path(),

					  *unescaped_path = UnescapeURLComponent(request->url().path(),

					      UnescapeRule::SPACES | UnescapeRule::PERCENTS);

					      UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);

					  if (unescaped_path->find_first_of(kInvalidChars) != std::string::npos) {

					  if (unescaped_path->find_first_of(kInvalidChars) != std::string::npos) {

					    SetLastError(ERROR_INTERNET_INVALID_URL);

					    SetLastError(ERROR_INTERNET_INVALID_URL);

					    // GURL path should not contain '%00' which is NULL(0x00) when unescaped.

					    // GURL path should not contain '%00' which is NULL(0x00) when unescaped.

					@ -415,7 +415,7 @@ void URLRequestFtpJob::OnStartDirectoryTraversal() {

					  // Unescape the URL path and pass the raw 8bit directly to the browser.

					  // Unescape the URL path and pass the raw 8bit directly to the browser.

					  string html = net_util::GetDirectoryListingHeader(

					  string html = net_util::GetDirectoryListingHeader(

					      UnescapeURLComponent(request_->url().path(),

					      UnescapeURLComponent(request_->url().path(),

					                           UnescapeRule::SPACES | UnescapeRule::PERCENTS));

					          UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS));

					  // If this isn't top level directory (i.e. the path isn't "/",) add a link to

					  // If this isn't top level directory (i.e. the path isn't "/",) add a link to

					  // the parent directory.

					  // the parent directory.

I looked at the full dump from the previous crashes and it looks like a testing issue. Hopefully it will be magically unbroken.

3 chrome/browser/url_fixer_upper.cc

6 net/base/data_url.cc

44 net/base/escape.cc

21 net/base/escape.h

31 net/base/escape_unittest.cc

8 net/base/net_util.cc

4 net/url_request/url_request_ftp_job.cc

3

chrome/browser/url_fixer_upper.cc

6

net/base/data_url.cc

44

net/base/escape.cc

21

net/base/escape.h

31

net/base/escape_unittest.cc

8

net/base/net_util.cc

4

net/url_request/url_request_ftp_job.cc