Improve text elision in TruncateString().

* When doing word-breaking in the middle of the first word in a string with
  leading whitespace, we would incorrectly truncate the entire string.
* When doing word-breaking in whitespace after a word, we would incorrectly omit
  that word from the truncated string.

This also adds more tests and organizes them a bit more readably.

BUG=none
TEST=Select up to but not including the 'f' in the string "aaaaaaaaa bbbbbbbbb ccccccccc ddddddddd eeeeeeeee f".  Right-click the selection and look at the "Search Google for..." string.  It should include 'e's.

Review URL: https://codereview.chromium.org/1340423006

Cr-Commit-Position: refs/heads/master@{#350310}

This commit is contained in:

pkasting

2015-09-22 22:59:31 -07:00

committed by

Commit bot

parent d7478de1ae

commit fc33692651

2 changed files with 82 additions and 65 deletions

ui/gfx

text_elider.cc text_elider_unittest.cc

									
										60

ui/gfx/text_elider.cc
									
				@ -752,45 +752,43 @@ int ElideRectangleText(const base::string16& input,

				base::string16 TruncateString(const base::string16& string,

				                              size_t length,

				                              BreakType break_type) {

				  DCHECK(break_type == CHARACTER_BREAK || break_type == WORD_BREAK);

				  const bool word_break = break_type == WORD_BREAK;

				  DCHECK(word_break || (break_type == CHARACTER_BREAK));

				  if (string.size() <= length)

				    // String fits, return it.

				    return string;

				    return string;  // No need to elide.

				  if (length == 0)

				    // No room for the elide string, return an empty string.

				    return base::string16();

				  size_t max = length - 1;

				    return base::string16();  // No room for anything, even an ellipsis.

				  // Added to the end of strings that are too big.

				  static const base::char16 kElideString[] = { 0x2026, 0 };

				  if (max == 0)

				    // Just enough room for the elide string.

				    return kElideString;

				  if (length == 1)

				    return kElideString;  // Only room for an ellipsis.

				  int32_t index = static_cast<int32_t>(max);

				  if (break_type == WORD_BREAK) {

				    // Use a line iterator to find the first boundary.

				  int32_t index = static_cast<int32_t>(length - 1);

				  if (word_break) {

				    // Use a word iterator to find the first boundary.

				    UErrorCode status = U_ZERO_ERROR;

				    scoped_ptr<icu::BreakIterator> bi(

				        icu::RuleBasedBreakIterator::createLineInstance(

				        icu::RuleBasedBreakIterator::createWordInstance(

				            icu::Locale::getDefault(), status));

				    if (U_FAILURE(status))

				      return string.substr(0, max) + kElideString;

				      return string.substr(0, length - 1) + kElideString;

				    bi->setText(string.c_str());

				    index = bi->preceding(index);

				    index = bi->preceding(static_cast<int32_t>(length));

				    if (index == icu::BreakIterator::DONE || index == 0) {

				      // We either found no valid line break at all, or one right at the

				      // We either found no valid word break at all, or one right at the

				      // beginning of the string. Go back to the end; we'll have to break in the

				      // middle of a word.

				      index = static_cast<int32_t>(max);

				      index = static_cast<int32_t>(length - 1);

				    }

				  }

				  // Use a character iterator to find the previous non-whitespace character.

				  // By this point, |index| should point at the character that's a candidate for

				  // replacing with an ellipsis.  Use a character iterator to check previous

				  // characters and stop as soon as we find a previous non-whitespace character.

				  icu::StringCharacterIterator char_iterator(string.c_str());

				  char_iterator.setIndex(index);

				  while (char_iterator.hasPrevious()) {

				@ -798,21 +796,23 @@ base::string16 TruncateString(const base::string16& string,

				    if (!(u_isspace(char_iterator.current()) ||

				          u_charType(char_iterator.current()) == U_CONTROL_CHAR ||

				          u_charType(char_iterator.current()) == U_NON_SPACING_MARK)) {

				      // Not a whitespace character. Advance the iterator so that we

				      // include the current character in the truncated string.

				      // Not a whitespace character.  Truncate to everything up to and including

				      // this character, and append an ellipsis.

				      char_iterator.next();

				      break;

				      return string.substr(0, char_iterator.getIndex()) + kElideString;

				    }

				  }

				  if (char_iterator.hasPrevious()) {

				    // Found a valid break point.

				    index = char_iterator.getIndex();

				  } else {

				    // String has leading whitespace, return the elide string.

				    return kElideString;

				  }

				  return string.substr(0, index) + kElideString;

				  // Couldn't find a previous non-whitespace character.  If we were originally

				  // word-breaking, and index != length - 1, then the string is |index|

				  // whitespace characters followed by a word we're trying to break in the midst

				  // of, and we can fit at least one character of the word in the elided string.

				  // Do that rather than just returning an ellipsis.

				  if (word_break && (index != static_cast<int32_t>(length - 1)))

				    return string.substr(0, length - 1) + kElideString;

				  // Trying to break after only whitespace, elide all of it.

				  return kElideString;

				}

				}  // namespace gfx

									
										87

ui/gfx/text_elider_unittest.cc
									
				@ -1071,53 +1071,70 @@ TEST(TextEliderTest, MAYBE_ElideRectangleWide32) {

				#define MAYBE_TruncateString TruncateString

				#endif

				TEST(TextEliderTest, MAYBE_TruncateString) {

				  base::string16 string = ASCIIToUTF16("foooooey    bxxxar baz");

				  base::string16 str = ASCIIToUTF16("fooooey    bxxxar baz  ");

				  // Tests that apply to both break behaviors:

				  // Test breaking at character 0.

				  EXPECT_EQ(base::string16(), TruncateString(str, 0, WORD_BREAK));

				  EXPECT_EQ(base::string16(), TruncateString(str, 0, CHARACTER_BREAK));

				  // Make sure it doesn't modify the string if length > string length.

				  EXPECT_EQ(string, TruncateString(string, 100, WORD_BREAK));

				  EXPECT_EQ(string, TruncateString(string, 100, CHARACTER_BREAK));

				  // Test breaking at character 1.

				  EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str, 1, WORD_BREAK)));

				  EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str, 1, CHARACTER_BREAK)));

				  // Test no characters.

				  EXPECT_EQ(L"", UTF16ToWide(TruncateString(string, 0, WORD_BREAK)));

				  EXPECT_EQ(L"", UTF16ToWide(TruncateString(string, 0, CHARACTER_BREAK)));

				  // Test breaking in the middle of the first word.

				  EXPECT_EQ(L"f\x2026", UTF16ToWide(TruncateString(str, 2, WORD_BREAK)));

				  EXPECT_EQ(L"f\x2026", UTF16ToWide(TruncateString(str, 2, CHARACTER_BREAK)));

				  // Test 1 character.

				  EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(string, 1, WORD_BREAK)));

				  EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(string, 1, CHARACTER_BREAK)));

				  // Test breaking in between words.

				  EXPECT_EQ(L"fooooey\x2026", UTF16ToWide(TruncateString(str, 9, WORD_BREAK)));

				  EXPECT_EQ(L"fooooey\x2026",

				            UTF16ToWide(TruncateString(str, 9, CHARACTER_BREAK)));

				  // Test completely truncates string if break is on initial whitespace.

				  EXPECT_EQ(L"\x2026",

				            UTF16ToWide(TruncateString(ASCIIToUTF16("   "), 2, WORD_BREAK)));

				  EXPECT_EQ(L"\x2026",

				            UTF16ToWide(TruncateString(ASCIIToUTF16("   "), 2,

				                                       CHARACTER_BREAK)));

				  // Test breaking at the start of a later word.

				  EXPECT_EQ(L"fooooey\x2026", UTF16ToWide(TruncateString(str, 11, WORD_BREAK)));

				  EXPECT_EQ(L"fooooey\x2026",

				            UTF16ToWide(TruncateString(str, 11, CHARACTER_BREAK)));

				  // Break-only-at-word-boundaries tests:

				  // Test breaking in the middle of a word.

				  EXPECT_EQ(L"fooooey\x2026", UTF16ToWide(TruncateString(str, 12, WORD_BREAK)));

				  EXPECT_EQ(L"fooooey\x2026",

				            UTF16ToWide(TruncateString(str, 12, CHARACTER_BREAK)));

				  EXPECT_EQ(L"fooooey\x2026", UTF16ToWide(TruncateString(str, 14, WORD_BREAK)));

				  EXPECT_EQ(L"fooooey    bx\x2026",

				            UTF16ToWide(TruncateString(str, 14, CHARACTER_BREAK)));

				  // Test adds ... at right spot when there is enough room to break at a

				  // word boundary.

				  EXPECT_EQ(L"foooooey\x2026", UTF16ToWide(TruncateString(string, 14,

				                                                          WORD_BREAK)));

				  // Test breaking in whitespace at the end of the string.

				  EXPECT_EQ(L"fooooey    bxxxar baz\x2026",

				            UTF16ToWide(TruncateString(str, 22, WORD_BREAK)));

				  EXPECT_EQ(L"fooooey    bxxxar baz\x2026",

				            UTF16ToWide(TruncateString(str, 22, CHARACTER_BREAK)));

				  // Test adds ... at right spot when there is not enough space in first word.

				  EXPECT_EQ(L"f\x2026", UTF16ToWide(TruncateString(string, 2, WORD_BREAK)));

				  // Test breaking at the end of the string.

				  EXPECT_EQ(str, TruncateString(str, str.length(), WORD_BREAK));

				  EXPECT_EQ(str, TruncateString(str, str.length(), CHARACTER_BREAK));

				  // Test adds ... at right spot when there is not enough room to break at a

				  // word boundary.

				  EXPECT_EQ(L"foooooey\x2026", UTF16ToWide(TruncateString(string, 11,

				                                                          WORD_BREAK)));

				  // Test breaking past the end of the string.

				  EXPECT_EQ(str, TruncateString(str, str.length() + 10, WORD_BREAK));

				  EXPECT_EQ(str, TruncateString(str, str.length() + 10, CHARACTER_BREAK));

				  // Break-anywhere tests:

				  // Test adds ... at right spot within a word.

				  EXPECT_EQ(L"f\x2026", UTF16ToWide(TruncateString(string, 2,

				                                                   CHARACTER_BREAK)));

				  // Tests of strings with leading whitespace:

				  base::string16 str2 = ASCIIToUTF16("   foo");

				  // Test removes trailing whitespace if break falls between words.

				  EXPECT_EQ(L"foooooey\x2026", UTF16ToWide(TruncateString(string, 12,

				                                                          CHARACTER_BREAK)));

				  // Test breaking in leading whitespace.

				  EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 2, WORD_BREAK)));

				  EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 2, CHARACTER_BREAK)));

				  // Test breaking at the beginning of the first word, with leading whitespace.

				  EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 3, WORD_BREAK)));

				  EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 3, CHARACTER_BREAK)));

				  // Test breaking in the middle of the first word, with leading whitespace.

				  EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 4, WORD_BREAK)));

				  EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 4, CHARACTER_BREAK)));

				  EXPECT_EQ(L"   f\x2026", UTF16ToWide(TruncateString(str2, 5, WORD_BREAK)));

				  EXPECT_EQ(L"   f\x2026",

				            UTF16ToWide(TruncateString(str2, 5, CHARACTER_BREAK)));

				}

				}  // namespace gfx

Improve text elision in TruncateString().

60 ui/gfx/text_elider.cc

87 ui/gfx/text_elider_unittest.cc

60

ui/gfx/text_elider.cc

87

ui/gfx/text_elider_unittest.cc