0

Improve text elision in TruncateString().

* When doing word-breaking in the middle of the first word in a string with
  leading whitespace, we would incorrectly truncate the entire string.
* When doing word-breaking in whitespace after a word, we would incorrectly omit
  that word from the truncated string.

This also adds more tests and organizes them a bit more readably.

BUG=none
TEST=Select up to but not including the 'f' in the string "aaaaaaaaa bbbbbbbbb ccccccccc ddddddddd eeeeeeeee f".  Right-click the selection and look at the "Search Google for..." string.  It should include 'e's.

Review URL: https://codereview.chromium.org/1340423006

Cr-Commit-Position: refs/heads/master@{#350310}
This commit is contained in:
pkasting
2015-09-22 22:59:31 -07:00
committed by Commit bot
parent d7478de1ae
commit fc33692651
2 changed files with 82 additions and 65 deletions

@ -752,45 +752,43 @@ int ElideRectangleText(const base::string16& input,
base::string16 TruncateString(const base::string16& string,
size_t length,
BreakType break_type) {
DCHECK(break_type == CHARACTER_BREAK || break_type == WORD_BREAK);
const bool word_break = break_type == WORD_BREAK;
DCHECK(word_break || (break_type == CHARACTER_BREAK));
if (string.size() <= length)
// String fits, return it.
return string;
return string; // No need to elide.
if (length == 0)
// No room for the elide string, return an empty string.
return base::string16();
size_t max = length - 1;
return base::string16(); // No room for anything, even an ellipsis.
// Added to the end of strings that are too big.
static const base::char16 kElideString[] = { 0x2026, 0 };
if (max == 0)
// Just enough room for the elide string.
return kElideString;
if (length == 1)
return kElideString; // Only room for an ellipsis.
int32_t index = static_cast<int32_t>(max);
if (break_type == WORD_BREAK) {
// Use a line iterator to find the first boundary.
int32_t index = static_cast<int32_t>(length - 1);
if (word_break) {
// Use a word iterator to find the first boundary.
UErrorCode status = U_ZERO_ERROR;
scoped_ptr<icu::BreakIterator> bi(
icu::RuleBasedBreakIterator::createLineInstance(
icu::RuleBasedBreakIterator::createWordInstance(
icu::Locale::getDefault(), status));
if (U_FAILURE(status))
return string.substr(0, max) + kElideString;
return string.substr(0, length - 1) + kElideString;
bi->setText(string.c_str());
index = bi->preceding(index);
index = bi->preceding(static_cast<int32_t>(length));
if (index == icu::BreakIterator::DONE || index == 0) {
// We either found no valid line break at all, or one right at the
// We either found no valid word break at all, or one right at the
// beginning of the string. Go back to the end; we'll have to break in the
// middle of a word.
index = static_cast<int32_t>(max);
index = static_cast<int32_t>(length - 1);
}
}
// Use a character iterator to find the previous non-whitespace character.
// By this point, |index| should point at the character that's a candidate for
// replacing with an ellipsis. Use a character iterator to check previous
// characters and stop as soon as we find a previous non-whitespace character.
icu::StringCharacterIterator char_iterator(string.c_str());
char_iterator.setIndex(index);
while (char_iterator.hasPrevious()) {
@ -798,21 +796,23 @@ base::string16 TruncateString(const base::string16& string,
if (!(u_isspace(char_iterator.current()) ||
u_charType(char_iterator.current()) == U_CONTROL_CHAR ||
u_charType(char_iterator.current()) == U_NON_SPACING_MARK)) {
// Not a whitespace character. Advance the iterator so that we
// include the current character in the truncated string.
// Not a whitespace character. Truncate to everything up to and including
// this character, and append an ellipsis.
char_iterator.next();
break;
return string.substr(0, char_iterator.getIndex()) + kElideString;
}
}
if (char_iterator.hasPrevious()) {
// Found a valid break point.
index = char_iterator.getIndex();
} else {
// String has leading whitespace, return the elide string.
return kElideString;
}
return string.substr(0, index) + kElideString;
// Couldn't find a previous non-whitespace character. If we were originally
// word-breaking, and index != length - 1, then the string is |index|
// whitespace characters followed by a word we're trying to break in the midst
// of, and we can fit at least one character of the word in the elided string.
// Do that rather than just returning an ellipsis.
if (word_break && (index != static_cast<int32_t>(length - 1)))
return string.substr(0, length - 1) + kElideString;
// Trying to break after only whitespace, elide all of it.
return kElideString;
}
} // namespace gfx

@ -1071,53 +1071,70 @@ TEST(TextEliderTest, MAYBE_ElideRectangleWide32) {
#define MAYBE_TruncateString TruncateString
#endif
TEST(TextEliderTest, MAYBE_TruncateString) {
base::string16 string = ASCIIToUTF16("foooooey bxxxar baz");
base::string16 str = ASCIIToUTF16("fooooey bxxxar baz ");
// Tests that apply to both break behaviors:
// Test breaking at character 0.
EXPECT_EQ(base::string16(), TruncateString(str, 0, WORD_BREAK));
EXPECT_EQ(base::string16(), TruncateString(str, 0, CHARACTER_BREAK));
// Make sure it doesn't modify the string if length > string length.
EXPECT_EQ(string, TruncateString(string, 100, WORD_BREAK));
EXPECT_EQ(string, TruncateString(string, 100, CHARACTER_BREAK));
// Test breaking at character 1.
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str, 1, WORD_BREAK)));
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str, 1, CHARACTER_BREAK)));
// Test no characters.
EXPECT_EQ(L"", UTF16ToWide(TruncateString(string, 0, WORD_BREAK)));
EXPECT_EQ(L"", UTF16ToWide(TruncateString(string, 0, CHARACTER_BREAK)));
// Test breaking in the middle of the first word.
EXPECT_EQ(L"f\x2026", UTF16ToWide(TruncateString(str, 2, WORD_BREAK)));
EXPECT_EQ(L"f\x2026", UTF16ToWide(TruncateString(str, 2, CHARACTER_BREAK)));
// Test 1 character.
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(string, 1, WORD_BREAK)));
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(string, 1, CHARACTER_BREAK)));
// Test breaking in between words.
EXPECT_EQ(L"fooooey\x2026", UTF16ToWide(TruncateString(str, 9, WORD_BREAK)));
EXPECT_EQ(L"fooooey\x2026",
UTF16ToWide(TruncateString(str, 9, CHARACTER_BREAK)));
// Test completely truncates string if break is on initial whitespace.
EXPECT_EQ(L"\x2026",
UTF16ToWide(TruncateString(ASCIIToUTF16(" "), 2, WORD_BREAK)));
EXPECT_EQ(L"\x2026",
UTF16ToWide(TruncateString(ASCIIToUTF16(" "), 2,
CHARACTER_BREAK)));
// Test breaking at the start of a later word.
EXPECT_EQ(L"fooooey\x2026", UTF16ToWide(TruncateString(str, 11, WORD_BREAK)));
EXPECT_EQ(L"fooooey\x2026",
UTF16ToWide(TruncateString(str, 11, CHARACTER_BREAK)));
// Break-only-at-word-boundaries tests:
// Test breaking in the middle of a word.
EXPECT_EQ(L"fooooey\x2026", UTF16ToWide(TruncateString(str, 12, WORD_BREAK)));
EXPECT_EQ(L"fooooey\x2026",
UTF16ToWide(TruncateString(str, 12, CHARACTER_BREAK)));
EXPECT_EQ(L"fooooey\x2026", UTF16ToWide(TruncateString(str, 14, WORD_BREAK)));
EXPECT_EQ(L"fooooey bx\x2026",
UTF16ToWide(TruncateString(str, 14, CHARACTER_BREAK)));
// Test adds ... at right spot when there is enough room to break at a
// word boundary.
EXPECT_EQ(L"foooooey\x2026", UTF16ToWide(TruncateString(string, 14,
WORD_BREAK)));
// Test breaking in whitespace at the end of the string.
EXPECT_EQ(L"fooooey bxxxar baz\x2026",
UTF16ToWide(TruncateString(str, 22, WORD_BREAK)));
EXPECT_EQ(L"fooooey bxxxar baz\x2026",
UTF16ToWide(TruncateString(str, 22, CHARACTER_BREAK)));
// Test adds ... at right spot when there is not enough space in first word.
EXPECT_EQ(L"f\x2026", UTF16ToWide(TruncateString(string, 2, WORD_BREAK)));
// Test breaking at the end of the string.
EXPECT_EQ(str, TruncateString(str, str.length(), WORD_BREAK));
EXPECT_EQ(str, TruncateString(str, str.length(), CHARACTER_BREAK));
// Test adds ... at right spot when there is not enough room to break at a
// word boundary.
EXPECT_EQ(L"foooooey\x2026", UTF16ToWide(TruncateString(string, 11,
WORD_BREAK)));
// Test breaking past the end of the string.
EXPECT_EQ(str, TruncateString(str, str.length() + 10, WORD_BREAK));
EXPECT_EQ(str, TruncateString(str, str.length() + 10, CHARACTER_BREAK));
// Break-anywhere tests:
// Test adds ... at right spot within a word.
EXPECT_EQ(L"f\x2026", UTF16ToWide(TruncateString(string, 2,
CHARACTER_BREAK)));
// Tests of strings with leading whitespace:
base::string16 str2 = ASCIIToUTF16(" foo");
// Test removes trailing whitespace if break falls between words.
EXPECT_EQ(L"foooooey\x2026", UTF16ToWide(TruncateString(string, 12,
CHARACTER_BREAK)));
// Test breaking in leading whitespace.
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 2, WORD_BREAK)));
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 2, CHARACTER_BREAK)));
// Test breaking at the beginning of the first word, with leading whitespace.
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 3, WORD_BREAK)));
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 3, CHARACTER_BREAK)));
// Test breaking in the middle of the first word, with leading whitespace.
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 4, WORD_BREAK)));
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 4, CHARACTER_BREAK)));
EXPECT_EQ(L" f\x2026", UTF16ToWide(TruncateString(str2, 5, WORD_BREAK)));
EXPECT_EQ(L" f\x2026",
UTF16ToWide(TruncateString(str2, 5, CHARACTER_BREAK)));
}
} // namespace gfx