Improve text elision in TruncateString().
* When doing word-breaking in the middle of the first word in a string with leading whitespace, we would incorrectly truncate the entire string. * When doing word-breaking in whitespace after a word, we would incorrectly omit that word from the truncated string. This also adds more tests and organizes them a bit more readably. BUG=none TEST=Select up to but not including the 'f' in the string "aaaaaaaaa bbbbbbbbb ccccccccc ddddddddd eeeeeeeee f". Right-click the selection and look at the "Search Google for..." string. It should include 'e's. Review URL: https://codereview.chromium.org/1340423006 Cr-Commit-Position: refs/heads/master@{#350310}
This commit is contained in:
@ -752,45 +752,43 @@ int ElideRectangleText(const base::string16& input,
|
||||
base::string16 TruncateString(const base::string16& string,
|
||||
size_t length,
|
||||
BreakType break_type) {
|
||||
DCHECK(break_type == CHARACTER_BREAK || break_type == WORD_BREAK);
|
||||
const bool word_break = break_type == WORD_BREAK;
|
||||
DCHECK(word_break || (break_type == CHARACTER_BREAK));
|
||||
|
||||
if (string.size() <= length)
|
||||
// String fits, return it.
|
||||
return string;
|
||||
return string; // No need to elide.
|
||||
|
||||
if (length == 0)
|
||||
// No room for the elide string, return an empty string.
|
||||
return base::string16();
|
||||
|
||||
size_t max = length - 1;
|
||||
return base::string16(); // No room for anything, even an ellipsis.
|
||||
|
||||
// Added to the end of strings that are too big.
|
||||
static const base::char16 kElideString[] = { 0x2026, 0 };
|
||||
|
||||
if (max == 0)
|
||||
// Just enough room for the elide string.
|
||||
return kElideString;
|
||||
if (length == 1)
|
||||
return kElideString; // Only room for an ellipsis.
|
||||
|
||||
int32_t index = static_cast<int32_t>(max);
|
||||
if (break_type == WORD_BREAK) {
|
||||
// Use a line iterator to find the first boundary.
|
||||
int32_t index = static_cast<int32_t>(length - 1);
|
||||
if (word_break) {
|
||||
// Use a word iterator to find the first boundary.
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
scoped_ptr<icu::BreakIterator> bi(
|
||||
icu::RuleBasedBreakIterator::createLineInstance(
|
||||
icu::RuleBasedBreakIterator::createWordInstance(
|
||||
icu::Locale::getDefault(), status));
|
||||
if (U_FAILURE(status))
|
||||
return string.substr(0, max) + kElideString;
|
||||
return string.substr(0, length - 1) + kElideString;
|
||||
bi->setText(string.c_str());
|
||||
index = bi->preceding(index);
|
||||
index = bi->preceding(static_cast<int32_t>(length));
|
||||
if (index == icu::BreakIterator::DONE || index == 0) {
|
||||
// We either found no valid line break at all, or one right at the
|
||||
// We either found no valid word break at all, or one right at the
|
||||
// beginning of the string. Go back to the end; we'll have to break in the
|
||||
// middle of a word.
|
||||
index = static_cast<int32_t>(max);
|
||||
index = static_cast<int32_t>(length - 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Use a character iterator to find the previous non-whitespace character.
|
||||
// By this point, |index| should point at the character that's a candidate for
|
||||
// replacing with an ellipsis. Use a character iterator to check previous
|
||||
// characters and stop as soon as we find a previous non-whitespace character.
|
||||
icu::StringCharacterIterator char_iterator(string.c_str());
|
||||
char_iterator.setIndex(index);
|
||||
while (char_iterator.hasPrevious()) {
|
||||
@ -798,21 +796,23 @@ base::string16 TruncateString(const base::string16& string,
|
||||
if (!(u_isspace(char_iterator.current()) ||
|
||||
u_charType(char_iterator.current()) == U_CONTROL_CHAR ||
|
||||
u_charType(char_iterator.current()) == U_NON_SPACING_MARK)) {
|
||||
// Not a whitespace character. Advance the iterator so that we
|
||||
// include the current character in the truncated string.
|
||||
// Not a whitespace character. Truncate to everything up to and including
|
||||
// this character, and append an ellipsis.
|
||||
char_iterator.next();
|
||||
break;
|
||||
return string.substr(0, char_iterator.getIndex()) + kElideString;
|
||||
}
|
||||
}
|
||||
if (char_iterator.hasPrevious()) {
|
||||
// Found a valid break point.
|
||||
index = char_iterator.getIndex();
|
||||
} else {
|
||||
// String has leading whitespace, return the elide string.
|
||||
return kElideString;
|
||||
}
|
||||
|
||||
return string.substr(0, index) + kElideString;
|
||||
// Couldn't find a previous non-whitespace character. If we were originally
|
||||
// word-breaking, and index != length - 1, then the string is |index|
|
||||
// whitespace characters followed by a word we're trying to break in the midst
|
||||
// of, and we can fit at least one character of the word in the elided string.
|
||||
// Do that rather than just returning an ellipsis.
|
||||
if (word_break && (index != static_cast<int32_t>(length - 1)))
|
||||
return string.substr(0, length - 1) + kElideString;
|
||||
|
||||
// Trying to break after only whitespace, elide all of it.
|
||||
return kElideString;
|
||||
}
|
||||
|
||||
} // namespace gfx
|
||||
|
@ -1071,53 +1071,70 @@ TEST(TextEliderTest, MAYBE_ElideRectangleWide32) {
|
||||
#define MAYBE_TruncateString TruncateString
|
||||
#endif
|
||||
TEST(TextEliderTest, MAYBE_TruncateString) {
|
||||
base::string16 string = ASCIIToUTF16("foooooey bxxxar baz");
|
||||
base::string16 str = ASCIIToUTF16("fooooey bxxxar baz ");
|
||||
|
||||
// Tests that apply to both break behaviors:
|
||||
// Test breaking at character 0.
|
||||
EXPECT_EQ(base::string16(), TruncateString(str, 0, WORD_BREAK));
|
||||
EXPECT_EQ(base::string16(), TruncateString(str, 0, CHARACTER_BREAK));
|
||||
|
||||
// Make sure it doesn't modify the string if length > string length.
|
||||
EXPECT_EQ(string, TruncateString(string, 100, WORD_BREAK));
|
||||
EXPECT_EQ(string, TruncateString(string, 100, CHARACTER_BREAK));
|
||||
// Test breaking at character 1.
|
||||
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str, 1, WORD_BREAK)));
|
||||
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str, 1, CHARACTER_BREAK)));
|
||||
|
||||
// Test no characters.
|
||||
EXPECT_EQ(L"", UTF16ToWide(TruncateString(string, 0, WORD_BREAK)));
|
||||
EXPECT_EQ(L"", UTF16ToWide(TruncateString(string, 0, CHARACTER_BREAK)));
|
||||
// Test breaking in the middle of the first word.
|
||||
EXPECT_EQ(L"f\x2026", UTF16ToWide(TruncateString(str, 2, WORD_BREAK)));
|
||||
EXPECT_EQ(L"f\x2026", UTF16ToWide(TruncateString(str, 2, CHARACTER_BREAK)));
|
||||
|
||||
// Test 1 character.
|
||||
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(string, 1, WORD_BREAK)));
|
||||
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(string, 1, CHARACTER_BREAK)));
|
||||
// Test breaking in between words.
|
||||
EXPECT_EQ(L"fooooey\x2026", UTF16ToWide(TruncateString(str, 9, WORD_BREAK)));
|
||||
EXPECT_EQ(L"fooooey\x2026",
|
||||
UTF16ToWide(TruncateString(str, 9, CHARACTER_BREAK)));
|
||||
|
||||
// Test completely truncates string if break is on initial whitespace.
|
||||
EXPECT_EQ(L"\x2026",
|
||||
UTF16ToWide(TruncateString(ASCIIToUTF16(" "), 2, WORD_BREAK)));
|
||||
EXPECT_EQ(L"\x2026",
|
||||
UTF16ToWide(TruncateString(ASCIIToUTF16(" "), 2,
|
||||
CHARACTER_BREAK)));
|
||||
// Test breaking at the start of a later word.
|
||||
EXPECT_EQ(L"fooooey\x2026", UTF16ToWide(TruncateString(str, 11, WORD_BREAK)));
|
||||
EXPECT_EQ(L"fooooey\x2026",
|
||||
UTF16ToWide(TruncateString(str, 11, CHARACTER_BREAK)));
|
||||
|
||||
// Break-only-at-word-boundaries tests:
|
||||
// Test breaking in the middle of a word.
|
||||
EXPECT_EQ(L"fooooey\x2026", UTF16ToWide(TruncateString(str, 12, WORD_BREAK)));
|
||||
EXPECT_EQ(L"fooooey\x2026",
|
||||
UTF16ToWide(TruncateString(str, 12, CHARACTER_BREAK)));
|
||||
EXPECT_EQ(L"fooooey\x2026", UTF16ToWide(TruncateString(str, 14, WORD_BREAK)));
|
||||
EXPECT_EQ(L"fooooey bx\x2026",
|
||||
UTF16ToWide(TruncateString(str, 14, CHARACTER_BREAK)));
|
||||
|
||||
// Test adds ... at right spot when there is enough room to break at a
|
||||
// word boundary.
|
||||
EXPECT_EQ(L"foooooey\x2026", UTF16ToWide(TruncateString(string, 14,
|
||||
WORD_BREAK)));
|
||||
// Test breaking in whitespace at the end of the string.
|
||||
EXPECT_EQ(L"fooooey bxxxar baz\x2026",
|
||||
UTF16ToWide(TruncateString(str, 22, WORD_BREAK)));
|
||||
EXPECT_EQ(L"fooooey bxxxar baz\x2026",
|
||||
UTF16ToWide(TruncateString(str, 22, CHARACTER_BREAK)));
|
||||
|
||||
// Test adds ... at right spot when there is not enough space in first word.
|
||||
EXPECT_EQ(L"f\x2026", UTF16ToWide(TruncateString(string, 2, WORD_BREAK)));
|
||||
// Test breaking at the end of the string.
|
||||
EXPECT_EQ(str, TruncateString(str, str.length(), WORD_BREAK));
|
||||
EXPECT_EQ(str, TruncateString(str, str.length(), CHARACTER_BREAK));
|
||||
|
||||
// Test adds ... at right spot when there is not enough room to break at a
|
||||
// word boundary.
|
||||
EXPECT_EQ(L"foooooey\x2026", UTF16ToWide(TruncateString(string, 11,
|
||||
WORD_BREAK)));
|
||||
// Test breaking past the end of the string.
|
||||
EXPECT_EQ(str, TruncateString(str, str.length() + 10, WORD_BREAK));
|
||||
EXPECT_EQ(str, TruncateString(str, str.length() + 10, CHARACTER_BREAK));
|
||||
|
||||
// Break-anywhere tests:
|
||||
|
||||
// Test adds ... at right spot within a word.
|
||||
EXPECT_EQ(L"f\x2026", UTF16ToWide(TruncateString(string, 2,
|
||||
CHARACTER_BREAK)));
|
||||
// Tests of strings with leading whitespace:
|
||||
base::string16 str2 = ASCIIToUTF16(" foo");
|
||||
|
||||
// Test removes trailing whitespace if break falls between words.
|
||||
EXPECT_EQ(L"foooooey\x2026", UTF16ToWide(TruncateString(string, 12,
|
||||
CHARACTER_BREAK)));
|
||||
// Test breaking in leading whitespace.
|
||||
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 2, WORD_BREAK)));
|
||||
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 2, CHARACTER_BREAK)));
|
||||
|
||||
// Test breaking at the beginning of the first word, with leading whitespace.
|
||||
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 3, WORD_BREAK)));
|
||||
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 3, CHARACTER_BREAK)));
|
||||
|
||||
// Test breaking in the middle of the first word, with leading whitespace.
|
||||
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 4, WORD_BREAK)));
|
||||
EXPECT_EQ(L"\x2026", UTF16ToWide(TruncateString(str2, 4, CHARACTER_BREAK)));
|
||||
EXPECT_EQ(L" f\x2026", UTF16ToWide(TruncateString(str2, 5, WORD_BREAK)));
|
||||
EXPECT_EQ(L" f\x2026",
|
||||
UTF16ToWide(TruncateString(str2, 5, CHARACTER_BREAK)));
|
||||
}
|
||||
|
||||
} // namespace gfx
|
||||
|
Reference in New Issue
Block a user