0

Revert "Add logic to break PDFiumPage text runs by page objects"

This reverts commit a09403d61c.

Reason for revert: PDFiumPageTextTest.GetTextRunInfo fails on MSan, see crbug.com/996629

Original change's description:
> Add logic to break PDFiumPage text runs by page objects
> 
> This CL introduces a logic to break the text runs whenever a link is
> found while generating the text runs in PDFiumPage::GetTextRunInfo().
> 
> A new method PDFiumPage::CalculatePageObjectTextRunBreaks() is
> introduced which pre-populates a set of text breakpoints according to
> the start and end char indices of links in the page.
> PDFiumPage::GetTextRunInfo() uses that pre-populated set to determine
> the text run boundary.
> 
> This CL includes a unit test to verify the text tuns in a pdf page which
> has a link in between a text block.
> 
> Bug: 981448
> Change-Id: I2365dc84fa1e1206a58cd8fa4e918d660ce8bc00
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1714773
> Reviewed-by: Lei Zhang <thestig@chromium.org>
> Reviewed-by: Kevin Babbitt <kbabbitt@microsoft.com>
> Commit-Queue: Pratish Kumar <prkum@microsoft.com>
> Cr-Commit-Position: refs/heads/master@{#689356}

TBR=thestig@chromium.org,kbabbitt@microsoft.com,manojb@microsoft.com,mohitb@microsoft.com,virens@microsoft.com,ankk@microsoft.com,prkum@microsoft.com

Change-Id: I31ac68a47651ea51fa0dc1b4cc94e1aad9ee63c9
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: 981448, 996629
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1763764
Reviewed-by: Marc Treib <treib@chromium.org>
Commit-Queue: Marc Treib <treib@chromium.org>
Cr-Commit-Position: refs/heads/master@{#689387}
This commit is contained in:
Marc Treib
2019-08-22 08:31:33 +00:00
committed by Commit Bot
parent eb55afdd25
commit 8780c1e265
6 changed files with 14 additions and 90 deletions

@ -203,27 +203,6 @@ FPDF_TEXTPAGE PDFiumPage::GetTextPage() {
return text_page();
}
void PDFiumPage::CalculatePageObjectTextRunBreaks() {
if (calculated_page_object_text_run_breaks_)
return;
int chars_count = FPDFText_CountChars(GetTextPage());
if (chars_count == 0)
return;
calculated_page_object_text_run_breaks_ = true;
CalculateLinks();
for (const auto& link : links_) {
if (link.start_char_index >= 0 && link.start_char_index < chars_count) {
page_object_text_run_breaks_.insert(link.start_char_index);
int next_text_run_break_index = link.start_char_index + link.char_count;
// Don't insert a break if the link is at the end of the page text.
if (next_text_run_break_index < chars_count) {
page_object_text_run_breaks_.insert(next_text_run_break_index);
}
}
}
}
void PDFiumPage::GetTextRunInfo(int start_char_index,
uint32_t* out_len,
double* out_font_size,
@ -262,21 +241,9 @@ void PDFiumPage::GetTextRunInfo(int start_char_index,
float estimated_font_size =
std::max(start_char_rect.width(), start_char_rect.height());
CalculatePageObjectTextRunBreaks();
const auto breakpoint_iter =
std::lower_bound(page_object_text_run_breaks_.begin(),
page_object_text_run_breaks_.end(), char_index);
int breakpoint_index = breakpoint_iter != page_object_text_run_breaks_.end()
? *breakpoint_iter
: -1;
// Continue adding characters until heuristics indicate we should end the text
// run.
while (char_index < chars_count) {
// Split a text run when it encounters a page object like links or images.
if (char_index == breakpoint_index)
break;
unsigned int character = FPDFText_GetUnicode(text_page, char_index);
pp::FloatRect char_rect =
GetFloatCharRectInPixels(page, text_page, char_index);

@ -157,9 +157,6 @@ class PDFiumPage {
// Returns link type and fills target associated with a URI action. Returns
// NONSELECTABLE_AREA if detection failed.
Area GetURITarget(FPDF_ACTION uri_action, LinkTarget* target) const;
// Calculates the set of character indices on which text runs need to be
// broken for page objects such as links and images.
void CalculatePageObjectTextRunBreaks();
class ScopedUnloadPreventer {
public:
@ -205,10 +202,6 @@ class PDFiumPage {
std::vector<Link> links_;
bool calculated_images_ = false;
std::vector<Image> images_;
bool calculated_page_object_text_run_breaks_ = false;
// The set of character indices on which text runs need to be broken for page
// objects.
std::set<int> page_object_text_run_breaks_;
bool available_;
PDFEngine::PageFeatures page_features_;

@ -21,6 +21,10 @@ namespace chrome_pdf {
namespace {
bool IsValidLinkForTesting(const std::string& url) {
return !url.empty();
}
TEST(PDFiumPageTest, ToPDFiumRotation) {
EXPECT_EQ(ToPDFiumRotation(PageOrientation::kOriginal), 0);
EXPECT_EQ(ToPDFiumRotation(PageOrientation::kClockwise90), 1);
@ -41,8 +45,13 @@ TEST(PDFiumPageDeathTest, ToPDFiumRotation) {
class PDFiumPageLinkTest : public PDFiumTestBase {
public:
PDFiumPageLinkTest() = default;
~PDFiumPageLinkTest() override = default;
PDFiumPageLinkTest() {
PDFiumPage::SetIsValidLinkFunctionForTesting(&IsValidLinkForTesting);
}
~PDFiumPageLinkTest() override {
PDFiumPage::SetIsValidLinkFunctionForTesting(nullptr);
}
const std::vector<PDFiumPage::Link>& GetLinks(PDFiumEngine* engine,
int page_index) {
@ -68,7 +77,7 @@ TEST_F(PDFiumPageLinkTest, TestLinkGeneration) {
#endif
const std::vector<PDFiumPage::Link>& links = GetLinks(engine.get(), 0);
ASSERT_EQ(3u, links.size());
ASSERT_EQ(2u, links.size());
const PDFiumPage::Link& link = links[0];
EXPECT_EQ("http://yahoo.com", link.url);
@ -91,13 +100,6 @@ TEST_F(PDFiumPageLinkTest, TestLinkGeneration) {
} else {
CompareRect({131, 121, 138, 20}, second_link.bounding_rects[0]);
}
const PDFiumPage::Link& third_link = links[2];
EXPECT_EQ("http://google.com", third_link.url);
EXPECT_EQ(92, third_link.start_char_index);
EXPECT_EQ(17, third_link.char_count);
ASSERT_EQ(1u, third_link.bounding_rects.size());
CompareRect({82, 67, 161, 21}, third_link.bounding_rects[0]);
}
using PDFiumPageImageTest = PDFiumTestBase;
@ -118,30 +120,4 @@ TEST_F(PDFiumPageImageTest, TestCalculateImages) {
CompareRect({380, 678, 1, 1}, page->images_[2].bounding_rect);
}
using PDFiumPageTextTest = PDFiumTestBase;
TEST_F(PDFiumPageTextTest, GetTextRunInfo) {
TestClient client;
std::unique_ptr<PDFiumEngine> engine =
InitializeEngine(&client, FILE_PATH_LITERAL("weblinks.pdf"));
ASSERT_TRUE(engine);
int current_char_index = 0;
uint32_t text_run_length;
double text_run_font_size;
pp::FloatRect bounds;
// The links span from [7, 22], [52, 66] and [92, 108] with 16, 15 and 17
// text run lengths respectively. There are text runs preceding and
// succeeding them.
static constexpr uint32_t kExpectedTextRunLengths[] = {7, 16, 20, 9, 15,
20, 5, 17, 0};
for (uint32_t expected_length : kExpectedTextRunLengths) {
engine->GetTextRunInfo(0, current_char_index, &text_run_length,
&text_run_font_size, &bounds);
EXPECT_EQ(expected_length, text_run_length);
current_char_index += text_run_length;
}
}
} // namespace chrome_pdf

@ -21,10 +21,6 @@ std::unique_ptr<DocumentLoader> CreateTestDocumentLoader(
return std::make_unique<TestDocumentLoader>(client, g_test_pdf_name);
}
bool IsValidLinkForTesting(const std::string& url) {
return !url.empty();
}
} // namespace
PDFiumTestBase::PDFiumTestBase() = default;
@ -33,12 +29,10 @@ PDFiumTestBase::~PDFiumTestBase() = default;
void PDFiumTestBase::SetUp() {
InitializePDFium();
PDFiumPage::SetIsValidLinkFunctionForTesting(&IsValidLinkForTesting);
}
void PDFiumTestBase::TearDown() {
PDFiumEngine::SetCreateDocumentLoaderFunctionForTesting(nullptr);
PDFiumPage::SetIsValidLinkFunctionForTesting(nullptr);
g_test_pdf_name = nullptr;
FPDF_DestroyLibrary();
}

@ -46,9 +46,6 @@ BT
0 50 Td
/F2 16 Tf
(Goodbye, http://bing.com! nice meeting you) Tj
0 40 Td
/F2 16 Tf
(Bye, http://google.com) Tj
ET
endstream
endobj

@ -37,7 +37,7 @@ endobj
>>
endobj
6 0 obj <<
/Length 184
/Length 138
>>
stream
BT
@ -47,9 +47,6 @@ BT
0 50 Td
/F2 16 Tf
(Goodbye, http://bing.com! nice meeting you) Tj
0 40 Td
/F2 16 Tf
(Bye, http://google.com) Tj
ET
endstream
endobj
@ -67,5 +64,5 @@ trailer <<
/Size 7
>>
startxref
689
642
%%EOF