Revert "Add logic to break PDFiumPage text runs by page objects"
This reverts commit a09403d61c
.
Reason for revert: PDFiumPageTextTest.GetTextRunInfo fails on MSan, see crbug.com/996629
Original change's description:
> Add logic to break PDFiumPage text runs by page objects
>
> This CL introduces a logic to break the text runs whenever a link is
> found while generating the text runs in PDFiumPage::GetTextRunInfo().
>
> A new method PDFiumPage::CalculatePageObjectTextRunBreaks() is
> introduced which pre-populates a set of text breakpoints according to
> the start and end char indices of links in the page.
> PDFiumPage::GetTextRunInfo() uses that pre-populated set to determine
> the text run boundary.
>
> This CL includes a unit test to verify the text tuns in a pdf page which
> has a link in between a text block.
>
> Bug: 981448
> Change-Id: I2365dc84fa1e1206a58cd8fa4e918d660ce8bc00
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1714773
> Reviewed-by: Lei Zhang <thestig@chromium.org>
> Reviewed-by: Kevin Babbitt <kbabbitt@microsoft.com>
> Commit-Queue: Pratish Kumar <prkum@microsoft.com>
> Cr-Commit-Position: refs/heads/master@{#689356}
TBR=thestig@chromium.org,kbabbitt@microsoft.com,manojb@microsoft.com,mohitb@microsoft.com,virens@microsoft.com,ankk@microsoft.com,prkum@microsoft.com
Change-Id: I31ac68a47651ea51fa0dc1b4cc94e1aad9ee63c9
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: 981448, 996629
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1763764
Reviewed-by: Marc Treib <treib@chromium.org>
Commit-Queue: Marc Treib <treib@chromium.org>
Cr-Commit-Position: refs/heads/master@{#689387}
This commit is contained in:
@ -203,27 +203,6 @@ FPDF_TEXTPAGE PDFiumPage::GetTextPage() {
|
||||
return text_page();
|
||||
}
|
||||
|
||||
void PDFiumPage::CalculatePageObjectTextRunBreaks() {
|
||||
if (calculated_page_object_text_run_breaks_)
|
||||
return;
|
||||
int chars_count = FPDFText_CountChars(GetTextPage());
|
||||
if (chars_count == 0)
|
||||
return;
|
||||
|
||||
calculated_page_object_text_run_breaks_ = true;
|
||||
CalculateLinks();
|
||||
for (const auto& link : links_) {
|
||||
if (link.start_char_index >= 0 && link.start_char_index < chars_count) {
|
||||
page_object_text_run_breaks_.insert(link.start_char_index);
|
||||
int next_text_run_break_index = link.start_char_index + link.char_count;
|
||||
// Don't insert a break if the link is at the end of the page text.
|
||||
if (next_text_run_break_index < chars_count) {
|
||||
page_object_text_run_breaks_.insert(next_text_run_break_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PDFiumPage::GetTextRunInfo(int start_char_index,
|
||||
uint32_t* out_len,
|
||||
double* out_font_size,
|
||||
@ -262,21 +241,9 @@ void PDFiumPage::GetTextRunInfo(int start_char_index,
|
||||
float estimated_font_size =
|
||||
std::max(start_char_rect.width(), start_char_rect.height());
|
||||
|
||||
CalculatePageObjectTextRunBreaks();
|
||||
const auto breakpoint_iter =
|
||||
std::lower_bound(page_object_text_run_breaks_.begin(),
|
||||
page_object_text_run_breaks_.end(), char_index);
|
||||
int breakpoint_index = breakpoint_iter != page_object_text_run_breaks_.end()
|
||||
? *breakpoint_iter
|
||||
: -1;
|
||||
|
||||
// Continue adding characters until heuristics indicate we should end the text
|
||||
// run.
|
||||
while (char_index < chars_count) {
|
||||
// Split a text run when it encounters a page object like links or images.
|
||||
if (char_index == breakpoint_index)
|
||||
break;
|
||||
|
||||
unsigned int character = FPDFText_GetUnicode(text_page, char_index);
|
||||
pp::FloatRect char_rect =
|
||||
GetFloatCharRectInPixels(page, text_page, char_index);
|
||||
|
@ -157,9 +157,6 @@ class PDFiumPage {
|
||||
// Returns link type and fills target associated with a URI action. Returns
|
||||
// NONSELECTABLE_AREA if detection failed.
|
||||
Area GetURITarget(FPDF_ACTION uri_action, LinkTarget* target) const;
|
||||
// Calculates the set of character indices on which text runs need to be
|
||||
// broken for page objects such as links and images.
|
||||
void CalculatePageObjectTextRunBreaks();
|
||||
|
||||
class ScopedUnloadPreventer {
|
||||
public:
|
||||
@ -205,10 +202,6 @@ class PDFiumPage {
|
||||
std::vector<Link> links_;
|
||||
bool calculated_images_ = false;
|
||||
std::vector<Image> images_;
|
||||
bool calculated_page_object_text_run_breaks_ = false;
|
||||
// The set of character indices on which text runs need to be broken for page
|
||||
// objects.
|
||||
std::set<int> page_object_text_run_breaks_;
|
||||
bool available_;
|
||||
PDFEngine::PageFeatures page_features_;
|
||||
|
||||
|
@ -21,6 +21,10 @@ namespace chrome_pdf {
|
||||
|
||||
namespace {
|
||||
|
||||
bool IsValidLinkForTesting(const std::string& url) {
|
||||
return !url.empty();
|
||||
}
|
||||
|
||||
TEST(PDFiumPageTest, ToPDFiumRotation) {
|
||||
EXPECT_EQ(ToPDFiumRotation(PageOrientation::kOriginal), 0);
|
||||
EXPECT_EQ(ToPDFiumRotation(PageOrientation::kClockwise90), 1);
|
||||
@ -41,8 +45,13 @@ TEST(PDFiumPageDeathTest, ToPDFiumRotation) {
|
||||
|
||||
class PDFiumPageLinkTest : public PDFiumTestBase {
|
||||
public:
|
||||
PDFiumPageLinkTest() = default;
|
||||
~PDFiumPageLinkTest() override = default;
|
||||
PDFiumPageLinkTest() {
|
||||
PDFiumPage::SetIsValidLinkFunctionForTesting(&IsValidLinkForTesting);
|
||||
}
|
||||
|
||||
~PDFiumPageLinkTest() override {
|
||||
PDFiumPage::SetIsValidLinkFunctionForTesting(nullptr);
|
||||
}
|
||||
|
||||
const std::vector<PDFiumPage::Link>& GetLinks(PDFiumEngine* engine,
|
||||
int page_index) {
|
||||
@ -68,7 +77,7 @@ TEST_F(PDFiumPageLinkTest, TestLinkGeneration) {
|
||||
#endif
|
||||
|
||||
const std::vector<PDFiumPage::Link>& links = GetLinks(engine.get(), 0);
|
||||
ASSERT_EQ(3u, links.size());
|
||||
ASSERT_EQ(2u, links.size());
|
||||
|
||||
const PDFiumPage::Link& link = links[0];
|
||||
EXPECT_EQ("http://yahoo.com", link.url);
|
||||
@ -91,13 +100,6 @@ TEST_F(PDFiumPageLinkTest, TestLinkGeneration) {
|
||||
} else {
|
||||
CompareRect({131, 121, 138, 20}, second_link.bounding_rects[0]);
|
||||
}
|
||||
|
||||
const PDFiumPage::Link& third_link = links[2];
|
||||
EXPECT_EQ("http://google.com", third_link.url);
|
||||
EXPECT_EQ(92, third_link.start_char_index);
|
||||
EXPECT_EQ(17, third_link.char_count);
|
||||
ASSERT_EQ(1u, third_link.bounding_rects.size());
|
||||
CompareRect({82, 67, 161, 21}, third_link.bounding_rects[0]);
|
||||
}
|
||||
|
||||
using PDFiumPageImageTest = PDFiumTestBase;
|
||||
@ -118,30 +120,4 @@ TEST_F(PDFiumPageImageTest, TestCalculateImages) {
|
||||
CompareRect({380, 678, 1, 1}, page->images_[2].bounding_rect);
|
||||
}
|
||||
|
||||
using PDFiumPageTextTest = PDFiumTestBase;
|
||||
|
||||
TEST_F(PDFiumPageTextTest, GetTextRunInfo) {
|
||||
TestClient client;
|
||||
std::unique_ptr<PDFiumEngine> engine =
|
||||
InitializeEngine(&client, FILE_PATH_LITERAL("weblinks.pdf"));
|
||||
ASSERT_TRUE(engine);
|
||||
|
||||
int current_char_index = 0;
|
||||
uint32_t text_run_length;
|
||||
double text_run_font_size;
|
||||
pp::FloatRect bounds;
|
||||
|
||||
// The links span from [7, 22], [52, 66] and [92, 108] with 16, 15 and 17
|
||||
// text run lengths respectively. There are text runs preceding and
|
||||
// succeeding them.
|
||||
static constexpr uint32_t kExpectedTextRunLengths[] = {7, 16, 20, 9, 15,
|
||||
20, 5, 17, 0};
|
||||
for (uint32_t expected_length : kExpectedTextRunLengths) {
|
||||
engine->GetTextRunInfo(0, current_char_index, &text_run_length,
|
||||
&text_run_font_size, &bounds);
|
||||
EXPECT_EQ(expected_length, text_run_length);
|
||||
current_char_index += text_run_length;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
@ -21,10 +21,6 @@ std::unique_ptr<DocumentLoader> CreateTestDocumentLoader(
|
||||
return std::make_unique<TestDocumentLoader>(client, g_test_pdf_name);
|
||||
}
|
||||
|
||||
bool IsValidLinkForTesting(const std::string& url) {
|
||||
return !url.empty();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
PDFiumTestBase::PDFiumTestBase() = default;
|
||||
@ -33,12 +29,10 @@ PDFiumTestBase::~PDFiumTestBase() = default;
|
||||
|
||||
void PDFiumTestBase::SetUp() {
|
||||
InitializePDFium();
|
||||
PDFiumPage::SetIsValidLinkFunctionForTesting(&IsValidLinkForTesting);
|
||||
}
|
||||
|
||||
void PDFiumTestBase::TearDown() {
|
||||
PDFiumEngine::SetCreateDocumentLoaderFunctionForTesting(nullptr);
|
||||
PDFiumPage::SetIsValidLinkFunctionForTesting(nullptr);
|
||||
g_test_pdf_name = nullptr;
|
||||
FPDF_DestroyLibrary();
|
||||
}
|
||||
|
@ -46,9 +46,6 @@ BT
|
||||
0 50 Td
|
||||
/F2 16 Tf
|
||||
(Goodbye, http://bing.com! nice meeting you) Tj
|
||||
0 40 Td
|
||||
/F2 16 Tf
|
||||
(Bye, http://google.com) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
|
@ -37,7 +37,7 @@ endobj
|
||||
>>
|
||||
endobj
|
||||
6 0 obj <<
|
||||
/Length 184
|
||||
/Length 138
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
@ -47,9 +47,6 @@ BT
|
||||
0 50 Td
|
||||
/F2 16 Tf
|
||||
(Goodbye, http://bing.com! nice meeting you) Tj
|
||||
0 40 Td
|
||||
/F2 16 Tf
|
||||
(Bye, http://google.com) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
@ -67,5 +64,5 @@ trailer <<
|
||||
/Size 7
|
||||
>>
|
||||
startxref
|
||||
689
|
||||
642
|
||||
%%EOF
|
||||
|
Reference in New Issue
Block a user