Populate Highlights in PDFiumPage
This CL introduces a method PDFiumPage::PopulateHighlights which reads highlight annotations from the PDF document and stores relevant accessibility info in a vector within PDFiumPage. Also included is a test pdf file containing simple highlights and a unit test which validates the new method. Bug: 1008775 Change-Id: I1c04f2cc7dc2885aa3a5f49ec66fcccc5a2c1311 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1862872 Commit-Queue: Kalpak Tapas <katapas@microsoft.com> Reviewed-by: Lei Zhang <thestig@chromium.org> Reviewed-by: Kevin Babbitt <kbabbitt@microsoft.com> Cr-Commit-Position: refs/heads/master@{#718088}
This commit is contained in:
@ -1009,6 +1009,43 @@ void PDFiumPage::PopulateImageAltTextForStructElement(
|
||||
}
|
||||
}
|
||||
|
||||
void PDFiumPage::PopulateHighlights() {
|
||||
if (calculated_highlights_)
|
||||
return;
|
||||
|
||||
FPDF_PAGE page = GetPage();
|
||||
if (!page)
|
||||
return;
|
||||
|
||||
calculated_highlights_ = true;
|
||||
// Populate highlights from within the pdf page into data structures ready
|
||||
// to be passed to mimehandler. Currently scoped to highlights only.
|
||||
int annotation_count = FPDFPage_GetAnnotCount(page);
|
||||
for (int i = 0; i < annotation_count; ++i) {
|
||||
ScopedFPDFAnnotation annot(FPDFPage_GetAnnot(page, i));
|
||||
DCHECK(annot);
|
||||
FPDF_ANNOTATION_SUBTYPE subtype = FPDFAnnot_GetSubtype(annot.get());
|
||||
if (subtype != FPDF_ANNOT_HIGHLIGHT)
|
||||
continue;
|
||||
|
||||
FS_RECTF rect;
|
||||
if (!FPDFAnnot_GetRect(annot.get(), &rect))
|
||||
continue;
|
||||
|
||||
Highlight highlight;
|
||||
// We use the bounding box of the highlight as the bounding rect.
|
||||
highlight.bounding_rect =
|
||||
PageToScreen(pp::Point(), 1.0, rect.left, rect.top, rect.right,
|
||||
rect.bottom, PageOrientation::kOriginal);
|
||||
GetUnderlyingTextRangeForRect(
|
||||
pp::FloatRect(rect.left, rect.bottom, std::abs(rect.right - rect.left),
|
||||
std::abs(rect.bottom - rect.top)),
|
||||
&highlight.start_char_index, &highlight.char_count);
|
||||
|
||||
highlights_.push_back(std::move(highlight));
|
||||
}
|
||||
}
|
||||
|
||||
bool PDFiumPage::GetUnderlyingTextRangeForRect(const pp::FloatRect& rect,
|
||||
int* start_index,
|
||||
int* char_len) {
|
||||
@ -1153,6 +1190,12 @@ PDFiumPage::Image::Image(const Image& that) = default;
|
||||
|
||||
PDFiumPage::Image::~Image() = default;
|
||||
|
||||
PDFiumPage::Highlight::Highlight() = default;
|
||||
|
||||
PDFiumPage::Highlight::Highlight(const Highlight& that) = default;
|
||||
|
||||
PDFiumPage::Highlight::~Highlight() = default;
|
||||
|
||||
int ToPDFiumRotation(PageOrientation orientation) {
|
||||
// Could static_cast<int>(orientation), but using an exhaustive switch will
|
||||
// trigger an error if we ever change the definition of PageOrientation.
|
||||
|
@ -162,6 +162,7 @@ class PDFiumPage {
|
||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, TestAnnotLinkGeneration);
|
||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, TestImageAltText);
|
||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, TestLinkGeneration);
|
||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageHighlightTest, TestPopulateHighlights);
|
||||
|
||||
// Returns a link index if the given character index is over a link, or -1
|
||||
// otherwise.
|
||||
@ -174,6 +175,8 @@ class PDFiumPage {
|
||||
void PopulateAnnotationLinks();
|
||||
// Calculate the locations of images on the page.
|
||||
void CalculateImages();
|
||||
// Populate highlights on the page.
|
||||
void PopulateHighlights();
|
||||
// Returns link type and fills target associated with a link. Returns
|
||||
// NONSELECTABLE_AREA if link detection failed.
|
||||
Area GetLinkTarget(FPDF_LINK link, LinkTarget* target);
|
||||
@ -248,6 +251,19 @@ class PDFiumPage {
|
||||
std::string alt_text;
|
||||
};
|
||||
|
||||
// Represents a highlight within the page.
|
||||
struct Highlight {
|
||||
Highlight();
|
||||
Highlight(const Highlight& other);
|
||||
~Highlight();
|
||||
|
||||
// Start index of underlying text range. -1 indicates invalid value.
|
||||
int32_t start_char_index = -1;
|
||||
// Number of characters encompassed by this highlight.
|
||||
int32_t char_count = 0;
|
||||
pp::Rect bounding_rect;
|
||||
};
|
||||
|
||||
PDFiumEngine* engine_;
|
||||
ScopedFPDFPage page_;
|
||||
ScopedFPDFTextPage text_page_;
|
||||
@ -258,6 +274,8 @@ class PDFiumPage {
|
||||
std::vector<Link> links_;
|
||||
bool calculated_images_ = false;
|
||||
std::vector<Image> images_;
|
||||
bool calculated_highlights_ = false;
|
||||
std::vector<Highlight> highlights_;
|
||||
bool calculated_page_object_text_run_breaks_ = false;
|
||||
// The set of character indices on which text runs need to be broken for page
|
||||
// objects.
|
||||
|
@ -291,4 +291,39 @@ TEST_F(PDFiumPageTextTest, GetTextRunInfo) {
|
||||
ASSERT_FALSE(text_run_info_result.has_value());
|
||||
}
|
||||
|
||||
using PDFiumPageHighlightTest = PDFiumTestBase;
|
||||
|
||||
TEST_F(PDFiumPageHighlightTest, TestPopulateHighlights) {
|
||||
struct ExpectedHighlight {
|
||||
int32_t start_char_index;
|
||||
int32_t char_count;
|
||||
pp::Rect bounding_rect;
|
||||
};
|
||||
|
||||
static const ExpectedHighlight kExpectedHighlights[] = {
|
||||
{0, 5, {5, 196, 49, 26}},
|
||||
{12, 7, {110, 196, 77, 26}},
|
||||
{20, 1, {192, 196, 13, 26}}};
|
||||
|
||||
TestClient client;
|
||||
std::unique_ptr<PDFiumEngine> engine =
|
||||
InitializeEngine(&client, FILE_PATH_LITERAL("highlights.pdf"));
|
||||
ASSERT_TRUE(engine);
|
||||
ASSERT_EQ(1, engine->GetNumberOfPages());
|
||||
|
||||
PDFiumPage* page = GetPDFiumPageForTest(engine.get(), 0);
|
||||
ASSERT_TRUE(page);
|
||||
page->PopulateHighlights();
|
||||
ASSERT_EQ(base::size(kExpectedHighlights), page->highlights_.size());
|
||||
|
||||
for (size_t i = 0; i < page->highlights_.size(); ++i) {
|
||||
ASSERT_EQ(kExpectedHighlights[i].start_char_index,
|
||||
page->highlights_[i].start_char_index);
|
||||
ASSERT_EQ(kExpectedHighlights[i].char_count,
|
||||
page->highlights_[i].char_count);
|
||||
CompareRect(kExpectedHighlights[i].bounding_rect,
|
||||
page->highlights_[i].bounding_rect);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
73
pdf/test/data/highlights.in
Normal file
73
pdf/test/data/highlights.in
Normal file
@ -0,0 +1,73 @@
|
||||
{{header}}
|
||||
{{object 1 0}} <<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
{{object 2 0}} <<
|
||||
/Type /Pages
|
||||
/MediaBox [0 0 400 200]
|
||||
/Count 1
|
||||
/Kids [3 0 R]
|
||||
>>
|
||||
endobj
|
||||
{{object 3 0}} <<
|
||||
/Type /Page
|
||||
/Parent 2 0 R
|
||||
/Resources <<
|
||||
/Font <<
|
||||
/F1 4 0 R
|
||||
>>
|
||||
>>
|
||||
/Contents 5 0 R
|
||||
/Annots [6 0 R 7 0 R 8 0 R]
|
||||
>>
|
||||
endobj
|
||||
{{object 4 0}} <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Helvetica
|
||||
>>
|
||||
endobj
|
||||
{{object 5 0}} <<
|
||||
{{streamlen}}
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
0 40 Td
|
||||
/F1 16 Tf
|
||||
(Hello, nice meeting you) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
{{object 6 0}} <<
|
||||
/Type /Annot
|
||||
/Subtype /Highlight
|
||||
/QuadPoints [0 55 36 59 0 36 36 36]
|
||||
/Rect [0 36 36 55]
|
||||
/C [0.15 0 0.9 0]
|
||||
/P 3 0 R
|
||||
>>
|
||||
endobj
|
||||
{{object 7 0}} <<
|
||||
/Type /Annot
|
||||
/Subtype /Highlight
|
||||
/QuadPoints [79 55 136 55 79 36 136 36]
|
||||
/Rect [79 36 136 55]
|
||||
/C [0.15 0 0.9 0]
|
||||
/P 3 0 R
|
||||
>>
|
||||
endobj
|
||||
{{object 8 0}} <<
|
||||
/Type /Annot
|
||||
/Subtype /Highlight
|
||||
/QuadPoints [140 55 149 55 140 36 149 36]
|
||||
/Rect [140 36 149 55]
|
||||
/C [0.15 0 0.9 0]
|
||||
/P 3 0 R
|
||||
>>
|
||||
endobj
|
||||
{{xref}}
|
||||
{{trailer}}
|
||||
{{startxref}}
|
||||
%%EOF
|
88
pdf/test/data/highlights.pdf
Normal file
88
pdf/test/data/highlights.pdf
Normal file
@ -0,0 +1,88 @@
|
||||
%PDF-1.7
|
||||
%<25><><EFBFBD><EFBFBD>
|
||||
1 0 obj <<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
2 0 obj <<
|
||||
/Type /Pages
|
||||
/MediaBox [0 0 400 200]
|
||||
/Count 1
|
||||
/Kids [3 0 R]
|
||||
>>
|
||||
endobj
|
||||
3 0 obj <<
|
||||
/Type /Page
|
||||
/Parent 2 0 R
|
||||
/Resources <<
|
||||
/Font <<
|
||||
/F1 4 0 R
|
||||
>>
|
||||
>>
|
||||
/Contents 5 0 R
|
||||
/Annots [6 0 R 7 0 R 8 0 R]
|
||||
>>
|
||||
endobj
|
||||
4 0 obj <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Helvetica
|
||||
>>
|
||||
endobj
|
||||
5 0 obj <<
|
||||
/Length 53
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
0 40 Td
|
||||
/F1 16 Tf
|
||||
(Hello, nice meeting you) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
6 0 obj <<
|
||||
/Type /Annot
|
||||
/Subtype /Highlight
|
||||
/QuadPoints [0 55 36 59 0 36 36 36]
|
||||
/Rect [0 36 36 55]
|
||||
/C [0.15 0 0.9 0]
|
||||
/P 3 0 R
|
||||
>>
|
||||
endobj
|
||||
7 0 obj <<
|
||||
/Type /Annot
|
||||
/Subtype /Highlight
|
||||
/QuadPoints [79 55 136 55 79 36 136 36]
|
||||
/Rect [79 36 136 55]
|
||||
/C [0.15 0 0.9 0]
|
||||
/P 3 0 R
|
||||
>>
|
||||
endobj
|
||||
8 0 obj <<
|
||||
/Type /Annot
|
||||
/Subtype /Highlight
|
||||
/QuadPoints [140 55 149 55 140 36 149 36]
|
||||
/Rect [140 36 149 55]
|
||||
/C [0.15 0 0.9 0]
|
||||
/P 3 0 R
|
||||
>>
|
||||
endobj
|
||||
xref
|
||||
0 9
|
||||
0000000000 65535 f
|
||||
0000000015 00000 n
|
||||
0000000068 00000 n
|
||||
0000000157 00000 n
|
||||
0000000313 00000 n
|
||||
0000000389 00000 n
|
||||
0000000493 00000 n
|
||||
0000000641 00000 n
|
||||
0000000795 00000 n
|
||||
trailer <<
|
||||
/Root 1 0 R
|
||||
/Size 9
|
||||
>>
|
||||
startxref
|
||||
952
|
||||
%%EOF
|
Reference in New Issue
Block a user