Populate Highlights in PDFiumPage
This CL introduces a method PDFiumPage::PopulateHighlights which reads highlight annotations from the PDF document and stores relevant accessibility info in a vector within PDFiumPage. Also included is a test pdf file containing simple highlights and a unit test which validates the new method. Bug: 1008775 Change-Id: I1c04f2cc7dc2885aa3a5f49ec66fcccc5a2c1311 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1862872 Commit-Queue: Kalpak Tapas <katapas@microsoft.com> Reviewed-by: Lei Zhang <thestig@chromium.org> Reviewed-by: Kevin Babbitt <kbabbitt@microsoft.com> Cr-Commit-Position: refs/heads/master@{#718088}
This commit is contained in:
@@ -1009,6 +1009,43 @@ void PDFiumPage::PopulateImageAltTextForStructElement(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PDFiumPage::PopulateHighlights() {
|
||||||
|
if (calculated_highlights_)
|
||||||
|
return;
|
||||||
|
|
||||||
|
FPDF_PAGE page = GetPage();
|
||||||
|
if (!page)
|
||||||
|
return;
|
||||||
|
|
||||||
|
calculated_highlights_ = true;
|
||||||
|
// Populate highlights from within the pdf page into data structures ready
|
||||||
|
// to be passed to mimehandler. Currently scoped to highlights only.
|
||||||
|
int annotation_count = FPDFPage_GetAnnotCount(page);
|
||||||
|
for (int i = 0; i < annotation_count; ++i) {
|
||||||
|
ScopedFPDFAnnotation annot(FPDFPage_GetAnnot(page, i));
|
||||||
|
DCHECK(annot);
|
||||||
|
FPDF_ANNOTATION_SUBTYPE subtype = FPDFAnnot_GetSubtype(annot.get());
|
||||||
|
if (subtype != FPDF_ANNOT_HIGHLIGHT)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
FS_RECTF rect;
|
||||||
|
if (!FPDFAnnot_GetRect(annot.get(), &rect))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
Highlight highlight;
|
||||||
|
// We use the bounding box of the highlight as the bounding rect.
|
||||||
|
highlight.bounding_rect =
|
||||||
|
PageToScreen(pp::Point(), 1.0, rect.left, rect.top, rect.right,
|
||||||
|
rect.bottom, PageOrientation::kOriginal);
|
||||||
|
GetUnderlyingTextRangeForRect(
|
||||||
|
pp::FloatRect(rect.left, rect.bottom, std::abs(rect.right - rect.left),
|
||||||
|
std::abs(rect.bottom - rect.top)),
|
||||||
|
&highlight.start_char_index, &highlight.char_count);
|
||||||
|
|
||||||
|
highlights_.push_back(std::move(highlight));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool PDFiumPage::GetUnderlyingTextRangeForRect(const pp::FloatRect& rect,
|
bool PDFiumPage::GetUnderlyingTextRangeForRect(const pp::FloatRect& rect,
|
||||||
int* start_index,
|
int* start_index,
|
||||||
int* char_len) {
|
int* char_len) {
|
||||||
@@ -1153,6 +1190,12 @@ PDFiumPage::Image::Image(const Image& that) = default;
|
|||||||
|
|
||||||
PDFiumPage::Image::~Image() = default;
|
PDFiumPage::Image::~Image() = default;
|
||||||
|
|
||||||
|
PDFiumPage::Highlight::Highlight() = default;
|
||||||
|
|
||||||
|
PDFiumPage::Highlight::Highlight(const Highlight& that) = default;
|
||||||
|
|
||||||
|
PDFiumPage::Highlight::~Highlight() = default;
|
||||||
|
|
||||||
int ToPDFiumRotation(PageOrientation orientation) {
|
int ToPDFiumRotation(PageOrientation orientation) {
|
||||||
// Could static_cast<int>(orientation), but using an exhaustive switch will
|
// Could static_cast<int>(orientation), but using an exhaustive switch will
|
||||||
// trigger an error if we ever change the definition of PageOrientation.
|
// trigger an error if we ever change the definition of PageOrientation.
|
||||||
|
@@ -162,6 +162,7 @@ class PDFiumPage {
|
|||||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, TestAnnotLinkGeneration);
|
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, TestAnnotLinkGeneration);
|
||||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, TestImageAltText);
|
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, TestImageAltText);
|
||||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, TestLinkGeneration);
|
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, TestLinkGeneration);
|
||||||
|
FRIEND_TEST_ALL_PREFIXES(PDFiumPageHighlightTest, TestPopulateHighlights);
|
||||||
|
|
||||||
// Returns a link index if the given character index is over a link, or -1
|
// Returns a link index if the given character index is over a link, or -1
|
||||||
// otherwise.
|
// otherwise.
|
||||||
@@ -174,6 +175,8 @@ class PDFiumPage {
|
|||||||
void PopulateAnnotationLinks();
|
void PopulateAnnotationLinks();
|
||||||
// Calculate the locations of images on the page.
|
// Calculate the locations of images on the page.
|
||||||
void CalculateImages();
|
void CalculateImages();
|
||||||
|
// Populate highlights on the page.
|
||||||
|
void PopulateHighlights();
|
||||||
// Returns link type and fills target associated with a link. Returns
|
// Returns link type and fills target associated with a link. Returns
|
||||||
// NONSELECTABLE_AREA if link detection failed.
|
// NONSELECTABLE_AREA if link detection failed.
|
||||||
Area GetLinkTarget(FPDF_LINK link, LinkTarget* target);
|
Area GetLinkTarget(FPDF_LINK link, LinkTarget* target);
|
||||||
@@ -248,6 +251,19 @@ class PDFiumPage {
|
|||||||
std::string alt_text;
|
std::string alt_text;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Represents a highlight within the page.
|
||||||
|
struct Highlight {
|
||||||
|
Highlight();
|
||||||
|
Highlight(const Highlight& other);
|
||||||
|
~Highlight();
|
||||||
|
|
||||||
|
// Start index of underlying text range. -1 indicates invalid value.
|
||||||
|
int32_t start_char_index = -1;
|
||||||
|
// Number of characters encompassed by this highlight.
|
||||||
|
int32_t char_count = 0;
|
||||||
|
pp::Rect bounding_rect;
|
||||||
|
};
|
||||||
|
|
||||||
PDFiumEngine* engine_;
|
PDFiumEngine* engine_;
|
||||||
ScopedFPDFPage page_;
|
ScopedFPDFPage page_;
|
||||||
ScopedFPDFTextPage text_page_;
|
ScopedFPDFTextPage text_page_;
|
||||||
@@ -258,6 +274,8 @@ class PDFiumPage {
|
|||||||
std::vector<Link> links_;
|
std::vector<Link> links_;
|
||||||
bool calculated_images_ = false;
|
bool calculated_images_ = false;
|
||||||
std::vector<Image> images_;
|
std::vector<Image> images_;
|
||||||
|
bool calculated_highlights_ = false;
|
||||||
|
std::vector<Highlight> highlights_;
|
||||||
bool calculated_page_object_text_run_breaks_ = false;
|
bool calculated_page_object_text_run_breaks_ = false;
|
||||||
// The set of character indices on which text runs need to be broken for page
|
// The set of character indices on which text runs need to be broken for page
|
||||||
// objects.
|
// objects.
|
||||||
|
@@ -291,4 +291,39 @@ TEST_F(PDFiumPageTextTest, GetTextRunInfo) {
|
|||||||
ASSERT_FALSE(text_run_info_result.has_value());
|
ASSERT_FALSE(text_run_info_result.has_value());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using PDFiumPageHighlightTest = PDFiumTestBase;
|
||||||
|
|
||||||
|
TEST_F(PDFiumPageHighlightTest, TestPopulateHighlights) {
|
||||||
|
struct ExpectedHighlight {
|
||||||
|
int32_t start_char_index;
|
||||||
|
int32_t char_count;
|
||||||
|
pp::Rect bounding_rect;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const ExpectedHighlight kExpectedHighlights[] = {
|
||||||
|
{0, 5, {5, 196, 49, 26}},
|
||||||
|
{12, 7, {110, 196, 77, 26}},
|
||||||
|
{20, 1, {192, 196, 13, 26}}};
|
||||||
|
|
||||||
|
TestClient client;
|
||||||
|
std::unique_ptr<PDFiumEngine> engine =
|
||||||
|
InitializeEngine(&client, FILE_PATH_LITERAL("highlights.pdf"));
|
||||||
|
ASSERT_TRUE(engine);
|
||||||
|
ASSERT_EQ(1, engine->GetNumberOfPages());
|
||||||
|
|
||||||
|
PDFiumPage* page = GetPDFiumPageForTest(engine.get(), 0);
|
||||||
|
ASSERT_TRUE(page);
|
||||||
|
page->PopulateHighlights();
|
||||||
|
ASSERT_EQ(base::size(kExpectedHighlights), page->highlights_.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < page->highlights_.size(); ++i) {
|
||||||
|
ASSERT_EQ(kExpectedHighlights[i].start_char_index,
|
||||||
|
page->highlights_[i].start_char_index);
|
||||||
|
ASSERT_EQ(kExpectedHighlights[i].char_count,
|
||||||
|
page->highlights_[i].char_count);
|
||||||
|
CompareRect(kExpectedHighlights[i].bounding_rect,
|
||||||
|
page->highlights_[i].bounding_rect);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace chrome_pdf
|
} // namespace chrome_pdf
|
||||||
|
73
pdf/test/data/highlights.in
Normal file
73
pdf/test/data/highlights.in
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
{{header}}
|
||||||
|
{{object 1 0}} <<
|
||||||
|
/Type /Catalog
|
||||||
|
/Pages 2 0 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
{{object 2 0}} <<
|
||||||
|
/Type /Pages
|
||||||
|
/MediaBox [0 0 400 200]
|
||||||
|
/Count 1
|
||||||
|
/Kids [3 0 R]
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
{{object 3 0}} <<
|
||||||
|
/Type /Page
|
||||||
|
/Parent 2 0 R
|
||||||
|
/Resources <<
|
||||||
|
/Font <<
|
||||||
|
/F1 4 0 R
|
||||||
|
>>
|
||||||
|
>>
|
||||||
|
/Contents 5 0 R
|
||||||
|
/Annots [6 0 R 7 0 R 8 0 R]
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
{{object 4 0}} <<
|
||||||
|
/Type /Font
|
||||||
|
/Subtype /Type1
|
||||||
|
/BaseFont /Helvetica
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
{{object 5 0}} <<
|
||||||
|
{{streamlen}}
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
BT
|
||||||
|
0 40 Td
|
||||||
|
/F1 16 Tf
|
||||||
|
(Hello, nice meeting you) Tj
|
||||||
|
ET
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
{{object 6 0}} <<
|
||||||
|
/Type /Annot
|
||||||
|
/Subtype /Highlight
|
||||||
|
/QuadPoints [0 55 36 59 0 36 36 36]
|
||||||
|
/Rect [0 36 36 55]
|
||||||
|
/C [0.15 0 0.9 0]
|
||||||
|
/P 3 0 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
{{object 7 0}} <<
|
||||||
|
/Type /Annot
|
||||||
|
/Subtype /Highlight
|
||||||
|
/QuadPoints [79 55 136 55 79 36 136 36]
|
||||||
|
/Rect [79 36 136 55]
|
||||||
|
/C [0.15 0 0.9 0]
|
||||||
|
/P 3 0 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
{{object 8 0}} <<
|
||||||
|
/Type /Annot
|
||||||
|
/Subtype /Highlight
|
||||||
|
/QuadPoints [140 55 149 55 140 36 149 36]
|
||||||
|
/Rect [140 36 149 55]
|
||||||
|
/C [0.15 0 0.9 0]
|
||||||
|
/P 3 0 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
{{xref}}
|
||||||
|
{{trailer}}
|
||||||
|
{{startxref}}
|
||||||
|
%%EOF
|
88
pdf/test/data/highlights.pdf
Normal file
88
pdf/test/data/highlights.pdf
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
%PDF-1.7
|
||||||
|
%<25><><EFBFBD><EFBFBD>
|
||||||
|
1 0 obj <<
|
||||||
|
/Type /Catalog
|
||||||
|
/Pages 2 0 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
2 0 obj <<
|
||||||
|
/Type /Pages
|
||||||
|
/MediaBox [0 0 400 200]
|
||||||
|
/Count 1
|
||||||
|
/Kids [3 0 R]
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
3 0 obj <<
|
||||||
|
/Type /Page
|
||||||
|
/Parent 2 0 R
|
||||||
|
/Resources <<
|
||||||
|
/Font <<
|
||||||
|
/F1 4 0 R
|
||||||
|
>>
|
||||||
|
>>
|
||||||
|
/Contents 5 0 R
|
||||||
|
/Annots [6 0 R 7 0 R 8 0 R]
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
4 0 obj <<
|
||||||
|
/Type /Font
|
||||||
|
/Subtype /Type1
|
||||||
|
/BaseFont /Helvetica
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
5 0 obj <<
|
||||||
|
/Length 53
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
BT
|
||||||
|
0 40 Td
|
||||||
|
/F1 16 Tf
|
||||||
|
(Hello, nice meeting you) Tj
|
||||||
|
ET
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
6 0 obj <<
|
||||||
|
/Type /Annot
|
||||||
|
/Subtype /Highlight
|
||||||
|
/QuadPoints [0 55 36 59 0 36 36 36]
|
||||||
|
/Rect [0 36 36 55]
|
||||||
|
/C [0.15 0 0.9 0]
|
||||||
|
/P 3 0 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
7 0 obj <<
|
||||||
|
/Type /Annot
|
||||||
|
/Subtype /Highlight
|
||||||
|
/QuadPoints [79 55 136 55 79 36 136 36]
|
||||||
|
/Rect [79 36 136 55]
|
||||||
|
/C [0.15 0 0.9 0]
|
||||||
|
/P 3 0 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
8 0 obj <<
|
||||||
|
/Type /Annot
|
||||||
|
/Subtype /Highlight
|
||||||
|
/QuadPoints [140 55 149 55 140 36 149 36]
|
||||||
|
/Rect [140 36 149 55]
|
||||||
|
/C [0.15 0 0.9 0]
|
||||||
|
/P 3 0 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
xref
|
||||||
|
0 9
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000015 00000 n
|
||||||
|
0000000068 00000 n
|
||||||
|
0000000157 00000 n
|
||||||
|
0000000313 00000 n
|
||||||
|
0000000389 00000 n
|
||||||
|
0000000493 00000 n
|
||||||
|
0000000641 00000 n
|
||||||
|
0000000795 00000 n
|
||||||
|
trailer <<
|
||||||
|
/Root 1 0 R
|
||||||
|
/Size 9
|
||||||
|
>>
|
||||||
|
startxref
|
||||||
|
952
|
||||||
|
%%EOF
|
Reference in New Issue
Block a user