Populate Text Fields in PDFiumPage
This CL introduces a method PDFiumPage::PopulateTextFields() which reads text form fields from the PDF document and stores relevant information in a vector within PDFiumPage. The CL also includes a new test file with sample text fields and a unit test to validate the new method. Bug: 1030242 Change-Id: I98a13e237e443f1703ac7b699cc4952cf21c5e10 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2024637 Commit-Queue: Mansi Awasthi <maawas@microsoft.com> Reviewed-by: Lei Zhang <thestig@chromium.org> Reviewed-by: Kevin Babbitt <kbabbitt@microsoft.com> Cr-Commit-Position: refs/heads/master@{#745022}
This commit is contained in:

committed by
Commit Bot

parent
7d09cf61d7
commit
d6afb93c52
@ -183,6 +183,24 @@ bool FloatEquals(float f1, float f2) {
|
||||
kEpsilonScale * fmaxf(fmaxf(fabsf(f1), fabsf(f2)), kEpsilonScale);
|
||||
}
|
||||
|
||||
using GetFormFieldPropertyFunction =
|
||||
base::RepeatingCallback<unsigned long(unsigned short* buffer,
|
||||
unsigned long buflen)>;
|
||||
|
||||
// Helper method to fetch string properties of form fields.
|
||||
std::string GetFormFieldProperty(GetFormFieldPropertyFunction function) {
|
||||
base::string16 data;
|
||||
size_t buffer_size = function.Run(nullptr, 0);
|
||||
if (buffer_size > 0) {
|
||||
PDFiumAPIStringBufferSizeInBytesAdapter<base::string16> api_string_adapter(
|
||||
&data, buffer_size, true);
|
||||
api_string_adapter.Close(function.Run(
|
||||
reinterpret_cast<unsigned short*>(api_string_adapter.GetData()),
|
||||
buffer_size));
|
||||
}
|
||||
return base::UTF16ToUTF8(data);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
PDFiumPage::LinkTarget::LinkTarget() : page(-1) {}
|
||||
@ -266,7 +284,7 @@ void PDFiumPage::CalculatePageObjectTextRunBreaks() {
|
||||
}
|
||||
}
|
||||
|
||||
PopulateHighlights();
|
||||
PopulateAnnotations();
|
||||
for (const auto& highlight : highlights_) {
|
||||
if (highlight.start_char_index >= 0 &&
|
||||
highlight.start_char_index < chars_count) {
|
||||
@ -568,7 +586,7 @@ PDFiumPage::GetHighlightInfo() {
|
||||
if (!available_)
|
||||
return highlight_info;
|
||||
|
||||
PopulateHighlights();
|
||||
PopulateAnnotations();
|
||||
|
||||
highlight_info.reserve(highlights_.size());
|
||||
for (const Highlight& highlight : highlights_) {
|
||||
@ -1041,57 +1059,98 @@ void PDFiumPage::PopulateImageAltTextForStructElement(
|
||||
}
|
||||
}
|
||||
|
||||
void PDFiumPage::PopulateHighlights() {
|
||||
if (calculated_highlights_)
|
||||
void PDFiumPage::PopulateAnnotations() {
|
||||
if (calculated_annotations_)
|
||||
return;
|
||||
|
||||
FPDF_PAGE page = GetPage();
|
||||
if (!page)
|
||||
return;
|
||||
|
||||
calculated_highlights_ = true;
|
||||
// Populate highlights from within the pdf page into data structures ready
|
||||
// to be passed to mimehandler. Currently scoped to highlights only.
|
||||
int annotation_count = FPDFPage_GetAnnotCount(page);
|
||||
for (int i = 0; i < annotation_count; ++i) {
|
||||
ScopedFPDFAnnotation annot(FPDFPage_GetAnnot(page, i));
|
||||
DCHECK(annot);
|
||||
FPDF_ANNOTATION_SUBTYPE subtype = FPDFAnnot_GetSubtype(annot.get());
|
||||
if (subtype != FPDF_ANNOT_HIGHLIGHT)
|
||||
continue;
|
||||
|
||||
FS_RECTF rect;
|
||||
if (!FPDFAnnot_GetRect(annot.get(), &rect))
|
||||
continue;
|
||||
|
||||
Highlight highlight;
|
||||
// We use the bounding box of the highlight as the bounding rect.
|
||||
highlight.bounding_rect =
|
||||
PageToScreen(pp::Point(), 1.0, rect.left, rect.top, rect.right,
|
||||
rect.bottom, PageOrientation::kOriginal);
|
||||
GetUnderlyingTextRangeForRect(
|
||||
pp::FloatRect(rect.left, rect.bottom, std::abs(rect.right - rect.left),
|
||||
std::abs(rect.bottom - rect.top)),
|
||||
&highlight.start_char_index, &highlight.char_count);
|
||||
|
||||
// Retrieve the color of the highlight.
|
||||
unsigned int color_r;
|
||||
unsigned int color_g;
|
||||
unsigned int color_b;
|
||||
unsigned int color_a;
|
||||
FPDF_PAGEOBJECT page_object = FPDFAnnot_GetObject(annot.get(), 0);
|
||||
if (FPDFPageObj_GetFillColor(page_object, &color_r, &color_g, &color_b,
|
||||
&color_a)) {
|
||||
highlight.color = MakeARGB(color_a, color_r, color_g, color_b);
|
||||
} else {
|
||||
// Set the same default color as in pdfium. See calls to
|
||||
// GetColorStringWithDefault() in CPVT_GenerateAP::Generate*AP() in
|
||||
// pdfium.
|
||||
highlight.color = MakeARGB(255, 255, 255, 0);
|
||||
switch (subtype) {
|
||||
case FPDF_ANNOT_HIGHLIGHT: {
|
||||
PopulateHighlight(annot.get());
|
||||
break;
|
||||
}
|
||||
case FPDF_ANNOT_WIDGET: {
|
||||
// TODO(crbug.com/1030242): Populate other types of form fields too.
|
||||
if (FPDFAnnot_GetFormFieldType(engine_->form(), annot.get()) ==
|
||||
FPDF_FORMFIELD_TEXTFIELD) {
|
||||
PopulateTextField(annot.get());
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
highlights_.push_back(std::move(highlight));
|
||||
}
|
||||
calculated_annotations_ = true;
|
||||
}
|
||||
|
||||
void PDFiumPage::PopulateHighlight(FPDF_ANNOTATION annot) {
|
||||
DCHECK(annot);
|
||||
DCHECK_EQ(FPDFAnnot_GetSubtype(annot), FPDF_ANNOT_HIGHLIGHT);
|
||||
|
||||
FS_RECTF rect;
|
||||
if (!FPDFAnnot_GetRect(annot, &rect))
|
||||
return;
|
||||
|
||||
Highlight highlight;
|
||||
// We use the bounding box of the highlight as the bounding rect.
|
||||
highlight.bounding_rect =
|
||||
PageToScreen(pp::Point(), 1.0, rect.left, rect.top, rect.right,
|
||||
rect.bottom, PageOrientation::kOriginal);
|
||||
GetUnderlyingTextRangeForRect(
|
||||
pp::FloatRect(rect.left, rect.bottom, std::abs(rect.right - rect.left),
|
||||
std::abs(rect.bottom - rect.top)),
|
||||
&highlight.start_char_index, &highlight.char_count);
|
||||
|
||||
// Retrieve the color of the highlight.
|
||||
unsigned int color_r;
|
||||
unsigned int color_g;
|
||||
unsigned int color_b;
|
||||
unsigned int color_a;
|
||||
FPDF_PAGEOBJECT page_object = FPDFAnnot_GetObject(annot, 0);
|
||||
if (FPDFPageObj_GetFillColor(page_object, &color_r, &color_g, &color_b,
|
||||
&color_a)) {
|
||||
highlight.color = MakeARGB(color_a, color_r, color_g, color_b);
|
||||
} else {
|
||||
// Set the same default color as in pdfium. See calls to
|
||||
// GetColorStringWithDefault() in CPVT_GenerateAP::Generate*AP() in
|
||||
// pdfium.
|
||||
highlight.color = MakeARGB(255, 255, 255, 0);
|
||||
}
|
||||
|
||||
highlights_.push_back(std::move(highlight));
|
||||
}
|
||||
|
||||
void PDFiumPage::PopulateTextField(FPDF_ANNOTATION annot) {
|
||||
DCHECK(annot);
|
||||
FPDF_FORMHANDLE form_handle = engine_->form();
|
||||
DCHECK_EQ(FPDFAnnot_GetFormFieldType(form_handle, annot),
|
||||
FPDF_FORMFIELD_TEXTFIELD);
|
||||
|
||||
FS_RECTF rect;
|
||||
if (!FPDFAnnot_GetRect(annot, &rect))
|
||||
return;
|
||||
|
||||
TextField text_field;
|
||||
// We use the bounding box of the text field as the bounding rect.
|
||||
text_field.bounding_rect =
|
||||
PageToScreen(pp::Point(), 1.0, rect.left, rect.top, rect.right,
|
||||
rect.bottom, PageOrientation::kOriginal);
|
||||
text_field.value = GetFormFieldProperty(
|
||||
base::BindRepeating(FPDFAnnot_GetFormFieldValue, form_handle, annot));
|
||||
text_field.name = GetFormFieldProperty(
|
||||
base::BindRepeating(FPDFAnnot_GetFormFieldName, form_handle, annot));
|
||||
text_field.flags = FPDFAnnot_GetFormFieldFlags(form_handle, annot);
|
||||
text_fields_.push_back(std::move(text_field));
|
||||
}
|
||||
|
||||
bool PDFiumPage::GetUnderlyingTextRangeForRect(const pp::FloatRect& rect,
|
||||
@ -1244,6 +1303,12 @@ PDFiumPage::Highlight::Highlight(const Highlight& that) = default;
|
||||
|
||||
PDFiumPage::Highlight::~Highlight() = default;
|
||||
|
||||
PDFiumPage::TextField::TextField() = default;
|
||||
|
||||
PDFiumPage::TextField::TextField(const TextField& that) = default;
|
||||
|
||||
PDFiumPage::TextField::~TextField() = default;
|
||||
|
||||
int ToPDFiumRotation(PageOrientation orientation) {
|
||||
// Could static_cast<int>(orientation), but using an exhaustive switch will
|
||||
// trigger an error if we ever change the definition of PageOrientation.
|
||||
|
@ -170,6 +170,7 @@ class PDFiumPage {
|
||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, TestImageAltText);
|
||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, TestLinkGeneration);
|
||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageHighlightTest, TestPopulateHighlights);
|
||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageTextFieldTest, TestPopulateTextFields);
|
||||
|
||||
// Returns a link index if the given character index is over a link, or -1
|
||||
// otherwise.
|
||||
@ -182,8 +183,12 @@ class PDFiumPage {
|
||||
void PopulateAnnotationLinks();
|
||||
// Calculate the locations of images on the page.
|
||||
void CalculateImages();
|
||||
// Populate highlights on the page.
|
||||
void PopulateHighlights();
|
||||
// Populate annotations like highlight and text field on the page.
|
||||
void PopulateAnnotations();
|
||||
// Populate |highlights_| with |annot|.
|
||||
void PopulateHighlight(FPDF_ANNOTATION annot);
|
||||
// Populate |text_fields_| with |annot|.
|
||||
void PopulateTextField(FPDF_ANNOTATION annot);
|
||||
// Returns link type and fills target associated with a link. Returns
|
||||
// NONSELECTABLE_AREA if link detection failed.
|
||||
Area GetLinkTarget(FPDF_LINK link, LinkTarget* target);
|
||||
@ -275,6 +280,20 @@ class PDFiumPage {
|
||||
uint32_t color;
|
||||
};
|
||||
|
||||
// Represents a text field within the page.
|
||||
struct TextField {
|
||||
TextField();
|
||||
TextField(const TextField& other);
|
||||
~TextField();
|
||||
|
||||
// Represents the name of form field as defined in the field dictionary.
|
||||
std::string name;
|
||||
std::string value;
|
||||
pp::Rect bounding_rect;
|
||||
// Represents the flags of form field as defined in the field dictionary.
|
||||
int flags;
|
||||
};
|
||||
|
||||
PDFiumEngine* engine_;
|
||||
ScopedFPDFPage page_;
|
||||
ScopedFPDFTextPage text_page_;
|
||||
@ -285,8 +304,9 @@ class PDFiumPage {
|
||||
std::vector<Link> links_;
|
||||
bool calculated_images_ = false;
|
||||
std::vector<Image> images_;
|
||||
bool calculated_highlights_ = false;
|
||||
bool calculated_annotations_ = false;
|
||||
std::vector<Highlight> highlights_;
|
||||
std::vector<TextField> text_fields_;
|
||||
bool calculated_page_object_text_run_breaks_ = false;
|
||||
// The set of character indices on which text runs need to be broken for page
|
||||
// objects.
|
||||
|
@ -372,7 +372,7 @@ TEST_F(PDFiumPageHighlightTest, TestPopulateHighlights) {
|
||||
|
||||
PDFiumPage* page = GetPDFiumPageForTest(engine.get(), 0);
|
||||
ASSERT_TRUE(page);
|
||||
page->PopulateHighlights();
|
||||
page->PopulateAnnotations();
|
||||
ASSERT_EQ(base::size(kExpectedHighlights), page->highlights_.size());
|
||||
|
||||
for (size_t i = 0; i < page->highlights_.size(); ++i) {
|
||||
@ -386,4 +386,41 @@ TEST_F(PDFiumPageHighlightTest, TestPopulateHighlights) {
|
||||
}
|
||||
}
|
||||
|
||||
using PDFiumPageTextFieldTest = PDFiumTestBase;
|
||||
|
||||
TEST_F(PDFiumPageTextFieldTest, TestPopulateTextFields) {
|
||||
struct ExpectedTextField {
|
||||
const char* name;
|
||||
const char* value;
|
||||
pp::Rect bounding_rect;
|
||||
int flags;
|
||||
};
|
||||
|
||||
static const ExpectedTextField kExpectedTextFields[] = {
|
||||
{"Text Box", "Text", {138, 230, 135, 41}, 0},
|
||||
{"ReadOnly", "Elephant", {138, 163, 135, 41}, 1},
|
||||
{"Required", "Required Field", {138, 303, 135, 34}, 2},
|
||||
{"Password", "", {138, 356, 135, 35}, 8192}};
|
||||
|
||||
TestClient client;
|
||||
std::unique_ptr<PDFiumEngine> engine =
|
||||
InitializeEngine(&client, FILE_PATH_LITERAL("form_text_fields.pdf"));
|
||||
ASSERT_TRUE(engine);
|
||||
ASSERT_EQ(1, engine->GetNumberOfPages());
|
||||
|
||||
PDFiumPage* page = GetPDFiumPageForTest(engine.get(), 0);
|
||||
ASSERT_TRUE(page);
|
||||
page->PopulateAnnotations();
|
||||
size_t text_fields_count = page->text_fields_.size();
|
||||
ASSERT_EQ(base::size(kExpectedTextFields), text_fields_count);
|
||||
|
||||
for (size_t i = 0; i < text_fields_count; ++i) {
|
||||
EXPECT_EQ(kExpectedTextFields[i].name, page->text_fields_[i].name);
|
||||
EXPECT_EQ(kExpectedTextFields[i].value, page->text_fields_[i].value);
|
||||
CompareRect(kExpectedTextFields[i].bounding_rect,
|
||||
page->text_fields_[i].bounding_rect);
|
||||
EXPECT_EQ(kExpectedTextFields[i].flags, page->text_fields_[i].flags);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
105
pdf/test/data/form_text_fields.in
Normal file
105
pdf/test/data/form_text_fields.in
Normal file
@ -0,0 +1,105 @@
|
||||
{{header}}
|
||||
{{object 1 0}} <<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
/AcroForm <<
|
||||
/Fields [ 7 0 R 8 0 R 9 0 R 10 0 R ]
|
||||
/DR 4 0 R
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
{{object 2 0}} <<
|
||||
/Count 1
|
||||
/Kids [ 3 0 R ]
|
||||
/Type /Pages
|
||||
>>
|
||||
endobj
|
||||
{{object 3 0}} <<
|
||||
/Type /Page
|
||||
/Parent 2 0 R
|
||||
/Resources 4 0 R
|
||||
/MediaBox [ 0 0 300 300 ]
|
||||
/Contents 6 0 R
|
||||
/Annots [ 7 0 R 8 0 R 9 0 R 10 0 R ]
|
||||
>>
|
||||
endobj
|
||||
{{object 4 0}} <<
|
||||
/Font <<
|
||||
/F1 5 0 R
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
{{object 5 0}} <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Helvetica
|
||||
>>
|
||||
endobj
|
||||
{{object 6 0}} <<
|
||||
{{streamlen}}
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
/F1 12 Tf
|
||||
100 200 Td
|
||||
(Test Form) Tj
|
||||
/F1 12 Tf
|
||||
-80 -40 Td
|
||||
(Read Only:) Tj
|
||||
/F1 12 Tf
|
||||
0 -50 Td
|
||||
(Sample Text) Tj
|
||||
/F1 12 Tf
|
||||
200 -55 Td
|
||||
(*required field) Tj
|
||||
/F1 12 Tf
|
||||
-200 -35 Td
|
||||
(Password:) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
{{object 7 0}} <<
|
||||
/Type /Annot
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/T (Text Box)
|
||||
/V (Text)
|
||||
/DA (0 0 0 rg /F1 12 Tf)
|
||||
/Rect [ 100 100 200 130 ]
|
||||
>>
|
||||
endobj
|
||||
{{object 8 0}} <<
|
||||
/Type /Annot
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/Ff 1
|
||||
/T (ReadOnly)
|
||||
/V (Elephant)
|
||||
/DA (0 0 0 rg /F1 12 Tf)
|
||||
/Rect [ 100 150 200 180 ]
|
||||
>>
|
||||
endobj
|
||||
{{object 9 0}} <<
|
||||
/Type /Annot
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/Ff 2
|
||||
/T (Required)
|
||||
/V (Required Field)
|
||||
/DA (0 0 0 rg /F1 12 Tf)
|
||||
/Rect [ 100 50 200 75 ]
|
||||
>>
|
||||
{{object 10 0}} <<
|
||||
/Type /Annot
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/Ff 8192
|
||||
/T (Password)
|
||||
/DA (0 0 0 rg /F1 12 Tf)
|
||||
/Rect [ 100 10 200 35 ]
|
||||
>>
|
||||
endobj
|
||||
{{xref}}
|
||||
{{trailer}}
|
||||
{{startxref}}
|
||||
%%EOF
|
122
pdf/test/data/form_text_fields.pdf
Normal file
122
pdf/test/data/form_text_fields.pdf
Normal file
@ -0,0 +1,122 @@
|
||||
%PDF-1.7
|
||||
%<25><><EFBFBD><EFBFBD>
|
||||
1 0 obj <<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
/AcroForm <<
|
||||
/Fields [ 7 0 R 8 0 R 9 0 R 10 0 R ]
|
||||
/DR 4 0 R
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
2 0 obj <<
|
||||
/Count 1
|
||||
/Kids [ 3 0 R ]
|
||||
/Type /Pages
|
||||
>>
|
||||
endobj
|
||||
3 0 obj <<
|
||||
/Type /Page
|
||||
/Parent 2 0 R
|
||||
/Resources 4 0 R
|
||||
/MediaBox [ 0 0 300 300 ]
|
||||
/Contents 6 0 R
|
||||
/Annots [ 7 0 R 8 0 R 9 0 R 10 0 R ]
|
||||
>>
|
||||
endobj
|
||||
4 0 obj <<
|
||||
/Font <<
|
||||
/F1 5 0 R
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
5 0 obj <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Helvetica
|
||||
>>
|
||||
endobj
|
||||
6 0 obj <<
|
||||
/Length 194
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
/F1 12 Tf
|
||||
100 200 Td
|
||||
(Test Form) Tj
|
||||
/F1 12 Tf
|
||||
-80 -40 Td
|
||||
(Read Only:) Tj
|
||||
/F1 12 Tf
|
||||
0 -50 Td
|
||||
(Sample Text) Tj
|
||||
/F1 12 Tf
|
||||
200 -55 Td
|
||||
(*required field) Tj
|
||||
/F1 12 Tf
|
||||
-200 -35 Td
|
||||
(Password:) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
7 0 obj <<
|
||||
/Type /Annot
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/T (Text Box)
|
||||
/V (Text)
|
||||
/DA (0 0 0 rg /F1 12 Tf)
|
||||
/Rect [ 100 100 200 130 ]
|
||||
>>
|
||||
endobj
|
||||
8 0 obj <<
|
||||
/Type /Annot
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/Ff 1
|
||||
/T (ReadOnly)
|
||||
/V (Elephant)
|
||||
/DA (0 0 0 rg /F1 12 Tf)
|
||||
/Rect [ 100 150 200 180 ]
|
||||
>>
|
||||
endobj
|
||||
9 0 obj <<
|
||||
/Type /Annot
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/Ff 2
|
||||
/T (Required)
|
||||
/V (Required Field)
|
||||
/DA (0 0 0 rg /F1 12 Tf)
|
||||
/Rect [ 100 50 200 75 ]
|
||||
>>
|
||||
10 0 obj <<
|
||||
/Type /Annot
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/Ff 8192
|
||||
/T (Password)
|
||||
/DA (0 0 0 rg /F1 12 Tf)
|
||||
/Rect [ 100 10 200 35 ]
|
||||
>>
|
||||
endobj
|
||||
xref
|
||||
0 11
|
||||
0000000000 65535 f
|
||||
0000000015 00000 n
|
||||
0000000143 00000 n
|
||||
0000000208 00000 n
|
||||
0000000363 00000 n
|
||||
0000000414 00000 n
|
||||
0000000490 00000 n
|
||||
0000000736 00000 n
|
||||
0000000884 00000 n
|
||||
0000001044 00000 n
|
||||
0000001201 00000 n
|
||||
trailer <<
|
||||
/Root 1 0 R
|
||||
/Size 11
|
||||
>>
|
||||
startxref
|
||||
1347
|
||||
%%EOF
|
Reference in New Issue
Block a user