0

Add back-end support for PDF attachments in PDFEngine.

1. Create a new struct DocumentAttachmentInfo, which contains basic
   information about a PDF attachment, including the attachment's name,
   size, creation date and last modified date.

2. Implement a method GetDocumentAttachmentInfoList() in PDFEngine,
   which returns a list of DocumentAttachmentInfo structure(s)
   associated with a PDF document.

3. Add 2 unit tests for the new method. To test on PDFs with
   attachments, this CL uploads a test PDF embedded_attachments.pdf,
   which contains 3 attachments: 1.txt, attached.pdf and 附錄.txt.
   These 3 attachments can cover the tests on different file types and
   names, and tests on attachments with no creation dates or last
   modified dates.

Bug: 177188
Change-Id: If6eaa61f64407af9c848fe2b3b814aa83fc23495
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2202510
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: Daniel Hosseinian <dhoss@chromium.org>
Commit-Queue: Hui Yingst <nigi@chromium.org>
Cr-Commit-Position: refs/heads/master@{#773244}
This commit is contained in:
Hui Yingst
2020-05-29 17:35:38 +00:00
committed by Commit Bot
parent 7ef84f9815
commit c647be6cd2
8 changed files with 160 additions and 0 deletions

@ -49,6 +49,8 @@ if (enable_pdf) {
"accessibility.cc",
"accessibility.h",
"chunk_stream.h",
"document_attachment_info.cc",
"document_attachment_info.h",
"document_layout.cc",
"document_layout.h",
"document_loader.h",

@ -0,0 +1,16 @@
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "pdf/document_attachment_info.h"
namespace chrome_pdf {
DocumentAttachmentInfo::DocumentAttachmentInfo() = default;
DocumentAttachmentInfo::DocumentAttachmentInfo(
const DocumentAttachmentInfo& other) = default;
DocumentAttachmentInfo::~DocumentAttachmentInfo() = default;
} // namespace chrome_pdf

@ -0,0 +1,36 @@
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef PDF_DOCUMENT_ATTACHMENT_INFO_H_
#define PDF_DOCUMENT_ATTACHMENT_INFO_H_
#include "base/strings/string16.h"
namespace chrome_pdf {
struct DocumentAttachmentInfo {
DocumentAttachmentInfo();
DocumentAttachmentInfo(const DocumentAttachmentInfo& other);
~DocumentAttachmentInfo();
// The attachment's name.
base::string16 name;
// The attachment's size in bytes.
uint32_t size_bytes = 0;
// The creation date of the attachment. It stores the arbitrary string saved
// in field "CreationDate".
base::string16 creation_date;
// Last modified date of the attachment. It stores the arbitrary string saved
// in field "ModDate".
base::string16 modified_date;
};
} // namespace chrome_pdf
#endif // PDF_DOCUMENT_ATTACHMENT_INFO_H_

@ -56,6 +56,7 @@ class VarDictionary;
namespace chrome_pdf {
struct DocumentAttachmentInfo;
struct DocumentMetadata;
// Do one time initialization of the SDK.
@ -371,6 +372,9 @@ class PDFEngine {
// Checks the permissions associated with this document.
virtual bool HasPermission(DocumentPermission permission) const = 0;
virtual void SelectAll() = 0;
// Gets the list of DocumentAttachmentInfo from the document.
virtual const std::vector<DocumentAttachmentInfo>&
GetDocumentAttachmentInfoList() const = 0;
// Gets metadata about the document.
virtual const DocumentMetadata& GetDocumentMetadata() const = 0;
// Gets the number of pages in the document.

@ -351,6 +351,19 @@ void SetLinkUnderCursor(pp::Instance* instance,
pp::PDF::SetLinkUnderCursor(instance, link_under_cursor.c_str());
}
base::string16 GetAttachmentAttribute(FPDF_ATTACHMENT attachment,
FPDF_BYTESTRING field) {
return CallPDFiumWideStringBufferApi(
base::BindRepeating(&FPDFAttachment_GetStringValue, attachment, field),
/*check_expected_size=*/true);
}
base::string16 GetAttachmentName(FPDF_ATTACHMENT attachment) {
return CallPDFiumWideStringBufferApi(
base::BindRepeating(&FPDFAttachment_GetName, attachment),
/*check_expected_size=*/true);
}
} // namespace
void InitializeSDK(bool enable_v8) {
@ -732,6 +745,8 @@ void PDFiumEngine::FinishLoadingDocument() {
if (need_update)
LoadPageInfo();
LoadDocumentAttachmentInfoList();
LoadDocumentMetadata();
if (called_do_document_action_)
@ -2106,6 +2121,12 @@ void PDFiumEngine::SelectAll() {
}
}
const std::vector<DocumentAttachmentInfo>&
PDFiumEngine::GetDocumentAttachmentInfoList() const {
DCHECK(document_loaded_);
return doc_attachment_info_list_;
}
const DocumentMetadata& PDFiumEngine::GetDocumentMetadata() const {
DCHECK(document_loaded_);
return doc_metadata_;
@ -3753,6 +3774,28 @@ void PDFiumEngine::GetSelection(uint32_t* selection_start_page_index,
}
}
void PDFiumEngine::LoadDocumentAttachmentInfoList() {
DCHECK(document_loaded_);
int attachment_count = FPDFDoc_GetAttachmentCount(doc());
if (attachment_count <= 0)
return;
doc_attachment_info_list_.resize(attachment_count);
for (int i = 0; i < attachment_count; ++i) {
FPDF_ATTACHMENT attachment = FPDFDoc_GetAttachment(doc(), i);
DCHECK(attachment);
doc_attachment_info_list_[i].name = GetAttachmentName(attachment);
doc_attachment_info_list_[i].size_bytes =
FPDFAttachment_GetFile(attachment, /*buffer=*/nullptr, /*buflen=*/0);
doc_attachment_info_list_[i].creation_date =
GetAttachmentAttribute(attachment, "CreationDate");
doc_attachment_info_list_[i].modified_date =
GetAttachmentAttribute(attachment, "ModDate");
}
}
void PDFiumEngine::LoadDocumentMetadata() {
DCHECK(document_loaded_);

@ -17,6 +17,7 @@
#include "base/optional.h"
#include "base/time/time.h"
#include "base/timer/timer.h"
#include "pdf/document_attachment_info.h"
#include "pdf/document_layout.h"
#include "pdf/document_loader.h"
#include "pdf/document_metadata.h"
@ -113,6 +114,8 @@ class PDFiumEngine : public PDFEngine,
std::string GetLinkAtPosition(const pp::Point& point) override;
bool HasPermission(DocumentPermission permission) const override;
void SelectAll() override;
const std::vector<DocumentAttachmentInfo>& GetDocumentAttachmentInfoList()
const override;
const DocumentMetadata& GetDocumentMetadata() const override;
int GetNumberOfPages() override;
pp::VarArray GetBookmarks() override;
@ -586,6 +589,10 @@ class PDFiumEngine : public PDFEngine,
void OnFocusedAnnotationUpdated(FPDF_ANNOTATION annot, int page_index);
// Read the attachments' information inside the PDF document, and set
// |doc_attachment_info_list_|. To be called after the document is loaded.
void LoadDocumentAttachmentInfoList();
// Fetches and populates the fields of |doc_metadata_|. To be called after the
// document is loaded.
void LoadDocumentMetadata();
@ -796,6 +803,9 @@ class PDFiumEngine : public PDFEngine,
// Shadow matrix for generating the page shadow bitmap.
std::unique_ptr<draw_utils::ShadowMatrix> page_shadow_;
// A list of information of document attachments.
std::vector<DocumentAttachmentInfo> doc_attachment_info_list_;
// Stores parsed document metadata.
DocumentMetadata doc_metadata_;

@ -4,8 +4,10 @@
#include "pdf/pdfium/pdfium_engine.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/scoped_feature_list.h"
#include "base/test/task_environment.h"
#include "pdf/document_attachment_info.h"
#include "pdf/document_layout.h"
#include "pdf/document_metadata.h"
#include "pdf/pdf_features.h"
@ -183,6 +185,53 @@ TEST_F(PDFiumEngineTest, ApplyDocumentLayoutAvoidsInfiniteLoop) {
CompareSize({343, 1463}, engine->ApplyDocumentLayout(options));
}
TEST_F(PDFiumEngineTest, GetDocumentAttachmentInfo) {
NiceMock<MockTestClient> client;
std::unique_ptr<PDFiumEngine> engine =
InitializeEngine(&client, FILE_PATH_LITERAL("embedded_attachments.pdf"));
ASSERT_TRUE(engine);
const std::vector<DocumentAttachmentInfo>& attachments =
engine->GetDocumentAttachmentInfoList();
ASSERT_EQ(3u, attachments.size());
{
const DocumentAttachmentInfo& attachment = attachments[0];
EXPECT_EQ("1.txt", base::UTF16ToUTF8(attachment.name));
EXPECT_EQ(4u, attachment.size_bytes);
EXPECT_EQ("D:20170712214438-07'00'",
base::UTF16ToUTF8(attachment.creation_date));
EXPECT_EQ("D:20160115091400", base::UTF16ToUTF8(attachment.modified_date));
}
{
const DocumentAttachmentInfo& attachment = attachments[1];
EXPECT_EQ("attached.pdf", base::UTF16ToUTF8(attachment.name));
EXPECT_EQ(5869u, attachment.size_bytes);
EXPECT_EQ("D:20170712214443-07'00'",
base::UTF16ToUTF8(attachment.creation_date));
EXPECT_EQ("D:20170712214410", base::UTF16ToUTF8(attachment.modified_date));
}
{
// Test attachments with no creation date or last modified date.
const DocumentAttachmentInfo& attachment = attachments[2];
EXPECT_EQ("附錄.txt", base::UTF16ToUTF8(attachment.name));
EXPECT_EQ(5u, attachment.size_bytes);
EXPECT_THAT(attachment.creation_date, IsEmpty());
EXPECT_THAT(attachment.modified_date, IsEmpty());
}
}
TEST_F(PDFiumEngineTest, NoDocumentAttachmentInfo) {
NiceMock<MockTestClient> client;
std::unique_ptr<PDFiumEngine> engine =
InitializeEngine(&client, FILE_PATH_LITERAL("hello_world2.pdf"));
ASSERT_TRUE(engine);
EXPECT_EQ(0u, engine->GetDocumentAttachmentInfoList().size());
}
TEST_F(PDFiumEngineTest, GetDocumentMetadata) {
NiceMock<MockTestClient> client;
std::unique_ptr<PDFiumEngine> engine =

Binary file not shown.