Add GetDocumentMetadata() to the interface of PDFEngine
The function returns a struct, DocumentMetadata, which contains fields with document metadata, including the entries from the /Info dictionary of a PDF. Implement the function in PDFiumEngine. Add a test and check in a test PDF. Remove PDFEngine::GetMetadata() as it only had one caller and its functionality would be redundant with the new GetDocumentMetadata(). Simultaneously, privatize the old GetDocumentMetadata(), which only fetched unparsed field entries one by one and rename to GetMetadataByField(). Bug: 93619 Change-Id: I36ce50fa2bf2e4a5eba94efa54cfd3e3bbf72f72 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2096103 Commit-Queue: Daniel Hosseinian <dhoss@chromium.org> Reviewed-by: Lei Zhang <thestig@chromium.org> Cr-Commit-Position: refs/heads/master@{#753385}
This commit is contained in:

committed by
Commit Bot

parent
682b6cbf39
commit
3881c97ab1
@ -54,6 +54,8 @@ if (enable_pdf) {
|
||||
"document_loader.h",
|
||||
"document_loader_impl.cc",
|
||||
"document_loader_impl.h",
|
||||
"document_metadata.cc",
|
||||
"document_metadata.h",
|
||||
"draw_utils/coordinates.cc",
|
||||
"draw_utils/coordinates.h",
|
||||
"draw_utils/shadow.cc",
|
||||
|
13
pdf/document_metadata.cc
Normal file
13
pdf/document_metadata.cc
Normal file
@ -0,0 +1,13 @@
|
||||
// Copyright 2020 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "pdf/document_metadata.h"
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
DocumentMetadata::DocumentMetadata() = default;
|
||||
|
||||
DocumentMetadata::~DocumentMetadata() = default;
|
||||
|
||||
} // namespace chrome_pdf
|
43
pdf/document_metadata.h
Normal file
43
pdf/document_metadata.h
Normal file
@ -0,0 +1,43 @@
|
||||
// Copyright 2020 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef PDF_DOCUMENT_METADATA_H_
|
||||
#define PDF_DOCUMENT_METADATA_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
// Document properties, including those specified in the document information
|
||||
// dictionary (see section 14.3.3 "Document Information Dictionary" of the ISO
|
||||
// 32000-1 standard), as well as other properties about the file.
|
||||
// TODO(crbug.com/93619): Finish adding information dictionary fields like
|
||||
// |keywords|, |creation_date|, and |mod_date|. Also add fields like |version|,
|
||||
// |size_bytes|, |is_encrypted|, and |is_linearized|.
|
||||
struct DocumentMetadata {
|
||||
DocumentMetadata();
|
||||
DocumentMetadata(const DocumentMetadata&) = delete;
|
||||
DocumentMetadata& operator=(const DocumentMetadata&) = delete;
|
||||
~DocumentMetadata();
|
||||
|
||||
// The document's title.
|
||||
std::string title;
|
||||
|
||||
// The name of the document's creator.
|
||||
std::string author;
|
||||
|
||||
// The document's subject.
|
||||
std::string subject;
|
||||
|
||||
// The name of the application that created the original document.
|
||||
std::string creator;
|
||||
|
||||
// If the document's format was not originally PDF, the name of the
|
||||
// application that converted the document to PDF.
|
||||
std::string producer;
|
||||
};
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
||||
#endif // PDF_DOCUMENT_METADATA_H_
|
@ -25,6 +25,7 @@
|
||||
#include "net/base/filename_util.h"
|
||||
#include "pdf/accessibility.h"
|
||||
#include "pdf/document_layout.h"
|
||||
#include "pdf/document_metadata.h"
|
||||
#include "pdf/pdf.h"
|
||||
#include "pdf/pdf_features.h"
|
||||
#include "ppapi/c/dev/ppb_cursor_control_dev.h"
|
||||
@ -1679,7 +1680,7 @@ void OutOfProcessInstance::DocumentLoadComplete(
|
||||
|
||||
pp::VarDictionary metadata_message;
|
||||
metadata_message.Set(pp::Var(kType), pp::Var(kJSMetadataType));
|
||||
std::string title = engine_->GetMetadata("Title");
|
||||
const std::string& title = engine_->GetDocumentMetadata().title;
|
||||
if (!base::TrimWhitespace(base::UTF8ToUTF16(title), base::TRIM_ALL).empty()) {
|
||||
metadata_message.Set(pp::Var(kJSTitle), pp::Var(title));
|
||||
HistogramEnumeration("PDF.DocumentFeature", HAS_TITLE, FEATURES_COUNT);
|
||||
|
@ -56,6 +56,8 @@ class VarDictionary;
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
struct DocumentMetadata;
|
||||
|
||||
// Do one time initialization of the SDK.
|
||||
// If |enable_v8| is false, then the PDFEngine will not be able to run
|
||||
// JavaScript.
|
||||
@ -363,6 +365,8 @@ class PDFEngine {
|
||||
// Checks the permissions associated with this document.
|
||||
virtual bool HasPermission(DocumentPermission permission) const = 0;
|
||||
virtual void SelectAll() = 0;
|
||||
// Gets metadata about the document.
|
||||
virtual const DocumentMetadata& GetDocumentMetadata() const = 0;
|
||||
// Gets the number of pages in the document.
|
||||
virtual int GetNumberOfPages() = 0;
|
||||
// Gets the named destination by name.
|
||||
@ -434,7 +438,6 @@ class PDFEngine {
|
||||
// document at page |index|.
|
||||
virtual void AppendPage(PDFEngine* engine, int index) = 0;
|
||||
|
||||
virtual std::string GetMetadata(const std::string& key) = 0;
|
||||
virtual std::vector<uint8_t> GetSaveData() = 0;
|
||||
|
||||
virtual void SetCaretPosition(const pp::Point& position) = 0;
|
||||
|
@ -220,19 +220,6 @@ bool FindMultipleClickBoundary(bool is_double_click, base::char16 cur) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string GetDocumentMetadata(FPDF_DOCUMENT doc, const std::string& key) {
|
||||
size_t size = FPDF_GetMetaText(doc, key.c_str(), nullptr, 0);
|
||||
if (size == 0)
|
||||
return std::string();
|
||||
|
||||
base::string16 value;
|
||||
PDFiumAPIStringBufferSizeInBytesAdapter<base::string16> string_adapter(
|
||||
&value, size, false);
|
||||
string_adapter.Close(
|
||||
FPDF_GetMetaText(doc, key.c_str(), string_adapter.GetData(), size));
|
||||
return base::UTF16ToUTF8(value);
|
||||
}
|
||||
|
||||
gin::IsolateHolder* g_isolate_holder = nullptr;
|
||||
|
||||
bool IsV8Initialized() {
|
||||
@ -617,10 +604,6 @@ void PDFiumEngine::AppendPage(PDFEngine* engine, int index) {
|
||||
client_->Invalidate(GetPageScreenRect(index));
|
||||
}
|
||||
|
||||
std::string PDFiumEngine::GetMetadata(const std::string& key) {
|
||||
return GetDocumentMetadata(doc(), key);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> PDFiumEngine::GetSaveData() {
|
||||
PDFiumMemBufferFileWrite output_file_write;
|
||||
if (!FPDF_SaveAsCopy(doc(), &output_file_write, 0))
|
||||
@ -731,6 +714,8 @@ void PDFiumEngine::FinishLoadingDocument() {
|
||||
if (need_update)
|
||||
LoadPageInfo();
|
||||
|
||||
LoadDocumentMetadata();
|
||||
|
||||
if (called_do_document_action_)
|
||||
return;
|
||||
called_do_document_action_ = true;
|
||||
@ -2083,6 +2068,11 @@ void PDFiumEngine::SelectAll() {
|
||||
}
|
||||
}
|
||||
|
||||
const DocumentMetadata& PDFiumEngine::GetDocumentMetadata() const {
|
||||
DCHECK(document_loaded_);
|
||||
return doc_metadata_;
|
||||
}
|
||||
|
||||
int PDFiumEngine::GetNumberOfPages() {
|
||||
return pages_.size();
|
||||
}
|
||||
@ -3702,6 +3692,33 @@ void PDFiumEngine::GetSelection(uint32_t* selection_start_page_index,
|
||||
}
|
||||
}
|
||||
|
||||
void PDFiumEngine::LoadDocumentMetadata() {
|
||||
DCHECK(document_loaded_);
|
||||
|
||||
// Document information dictionary entries
|
||||
doc_metadata_.title = GetMetadataByField("Title");
|
||||
doc_metadata_.author = GetMetadataByField("Author");
|
||||
doc_metadata_.subject = GetMetadataByField("Subject");
|
||||
doc_metadata_.creator = GetMetadataByField("Creator");
|
||||
doc_metadata_.producer = GetMetadataByField("Producer");
|
||||
}
|
||||
|
||||
std::string PDFiumEngine::GetMetadataByField(FPDF_BYTESTRING field) const {
|
||||
DCHECK(doc());
|
||||
|
||||
size_t size =
|
||||
FPDF_GetMetaText(doc(), field, /*buffer=*/nullptr, /*buflen=*/0);
|
||||
if (size == 0)
|
||||
return std::string();
|
||||
|
||||
base::string16 value;
|
||||
PDFiumAPIStringBufferSizeInBytesAdapter<base::string16> string_adapter(
|
||||
&value, size, /*check_expected_size=*/false);
|
||||
string_adapter.Close(
|
||||
FPDF_GetMetaText(doc(), field, string_adapter.GetData(), size));
|
||||
return base::UTF16ToUTF8(value);
|
||||
}
|
||||
|
||||
#if defined(PDF_ENABLE_XFA)
|
||||
void PDFiumEngine::UpdatePageCount() {
|
||||
InvalidateAllPages();
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "base/timer/timer.h"
|
||||
#include "pdf/document_layout.h"
|
||||
#include "pdf/document_loader.h"
|
||||
#include "pdf/document_metadata.h"
|
||||
#include "pdf/pdf_engine.h"
|
||||
#include "pdf/pdfium/pdfium_form_filler.h"
|
||||
#include "pdf/pdfium/pdfium_page.h"
|
||||
@ -102,6 +103,7 @@ class PDFiumEngine : public PDFEngine,
|
||||
std::string GetLinkAtPosition(const pp::Point& point) override;
|
||||
bool HasPermission(DocumentPermission permission) const override;
|
||||
void SelectAll() override;
|
||||
const DocumentMetadata& GetDocumentMetadata() const override;
|
||||
int GetNumberOfPages() override;
|
||||
pp::VarArray GetBookmarks() override;
|
||||
base::Optional<PDFEngine::NamedDestination> GetNamedDestination(
|
||||
@ -130,7 +132,6 @@ class PDFiumEngine : public PDFEngine,
|
||||
bool GetPageSizeAndUniformity(pp::Size* size) override;
|
||||
void AppendBlankPages(size_t num_pages) override;
|
||||
void AppendPage(PDFEngine* engine, int index) override;
|
||||
std::string GetMetadata(const std::string& key) override;
|
||||
std::vector<uint8_t> GetSaveData() override;
|
||||
void SetCaretPosition(const pp::Point& position) override;
|
||||
void MoveRangeSelectionExtent(const pp::Point& extent) override;
|
||||
@ -571,6 +572,14 @@ class PDFiumEngine : public PDFEngine,
|
||||
// already in view.
|
||||
void ScrollIntoView(const pp::Rect& rect);
|
||||
|
||||
// Fetches and populates the fields of |doc_metadata_|. To be called after the
|
||||
// document is loaded.
|
||||
void LoadDocumentMetadata();
|
||||
|
||||
// Retrieves the unparsed value of |field| in the document information
|
||||
// dictionary.
|
||||
std::string GetMetadataByField(FPDF_BYTESTRING field) const;
|
||||
|
||||
PDFEngine::Client* const client_;
|
||||
|
||||
// The current document layout.
|
||||
@ -741,6 +750,9 @@ class PDFiumEngine : public PDFEngine,
|
||||
// Shadow matrix for generating the page shadow bitmap.
|
||||
std::unique_ptr<draw_utils::ShadowMatrix> page_shadow_;
|
||||
|
||||
// Stores parsed document metadata.
|
||||
DocumentMetadata doc_metadata_;
|
||||
|
||||
// While true, the document try to be opened and parsed after download each
|
||||
// part. Else the document will be opened and parsed only on finish of
|
||||
// downloading.
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "pdf/pdfium/pdfium_engine.h"
|
||||
|
||||
#include "pdf/document_layout.h"
|
||||
#include "pdf/document_metadata.h"
|
||||
#include "pdf/pdfium/pdfium_page.h"
|
||||
#include "pdf/pdfium/pdfium_test_base.h"
|
||||
#include "pdf/test/test_client.h"
|
||||
@ -17,6 +18,7 @@ namespace chrome_pdf {
|
||||
namespace {
|
||||
|
||||
using ::testing::InSequence;
|
||||
using ::testing::IsEmpty;
|
||||
using ::testing::NiceMock;
|
||||
using ::testing::Return;
|
||||
|
||||
@ -177,5 +179,35 @@ TEST_F(PDFiumEngineTest, ApplyDocumentLayoutAvoidsInfiniteLoop) {
|
||||
CompareSize({343, 1463}, engine->ApplyDocumentLayout(options));
|
||||
}
|
||||
|
||||
TEST_F(PDFiumEngineTest, GetDocumentMetadata) {
|
||||
NiceMock<MockTestClient> client;
|
||||
std::unique_ptr<PDFiumEngine> engine =
|
||||
InitializeEngine(&client, FILE_PATH_LITERAL("document_info.pdf"));
|
||||
ASSERT_TRUE(engine);
|
||||
|
||||
const DocumentMetadata& doc_metadata = engine->GetDocumentMetadata();
|
||||
|
||||
EXPECT_EQ("Sample PDF Document Info", doc_metadata.title);
|
||||
EXPECT_EQ("Chromium Authors", doc_metadata.author);
|
||||
EXPECT_EQ("Testing", doc_metadata.subject);
|
||||
EXPECT_EQ("Your Preferred Text Editor", doc_metadata.creator);
|
||||
EXPECT_EQ("fixup_pdf_template.py", doc_metadata.producer);
|
||||
}
|
||||
|
||||
TEST_F(PDFiumEngineTest, GetEmptyDocumentMetadata) {
|
||||
NiceMock<MockTestClient> client;
|
||||
std::unique_ptr<PDFiumEngine> engine =
|
||||
InitializeEngine(&client, FILE_PATH_LITERAL("hello_world2.pdf"));
|
||||
ASSERT_TRUE(engine);
|
||||
|
||||
const DocumentMetadata& doc_metadata = engine->GetDocumentMetadata();
|
||||
|
||||
EXPECT_THAT(doc_metadata.title, IsEmpty());
|
||||
EXPECT_THAT(doc_metadata.author, IsEmpty());
|
||||
EXPECT_THAT(doc_metadata.subject, IsEmpty());
|
||||
EXPECT_THAT(doc_metadata.creator, IsEmpty());
|
||||
EXPECT_THAT(doc_metadata.producer, IsEmpty());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace chrome_pdf
|
||||
|
37
pdf/test/data/document_info.in
Normal file
37
pdf/test/data/document_info.in
Normal file
@ -0,0 +1,37 @@
|
||||
{{header}}
|
||||
{{object 1 0}} <<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
{{object 2 0}} <<
|
||||
/Type /Pages
|
||||
/MediaBox [ 0 0 200 200 ]
|
||||
/Count 1
|
||||
/Kids [ 3 0 R ]
|
||||
>>
|
||||
endobj
|
||||
{{object 3 0}} <<
|
||||
/Type /Page
|
||||
/Parent 2 0 R
|
||||
>>
|
||||
endobj
|
||||
{{object 4 0}} <<
|
||||
/Author (Chromium Authors)
|
||||
/CreationDate (D:20200205153912+00'00')
|
||||
/Creator (Your Preferred Text Editor)
|
||||
/Keywords (testing,chromium,pdfium,document,info)
|
||||
/ModDate (D:20200206094234+00'00')
|
||||
/Producer (fixup_pdf_template.py)
|
||||
/Subject (Testing)
|
||||
/Title (Sample PDF Document Info)
|
||||
>>
|
||||
endobj
|
||||
{{xref}}
|
||||
trailer <<
|
||||
/Info 4 0 R
|
||||
/Root 1 0 R
|
||||
{{trailersize}}
|
||||
>>
|
||||
{{startxref}}
|
||||
%%EOF
|
45
pdf/test/data/document_info.pdf
Normal file
45
pdf/test/data/document_info.pdf
Normal file
@ -0,0 +1,45 @@
|
||||
%PDF-1.7
|
||||
%<25><><EFBFBD><EFBFBD>
|
||||
1 0 obj <<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
2 0 obj <<
|
||||
/Type /Pages
|
||||
/MediaBox [ 0 0 200 200 ]
|
||||
/Count 1
|
||||
/Kids [ 3 0 R ]
|
||||
>>
|
||||
endobj
|
||||
3 0 obj <<
|
||||
/Type /Page
|
||||
/Parent 2 0 R
|
||||
>>
|
||||
endobj
|
||||
4 0 obj <<
|
||||
/Author (Chromium Authors)
|
||||
/CreationDate (D:20200205153912+00'00')
|
||||
/Creator (Your Preferred Text Editor)
|
||||
/Keywords (testing,chromium,pdfium,document,info)
|
||||
/ModDate (D:20200206094234+00'00')
|
||||
/Producer (fixup_pdf_template.py)
|
||||
/Subject (Testing)
|
||||
/Title (Sample PDF Document Info)
|
||||
>>
|
||||
endobj
|
||||
xref
|
||||
0 5
|
||||
0000000000 65535 f
|
||||
0000000015 00000 n
|
||||
0000000068 00000 n
|
||||
0000000161 00000 n
|
||||
0000000212 00000 n
|
||||
trailer <<
|
||||
/Info 4 0 R
|
||||
/Root 1 0 R
|
||||
/Size 5
|
||||
>>
|
||||
startxref
|
||||
526
|
||||
%%EOF
|
Reference in New Issue
Block a user