Tagged PDFs: Stores the tag's type in the accessibility data in Chrome's PDF Viewer
The "structure tree" inside a PDF file provides information that can be used by assistive software to expose the organization and semantics of the file's elements, e.g. indicate where the headings are, or present some textual information as a table. PDFs with a "structure tree" are called "tagged". This patch starts the journey of supporting tagged PDFs by exposing the tag's type, i.e. the accessibility role, of every text span to assistive software. This allows, e.g. headings, and list bullets to be supported. Design doc at: https://docs.google.com/document/d/1ScD93clMA7AtViWINnaQgRTAK3vbpMshQZQXmWkfCyQ/edit?usp=sharing AX-Relnotes: n/a. Change-Id: Id56a8d6e21196c383b0d38ceac24923bf5fadd86 Bug: 40707542 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5980169 Reviewed-by: Ramin Halavati <rhalavati@chromium.org> Commit-Queue: Nektarios Paisios <nektar@chromium.org> Reviewed-by: Kyungjun Lee <kyungjunlee@google.com> Reviewed-by: Lei Zhang <thestig@chromium.org> Cr-Commit-Position: refs/heads/main@{#1405381}
This commit is contained in:

committed by
Chromium LUCI CQ

parent
fda80b6257
commit
a3551e7a1a
@ -12,6 +12,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "base/check_is_test.h"
|
||||
#include "base/feature_list.h"
|
||||
#include "base/functional/bind.h"
|
||||
#include "base/functional/callback.h"
|
||||
#include "base/location.h"
|
||||
@ -248,6 +249,7 @@ gfx::Transform MakeTransformForImage(const gfx::RectF image_screen_size,
|
||||
bool PdfOcrInRenderer() {
|
||||
return !base::FeatureList::IsEnabled(chrome_pdf::features::kPdfSearchify);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
PdfAccessibilityTree::PdfAccessibilityTree(
|
||||
@ -470,6 +472,7 @@ void PdfAccessibilityTree::DoSetAccessibilityDocInfo(
|
||||
|
||||
ClearAccessibilityNodes();
|
||||
page_count_ = doc_info.page_count;
|
||||
is_tagged_ = doc_info.is_tagged;
|
||||
|
||||
doc_node_ =
|
||||
CreateNode(ax::mojom::Role::kPdfRoot, ax::mojom::Restriction::kReadOnly,
|
||||
@ -674,9 +677,9 @@ void PdfAccessibilityTree::AddPageContent(
|
||||
auto obj = GetPluginContainerAXObject();
|
||||
CHECK(obj);
|
||||
PdfAccessibilityTreeBuilder tree_builder(
|
||||
GetWeakPtr(), text_runs, chars, page_objects, page_info, page_index,
|
||||
doc_node_.get(), &(*obj), &nodes_, &node_id_to_page_char_index_,
|
||||
&node_id_to_annotation_info_
|
||||
/*mark_headings_using_heuristic=*/!is_tagged_, text_runs, chars,
|
||||
page_objects, page_info, page_index, doc_node_.get(), &(*obj), &nodes_,
|
||||
&node_id_to_page_char_index_, &node_id_to_annotation_info_
|
||||
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
,
|
||||
ocr_helper_.get(), had_accessible_text_
|
||||
|
@ -271,6 +271,7 @@ class PdfAccessibilityTree : public ui::AXTreeSource<const ui::AXNode*,
|
||||
uint32_t selection_end_page_index_ = 0;
|
||||
uint32_t selection_end_char_index_ = 0;
|
||||
uint32_t page_count_ = 0;
|
||||
bool is_tagged_ = false;
|
||||
std::unique_ptr<ui::AXNodeData> doc_node_;
|
||||
// The banner node will have an appropriate ARIA landmark for easy navigation
|
||||
// for screen reader users. It will contain the status node below.
|
||||
|
@ -4,6 +4,8 @@
|
||||
|
||||
#include "components/pdf/renderer/pdf_accessibility_tree.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
||||
@ -68,11 +70,11 @@ namespace pdf {
|
||||
namespace {
|
||||
|
||||
const chrome_pdf::AccessibilityTextRunInfo kFirstTextRun = {
|
||||
15, gfx::RectF(26.0f, 189.0f, 84.0f, 13.0f),
|
||||
15, "P", gfx::RectF(26.0f, 189.0f, 84.0f, 13.0f),
|
||||
chrome_pdf::AccessibilityTextDirection::kNone,
|
||||
chrome_pdf::AccessibilityTextStyleInfo()};
|
||||
const chrome_pdf::AccessibilityTextRunInfo kSecondTextRun = {
|
||||
15, gfx::RectF(28.0f, 117.0f, 152.0f, 19.0f),
|
||||
15, "P", gfx::RectF(28.0f, 117.0f, 152.0f, 19.0f),
|
||||
chrome_pdf::AccessibilityTextDirection::kNone,
|
||||
chrome_pdf::AccessibilityTextStyleInfo()};
|
||||
const chrome_pdf::AccessibilityCharInfo kDummyCharsData[] = {
|
||||
@ -83,19 +85,19 @@ const chrome_pdf::AccessibilityCharInfo kDummyCharsData[] = {
|
||||
{'w', 16}, {'o', 12}, {'r', 8}, {'l', 4}, {'d', 12}, {'!', 2},
|
||||
};
|
||||
const chrome_pdf::AccessibilityTextRunInfo kFirstRunMultiLine = {
|
||||
7, gfx::RectF(26.0f, 189.0f, 84.0f, 13.0f),
|
||||
7, "P", gfx::RectF(26.0f, 189.0f, 84.0f, 13.0f),
|
||||
chrome_pdf::AccessibilityTextDirection::kNone,
|
||||
chrome_pdf::AccessibilityTextStyleInfo()};
|
||||
const chrome_pdf::AccessibilityTextRunInfo kSecondRunMultiLine = {
|
||||
8, gfx::RectF(26.0f, 189.0f, 84.0f, 13.0f),
|
||||
8, "P", gfx::RectF(26.0f, 189.0f, 84.0f, 13.0f),
|
||||
chrome_pdf::AccessibilityTextDirection::kNone,
|
||||
chrome_pdf::AccessibilityTextStyleInfo()};
|
||||
const chrome_pdf::AccessibilityTextRunInfo kThirdRunMultiLine = {
|
||||
9, gfx::RectF(26.0f, 189.0f, 84.0f, 13.0f),
|
||||
9, "P", gfx::RectF(26.0f, 189.0f, 84.0f, 13.0f),
|
||||
chrome_pdf::AccessibilityTextDirection::kNone,
|
||||
chrome_pdf::AccessibilityTextStyleInfo()};
|
||||
const chrome_pdf::AccessibilityTextRunInfo kFourthRunMultiLine = {
|
||||
6, gfx::RectF(26.0f, 189.0f, 84.0f, 13.0f),
|
||||
6, "P", gfx::RectF(26.0f, 189.0f, 84.0f, 13.0f),
|
||||
chrome_pdf::AccessibilityTextDirection::kNone,
|
||||
chrome_pdf::AccessibilityTextStyleInfo()};
|
||||
|
||||
@ -362,6 +364,7 @@ class PdfAccessibilityTreeTest : public content::RenderViewTest {
|
||||
viewport_info_.selection_start_char_index = 0u;
|
||||
viewport_info_.selection_end_page_index = 0u;
|
||||
viewport_info_.selection_end_char_index = 0u;
|
||||
doc_info_.is_tagged = false;
|
||||
doc_info_.text_accessible = true;
|
||||
doc_info_.text_copyable = true;
|
||||
doc_info_.page_count = 1u;
|
||||
@ -602,6 +605,108 @@ TEST_F(PdfAccessibilityTreeTest, TestPdfAccessibilityTreeCreation) {
|
||||
image_node->GetStringAttribute(ax::mojom::StringAttribute::kName));
|
||||
}
|
||||
|
||||
TEST_F(PdfAccessibilityTreeTest, HeadingsDetectedByHeuristic) {
|
||||
base::test::ScopedFeatureList pdf_tags;
|
||||
pdf_tags.InitAndDisableFeature(chrome_pdf::features::kPdfTags);
|
||||
|
||||
CreatePdfAccessibilityTree();
|
||||
text_runs_ = {kFirstTextRun, kSecondTextRun, kFirstTextRun, kSecondTextRun};
|
||||
text_runs_[0].style.font_size = 16.0f;
|
||||
text_runs_[1].style.font_size = 8.0f;
|
||||
text_runs_[2].style.font_size = 8.0f;
|
||||
text_runs_[3].style.font_size = 8.0f;
|
||||
|
||||
chars_ = {std::begin(kDummyCharsData), std::end(kDummyCharsData)};
|
||||
std::copy(std::begin(kDummyCharsData), std::end(kDummyCharsData),
|
||||
std::back_inserter(chars_));
|
||||
|
||||
page_info_.text_run_count = text_runs_.size();
|
||||
page_info_.char_count = chars_.size();
|
||||
pdf_accessibility_tree_->SetAccessibilityDocInfo(doc_info_);
|
||||
pdf_accessibility_tree_->SetAccessibilityViewportInfo(viewport_info_);
|
||||
|
||||
pdf_accessibility_tree_->SetAccessibilityPageInfo(page_info_, text_runs_,
|
||||
chars_, page_objects_);
|
||||
WaitForThreadTasks();
|
||||
// Wait for `PdfAccessibilityTree::UnserializeNodes()`, a delayed task.
|
||||
WaitForThreadDelayedTasks();
|
||||
|
||||
const ui::AXNode* pdf_root = pdf_accessibility_tree_->GetRoot();
|
||||
CheckRootAndStatusNodes(pdf_root, doc_info_.page_count,
|
||||
/*is_pdf_ocr_test=*/false, /*is_ocr_completed=*/false,
|
||||
/*create_empty_ocr_results=*/false);
|
||||
|
||||
ASSERT_GT(pdf_root->GetChildCount(), 1u);
|
||||
const ui::AXNode* page = pdf_root->GetChildAtIndex(1u);
|
||||
ASSERT_NE(nullptr, page);
|
||||
ASSERT_EQ(4u, page->GetChildCount());
|
||||
|
||||
const ui::AXNode* heuristic_heading = page->GetChildAtIndex(0u);
|
||||
ASSERT_NE(nullptr, heuristic_heading);
|
||||
EXPECT_EQ(ax::mojom::Role::kHeading, heuristic_heading->GetRole());
|
||||
EXPECT_EQ(2, heuristic_heading->GetIntAttribute(
|
||||
ax::mojom::IntAttribute::kHierarchicalLevel));
|
||||
EXPECT_EQ("h2", heuristic_heading->GetStringAttribute(
|
||||
ax::mojom::StringAttribute::kHtmlTag));
|
||||
|
||||
const ui::AXNode* paragraph1 = page->GetChildAtIndex(1u);
|
||||
ASSERT_NE(nullptr, paragraph1);
|
||||
EXPECT_EQ(ax::mojom::Role::kParagraph, paragraph1->GetRole());
|
||||
|
||||
const ui::AXNode* paragraph2 = page->GetChildAtIndex(2u);
|
||||
ASSERT_NE(nullptr, paragraph2);
|
||||
EXPECT_EQ(ax::mojom::Role::kParagraph, paragraph2->GetRole());
|
||||
|
||||
const ui::AXNode* paragraph3 = page->GetChildAtIndex(3u);
|
||||
ASSERT_NE(nullptr, paragraph3);
|
||||
EXPECT_EQ(ax::mojom::Role::kParagraph, paragraph3->GetRole());
|
||||
}
|
||||
|
||||
TEST_F(PdfAccessibilityTreeTest, HeadingsDetectedFromTags) {
|
||||
base::test::ScopedFeatureList pdf_tags;
|
||||
pdf_tags.InitAndEnableFeature(chrome_pdf::features::kPdfTags);
|
||||
doc_info_.is_tagged = true;
|
||||
|
||||
CreatePdfAccessibilityTree();
|
||||
text_runs_ = {kFirstTextRun, kSecondTextRun};
|
||||
text_runs_[0].tag_type = "H1";
|
||||
text_runs_[1].tag_type = "H2";
|
||||
|
||||
chars_ = {std::begin(kDummyCharsData), std::end(kDummyCharsData)};
|
||||
page_info_.text_run_count = text_runs_.size();
|
||||
page_info_.char_count = chars_.size();
|
||||
pdf_accessibility_tree_->SetAccessibilityDocInfo(doc_info_);
|
||||
pdf_accessibility_tree_->SetAccessibilityViewportInfo(viewport_info_);
|
||||
|
||||
pdf_accessibility_tree_->SetAccessibilityPageInfo(page_info_, text_runs_,
|
||||
chars_, page_objects_);
|
||||
WaitForThreadTasks();
|
||||
// Wait for `PdfAccessibilityTree::UnserializeNodes()`, a delayed task.
|
||||
WaitForThreadDelayedTasks();
|
||||
|
||||
const ui::AXNode* pdf_root = pdf_accessibility_tree_->GetRoot();
|
||||
CheckRootAndStatusNodes(pdf_root, doc_info_.page_count,
|
||||
/*is_pdf_ocr_test=*/false, /*is_ocr_completed=*/false,
|
||||
/*create_empty_ocr_results=*/false);
|
||||
|
||||
ASSERT_GT(pdf_root->GetChildCount(), 1u);
|
||||
const ui::AXNode* page = pdf_root->GetChildAtIndex(1u);
|
||||
ASSERT_NE(nullptr, page);
|
||||
ASSERT_EQ(2u, page->GetChildCount());
|
||||
|
||||
const ui::AXNode* heading1 = page->GetChildAtIndex(0u);
|
||||
ASSERT_NE(nullptr, heading1);
|
||||
EXPECT_EQ(ax::mojom::Role::kHeading, heading1->GetRole());
|
||||
EXPECT_EQ(1, heading1->GetIntAttribute(
|
||||
ax::mojom::IntAttribute::kHierarchicalLevel));
|
||||
|
||||
const ui::AXNode* heading2 = page->GetChildAtIndex(1u);
|
||||
ASSERT_NE(nullptr, heading2);
|
||||
EXPECT_EQ(ax::mojom::Role::kHeading, heading2->GetRole());
|
||||
EXPECT_EQ(2, heading2->GetIntAttribute(
|
||||
ax::mojom::IntAttribute::kHierarchicalLevel));
|
||||
}
|
||||
|
||||
TEST_F(PdfAccessibilityTreeTest, TestOverlappingAnnots) {
|
||||
text_runs_.emplace_back(kFirstRunMultiLine);
|
||||
text_runs_.emplace_back(kSecondRunMultiLine);
|
||||
|
@ -4,9 +4,11 @@
|
||||
|
||||
#include "components/pdf/renderer/pdf_accessibility_tree_builder.h"
|
||||
|
||||
#include <optional>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
|
||||
#include "base/containers/fixed_flat_map.h"
|
||||
#include "base/i18n/break_iterator.h"
|
||||
#include "base/strings/utf_string_conversion_utils.h"
|
||||
#include "components/pdf/renderer/pdf_ocr_helper.h"
|
||||
@ -15,6 +17,8 @@
|
||||
#include "pdf/pdf_features.h"
|
||||
#include "services/strings/grit/services_strings.h"
|
||||
#include "third_party/blink/public/web/web_ax_object.h"
|
||||
#include "ui/accessibility/accessibility_features.h"
|
||||
#include "ui/accessibility/ax_enums.mojom-shared.h"
|
||||
#include "ui/accessibility/ax_node_data.h"
|
||||
#include "ui/base/l10n/l10n_util.h"
|
||||
#include "ui/gfx/geometry/rect_f.h"
|
||||
@ -297,12 +301,84 @@ size_t NormalizeTextRunIndex(uint32_t object_end_text_run_index,
|
||||
current_text_run_index ? current_text_run_index - 1 : 0);
|
||||
}
|
||||
|
||||
// Please keep the below map as close as possible to the list defined in the PDF
|
||||
// Specification, ISO 32000-1:2008, table 333.
|
||||
ax::mojom::Role StructureElementTypeToAccessibilityRole(
|
||||
const std::string& element_type) {
|
||||
static constexpr auto kStructureElementTypeToAccessibilityRoleMap =
|
||||
base::MakeFixedFlatMap<std::string_view, ax::mojom::Role>(
|
||||
{{"Document", ax::mojom::Role::kDocument},
|
||||
{"Part", ax::mojom::Role::kDocPart},
|
||||
{"Art", ax::mojom::Role::kArticle},
|
||||
{"Sect", ax::mojom::Role::kSection},
|
||||
{"Div", ax::mojom::Role::kGenericContainer},
|
||||
{"BlockQuote", ax::mojom::Role::kBlockquote},
|
||||
{"Caption", ax::mojom::Role::kCaption},
|
||||
{"TOC", ax::mojom::Role::kDocToc},
|
||||
{"TOCI", ax::mojom::Role::kListItem},
|
||||
{"Index", ax::mojom::Role::kDocIndex},
|
||||
{"P", ax::mojom::Role::kParagraph},
|
||||
{"H", ax::mojom::Role::kHeading},
|
||||
{"H1", ax::mojom::Role::kHeading},
|
||||
{"H2", ax::mojom::Role::kHeading},
|
||||
{"H3", ax::mojom::Role::kHeading},
|
||||
{"H4", ax::mojom::Role::kHeading},
|
||||
{"H5", ax::mojom::Role::kHeading},
|
||||
{"H6", ax::mojom::Role::kHeading},
|
||||
{"L", ax::mojom::Role::kList},
|
||||
{"LI", ax::mojom::Role::kListItem},
|
||||
{"Lbl", ax::mojom::Role::kListMarker},
|
||||
{"LBody", ax::mojom::Role::kNone}, // Presentational.
|
||||
{"Table", ax::mojom::Role::kTable},
|
||||
{"TR", ax::mojom::Role::kRow},
|
||||
{"TH", ax::mojom::Role::kRowHeader},
|
||||
{"THead", ax::mojom::Role::kRowGroup},
|
||||
{"TBody", ax::mojom::Role::kRowGroup},
|
||||
{"TFoot", ax::mojom::Role::kRowGroup},
|
||||
{"TD", ax::mojom::Role::kCell},
|
||||
{"Span", ax::mojom::Role::kStaticText},
|
||||
{"Link", ax::mojom::Role::kLink},
|
||||
{"Figure", ax::mojom::Role::kFigure},
|
||||
{"Formula", ax::mojom::Role::kMath},
|
||||
{"Form", ax::mojom::Role::kForm}});
|
||||
|
||||
if (auto iter =
|
||||
kStructureElementTypeToAccessibilityRoleMap.find(element_type);
|
||||
iter != kStructureElementTypeToAccessibilityRoleMap.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
// Return something that could at least make some sense, other than
|
||||
// `kUnknown`.
|
||||
return ax::mojom::Role::kParagraph;
|
||||
}
|
||||
|
||||
std::optional<uint32_t> StructureElementTypeToHeadingLevel(
|
||||
const std::string& element_type) {
|
||||
if (StructureElementTypeToAccessibilityRole(element_type) ==
|
||||
ax::mojom::Role::kHeading) {
|
||||
if (element_type == "H" || element_type == "H1") {
|
||||
return 1;
|
||||
} else if (element_type == "H2") {
|
||||
return 2;
|
||||
} else if (element_type == "H3") {
|
||||
return 3;
|
||||
} else if (element_type == "H4") {
|
||||
return 4;
|
||||
} else if (element_type == "H5") {
|
||||
return 5;
|
||||
} else if (element_type == "H6") {
|
||||
return 6;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace pdf {
|
||||
|
||||
PdfAccessibilityTreeBuilder::PdfAccessibilityTreeBuilder(
|
||||
base::WeakPtr<PdfAccessibilityTree> pdf_accessibility_tree,
|
||||
bool mark_headings_using_heuristic,
|
||||
const std::vector<chrome_pdf::AccessibilityTextRunInfo>& text_runs,
|
||||
const std::vector<chrome_pdf::AccessibilityCharInfo>& chars,
|
||||
const chrome_pdf::AccessibilityPageObjects& page_objects,
|
||||
@ -321,7 +397,7 @@ PdfAccessibilityTreeBuilder::PdfAccessibilityTreeBuilder(
|
||||
bool has_accessible_text
|
||||
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
)
|
||||
: pdf_accessibility_tree_(std::move(pdf_accessibility_tree)),
|
||||
: mark_headings_using_heuristic_(mark_headings_using_heuristic),
|
||||
text_runs_(text_runs),
|
||||
chars_(chars),
|
||||
links_(page_objects.links),
|
||||
@ -370,7 +446,7 @@ void PdfAccessibilityTreeBuilder::BuildPageTree() {
|
||||
&heading_font_size_threshold_,
|
||||
¶graph_spacing_threshold_);
|
||||
|
||||
ui::AXNodeData* para_node = nullptr;
|
||||
ui::AXNodeData* block_node = nullptr;
|
||||
ui::AXNodeData* static_text_node = nullptr;
|
||||
ui::AXNodeData* previous_on_line_node = nullptr;
|
||||
std::string static_text;
|
||||
@ -389,16 +465,16 @@ void PdfAccessibilityTreeBuilder::BuildPageTree() {
|
||||
bool ocr_block_start = text_run.is_searchified && !ocr_block;
|
||||
bool ocr_block_end = !text_run.is_searchified && ocr_block;
|
||||
if (ocr_block_start || ocr_block_end) {
|
||||
// If already inside a paragraph, end it.
|
||||
// If already inside a block, end it.
|
||||
// The searchifier adds the text at the exact position that it is seen in
|
||||
// the image and does not deal with paragraphs or other structures.
|
||||
// The function that creates the text runs only considers text positions
|
||||
// and separates the blocks based on that. Therefore there can be cases
|
||||
// that OCR text will be added in the middle of a paragraph.
|
||||
// that OCR text will be added in the middle of a block.
|
||||
// TODO(crbug.com/360803943): Add browser tests to verify.
|
||||
if (para_node) {
|
||||
if (block_node) {
|
||||
BuildStaticNode(&static_text_node, &static_text);
|
||||
para_node = nullptr;
|
||||
block_node = nullptr;
|
||||
}
|
||||
CHECK(ocr_block_start || text_run_index);
|
||||
gfx::PointF position =
|
||||
@ -411,21 +487,21 @@ void PdfAccessibilityTreeBuilder::BuildPageTree() {
|
||||
has_ocr_text = true;
|
||||
}
|
||||
|
||||
// If we don't have a paragraph, create one.
|
||||
if (!para_node) {
|
||||
para_node =
|
||||
CreateParagraphNode((*text_runs_)[text_run_index].style.font_size);
|
||||
page_node_->child_ids.push_back(para_node->id);
|
||||
// If we don't have a block level node, create one.
|
||||
if (!block_node) {
|
||||
block_node =
|
||||
CreateBlockLevelNode(text_run.tag_type, text_run.style.font_size);
|
||||
page_node_->child_ids.push_back(block_node->id);
|
||||
}
|
||||
|
||||
// If the `text_run_index` is less than or equal to the link's
|
||||
// `text_run_index`, then push the link node in the paragraph.
|
||||
// `text_run_index`, then push the link node in the block.
|
||||
if (IsObjectWithRangeInTextRun(*links_, current_link_index_,
|
||||
text_run_index)) {
|
||||
BuildStaticNode(&static_text_node, &static_text);
|
||||
const chrome_pdf::AccessibilityLinkInfo& link =
|
||||
(*links_)[current_link_index_++];
|
||||
AddLinkToParaNode(link, para_node, &previous_on_line_node,
|
||||
AddLinkToParaNode(link, block_node, &previous_on_line_node,
|
||||
&text_run_index);
|
||||
|
||||
if (link.text_range.count == 0) {
|
||||
@ -435,27 +511,27 @@ void PdfAccessibilityTreeBuilder::BuildPageTree() {
|
||||
} else if (IsObjectInTextRun(*images_, current_image_index_,
|
||||
text_run_index)) {
|
||||
BuildStaticNode(&static_text_node, &static_text);
|
||||
AddImageToParaNode((*images_)[current_image_index_++], para_node,
|
||||
AddImageToParaNode((*images_)[current_image_index_++], block_node,
|
||||
&text_run_index);
|
||||
continue;
|
||||
} else if (IsObjectWithRangeInTextRun(
|
||||
*highlights_, current_highlight_index_, text_run_index)) {
|
||||
BuildStaticNode(&static_text_node, &static_text);
|
||||
AddHighlightToParaNode((*highlights_)[current_highlight_index_++],
|
||||
para_node, &previous_on_line_node,
|
||||
block_node, &previous_on_line_node,
|
||||
&text_run_index);
|
||||
} else if (IsObjectInTextRun(*text_fields_, current_text_field_index_,
|
||||
text_run_index) &&
|
||||
pdf_forms_enabled) {
|
||||
BuildStaticNode(&static_text_node, &static_text);
|
||||
AddTextFieldToParaNode((*text_fields_)[current_text_field_index_++],
|
||||
para_node, &text_run_index);
|
||||
block_node, &text_run_index);
|
||||
continue;
|
||||
} else if (IsObjectInTextRun(*buttons_, current_button_index_,
|
||||
text_run_index) &&
|
||||
pdf_forms_enabled) {
|
||||
BuildStaticNode(&static_text_node, &static_text);
|
||||
AddButtonToParaNode((*buttons_)[current_button_index_++], para_node,
|
||||
AddButtonToParaNode((*buttons_)[current_button_index_++], block_node,
|
||||
&text_run_index);
|
||||
continue;
|
||||
} else if (IsObjectInTextRun(*choice_fields_, current_choice_field_index_,
|
||||
@ -463,17 +539,17 @@ void PdfAccessibilityTreeBuilder::BuildPageTree() {
|
||||
pdf_forms_enabled) {
|
||||
BuildStaticNode(&static_text_node, &static_text);
|
||||
AddChoiceFieldToParaNode((*choice_fields_)[current_choice_field_index_++],
|
||||
para_node, &text_run_index);
|
||||
block_node, &text_run_index);
|
||||
continue;
|
||||
} else {
|
||||
chrome_pdf::PageCharacterIndex page_char_index = {
|
||||
page_index_, text_run_start_indices_[text_run_index]};
|
||||
|
||||
// This node is for the text inside the paragraph, it includes
|
||||
// the text of all of the text runs.
|
||||
// This node is for the text inside the block, it includes the text of all
|
||||
// of the text runs.
|
||||
if (!static_text_node) {
|
||||
static_text_node = CreateStaticTextNode(page_char_index);
|
||||
para_node->child_ids.push_back(static_text_node->id);
|
||||
block_node->child_ids.push_back(static_text_node->id);
|
||||
}
|
||||
|
||||
// Add this text run to the current static text node.
|
||||
@ -484,7 +560,7 @@ void PdfAccessibilityTreeBuilder::BuildPageTree() {
|
||||
static_text += inline_text_box_node->GetStringAttribute(
|
||||
ax::mojom::StringAttribute::kName);
|
||||
|
||||
para_node->relative_bounds.bounds.Union(
|
||||
block_node->relative_bounds.bounds.Union(
|
||||
inline_text_box_node->relative_bounds.bounds);
|
||||
static_text_node->relative_bounds.bounds.Union(
|
||||
inline_text_box_node->relative_bounds.bounds);
|
||||
@ -517,7 +593,7 @@ void PdfAccessibilityTreeBuilder::BuildPageTree() {
|
||||
if (BreakParagraph(*text_runs_, text_run_index,
|
||||
paragraph_spacing_threshold_)) {
|
||||
BuildStaticNode(&static_text_node, &static_text);
|
||||
para_node = nullptr;
|
||||
block_node = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -530,7 +606,7 @@ void PdfAccessibilityTreeBuilder::BuildPageTree() {
|
||||
->id);
|
||||
}
|
||||
|
||||
AddRemainingAnnotations(para_node, has_ocr_text);
|
||||
AddRemainingAnnotations(block_node, has_ocr_text);
|
||||
}
|
||||
|
||||
void PdfAccessibilityTreeBuilder::AddWordStartsAndEnds(
|
||||
@ -575,23 +651,31 @@ ui::AXNodeData* PdfAccessibilityTreeBuilder::CreateAndAppendNode(
|
||||
return node_ptr;
|
||||
}
|
||||
|
||||
ui::AXNodeData* PdfAccessibilityTreeBuilder::CreateParagraphNode(
|
||||
ui::AXNodeData* PdfAccessibilityTreeBuilder::CreateBlockLevelNode(
|
||||
const std::string& text_run_type,
|
||||
float font_size) {
|
||||
ui::AXNodeData* para_node = CreateAndAppendNode(
|
||||
ax::mojom::Role::kParagraph, ax::mojom::Restriction::kReadOnly);
|
||||
para_node->AddBoolAttribute(ax::mojom::BoolAttribute::kIsLineBreakingObject,
|
||||
true);
|
||||
|
||||
// If font size exceeds the `heading_font_size_threshold_`, then classify
|
||||
// it as a Heading.
|
||||
if (heading_font_size_threshold_ > 0 &&
|
||||
font_size > heading_font_size_threshold_) {
|
||||
para_node->role = ax::mojom::Role::kHeading;
|
||||
para_node->AddIntAttribute(ax::mojom::IntAttribute::kHierarchicalLevel, 2);
|
||||
para_node->AddStringAttribute(ax::mojom::StringAttribute::kHtmlTag, "h2");
|
||||
ui::AXNodeData* block_node = CreateAndAppendNode(
|
||||
StructureElementTypeToAccessibilityRole(text_run_type),
|
||||
ax::mojom::Restriction::kReadOnly);
|
||||
block_node->AddBoolAttribute(ax::mojom::BoolAttribute::kIsLineBreakingObject,
|
||||
true);
|
||||
if (std::optional<uint32_t> level =
|
||||
StructureElementTypeToHeadingLevel(text_run_type);
|
||||
level) {
|
||||
block_node->AddIntAttribute(ax::mojom::IntAttribute::kHierarchicalLevel,
|
||||
*level);
|
||||
// TODO(crbug.com/40707542): Set the HTML tag to "h*" by creating a helper
|
||||
// in `AXEnumUtils`.
|
||||
}
|
||||
|
||||
return para_node;
|
||||
if (mark_headings_using_heuristic_ && heading_font_size_threshold_ > 0 &&
|
||||
font_size > heading_font_size_threshold_) {
|
||||
block_node->role = ax::mojom::Role::kHeading;
|
||||
block_node->AddIntAttribute(ax::mojom::IntAttribute::kHierarchicalLevel, 2);
|
||||
block_node->AddStringAttribute(ax::mojom::StringAttribute::kHtmlTag, "h2");
|
||||
}
|
||||
|
||||
return block_node;
|
||||
}
|
||||
|
||||
ui::AXNodeData* PdfAccessibilityTreeBuilder::CreateStaticTextNode() {
|
||||
|
@ -30,7 +30,7 @@ namespace pdf {
|
||||
class PdfAccessibilityTreeBuilder {
|
||||
public:
|
||||
PdfAccessibilityTreeBuilder(
|
||||
base::WeakPtr<PdfAccessibilityTree> pdf_accessibility_tree,
|
||||
bool mark_headings_using_heuristic,
|
||||
const std::vector<chrome_pdf::AccessibilityTextRunInfo>& text_runs,
|
||||
const std::vector<chrome_pdf::AccessibilityCharInfo>& chars,
|
||||
const chrome_pdf::AccessibilityPageObjects& page_objects,
|
||||
@ -61,7 +61,8 @@ class PdfAccessibilityTreeBuilder {
|
||||
void AddWordStartsAndEnds(ui::AXNodeData* inline_text_box);
|
||||
ui::AXNodeData* CreateAndAppendNode(ax::mojom::Role role,
|
||||
ax::mojom::Restriction restriction);
|
||||
ui::AXNodeData* CreateParagraphNode(float font_size);
|
||||
ui::AXNodeData* CreateBlockLevelNode(const std::string& text_run_type,
|
||||
float font_size);
|
||||
ui::AXNodeData* CreateStaticTextNode();
|
||||
ui::AXNodeData* CreateStaticTextNode(
|
||||
const chrome_pdf::PageCharacterIndex& page_char_index);
|
||||
@ -128,7 +129,7 @@ class PdfAccessibilityTreeBuilder {
|
||||
size_t* text_run_index);
|
||||
void AddRemainingAnnotations(ui::AXNodeData* para_node, bool ocr_applied);
|
||||
|
||||
base::WeakPtr<PdfAccessibilityTree> pdf_accessibility_tree_;
|
||||
const bool mark_headings_using_heuristic_;
|
||||
std::vector<uint32_t> text_run_start_indices_;
|
||||
const raw_ref<const std::vector<chrome_pdf::AccessibilityTextRunInfo>>
|
||||
text_runs_;
|
||||
|
@ -13,11 +13,11 @@
|
||||
namespace pdf {
|
||||
|
||||
const chrome_pdf::AccessibilityTextRunInfo kFirstTextRun = {
|
||||
15, gfx::RectF(26.0f, 189.0f, 84.0f, 13.0f),
|
||||
15, "Span", gfx::RectF(26.0f, 189.0f, 84.0f, 13.0f),
|
||||
chrome_pdf::AccessibilityTextDirection::kNone,
|
||||
chrome_pdf::AccessibilityTextStyleInfo()};
|
||||
const chrome_pdf::AccessibilityTextRunInfo kSecondTextRun = {
|
||||
15, gfx::RectF(28.0f, 117.0f, 152.0f, 19.0f),
|
||||
15, "Span", gfx::RectF(28.0f, 117.0f, 152.0f, 19.0f),
|
||||
chrome_pdf::AccessibilityTextDirection::kNone,
|
||||
chrome_pdf::AccessibilityTextStyleInfo()};
|
||||
const chrome_pdf::AccessibilityCharInfo kDummyCharsData[] = {
|
||||
|
@ -54,6 +54,8 @@ void GetAccessibilityInfo(PDFiumEngine* engine,
|
||||
chars[i].unicode_character = page->GetCharUnicode(i);
|
||||
}
|
||||
|
||||
// TODO(crbug.com/40707542): Move the entire logic present in the following
|
||||
// while loop to `PDFiumPage` class.
|
||||
uint32_t char_index = 0;
|
||||
while (char_index < char_count) {
|
||||
std::optional<AccessibilityTextRunInfo> text_run_info_result =
|
||||
@ -106,6 +108,7 @@ void GetAccessibilityInfo(PDFiumEngine* engine,
|
||||
char_index += text_run_info.len;
|
||||
}
|
||||
|
||||
page->PopulateTextRunTypeAndImageAltText(text_runs);
|
||||
page_info.text_run_count = text_runs.size();
|
||||
page_objects.links = page->GetLinkInfo(text_runs);
|
||||
page_objects.images = page->GetImageInfo(page_info.text_run_count);
|
||||
|
@ -7,7 +7,7 @@
|
||||
namespace chrome_pdf {
|
||||
|
||||
bool AccessibilityDocInfo::operator==(const AccessibilityDocInfo& other) const {
|
||||
return page_count == other.page_count &&
|
||||
return page_count == other.page_count && is_tagged == other.is_tagged &&
|
||||
text_accessible == other.text_accessible &&
|
||||
text_copyable == other.text_copyable;
|
||||
}
|
||||
@ -45,10 +45,12 @@ AccessibilityTextRunInfo::AccessibilityTextRunInfo() = default;
|
||||
|
||||
AccessibilityTextRunInfo::AccessibilityTextRunInfo(
|
||||
uint32_t len,
|
||||
const std::string& tag_type,
|
||||
const gfx::RectF& bounds,
|
||||
AccessibilityTextDirection direction,
|
||||
const AccessibilityTextStyleInfo& style)
|
||||
: AccessibilityTextRunInfo(len,
|
||||
tag_type,
|
||||
bounds,
|
||||
direction,
|
||||
style,
|
||||
@ -56,11 +58,13 @@ AccessibilityTextRunInfo::AccessibilityTextRunInfo(
|
||||
|
||||
AccessibilityTextRunInfo::AccessibilityTextRunInfo(
|
||||
uint32_t len,
|
||||
const std::string& tag_type,
|
||||
const gfx::RectF& bounds,
|
||||
AccessibilityTextDirection direction,
|
||||
const AccessibilityTextStyleInfo& style,
|
||||
bool is_searchified)
|
||||
: len(len),
|
||||
tag_type(tag_type),
|
||||
bounds(bounds),
|
||||
direction(direction),
|
||||
style(style),
|
||||
|
@ -22,6 +22,7 @@ struct AccessibilityDocInfo {
|
||||
bool operator!=(const AccessibilityDocInfo& other) const;
|
||||
|
||||
uint32_t page_count = 0;
|
||||
bool is_tagged = false;
|
||||
bool text_accessible = false;
|
||||
bool text_copyable = false;
|
||||
};
|
||||
@ -85,10 +86,12 @@ enum class AccessibilityTextDirection {
|
||||
struct AccessibilityTextRunInfo {
|
||||
AccessibilityTextRunInfo();
|
||||
AccessibilityTextRunInfo(uint32_t len,
|
||||
const std::string& tag_type,
|
||||
const gfx::RectF& bounds,
|
||||
AccessibilityTextDirection direction,
|
||||
const AccessibilityTextStyleInfo& style);
|
||||
AccessibilityTextRunInfo(uint32_t len,
|
||||
const std::string& tag_type,
|
||||
const gfx::RectF& bounds,
|
||||
AccessibilityTextDirection direction,
|
||||
const AccessibilityTextStyleInfo& style,
|
||||
@ -97,6 +100,9 @@ struct AccessibilityTextRunInfo {
|
||||
~AccessibilityTextRunInfo();
|
||||
|
||||
uint32_t len = 0;
|
||||
// One of various types defined in a PDF tag, such as "Span", "P", "H1", "LI",
|
||||
// etc.
|
||||
std::string tag_type;
|
||||
gfx::RectF bounds;
|
||||
AccessibilityTextDirection direction = AccessibilityTextDirection::kNone;
|
||||
AccessibilityTextStyleInfo style;
|
||||
|
@ -47,6 +47,12 @@ BASE_FEATURE(kPdfSearchifySave,
|
||||
"PdfSearchifySave",
|
||||
base::FEATURE_DISABLED_BY_DEFAULT);
|
||||
|
||||
// Enables accessibility tags in PDFs to be parsed and integrated into the
|
||||
// accessibility tree by Chrome's PDF Viewer. Accessibility tags provide
|
||||
// structure and semantics to the text found in a PDF, e.g. they could mark a
|
||||
// specific piece of text as a heading, or a block of text as a paragraph.
|
||||
BASE_FEATURE(kPdfTags, "PdfTags", base::FEATURE_DISABLED_BY_DEFAULT);
|
||||
|
||||
BASE_FEATURE(kPdfUseShowSaveFilePicker,
|
||||
"PdfUseShowSaveFilePicker",
|
||||
base::FEATURE_DISABLED_BY_DEFAULT);
|
||||
|
@ -24,6 +24,7 @@ BASE_DECLARE_FEATURE(kPdfPartialLoading);
|
||||
BASE_DECLARE_FEATURE(kPdfPortfolio);
|
||||
BASE_DECLARE_FEATURE(kPdfSearchify);
|
||||
BASE_DECLARE_FEATURE(kPdfSearchifySave);
|
||||
BASE_DECLARE_FEATURE(kPdfTags);
|
||||
BASE_DECLARE_FEATURE(kPdfUseShowSaveFilePicker);
|
||||
BASE_DECLARE_FEATURE(kPdfUseSkiaRenderer);
|
||||
BASE_DECLARE_FEATURE(kPdfXfaSupport);
|
||||
|
@ -2777,6 +2777,9 @@ gfx::Point PdfViewWebPlugin::FrameToPdfCoordinates(
|
||||
AccessibilityDocInfo PdfViewWebPlugin::GetAccessibilityDocInfo() const {
|
||||
AccessibilityDocInfo doc_info;
|
||||
doc_info.page_count = engine_->GetNumberOfPages();
|
||||
if (base::FeatureList::IsEnabled(chrome_pdf::features::kPdfTags)) {
|
||||
doc_info.is_tagged = engine_->IsTagged();
|
||||
}
|
||||
doc_info.text_accessible =
|
||||
engine_->HasPermission(DocumentPermission::kCopyAccessible);
|
||||
doc_info.text_copyable = engine_->HasPermission(DocumentPermission::kCopy);
|
||||
|
@ -4,10 +4,14 @@
|
||||
|
||||
#include "pdf/accessibility.h"
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
|
||||
#include "base/compiler_specific.h"
|
||||
#include "base/test/scoped_feature_list.h"
|
||||
#include "base/types/zip.h"
|
||||
#include "pdf/accessibility_structs.h"
|
||||
#include "pdf/pdf_features.h"
|
||||
#include "pdf/pdfium/pdfium_engine.h"
|
||||
#include "pdf/pdfium/pdfium_test_base.h"
|
||||
#include "pdf/test/test_client.h"
|
||||
@ -123,6 +127,55 @@ TEST_P(AccessibilityTest, GetAccessibilityPage) {
|
||||
});
|
||||
}
|
||||
|
||||
TEST_P(AccessibilityTest, GetAccessibilityPageWithTags) {
|
||||
base::test::ScopedFeatureList pdf_tags;
|
||||
pdf_tags.InitAndEnableFeature(features::kPdfTags);
|
||||
|
||||
struct TestTextRun {
|
||||
uint32_t len;
|
||||
std::string tag_type;
|
||||
};
|
||||
static constexpr std::array<TestTextRun, 5> kExpectedTextRuns = {
|
||||
TestTextRun{/*"Article\r\n"*/ 9, "Art"},
|
||||
TestTextRun{/*"BlockQuote\r\n"*/ 12, "BlockQuote"},
|
||||
TestTextRun{/*"Paragraph\r\n"*/ 11, "P"},
|
||||
TestTextRun{/*"Heading1\r\n"*/ 10, "H1"},
|
||||
TestTextRun{/*"Heading2"*/ 8, "H2"},
|
||||
};
|
||||
|
||||
static constexpr char kExpectedChars[] =
|
||||
"Article\r\nBlockQuote\r\nParagraph\r\nHeading1\r\nHeading2";
|
||||
|
||||
TestClient client;
|
||||
std::unique_ptr<PDFiumEngine> engine =
|
||||
InitializeEngine(&client, FILE_PATH_LITERAL("tags.pdf"));
|
||||
ASSERT_TRUE(engine);
|
||||
|
||||
ASSERT_EQ(1, engine->GetNumberOfPages());
|
||||
AccessibilityPageInfo page_info;
|
||||
std::vector<AccessibilityTextRunInfo> text_runs;
|
||||
std::vector<AccessibilityCharInfo> chars;
|
||||
AccessibilityPageObjects page_objects;
|
||||
GetAccessibilityInfo(engine.get(), 0, page_info, text_runs, chars,
|
||||
page_objects);
|
||||
EXPECT_EQ(0u, page_info.page_index);
|
||||
EXPECT_EQ(gfx::Rect(5, 3, 816, 1056), page_info.bounds);
|
||||
EXPECT_EQ(text_runs.size(), page_info.text_run_count);
|
||||
EXPECT_EQ(chars.size(), page_info.char_count);
|
||||
|
||||
ASSERT_EQ(kExpectedTextRuns.size(), text_runs.size());
|
||||
for (const auto [expected, actual] :
|
||||
base::zip(kExpectedTextRuns, text_runs)) {
|
||||
EXPECT_EQ(expected.len, actual.len);
|
||||
EXPECT_EQ(expected.tag_type, actual.tag_type);
|
||||
}
|
||||
|
||||
ASSERT_EQ(std::size(kExpectedChars) - 1, chars.size());
|
||||
for (const auto [expected, actual] : base::zip(kExpectedChars, chars)) {
|
||||
EXPECT_EQ(static_cast<uint32_t>(expected), actual.unicode_character);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(AccessibilityTest, GetAccessibilityImageInfo) {
|
||||
static const auto kExpectedImageInfo = std::to_array<AccessibilityImageInfo>({
|
||||
{"Image 1", 0, {380, 78, 67, 68}, {}},
|
||||
|
@ -2233,6 +2233,10 @@ void PDFiumEngine::SetReadOnly(bool read_only) {
|
||||
ClearTextSelection();
|
||||
}
|
||||
|
||||
bool PDFiumEngine::IsTagged() const {
|
||||
return FPDFCatalog_IsTagged(doc());
|
||||
}
|
||||
|
||||
void PDFiumEngine::SetDocumentLayout(DocumentLayout::PageSpread page_spread) {
|
||||
SaveSelection();
|
||||
desired_layout_options_.set_page_spread(page_spread);
|
||||
|
@ -219,6 +219,7 @@ class PDFiumEngine : public DocumentLoader::Client, public IFSDK_PAUSE {
|
||||
void RotateCounterclockwise();
|
||||
bool IsReadOnly() const;
|
||||
void SetReadOnly(bool read_only);
|
||||
bool IsTagged() const;
|
||||
void SetDocumentLayout(DocumentLayout::PageSpread page_spread);
|
||||
void DisplayAnnotations(bool display);
|
||||
|
||||
|
@ -874,7 +874,6 @@ std::vector<AccessibilityImageInfo> PDFiumPage::GetImageInfo(
|
||||
return image_info;
|
||||
|
||||
CalculateImages();
|
||||
|
||||
image_info.reserve(images_.size());
|
||||
for (const Image& image : images_) {
|
||||
AccessibilityImageInfo cur_info;
|
||||
@ -996,6 +995,49 @@ std::vector<AccessibilityTextFieldInfo> PDFiumPage::GetTextFieldInfo(
|
||||
return text_field_info;
|
||||
}
|
||||
|
||||
void PDFiumPage::PopulateTextRunTypeAndImageAltText(
|
||||
std::vector<AccessibilityTextRunInfo>& text_runs) {
|
||||
CalculateImages();
|
||||
ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(GetPage()));
|
||||
if (!struct_tree) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO(crbug.com/40707542): Consolidate `Accessibility"TextRunInfo` building
|
||||
// logic into this class and remove the following block.
|
||||
MarkedContentIdToTextRunInfoMap marked_content_id_text_run_info_map;
|
||||
if (base::FeatureList::IsEnabled(chrome_pdf::features::kPdfTags)) {
|
||||
FPDF_TEXTPAGE text_page = GetTextPage();
|
||||
uint32_t char_index = 0;
|
||||
for (auto& text_run : text_runs) {
|
||||
FPDF_PAGEOBJECT text_object =
|
||||
FPDFText_GetTextObject(text_page, char_index);
|
||||
int marked_content_id = FPDFPageObj_GetMarkedContentID(text_object);
|
||||
if (marked_content_id == -1) {
|
||||
continue;
|
||||
}
|
||||
auto [iter, _] = marked_content_id_text_run_info_map.emplace(
|
||||
marked_content_id, std::vector<raw_ptr<AccessibilityTextRunInfo>>());
|
||||
iter->second.push_back(&text_run);
|
||||
char_index += text_run.len;
|
||||
}
|
||||
}
|
||||
|
||||
if (marked_content_id_text_run_info_map.empty() &&
|
||||
marked_content_id_image_map_.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::set<FPDF_STRUCTELEMENT> visited_elements;
|
||||
int tree_children_count = FPDF_StructTree_CountChildren(struct_tree.get());
|
||||
for (int i = 0; i < tree_children_count; ++i) {
|
||||
FPDF_STRUCTELEMENT current_element =
|
||||
FPDF_StructTree_GetChildAtIndex(struct_tree.get(), i);
|
||||
PopulateTextRunTypeAndImageAltTextForStructElement(
|
||||
current_element, visited_elements, marked_content_id_text_run_info_map);
|
||||
}
|
||||
}
|
||||
|
||||
PDFiumPage::Area PDFiumPage::GetLinkTargetAtIndex(int link_index,
|
||||
LinkTarget* target) {
|
||||
if (!available_ || link_index < 0)
|
||||
@ -1385,8 +1427,6 @@ void PDFiumPage::CalculateImages() {
|
||||
calculated_images_ = true;
|
||||
FPDF_PAGE page = GetPage();
|
||||
int page_object_count = FPDFPage_CountObjects(page);
|
||||
MarkedContentIdToImageMap marked_content_id_image_map;
|
||||
bool is_tagged = FPDFCatalog_IsTagged(engine_->doc());
|
||||
for (int i = 0; i < page_object_count; ++i) {
|
||||
FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, i);
|
||||
if (FPDFPageObj_GetType(page_object) != FPDF_PAGEOBJ_IMAGE)
|
||||
@ -1403,7 +1443,7 @@ void PDFiumPage::CalculateImages() {
|
||||
image.bounding_rect = PageToScreen(gfx::Point(), 1.0, left, top, right,
|
||||
bottom, PageOrientation::kOriginal);
|
||||
|
||||
if (is_tagged) {
|
||||
if (engine_->IsTagged()) {
|
||||
// Collect all marked content IDs for image objects so that they can
|
||||
// later be used to retrieve alt text from struct tree for the page.
|
||||
FPDF_IMAGEOBJ_METADATA image_metadata;
|
||||
@ -1412,64 +1452,65 @@ void PDFiumPage::CalculateImages() {
|
||||
if (marked_content_id >= 0) {
|
||||
// If `marked_content_id` is already present, ignore the one being
|
||||
// inserted.
|
||||
marked_content_id_image_map.insert(
|
||||
marked_content_id_image_map_.insert(
|
||||
{marked_content_id, images_.size()});
|
||||
}
|
||||
}
|
||||
}
|
||||
images_.push_back(image);
|
||||
}
|
||||
|
||||
if (!marked_content_id_image_map.empty())
|
||||
PopulateImageAltText(marked_content_id_image_map);
|
||||
}
|
||||
|
||||
void PDFiumPage::PopulateImageAltText(
|
||||
const MarkedContentIdToImageMap& marked_content_id_image_map) {
|
||||
ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(GetPage()));
|
||||
if (!struct_tree)
|
||||
return;
|
||||
|
||||
std::set<FPDF_STRUCTELEMENT> visited_elements;
|
||||
int tree_children_count = FPDF_StructTree_CountChildren(struct_tree.get());
|
||||
for (int i = 0; i < tree_children_count; ++i) {
|
||||
FPDF_STRUCTELEMENT current_element =
|
||||
FPDF_StructTree_GetChildAtIndex(struct_tree.get(), i);
|
||||
PopulateImageAltTextForStructElement(marked_content_id_image_map,
|
||||
current_element, &visited_elements);
|
||||
}
|
||||
}
|
||||
|
||||
void PDFiumPage::PopulateImageAltTextForStructElement(
|
||||
const MarkedContentIdToImageMap& marked_content_id_image_map,
|
||||
void PDFiumPage::PopulateTextRunTypeAndImageAltTextForStructElement(
|
||||
FPDF_STRUCTELEMENT current_element,
|
||||
std::set<FPDF_STRUCTELEMENT>* visited_elements) {
|
||||
if (!current_element)
|
||||
std::set<FPDF_STRUCTELEMENT>& visited_elements,
|
||||
MarkedContentIdToTextRunInfoMap& marked_content_id_text_run_info_map) {
|
||||
if (!current_element) {
|
||||
return;
|
||||
}
|
||||
|
||||
bool inserted = visited_elements->insert(current_element).second;
|
||||
if (!inserted)
|
||||
bool inserted = visited_elements.insert(current_element).second;
|
||||
if (!inserted) {
|
||||
return;
|
||||
}
|
||||
|
||||
int marked_content_id =
|
||||
FPDF_StructElement_GetMarkedContentID(current_element);
|
||||
int marked_content_id = -1;
|
||||
if (FPDF_StructElement_GetMarkedContentIdCount(current_element)) {
|
||||
marked_content_id =
|
||||
FPDF_StructElement_GetMarkedContentIdAtIndex(current_element, 0);
|
||||
}
|
||||
if (marked_content_id >= 0) {
|
||||
auto it = marked_content_id_image_map.find(marked_content_id);
|
||||
if (it != marked_content_id_image_map.end() &&
|
||||
images_[it->second].alt_text.empty()) {
|
||||
images_[it->second].alt_text =
|
||||
if (base::FeatureList::IsEnabled(chrome_pdf::features::kPdfTags)) {
|
||||
auto text_runs_iter =
|
||||
marked_content_id_text_run_info_map.find(marked_content_id);
|
||||
if (text_runs_iter != marked_content_id_text_run_info_map.end()) {
|
||||
std::vector<raw_ptr<AccessibilityTextRunInfo>>& text_runs =
|
||||
text_runs_iter->second;
|
||||
for (raw_ptr<AccessibilityTextRunInfo>& text_run : text_runs) {
|
||||
text_run->tag_type = base::UTF16ToUTF8(CallPDFiumWideStringBufferApi(
|
||||
base::BindRepeating(&FPDF_StructElement_GetType, current_element),
|
||||
/*check_expected_size=*/true));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto image_iter = marked_content_id_image_map_.find(marked_content_id);
|
||||
if (image_iter != marked_content_id_image_map_.end() &&
|
||||
images_[image_iter->second].alt_text.empty()) {
|
||||
images_[image_iter->second].alt_text =
|
||||
base::UTF16ToUTF8(CallPDFiumWideStringBufferApi(
|
||||
base::BindRepeating(&FPDF_StructElement_GetAltText,
|
||||
current_element),
|
||||
/*check_expected_size=*/true));
|
||||
}
|
||||
}
|
||||
|
||||
int children_count = FPDF_StructElement_CountChildren(current_element);
|
||||
for (int i = 0; i < children_count; ++i) {
|
||||
FPDF_STRUCTELEMENT child =
|
||||
FPDF_StructElement_GetChildAtIndex(current_element, i);
|
||||
PopulateImageAltTextForStructElement(marked_content_id_image_map, child,
|
||||
visited_elements);
|
||||
PopulateTextRunTypeAndImageAltTextForStructElement(
|
||||
child, visited_elements, marked_content_id_text_run_info_map);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -143,6 +143,13 @@ class PDFiumPage {
|
||||
std::vector<AccessibilityTextFieldInfo> GetTextFieldInfo(
|
||||
uint32_t text_run_count);
|
||||
|
||||
// Traverses the entire struct tree of the page recursively and extracts the
|
||||
// text run type or the alt text from struct tree elements corresponding to
|
||||
// the marked content IDs associated with `text_runs` or present in
|
||||
// `marked_content_id_image_map_` respectively.
|
||||
void PopulateTextRunTypeAndImageAltText(
|
||||
std::vector<AccessibilityTextRunInfo>& text_runs);
|
||||
|
||||
enum Area {
|
||||
NONSELECTABLE_AREA,
|
||||
TEXT_AREA, // Area contains regular, selectable text not
|
||||
@ -274,7 +281,7 @@ class PDFiumPage {
|
||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageForOcrTest, HighResolutionImage);
|
||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageForOcrTest, RotatedPage);
|
||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageForOcrTest, NonImage);
|
||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, CalculateImages);
|
||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, PopulateImageAltText);
|
||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, ImageAltText);
|
||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, AnnotLinkGeneration);
|
||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, GetLinkTarget);
|
||||
@ -429,23 +436,27 @@ class PDFiumPage {
|
||||
// broken for page objects such as links and images.
|
||||
void CalculatePageObjectTextRunBreaks();
|
||||
|
||||
// Key : Marked content id for the text element as specified in the struct
|
||||
// tree.
|
||||
// Value: A list of pointers to the associated text runs.
|
||||
using MarkedContentIdToTextRunInfoMap =
|
||||
std::map<int, std::vector<raw_ptr<AccessibilityTextRunInfo>>>;
|
||||
|
||||
// Key : Marked content id for the image element as specified in the
|
||||
// struct tree.
|
||||
// Value : Index of image in the `images_` vector.
|
||||
// struct tree.
|
||||
// Value : Index of the image in the `images_` vector.
|
||||
using MarkedContentIdToImageMap = std::map<int, size_t>;
|
||||
// Traverses the entire struct tree of the page recursively and extracts the
|
||||
// alt text from struct tree elements corresponding to the marked content IDs
|
||||
// present in `marked_content_id_image_map`.
|
||||
void PopulateImageAltText(
|
||||
const MarkedContentIdToImageMap& marked_content_id_image_map);
|
||||
|
||||
// Traverses a struct element and its sub-tree recursively and extracts the
|
||||
// alt text from struct elements corresponding to the marked content IDs
|
||||
// present in `marked_content_id_image_map`. Uses `visited_elements` to guard
|
||||
// against malformed struct trees.
|
||||
void PopulateImageAltTextForStructElement(
|
||||
const MarkedContentIdToImageMap& marked_content_id_image_map,
|
||||
// text run type or the alt text from struct elements corresponding to the
|
||||
// marked content IDs present in `marked_content_id_text_run_info_map` or
|
||||
// `marked_content_id_image_map_` respectively. Uses `visited_elements` to
|
||||
// guard against malformed struct trees.
|
||||
void PopulateTextRunTypeAndImageAltTextForStructElement(
|
||||
FPDF_STRUCTELEMENT current_element,
|
||||
std::set<FPDF_STRUCTELEMENT>* visited_elements);
|
||||
std::set<FPDF_STRUCTELEMENT>& visited_elements,
|
||||
MarkedContentIdToTextRunInfoMap& marked_content_id_text_run_info_map);
|
||||
|
||||
bool PopulateFormFieldProperties(FPDF_ANNOTATION annot,
|
||||
FormField* form_field);
|
||||
|
||||
@ -470,6 +481,7 @@ class PDFiumPage {
|
||||
bool calculated_links_ = false;
|
||||
std::vector<Link> links_;
|
||||
bool calculated_images_ = false;
|
||||
MarkedContentIdToImageMap marked_content_id_image_map_;
|
||||
std::vector<Image> images_;
|
||||
bool calculated_annotations_ = false;
|
||||
std::vector<Highlight> highlights_;
|
||||
|
@ -64,6 +64,7 @@ TEST(PDFiumPageHelperTest, ScopedUnloadPreventer) {
|
||||
void CompareTextRuns(const AccessibilityTextRunInfo& expected_text_run,
|
||||
const AccessibilityTextRunInfo& actual_text_run) {
|
||||
EXPECT_EQ(expected_text_run.len, actual_text_run.len);
|
||||
EXPECT_EQ(expected_text_run.tag_type, actual_text_run.tag_type);
|
||||
EXPECT_RECTF_EQ(expected_text_run.bounds, actual_text_run.bounds);
|
||||
EXPECT_EQ(expected_text_run.direction, actual_text_run.direction);
|
||||
|
||||
@ -508,7 +509,7 @@ INSTANTIATE_TEST_SUITE_P(All, PDFiumPageLinkTest, testing::Bool());
|
||||
|
||||
using PDFiumPageImageTest = PDFiumTestBase;
|
||||
|
||||
TEST_P(PDFiumPageImageTest, CalculateImages) {
|
||||
TEST_P(PDFiumPageImageTest, PopulateImageAltText) {
|
||||
TestClient client;
|
||||
std::unique_ptr<PDFiumEngine> engine =
|
||||
InitializeEngine(&client, FILE_PATH_LITERAL("image_alt_text.pdf"));
|
||||
@ -516,7 +517,8 @@ TEST_P(PDFiumPageImageTest, CalculateImages) {
|
||||
ASSERT_EQ(1, engine->GetNumberOfPages());
|
||||
|
||||
PDFiumPage& page = GetPDFiumPageForTest(*engine, 0);
|
||||
page.CalculateImages();
|
||||
std::vector<AccessibilityTextRunInfo> text_runs;
|
||||
page.PopulateTextRunTypeAndImageAltText(text_runs);
|
||||
ASSERT_EQ(3u, page.images_.size());
|
||||
EXPECT_EQ(gfx::Rect(380, 78, 67, 68), page.images_[0].bounding_rect);
|
||||
EXPECT_EQ("Image 1", page.images_[0].alt_text);
|
||||
@ -534,7 +536,8 @@ TEST_P(PDFiumPageImageTest, ImageAltText) {
|
||||
ASSERT_EQ(1, engine->GetNumberOfPages());
|
||||
|
||||
PDFiumPage& page = GetPDFiumPageForTest(*engine, 0);
|
||||
page.CalculateImages();
|
||||
std::vector<AccessibilityTextRunInfo> text_runs;
|
||||
page.PopulateTextRunTypeAndImageAltText(text_runs);
|
||||
ASSERT_EQ(3u, page.images_.size());
|
||||
EXPECT_EQ(gfx::Rect(380, 78, 67, 68), page.images_[0].bounding_rect);
|
||||
EXPECT_EQ("Image 1", page.images_[0].alt_text);
|
||||
@ -577,7 +580,8 @@ TEST_P(PDFiumPageImageForOcrTest, LowResolutionImage) {
|
||||
ASSERT_EQ(1, engine->GetNumberOfPages());
|
||||
|
||||
PDFiumPage& page = GetPDFiumPageForTest(*engine, 0);
|
||||
page.CalculateImages();
|
||||
std::vector<AccessibilityTextRunInfo> text_runs;
|
||||
page.PopulateTextRunTypeAndImageAltText(text_runs);
|
||||
ASSERT_EQ(3u, page.images_.size());
|
||||
|
||||
ASSERT_FALSE(page.images_[0].alt_text.empty());
|
||||
@ -605,7 +609,8 @@ TEST_P(PDFiumPageImageForOcrTest, HighResolutionImage) {
|
||||
ASSERT_EQ(1, engine->GetNumberOfPages());
|
||||
|
||||
PDFiumPage& page = GetPDFiumPageForTest(*engine, 0);
|
||||
page.CalculateImages();
|
||||
std::vector<AccessibilityTextRunInfo> text_runs;
|
||||
page.PopulateTextRunTypeAndImageAltText(text_runs);
|
||||
ASSERT_EQ(1u, page.images_.size());
|
||||
|
||||
SkBitmap image_bitmap = engine->GetImageForOcr(
|
||||
@ -625,7 +630,8 @@ TEST_P(PDFiumPageImageForOcrTest, RotatedPage) {
|
||||
ASSERT_EQ(1, engine->GetNumberOfPages());
|
||||
|
||||
PDFiumPage& page = GetPDFiumPageForTest(*engine, 0);
|
||||
page.CalculateImages();
|
||||
std::vector<AccessibilityTextRunInfo> text_runs;
|
||||
page.PopulateTextRunTypeAndImageAltText(text_runs);
|
||||
ASSERT_EQ(1u, page.images_.size());
|
||||
|
||||
// This page is rotated, therefore the extracted image size is 25x100 while
|
||||
@ -644,7 +650,8 @@ TEST_P(PDFiumPageImageForOcrTest, NonImage) {
|
||||
ASSERT_EQ(1, engine->GetNumberOfPages());
|
||||
|
||||
PDFiumPage& page = GetPDFiumPageForTest(*engine, 0);
|
||||
page.CalculateImages();
|
||||
std::vector<AccessibilityTextRunInfo> text_runs;
|
||||
page.PopulateTextRunTypeAndImageAltText(text_runs);
|
||||
ASSERT_EQ(3u, page.images_.size());
|
||||
ASSERT_EQ(1, page.images_[0].page_object_index);
|
||||
|
||||
@ -765,21 +772,21 @@ TEST_P(PDFiumPageTextTest, GetTextRunInfo) {
|
||||
// text run lengths respectively. There are text runs preceding and
|
||||
// succeeding them.
|
||||
auto expected_text_runs = std::to_array<AccessibilityTextRunInfo>({
|
||||
{7, gfx::RectF(26.666666f, 189.333333f, 38.666672f, 13.333344f),
|
||||
{7, "", gfx::RectF(26.666666f, 189.333333f, 38.666672f, 13.333344f),
|
||||
AccessibilityTextDirection::kLeftToRight, expected_style_1},
|
||||
{16, gfx::RectF(70.666664f, 189.333333f, 108.0f, 14.666672f),
|
||||
{16, "", gfx::RectF(70.666664f, 189.333333f, 108.0f, 14.666672f),
|
||||
AccessibilityTextDirection::kLeftToRight, expected_style_1},
|
||||
{20, gfx::RectF(181.333333f, 189.333333f, 117.333333f, 14.666672f),
|
||||
{20, "", gfx::RectF(181.333333f, 189.333333f, 117.333333f, 14.666672f),
|
||||
AccessibilityTextDirection::kLeftToRight, expected_style_1},
|
||||
{9, gfx::RectF(28.0f, 117.33334f, 89.333328f, 20.0f),
|
||||
{9, "", gfx::RectF(28.0f, 117.33334f, 89.333328f, 20.0f),
|
||||
AccessibilityTextDirection::kLeftToRight, expected_style_2},
|
||||
{15, gfx::RectF(126.66666f, 117.33334f, 137.33334f, 20.0f),
|
||||
{15, "", gfx::RectF(126.66666f, 117.33334f, 137.33334f, 20.0f),
|
||||
AccessibilityTextDirection::kLeftToRight, expected_style_2},
|
||||
{20, gfx::RectF(266.66666f, 118.66666f, 169.33334f, 18.666664f),
|
||||
{20, "", gfx::RectF(266.66666f, 118.66666f, 169.33334f, 18.666664f),
|
||||
AccessibilityTextDirection::kLeftToRight, expected_style_2},
|
||||
{5, gfx::RectF(28.0f, 65.333336f, 40.0f, 18.666664f),
|
||||
{5, "", gfx::RectF(28.0f, 65.333336f, 40.0f, 18.666664f),
|
||||
AccessibilityTextDirection::kLeftToRight, expected_style_2},
|
||||
{17, gfx::RectF(77.333336f, 64.0f, 160.0f, 20.0f),
|
||||
{17, "", gfx::RectF(77.333336f, 64.0f, 160.0f, 20.0f),
|
||||
AccessibilityTextDirection::kLeftToRight, expected_style_2},
|
||||
});
|
||||
|
||||
@ -826,18 +833,17 @@ TEST_P(PDFiumPageTextTest, HighlightTextRunInfo) {
|
||||
"Helvetica", 0, AccessibilityTextRenderMode::kFill,
|
||||
16, 0xff000000, 0xff000000,
|
||||
false, false};
|
||||
auto expected_text_runs = std::to_array<AccessibilityTextRunInfo>({
|
||||
{5, gfx::RectF(1.3333334f, 198.66667f, 46.666668f, 14.666672f),
|
||||
AccessibilityTextDirection::kLeftToRight, kExpectedStyle},
|
||||
{7, gfx::RectF(50.666668f, 198.66667f, 47.999996f, 17.333328f),
|
||||
AccessibilityTextDirection::kLeftToRight, kExpectedStyle},
|
||||
{7, gfx::RectF(106.66666f, 198.66667f, 73.333336f, 18.666672f),
|
||||
AccessibilityTextDirection::kLeftToRight, kExpectedStyle},
|
||||
{2, gfx::RectF(181.33333f, 202.66667f, 16.0f, 14.66667f),
|
||||
AccessibilityTextDirection::kNone, kExpectedStyle},
|
||||
{2, gfx::RectF(198.66667f, 202.66667f, 21.333328f, 10.666672f),
|
||||
AccessibilityTextDirection::kLeftToRight, kExpectedStyle},
|
||||
});
|
||||
auto expected_text_runs = std::to_array<AccessibilityTextRunInfo>(
|
||||
{{5, "", gfx::RectF(1.3333334f, 198.66667f, 46.666668f, 14.666672f),
|
||||
AccessibilityTextDirection::kLeftToRight, kExpectedStyle},
|
||||
{7, "", gfx::RectF(50.666668f, 198.66667f, 47.999996f, 17.333328f),
|
||||
AccessibilityTextDirection::kLeftToRight, kExpectedStyle},
|
||||
{7, "", gfx::RectF(106.66666f, 198.66667f, 73.333336f, 18.666672f),
|
||||
AccessibilityTextDirection::kLeftToRight, kExpectedStyle},
|
||||
{2, "", gfx::RectF(181.33333f, 202.66667f, 16.0f, 14.66667f),
|
||||
AccessibilityTextDirection::kNone, kExpectedStyle},
|
||||
{2, "", gfx::RectF(198.66667f, 202.66667f, 21.333328f, 10.666672f),
|
||||
AccessibilityTextDirection::kLeftToRight, kExpectedStyle}});
|
||||
|
||||
if (UsingTestFonts()) {
|
||||
expected_text_runs[2].bounds =
|
||||
|
164
pdf/test/data/tags.in
Normal file
164
pdf/test/data/tags.in
Normal file
@ -0,0 +1,164 @@
|
||||
{{header}}
|
||||
{{object 1 0}} <<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
/StructTreeRoot 7 0 R
|
||||
/Lang (en-US)
|
||||
/MarkInfo <<
|
||||
/Type /MarkInfo
|
||||
/Marked true
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
{{object 2 0}} <<
|
||||
/Type /Pages
|
||||
/Count 1
|
||||
/Kids [3 0 R]
|
||||
>>
|
||||
endobj
|
||||
{{object 3 0}} <<
|
||||
/Type /Page
|
||||
/Contents 6 0 R
|
||||
/Parent 2 0 R
|
||||
/MediaBox [0 0 612 792]
|
||||
/Resources <<
|
||||
/Font <<
|
||||
/F1 4 0 R
|
||||
/F2 5 0 R
|
||||
>>
|
||||
>>
|
||||
/StructParents 0
|
||||
>>
|
||||
endobj
|
||||
{{object 4 0}} <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Times-Roman
|
||||
>>
|
||||
endobj
|
||||
{{object 5 0}} <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Helvetica
|
||||
>>
|
||||
endobj
|
||||
{{object 6 0}} <<
|
||||
{{streamlen}}
|
||||
>>
|
||||
stream
|
||||
/Art <</MCID 0 >>BDC
|
||||
BT
|
||||
/F1 12 Tf
|
||||
20 50 Td
|
||||
(Article) Tj
|
||||
ET
|
||||
EMC
|
||||
/BlockQuote <</MCID 1 >>BDC
|
||||
BT
|
||||
/F1 12 Tf
|
||||
20 150 Td
|
||||
(BlockQuote) Tj
|
||||
ET
|
||||
EMC
|
||||
/P <</MCID 2 >>BDC
|
||||
BT
|
||||
/F1 12 Tf
|
||||
20 250 Td
|
||||
(Paragraph) Tj
|
||||
ET
|
||||
EMC
|
||||
/H1 <</MCID 3 >>BDC
|
||||
BT
|
||||
/F2 16 Tf
|
||||
20 350 Td
|
||||
(Heading1) Tj
|
||||
ET
|
||||
EMC
|
||||
/H2 <</MCID 4 >>BDC
|
||||
BT
|
||||
/F2 14 Tf
|
||||
20 550 Td
|
||||
(Heading2) Tj
|
||||
ET
|
||||
EMC
|
||||
endstream
|
||||
endobj
|
||||
{{object 7 0}} <<
|
||||
/Type /StructTreeRoot
|
||||
/K 8 0 R
|
||||
/ParentTree 9 0 R
|
||||
/ParentTreeNextKey 1
|
||||
>>
|
||||
endobj
|
||||
{{object 8 0}} <<
|
||||
/Type /StructElem
|
||||
/S /Document
|
||||
/Lang (en-US)
|
||||
/P 7 0 R
|
||||
/K [10 0 R 11 0 R 12 0 R 13 0 R 14 0 R]
|
||||
>>
|
||||
endobj
|
||||
{{object 9 0}} <<
|
||||
/Type /ParentTree
|
||||
/Nums [0 [10 0 R 11 0 R 12 0 R 13 0 R 14 0 R]]
|
||||
>>
|
||||
endobj
|
||||
{{object 10 0}} <<
|
||||
/Type /StructElem
|
||||
/S /Art
|
||||
/P 8 0 R
|
||||
/K <<
|
||||
/Type /MCR
|
||||
/Pg 3 0 R
|
||||
/MCID 0
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
{{object 11 0}} <<
|
||||
/Type /StructElem
|
||||
/S /BlockQuote
|
||||
/P 8 0 R
|
||||
/K <<
|
||||
/Type /MCR
|
||||
/Pg 3 0 R
|
||||
/MCID 1
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
{{object 12 0}} <<
|
||||
/Type /StructElem
|
||||
/S /P
|
||||
/P 8 0 R
|
||||
/K <<
|
||||
/Type /MCR
|
||||
/Pg 3 0 R
|
||||
/MCID 2
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
{{object 13 0}} <<
|
||||
/Type /StructElem
|
||||
/S /H1
|
||||
/P 8 0 R
|
||||
/K <<
|
||||
/Type /MCR
|
||||
/Pg 3 0 R
|
||||
/MCID 3
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
{{object 14 0}} <<
|
||||
/Type /StructElem
|
||||
/S /H2
|
||||
/P 8 0 R
|
||||
/K <<
|
||||
/Type /MCR
|
||||
/Pg 3 0 R
|
||||
/MCID 4
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
{{xref}}
|
||||
{{trailer}}
|
||||
{{startxref}}
|
||||
%%EOF
|
185
pdf/test/data/tags.pdf
Normal file
185
pdf/test/data/tags.pdf
Normal file
@ -0,0 +1,185 @@
|
||||
%PDF-1.7
|
||||
%<25><><EFBFBD><EFBFBD>
|
||||
1 0 obj <<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
/StructTreeRoot 7 0 R
|
||||
/Lang (en-US)
|
||||
/MarkInfo <<
|
||||
/Type /MarkInfo
|
||||
/Marked true
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
2 0 obj <<
|
||||
/Type /Pages
|
||||
/Count 1
|
||||
/Kids [3 0 R]
|
||||
>>
|
||||
endobj
|
||||
3 0 obj <<
|
||||
/Type /Page
|
||||
/Contents 6 0 R
|
||||
/Parent 2 0 R
|
||||
/MediaBox [0 0 612 792]
|
||||
/Resources <<
|
||||
/Font <<
|
||||
/F1 4 0 R
|
||||
/F2 5 0 R
|
||||
>>
|
||||
>>
|
||||
/StructParents 0
|
||||
>>
|
||||
endobj
|
||||
4 0 obj <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Times-Roman
|
||||
>>
|
||||
endobj
|
||||
5 0 obj <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Helvetica
|
||||
>>
|
||||
endobj
|
||||
6 0 obj <<
|
||||
/Length 329
|
||||
>>
|
||||
stream
|
||||
/Art <</MCID 0 >>BDC
|
||||
BT
|
||||
/F1 12 Tf
|
||||
20 50 Td
|
||||
(Article) Tj
|
||||
ET
|
||||
EMC
|
||||
/BlockQuote <</MCID 1 >>BDC
|
||||
BT
|
||||
/F1 12 Tf
|
||||
20 150 Td
|
||||
(BlockQuote) Tj
|
||||
ET
|
||||
EMC
|
||||
/P <</MCID 2 >>BDC
|
||||
BT
|
||||
/F1 12 Tf
|
||||
20 250 Td
|
||||
(Paragraph) Tj
|
||||
ET
|
||||
EMC
|
||||
/H1 <</MCID 3 >>BDC
|
||||
BT
|
||||
/F2 16 Tf
|
||||
20 350 Td
|
||||
(Heading1) Tj
|
||||
ET
|
||||
EMC
|
||||
/H2 <</MCID 4 >>BDC
|
||||
BT
|
||||
/F2 14 Tf
|
||||
20 550 Td
|
||||
(Heading2) Tj
|
||||
ET
|
||||
EMC
|
||||
endstream
|
||||
endobj
|
||||
7 0 obj <<
|
||||
/Type /StructTreeRoot
|
||||
/K 8 0 R
|
||||
/ParentTree 9 0 R
|
||||
/ParentTreeNextKey 1
|
||||
>>
|
||||
endobj
|
||||
8 0 obj <<
|
||||
/Type /StructElem
|
||||
/S /Document
|
||||
/Lang (en-US)
|
||||
/P 7 0 R
|
||||
/K [10 0 R 11 0 R 12 0 R 13 0 R 14 0 R]
|
||||
>>
|
||||
endobj
|
||||
9 0 obj <<
|
||||
/Type /ParentTree
|
||||
/Nums [0 [10 0 R 11 0 R 12 0 R 13 0 R 14 0 R]]
|
||||
>>
|
||||
endobj
|
||||
10 0 obj <<
|
||||
/Type /StructElem
|
||||
/S /Art
|
||||
/P 8 0 R
|
||||
/K <<
|
||||
/Type /MCR
|
||||
/Pg 3 0 R
|
||||
/MCID 0
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
11 0 obj <<
|
||||
/Type /StructElem
|
||||
/S /BlockQuote
|
||||
/P 8 0 R
|
||||
/K <<
|
||||
/Type /MCR
|
||||
/Pg 3 0 R
|
||||
/MCID 1
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
12 0 obj <<
|
||||
/Type /StructElem
|
||||
/S /P
|
||||
/P 8 0 R
|
||||
/K <<
|
||||
/Type /MCR
|
||||
/Pg 3 0 R
|
||||
/MCID 2
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
13 0 obj <<
|
||||
/Type /StructElem
|
||||
/S /H1
|
||||
/P 8 0 R
|
||||
/K <<
|
||||
/Type /MCR
|
||||
/Pg 3 0 R
|
||||
/MCID 3
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
14 0 obj <<
|
||||
/Type /StructElem
|
||||
/S /H2
|
||||
/P 8 0 R
|
||||
/K <<
|
||||
/Type /MCR
|
||||
/Pg 3 0 R
|
||||
/MCID 4
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
xref
|
||||
0 15
|
||||
0000000000 65535 f
|
||||
0000000015 00000 n
|
||||
0000000165 00000 n
|
||||
0000000228 00000 n
|
||||
0000000415 00000 n
|
||||
0000000493 00000 n
|
||||
0000000569 00000 n
|
||||
0000000950 00000 n
|
||||
0000001049 00000 n
|
||||
0000001174 00000 n
|
||||
0000001264 00000 n
|
||||
0000001381 00000 n
|
||||
0000001505 00000 n
|
||||
0000001620 00000 n
|
||||
0000001736 00000 n
|
||||
trailer <<
|
||||
/Root 1 0 R
|
||||
/Size 15
|
||||
>>
|
||||
startxref
|
||||
1852
|
||||
%%EOF
|
Reference in New Issue
Block a user