0

[unseasoned-pdf] Consolidate AccessibilityLinkInfo in pdf/

Use a single AccessibilityLinkInfo struct in pdf/ to fetch
accessibility data. The consolidation removes the need for two structs
and removes the overhead of conversion and copy from one struct to
another.

Bug: 1175023
Change-Id: I0ac87fda5766094f3c8477c9e1302234e6a498ef
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2690223
Commit-Queue: Ankit Kumar 🌪️ <ankk@microsoft.com>
Reviewed-by: Daniel Hosseinian <dhoss@chromium.org>
Cr-Commit-Position: refs/heads/master@{#853462}
This commit is contained in:
Ankit Kumar 🌪️
2021-02-12 10:41:33 +00:00
committed by Chromium LUCI CQ
parent 27cffc43dd
commit ab90ffc3d2
8 changed files with 40 additions and 68 deletions

@ -23,30 +23,6 @@ bool CompareTextRuns(const T& a, const T& b) {
return a.text_range.index < b.text_range.index;
}
std::vector<AccessibilityLinkInfo> GetAccessibilityLinkInfo(
PDFEngine* engine,
int32_t page_index,
const std::vector<AccessibilityTextRunInfo>& text_runs) {
std::vector<PDFEngine::AccessibilityLinkInfo> engine_link_infos =
engine->GetLinkInfo(page_index);
std::vector<AccessibilityLinkInfo> link_infos;
link_infos.reserve(engine_link_infos.size());
for (size_t i = 0; i < engine_link_infos.size(); ++i) {
auto& cur_engine_info = engine_link_infos[i];
AccessibilityLinkInfo link_info;
link_info.url = std::move(cur_engine_info.url);
link_info.index_in_page = i;
link_info.bounds = cur_engine_info.bounds;
link_info.text_range = GetEnclosingTextRunRangeForCharRange(
text_runs, cur_engine_info.start_char_index,
cur_engine_info.char_count);
link_infos.push_back(std::move(link_info));
}
std::sort(link_infos.begin(), link_infos.end(),
CompareTextRuns<AccessibilityLinkInfo>);
return link_infos;
}
std::vector<AccessibilityImageInfo> GetAccessibilityImageInfo(
PDFEngine* engine,
int32_t page_index,
@ -209,7 +185,7 @@ bool GetAccessibilityInfo(PDFEngine* engine,
}
page_info.text_run_count = text_runs.size();
page_objects.links = GetAccessibilityLinkInfo(engine, page_index, text_runs);
page_objects.links = engine->GetLinkInfo(page_index, text_runs);
page_objects.images =
GetAccessibilityImageInfo(engine, page_index, page_info.text_run_count);
page_objects.highlights =

@ -1,4 +1,4 @@
// Copyright 2021 The Chromium Authors. All rights reserved.
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@ -14,8 +14,6 @@
namespace chrome_pdf {
namespace {
bool IsCharWithinTextRun(const AccessibilityTextRunInfo& text_run,
uint32_t text_run_start_char_index,
uint32_t char_index) {
@ -23,20 +21,19 @@ bool IsCharWithinTextRun(const AccessibilityTextRunInfo& text_run,
char_index - text_run_start_char_index < text_run.len;
}
} // namespace
// If a valid text run range is not found for the char range then return the
// fallback value.
AccessibilityTextRunRangeInfo GetEnclosingTextRunRangeForCharRange(
const std::vector<AccessibilityTextRunInfo>& text_runs,
int start_char_index,
int char_count) {
// Initialize with fallback value.
AccessibilityTextRunRangeInfo text_range = {
static_cast<uint32_t>(text_runs.size()), 0};
AccessibilityTextRunRangeInfo text_range = {text_runs.size(), 0};
if (start_char_index < 0 || char_count <= 0)
return text_range;
const base::CheckedNumeric<uint32_t> checked_end_char_index =
base::MakeCheckedNum(char_count) - 1 + start_char_index;
base::CheckedNumeric<uint32_t> checked_end_char_index = char_count - 1;
checked_end_char_index += start_char_index;
if (!checked_end_char_index.IsValid())
return text_range;
uint32_t end_char_index = checked_end_char_index.ValueOrDie();

@ -1,4 +1,4 @@
// Copyright 2021 The Chromium Authors. All rights reserved.
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@ -12,9 +12,6 @@ namespace chrome_pdf {
struct AccessibilityTextRunInfo;
struct AccessibilityTextRunRangeInfo;
// Find the text run range encompassing the char range denoted by
// |start_char_index| and |char_count|. If a valid text run range is not found
// for the char range then return the fallback value.
AccessibilityTextRunRangeInfo GetEnclosingTextRunRangeForCharRange(
const std::vector<AccessibilityTextRunInfo>& text_runs,
int start_char_index,

@ -60,6 +60,7 @@ namespace chrome_pdf {
class InputEvent;
class Thumbnail;
class UrlLoader;
struct AccessibilityLinkInfo;
struct AccessibilityTextRunInfo;
struct DocumentAttachmentInfo;
struct DocumentMetadata;
@ -282,17 +283,6 @@ class PDFEngine {
const base::Location& from_here = base::Location::Current()) = 0;
};
struct AccessibilityLinkInfo {
AccessibilityLinkInfo();
AccessibilityLinkInfo(const AccessibilityLinkInfo& that);
~AccessibilityLinkInfo();
std::string url;
int start_char_index;
int char_count;
gfx::RectF bounds;
};
struct AccessibilityImageInfo {
AccessibilityImageInfo();
AccessibilityImageInfo(const AccessibilityImageInfo& that);
@ -440,7 +430,9 @@ class PDFEngine {
int start_char_index) = 0;
// For all the links on page |page_index|, get their urls, underlying text
// ranges and bounding boxes.
virtual std::vector<AccessibilityLinkInfo> GetLinkInfo(int page_index) = 0;
virtual std::vector<AccessibilityLinkInfo> GetLinkInfo(
int page_index,
const std::vector<AccessibilityTextRunInfo>& text_runs) = 0;
// For all the images in page |page_index|, get their alt texts and bounding
// boxes.
virtual std::vector<AccessibilityImageInfo> GetImageInfo(int page_index) = 0;

@ -515,13 +515,6 @@ void ShutdownSDK() {
#endif // defined(PDF_ENABLE_V8)
}
PDFEngine::AccessibilityLinkInfo::AccessibilityLinkInfo() = default;
PDFEngine::AccessibilityLinkInfo::AccessibilityLinkInfo(
const AccessibilityLinkInfo& that) = default;
PDFEngine::AccessibilityLinkInfo::~AccessibilityLinkInfo() = default;
PDFEngine::AccessibilityImageInfo::AccessibilityImageInfo() = default;
PDFEngine::AccessibilityImageInfo::AccessibilityImageInfo(
@ -2606,10 +2599,11 @@ base::Optional<AccessibilityTextRunInfo> PDFiumEngine::GetTextRunInfo(
return info;
}
std::vector<PDFEngine::AccessibilityLinkInfo> PDFiumEngine::GetLinkInfo(
int page_index) {
std::vector<AccessibilityLinkInfo> PDFiumEngine::GetLinkInfo(
int page_index,
const std::vector<AccessibilityTextRunInfo>& text_runs) {
DCHECK(PageIndexInBounds(page_index));
return pages_[page_index]->GetLinkInfo();
return pages_[page_index]->GetLinkInfo(text_runs);
}
std::vector<PDFEngine::AccessibilityImageInfo> PDFiumEngine::GetImageInfo(

@ -141,7 +141,9 @@ class PDFiumEngine : public PDFEngine,
base::Optional<AccessibilityTextRunInfo> GetTextRunInfo(
int page_index,
int start_char_index) override;
std::vector<AccessibilityLinkInfo> GetLinkInfo(int page_index) override;
std::vector<AccessibilityLinkInfo> GetLinkInfo(
int page_index,
const std::vector<AccessibilityTextRunInfo>& text_runs) override;
std::vector<AccessibilityImageInfo> GetImageInfo(int page_index) override;
std::vector<AccessibilityHighlightInfo> GetHighlightInfo(
int page_index) override;

@ -21,6 +21,7 @@
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "pdf/accessibility_helper.h"
#include "pdf/accessibility_structs.h"
#include "pdf/pdfium/pdfium_api_string_buffer_adapter.h"
#include "pdf/pdfium/pdfium_engine.h"
@ -256,6 +257,11 @@ bool IsRadioButtonOrCheckBox(int button_type) {
button_type == FPDF_FORMFIELD_RADIOBUTTON;
}
template <typename T>
bool CompareTextRuns(const T& a, const T& b) {
return a.text_range.index < b.text_range.index;
}
} // namespace
PDFiumPage::LinkTarget::LinkTarget() : page(-1) {}
@ -611,19 +617,22 @@ gfx::RectF PDFiumPage::GetCharBounds(int char_index) {
return GetFloatCharRectInPixels(page, text_page, char_index);
}
std::vector<PDFEngine::AccessibilityLinkInfo> PDFiumPage::GetLinkInfo() {
std::vector<PDFEngine::AccessibilityLinkInfo> link_info;
std::vector<AccessibilityLinkInfo> PDFiumPage::GetLinkInfo(
const std::vector<AccessibilityTextRunInfo>& text_runs) {
std::vector<AccessibilityLinkInfo> link_info;
if (!available_)
return link_info;
CalculateLinks();
link_info.reserve(links_.size());
for (const Link& link : links_) {
PDFEngine::AccessibilityLinkInfo cur_info;
for (size_t i = 0; i < links_.size(); ++i) {
const Link& link = links_[i];
AccessibilityLinkInfo cur_info;
cur_info.url = link.target.url;
cur_info.start_char_index = link.start_char_index;
cur_info.char_count = link.char_count;
cur_info.index_in_page = i;
cur_info.text_range = GetEnclosingTextRunRangeForCharRange(
text_runs, link.start_char_index, link.char_count);
gfx::Rect link_rect;
for (const auto& rect : link.bounding_rects)
@ -633,6 +642,9 @@ std::vector<PDFEngine::AccessibilityLinkInfo> PDFiumPage::GetLinkInfo() {
link_info.push_back(std::move(cur_info));
}
std::sort(link_info.begin(), link_info.end(),
CompareTextRuns<AccessibilityLinkInfo>);
return link_info;
}

@ -34,6 +34,7 @@ namespace chrome_pdf {
class PDFiumEngine;
class Thumbnail;
struct AccessibilityLinkInfo;
struct AccessibilityTextRunInfo;
struct AccessibilityTextStyleInfo;
@ -64,7 +65,8 @@ class PDFiumPage {
gfx::RectF GetCharBounds(int char_index);
// For all the links on the page, get their urls, underlying text ranges and
// bounding boxes.
std::vector<PDFEngine::AccessibilityLinkInfo> GetLinkInfo();
std::vector<AccessibilityLinkInfo> GetLinkInfo(
const std::vector<AccessibilityTextRunInfo>& text_runs);
// For all the images on the page, get their alt texts and bounding boxes.
std::vector<PDFEngine::AccessibilityImageInfo> GetImageInfo();
// For all the highlights on the page, get their underlying text ranges and