Add a unit test for PDF a11y info extraction.
Move most of OutOfProcessInstance::SendNextAccessibilityPage() out into its own function. Then unit test it. Change-Id: I54590ac004f11d918ccb45a3e071d9cf19a927ed Reviewed-on: https://chromium-review.googlesource.com/c/1407265 Commit-Queue: Lei Zhang <thestig@chromium.org> Reviewed-by: Dominic Mazzoni <dmazzoni@chromium.org> Cr-Commit-Position: refs/heads/master@{#625117}
This commit is contained in:
@ -44,6 +44,8 @@ if (enable_pdf) {
|
||||
]
|
||||
|
||||
sources = [
|
||||
"accessibility.cc",
|
||||
"accessibility.h",
|
||||
"chunk_stream.h",
|
||||
"document_loader.h",
|
||||
"document_loader_impl.cc",
|
||||
@ -174,6 +176,7 @@ if (enable_pdf) {
|
||||
"//third_party/pdfium",
|
||||
]
|
||||
sources += [
|
||||
"pdfium/accessibility_unittest.cc",
|
||||
"pdfium/findtext_unittest.cc",
|
||||
"pdfium/pdfium_engine_exports_unittest.cc",
|
||||
"pdfium/pdfium_print_unittest.cc",
|
||||
|
76
pdf/accessibility.cc
Normal file
76
pdf/accessibility.cc
Normal file
@ -0,0 +1,76 @@
|
||||
// Copyright 2019 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "pdf/accessibility.h"
|
||||
|
||||
#include "pdf/pdf_engine.h"
|
||||
#include "ppapi/c/private/ppb_pdf.h"
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
bool GetAccessibilityInfo(
|
||||
PDFEngine* engine,
|
||||
int32_t page_index,
|
||||
PP_PrivateAccessibilityPageInfo* page_info,
|
||||
std::vector<PP_PrivateAccessibilityTextRunInfo>* text_runs,
|
||||
std::vector<PP_PrivateAccessibilityCharInfo>* chars) {
|
||||
int page_count = engine->GetNumberOfPages();
|
||||
if (page_index < 0 || page_index >= page_count)
|
||||
return false;
|
||||
|
||||
int char_count = engine->GetCharCount(page_index);
|
||||
|
||||
// Treat a char count of -1 (error) as 0 (an empty page), since
|
||||
// other pages might have valid content.
|
||||
if (char_count < 0)
|
||||
char_count = 0;
|
||||
|
||||
page_info->page_index = page_index;
|
||||
page_info->bounds = engine->GetPageBoundsRect(page_index);
|
||||
page_info->char_count = char_count;
|
||||
|
||||
chars->resize(page_info->char_count);
|
||||
for (uint32_t i = 0; i < page_info->char_count; ++i) {
|
||||
(*chars)[i].unicode_character = engine->GetCharUnicode(page_index, i);
|
||||
}
|
||||
|
||||
int char_index = 0;
|
||||
while (char_index < char_count) {
|
||||
PP_PrivateAccessibilityTextRunInfo text_run_info;
|
||||
pp::FloatRect bounds;
|
||||
engine->GetTextRunInfo(page_index, char_index, &text_run_info.len,
|
||||
&text_run_info.font_size, &bounds);
|
||||
DCHECK_LE(char_index + text_run_info.len,
|
||||
static_cast<uint32_t>(char_count));
|
||||
text_run_info.direction = PP_PRIVATEDIRECTION_LTR;
|
||||
text_run_info.bounds = bounds;
|
||||
text_runs->push_back(text_run_info);
|
||||
|
||||
// We need to provide enough information to draw a bounding box
|
||||
// around any arbitrary text range, but the bounding boxes of characters
|
||||
// we get from PDFium don't necessarily "line up". Walk through the
|
||||
// characters in each text run and let the width of each character be
|
||||
// the difference between the x coordinate of one character and the
|
||||
// x coordinate of the next. The rest of the bounds of each character
|
||||
// can be computed from the bounds of the text run.
|
||||
pp::FloatRect char_bounds = engine->GetCharBounds(page_index, char_index);
|
||||
for (uint32_t i = 0; i < text_run_info.len - 1; i++) {
|
||||
DCHECK_LT(char_index + i + 1, static_cast<uint32_t>(char_count));
|
||||
pp::FloatRect next_char_bounds =
|
||||
engine->GetCharBounds(page_index, char_index + i + 1);
|
||||
(*chars)[char_index + i].char_width =
|
||||
next_char_bounds.x() - char_bounds.x();
|
||||
char_bounds = next_char_bounds;
|
||||
}
|
||||
(*chars)[char_index + text_run_info.len - 1].char_width =
|
||||
char_bounds.width();
|
||||
|
||||
char_index += text_run_info.len;
|
||||
}
|
||||
|
||||
page_info->text_run_count = text_runs->size();
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace chrome_pdf
|
32
pdf/accessibility.h
Normal file
32
pdf/accessibility.h
Normal file
@ -0,0 +1,32 @@
|
||||
// Copyright 2019 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef PDF_ACCESSIBILITY_H_
|
||||
#define PDF_ACCESSIBILITY_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
struct PP_PrivateAccessibilityCharInfo;
|
||||
struct PP_PrivateAccessibilityPageInfo;
|
||||
struct PP_PrivateAccessibilityTextRunInfo;
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
class PDFEngine;
|
||||
|
||||
// Retrieve |page_info|, |text_runs|, and |chars| from |engine| for the page at
|
||||
// 0-indexed |page_index|. Returns true on success with all out parameters
|
||||
// filled, or false on failure with all out parameters untouched.
|
||||
bool GetAccessibilityInfo(
|
||||
PDFEngine* engine,
|
||||
int32_t page_index,
|
||||
PP_PrivateAccessibilityPageInfo* page_info,
|
||||
std::vector<PP_PrivateAccessibilityTextRunInfo>* text_runs,
|
||||
std::vector<PP_PrivateAccessibilityCharInfo>* chars);
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
||||
#endif // PDF_ACCESSIBILITY_H_
|
@ -21,6 +21,7 @@
|
||||
#include "base/values.h"
|
||||
#include "chrome/common/content_restriction.h"
|
||||
#include "net/base/escape.h"
|
||||
#include "pdf/accessibility.h"
|
||||
#include "pdf/pdf.h"
|
||||
#include "pdf/pdf_features.h"
|
||||
#include "ppapi/c/dev/ppb_cursor_control_dev.h"
|
||||
@ -936,61 +937,14 @@ void OutOfProcessInstance::LoadAccessibility() {
|
||||
}
|
||||
|
||||
void OutOfProcessInstance::SendNextAccessibilityPage(int32_t page_index) {
|
||||
int page_count = engine_->GetNumberOfPages();
|
||||
if (page_index < 0 || page_index >= page_count)
|
||||
return;
|
||||
|
||||
int char_count = engine_->GetCharCount(page_index);
|
||||
|
||||
// Treat a char count of -1 (error) as 0 (an empty page), since
|
||||
// other pages might have valid content.
|
||||
if (char_count < 0)
|
||||
char_count = 0;
|
||||
|
||||
PP_PrivateAccessibilityPageInfo page_info;
|
||||
page_info.page_index = page_index;
|
||||
page_info.bounds = engine_->GetPageBoundsRect(page_index);
|
||||
page_info.char_count = char_count;
|
||||
|
||||
std::vector<PP_PrivateAccessibilityCharInfo> chars(page_info.char_count);
|
||||
for (uint32_t i = 0; i < page_info.char_count; ++i) {
|
||||
chars[i].unicode_character = engine_->GetCharUnicode(page_index, i);
|
||||
}
|
||||
|
||||
std::vector<PP_PrivateAccessibilityTextRunInfo> text_runs;
|
||||
int char_index = 0;
|
||||
while (char_index < char_count) {
|
||||
PP_PrivateAccessibilityTextRunInfo text_run_info;
|
||||
pp::FloatRect bounds;
|
||||
engine_->GetTextRunInfo(page_index, char_index, &text_run_info.len,
|
||||
&text_run_info.font_size, &bounds);
|
||||
DCHECK_LE(char_index + text_run_info.len,
|
||||
static_cast<uint32_t>(char_count));
|
||||
text_run_info.direction = PP_PRIVATEDIRECTION_LTR;
|
||||
text_run_info.bounds = bounds;
|
||||
text_runs.push_back(text_run_info);
|
||||
|
||||
// We need to provide enough information to draw a bounding box
|
||||
// around any arbitrary text range, but the bounding boxes of characters
|
||||
// we get from PDFium don't necessarily "line up". Walk through the
|
||||
// characters in each text run and let the width of each character be
|
||||
// the difference between the x coordinate of one character and the
|
||||
// x coordinate of the next. The rest of the bounds of each character
|
||||
// can be computed from the bounds of the text run.
|
||||
pp::FloatRect char_bounds = engine_->GetCharBounds(page_index, char_index);
|
||||
for (uint32_t i = 0; i < text_run_info.len - 1; i++) {
|
||||
DCHECK_LT(char_index + i + 1, static_cast<uint32_t>(char_count));
|
||||
pp::FloatRect next_char_bounds =
|
||||
engine_->GetCharBounds(page_index, char_index + i + 1);
|
||||
chars[char_index + i].char_width = next_char_bounds.x() - char_bounds.x();
|
||||
char_bounds = next_char_bounds;
|
||||
}
|
||||
chars[char_index + text_run_info.len - 1].char_width = char_bounds.width();
|
||||
|
||||
char_index += text_run_info.len;
|
||||
std::vector<PP_PrivateAccessibilityCharInfo> chars;
|
||||
if (!GetAccessibilityInfo(engine_.get(), page_index, &page_info, &text_runs,
|
||||
&chars)) {
|
||||
return;
|
||||
}
|
||||
|
||||
page_info.text_run_count = text_runs.size();
|
||||
pp::PDF::SetAccessibilityPageInfo(GetPluginInstance(), &page_info,
|
||||
text_runs.data(), chars.data());
|
||||
|
||||
|
119
pdf/pdfium/accessibility_unittest.cc
Normal file
119
pdf/pdfium/accessibility_unittest.cc
Normal file
@ -0,0 +1,119 @@
|
||||
// Copyright 2019 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "pdf/accessibility.h"
|
||||
|
||||
#include "build/build_config.h"
|
||||
#include "pdf/pdfium/pdfium_engine.h"
|
||||
#include "pdf/pdfium/pdfium_test_base.h"
|
||||
#include "pdf/test/test_client.h"
|
||||
#include "ppapi/c/private/ppb_pdf.h"
|
||||
#include "testing/gmock/include/gmock/gmock.h"
|
||||
|
||||
#if defined(OS_CHROMEOS)
|
||||
#include "base/system/sys_info.h"
|
||||
#endif
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
using AccessibilityTest = PDFiumTestBase;
|
||||
|
||||
float GetExpectedBoundsWidth(bool is_chromeos, size_t i, float expected) {
|
||||
return (is_chromeos && i == 0) ? 85.333336f : expected;
|
||||
}
|
||||
|
||||
double GetExpectedCharWidth(bool is_chromeos, size_t i, double expected) {
|
||||
if (is_chromeos) {
|
||||
if (i == 25)
|
||||
return 13.333343;
|
||||
if (i == 26)
|
||||
return 6.666656;
|
||||
}
|
||||
return expected;
|
||||
}
|
||||
|
||||
// NOTE: This test is sensitive to font metrics from the underlying platform.
|
||||
// If changes to fonts on the system or to font code like FreeType cause this
|
||||
// test to fail, please feel free to rebase the test expectations here, or
|
||||
// update the GetExpected... functions above. If that becomes too much of a
|
||||
// burden, consider changing the checks to just make sure the font metrics look
|
||||
// sane.
|
||||
TEST_F(AccessibilityTest, GetAccessibilityPage) {
|
||||
static constexpr size_t kExpectedTextRunCount = 2;
|
||||
struct {
|
||||
uint32_t len;
|
||||
double font_size;
|
||||
float bounds_x;
|
||||
float bounds_y;
|
||||
float bounds_w;
|
||||
float bounds_h;
|
||||
} static constexpr kExpectedTextRuns[] = {
|
||||
{15, 12, 26.666666f, 189.333328f, 84.000008f, 13.333344f},
|
||||
{15, 16, 28.000000f, 117.333334f, 152.000000f, 19.999992f},
|
||||
};
|
||||
static_assert(base::size(kExpectedTextRuns) == kExpectedTextRunCount,
|
||||
"Bad test expectation count");
|
||||
|
||||
static constexpr size_t kExpectedCharCount = 30;
|
||||
static constexpr PP_PrivateAccessibilityCharInfo kExpectedChars[] = {
|
||||
{'H', 12}, {'e', 6.6666}, {'l', 5.3333}, {'l', 4}, {'o', 8},
|
||||
{',', 4}, {' ', 4}, {'w', 12}, {'o', 6.6666}, {'r', 6.6666},
|
||||
{'l', 4}, {'d', 9.3333}, {'!', 4}, {'\r', 0}, {'\n', 0},
|
||||
{'G', 16}, {'o', 12}, {'o', 12}, {'d', 12}, {'b', 10.6666},
|
||||
{'y', 12}, {'e', 12}, {',', 4}, {' ', 6.6666}, {'w', 16},
|
||||
{'o', 12}, {'r', 8}, {'l', 4}, {'d', 12}, {'!', 2.6666},
|
||||
};
|
||||
static_assert(base::size(kExpectedChars) == kExpectedCharCount,
|
||||
"Bad test expectation count");
|
||||
|
||||
TestClient client;
|
||||
std::unique_ptr<PDFiumEngine> engine =
|
||||
InitializeEngine(&client, FILE_PATH_LITERAL("hello_world2.pdf"));
|
||||
ASSERT_TRUE(engine);
|
||||
|
||||
ASSERT_EQ(2, engine->GetNumberOfPages());
|
||||
PP_PrivateAccessibilityPageInfo page_info;
|
||||
std::vector<PP_PrivateAccessibilityTextRunInfo> text_runs;
|
||||
std::vector<PP_PrivateAccessibilityCharInfo> chars;
|
||||
ASSERT_TRUE(
|
||||
GetAccessibilityInfo(engine.get(), 0, &page_info, &text_runs, &chars));
|
||||
EXPECT_EQ(0u, page_info.page_index);
|
||||
EXPECT_EQ(5, page_info.bounds.point.x);
|
||||
EXPECT_EQ(3, page_info.bounds.point.y);
|
||||
EXPECT_EQ(266, page_info.bounds.size.width);
|
||||
EXPECT_EQ(266, page_info.bounds.size.height);
|
||||
EXPECT_EQ(text_runs.size(), page_info.text_run_count);
|
||||
EXPECT_EQ(chars.size(), page_info.char_count);
|
||||
|
||||
#if defined(OS_CHROMEOS)
|
||||
bool is_chromeos = base::SysInfo::IsRunningOnChromeOS();
|
||||
#else
|
||||
bool is_chromeos = false;
|
||||
#endif
|
||||
|
||||
ASSERT_EQ(kExpectedTextRunCount, text_runs.size());
|
||||
for (size_t i = 0; i < kExpectedTextRunCount; ++i) {
|
||||
const auto& expected = kExpectedTextRuns[i];
|
||||
EXPECT_EQ(expected.len, text_runs[i].len) << i;
|
||||
EXPECT_DOUBLE_EQ(expected.font_size, text_runs[i].font_size) << i;
|
||||
EXPECT_FLOAT_EQ(expected.bounds_x, text_runs[i].bounds.point.x) << i;
|
||||
EXPECT_FLOAT_EQ(expected.bounds_y, text_runs[i].bounds.point.y) << i;
|
||||
float expected_bounds_w =
|
||||
GetExpectedBoundsWidth(is_chromeos, i, expected.bounds_w);
|
||||
EXPECT_FLOAT_EQ(expected_bounds_w, text_runs[i].bounds.size.width) << i;
|
||||
EXPECT_FLOAT_EQ(expected.bounds_h, text_runs[i].bounds.size.height) << i;
|
||||
EXPECT_EQ(PP_PRIVATEDIRECTION_LTR, text_runs[i].direction);
|
||||
}
|
||||
|
||||
ASSERT_EQ(kExpectedCharCount, chars.size());
|
||||
for (size_t i = 0; i < kExpectedCharCount; ++i) {
|
||||
const auto& expected = kExpectedChars[i];
|
||||
EXPECT_EQ(expected.unicode_character, chars[i].unicode_character) << i;
|
||||
double expected_char_width =
|
||||
GetExpectedCharWidth(is_chromeos, i, expected.char_width);
|
||||
EXPECT_NEAR(expected_char_width, chars[i].char_width, 0.001) << i;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace chrome_pdf
|
Reference in New Issue
Block a user