0

Add a unit test for PDF a11y info extraction.

Move most of OutOfProcessInstance::SendNextAccessibilityPage() out into
its own function. Then unit test it.

Change-Id: I54590ac004f11d918ccb45a3e071d9cf19a927ed
Reviewed-on: https://chromium-review.googlesource.com/c/1407265
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Dominic Mazzoni <dmazzoni@chromium.org>
Cr-Commit-Position: refs/heads/master@{#625117}
This commit is contained in:
Lei Zhang
2019-01-23 07:03:23 +00:00
committed by Commit Bot
parent f51a186379
commit a294e76933
5 changed files with 235 additions and 51 deletions

@ -44,6 +44,8 @@ if (enable_pdf) {
]
sources = [
"accessibility.cc",
"accessibility.h",
"chunk_stream.h",
"document_loader.h",
"document_loader_impl.cc",
@ -174,6 +176,7 @@ if (enable_pdf) {
"//third_party/pdfium",
]
sources += [
"pdfium/accessibility_unittest.cc",
"pdfium/findtext_unittest.cc",
"pdfium/pdfium_engine_exports_unittest.cc",
"pdfium/pdfium_print_unittest.cc",

76
pdf/accessibility.cc Normal file

@ -0,0 +1,76 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "pdf/accessibility.h"
#include "pdf/pdf_engine.h"
#include "ppapi/c/private/ppb_pdf.h"
namespace chrome_pdf {
bool GetAccessibilityInfo(
PDFEngine* engine,
int32_t page_index,
PP_PrivateAccessibilityPageInfo* page_info,
std::vector<PP_PrivateAccessibilityTextRunInfo>* text_runs,
std::vector<PP_PrivateAccessibilityCharInfo>* chars) {
int page_count = engine->GetNumberOfPages();
if (page_index < 0 || page_index >= page_count)
return false;
int char_count = engine->GetCharCount(page_index);
// Treat a char count of -1 (error) as 0 (an empty page), since
// other pages might have valid content.
if (char_count < 0)
char_count = 0;
page_info->page_index = page_index;
page_info->bounds = engine->GetPageBoundsRect(page_index);
page_info->char_count = char_count;
chars->resize(page_info->char_count);
for (uint32_t i = 0; i < page_info->char_count; ++i) {
(*chars)[i].unicode_character = engine->GetCharUnicode(page_index, i);
}
int char_index = 0;
while (char_index < char_count) {
PP_PrivateAccessibilityTextRunInfo text_run_info;
pp::FloatRect bounds;
engine->GetTextRunInfo(page_index, char_index, &text_run_info.len,
&text_run_info.font_size, &bounds);
DCHECK_LE(char_index + text_run_info.len,
static_cast<uint32_t>(char_count));
text_run_info.direction = PP_PRIVATEDIRECTION_LTR;
text_run_info.bounds = bounds;
text_runs->push_back(text_run_info);
// We need to provide enough information to draw a bounding box
// around any arbitrary text range, but the bounding boxes of characters
// we get from PDFium don't necessarily "line up". Walk through the
// characters in each text run and let the width of each character be
// the difference between the x coordinate of one character and the
// x coordinate of the next. The rest of the bounds of each character
// can be computed from the bounds of the text run.
pp::FloatRect char_bounds = engine->GetCharBounds(page_index, char_index);
for (uint32_t i = 0; i < text_run_info.len - 1; i++) {
DCHECK_LT(char_index + i + 1, static_cast<uint32_t>(char_count));
pp::FloatRect next_char_bounds =
engine->GetCharBounds(page_index, char_index + i + 1);
(*chars)[char_index + i].char_width =
next_char_bounds.x() - char_bounds.x();
char_bounds = next_char_bounds;
}
(*chars)[char_index + text_run_info.len - 1].char_width =
char_bounds.width();
char_index += text_run_info.len;
}
page_info->text_run_count = text_runs->size();
return true;
}
} // namespace chrome_pdf

32
pdf/accessibility.h Normal file

@ -0,0 +1,32 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef PDF_ACCESSIBILITY_H_
#define PDF_ACCESSIBILITY_H_
#include <stdint.h>
#include <vector>
struct PP_PrivateAccessibilityCharInfo;
struct PP_PrivateAccessibilityPageInfo;
struct PP_PrivateAccessibilityTextRunInfo;
namespace chrome_pdf {
class PDFEngine;
// Retrieve |page_info|, |text_runs|, and |chars| from |engine| for the page at
// 0-indexed |page_index|. Returns true on success with all out parameters
// filled, or false on failure with all out parameters untouched.
bool GetAccessibilityInfo(
PDFEngine* engine,
int32_t page_index,
PP_PrivateAccessibilityPageInfo* page_info,
std::vector<PP_PrivateAccessibilityTextRunInfo>* text_runs,
std::vector<PP_PrivateAccessibilityCharInfo>* chars);
} // namespace chrome_pdf
#endif // PDF_ACCESSIBILITY_H_

@ -21,6 +21,7 @@
#include "base/values.h"
#include "chrome/common/content_restriction.h"
#include "net/base/escape.h"
#include "pdf/accessibility.h"
#include "pdf/pdf.h"
#include "pdf/pdf_features.h"
#include "ppapi/c/dev/ppb_cursor_control_dev.h"
@ -936,61 +937,14 @@ void OutOfProcessInstance::LoadAccessibility() {
}
void OutOfProcessInstance::SendNextAccessibilityPage(int32_t page_index) {
int page_count = engine_->GetNumberOfPages();
if (page_index < 0 || page_index >= page_count)
return;
int char_count = engine_->GetCharCount(page_index);
// Treat a char count of -1 (error) as 0 (an empty page), since
// other pages might have valid content.
if (char_count < 0)
char_count = 0;
PP_PrivateAccessibilityPageInfo page_info;
page_info.page_index = page_index;
page_info.bounds = engine_->GetPageBoundsRect(page_index);
page_info.char_count = char_count;
std::vector<PP_PrivateAccessibilityCharInfo> chars(page_info.char_count);
for (uint32_t i = 0; i < page_info.char_count; ++i) {
chars[i].unicode_character = engine_->GetCharUnicode(page_index, i);
}
std::vector<PP_PrivateAccessibilityTextRunInfo> text_runs;
int char_index = 0;
while (char_index < char_count) {
PP_PrivateAccessibilityTextRunInfo text_run_info;
pp::FloatRect bounds;
engine_->GetTextRunInfo(page_index, char_index, &text_run_info.len,
&text_run_info.font_size, &bounds);
DCHECK_LE(char_index + text_run_info.len,
static_cast<uint32_t>(char_count));
text_run_info.direction = PP_PRIVATEDIRECTION_LTR;
text_run_info.bounds = bounds;
text_runs.push_back(text_run_info);
// We need to provide enough information to draw a bounding box
// around any arbitrary text range, but the bounding boxes of characters
// we get from PDFium don't necessarily "line up". Walk through the
// characters in each text run and let the width of each character be
// the difference between the x coordinate of one character and the
// x coordinate of the next. The rest of the bounds of each character
// can be computed from the bounds of the text run.
pp::FloatRect char_bounds = engine_->GetCharBounds(page_index, char_index);
for (uint32_t i = 0; i < text_run_info.len - 1; i++) {
DCHECK_LT(char_index + i + 1, static_cast<uint32_t>(char_count));
pp::FloatRect next_char_bounds =
engine_->GetCharBounds(page_index, char_index + i + 1);
chars[char_index + i].char_width = next_char_bounds.x() - char_bounds.x();
char_bounds = next_char_bounds;
}
chars[char_index + text_run_info.len - 1].char_width = char_bounds.width();
char_index += text_run_info.len;
std::vector<PP_PrivateAccessibilityCharInfo> chars;
if (!GetAccessibilityInfo(engine_.get(), page_index, &page_info, &text_runs,
&chars)) {
return;
}
page_info.text_run_count = text_runs.size();
pp::PDF::SetAccessibilityPageInfo(GetPluginInstance(), &page_info,
text_runs.data(), chars.data());

@ -0,0 +1,119 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "pdf/accessibility.h"
#include "build/build_config.h"
#include "pdf/pdfium/pdfium_engine.h"
#include "pdf/pdfium/pdfium_test_base.h"
#include "pdf/test/test_client.h"
#include "ppapi/c/private/ppb_pdf.h"
#include "testing/gmock/include/gmock/gmock.h"
#if defined(OS_CHROMEOS)
#include "base/system/sys_info.h"
#endif
namespace chrome_pdf {
using AccessibilityTest = PDFiumTestBase;
float GetExpectedBoundsWidth(bool is_chromeos, size_t i, float expected) {
return (is_chromeos && i == 0) ? 85.333336f : expected;
}
double GetExpectedCharWidth(bool is_chromeos, size_t i, double expected) {
if (is_chromeos) {
if (i == 25)
return 13.333343;
if (i == 26)
return 6.666656;
}
return expected;
}
// NOTE: This test is sensitive to font metrics from the underlying platform.
// If changes to fonts on the system or to font code like FreeType cause this
// test to fail, please feel free to rebase the test expectations here, or
// update the GetExpected... functions above. If that becomes too much of a
// burden, consider changing the checks to just make sure the font metrics look
// sane.
TEST_F(AccessibilityTest, GetAccessibilityPage) {
static constexpr size_t kExpectedTextRunCount = 2;
struct {
uint32_t len;
double font_size;
float bounds_x;
float bounds_y;
float bounds_w;
float bounds_h;
} static constexpr kExpectedTextRuns[] = {
{15, 12, 26.666666f, 189.333328f, 84.000008f, 13.333344f},
{15, 16, 28.000000f, 117.333334f, 152.000000f, 19.999992f},
};
static_assert(base::size(kExpectedTextRuns) == kExpectedTextRunCount,
"Bad test expectation count");
static constexpr size_t kExpectedCharCount = 30;
static constexpr PP_PrivateAccessibilityCharInfo kExpectedChars[] = {
{'H', 12}, {'e', 6.6666}, {'l', 5.3333}, {'l', 4}, {'o', 8},
{',', 4}, {' ', 4}, {'w', 12}, {'o', 6.6666}, {'r', 6.6666},
{'l', 4}, {'d', 9.3333}, {'!', 4}, {'\r', 0}, {'\n', 0},
{'G', 16}, {'o', 12}, {'o', 12}, {'d', 12}, {'b', 10.6666},
{'y', 12}, {'e', 12}, {',', 4}, {' ', 6.6666}, {'w', 16},
{'o', 12}, {'r', 8}, {'l', 4}, {'d', 12}, {'!', 2.6666},
};
static_assert(base::size(kExpectedChars) == kExpectedCharCount,
"Bad test expectation count");
TestClient client;
std::unique_ptr<PDFiumEngine> engine =
InitializeEngine(&client, FILE_PATH_LITERAL("hello_world2.pdf"));
ASSERT_TRUE(engine);
ASSERT_EQ(2, engine->GetNumberOfPages());
PP_PrivateAccessibilityPageInfo page_info;
std::vector<PP_PrivateAccessibilityTextRunInfo> text_runs;
std::vector<PP_PrivateAccessibilityCharInfo> chars;
ASSERT_TRUE(
GetAccessibilityInfo(engine.get(), 0, &page_info, &text_runs, &chars));
EXPECT_EQ(0u, page_info.page_index);
EXPECT_EQ(5, page_info.bounds.point.x);
EXPECT_EQ(3, page_info.bounds.point.y);
EXPECT_EQ(266, page_info.bounds.size.width);
EXPECT_EQ(266, page_info.bounds.size.height);
EXPECT_EQ(text_runs.size(), page_info.text_run_count);
EXPECT_EQ(chars.size(), page_info.char_count);
#if defined(OS_CHROMEOS)
bool is_chromeos = base::SysInfo::IsRunningOnChromeOS();
#else
bool is_chromeos = false;
#endif
ASSERT_EQ(kExpectedTextRunCount, text_runs.size());
for (size_t i = 0; i < kExpectedTextRunCount; ++i) {
const auto& expected = kExpectedTextRuns[i];
EXPECT_EQ(expected.len, text_runs[i].len) << i;
EXPECT_DOUBLE_EQ(expected.font_size, text_runs[i].font_size) << i;
EXPECT_FLOAT_EQ(expected.bounds_x, text_runs[i].bounds.point.x) << i;
EXPECT_FLOAT_EQ(expected.bounds_y, text_runs[i].bounds.point.y) << i;
float expected_bounds_w =
GetExpectedBoundsWidth(is_chromeos, i, expected.bounds_w);
EXPECT_FLOAT_EQ(expected_bounds_w, text_runs[i].bounds.size.width) << i;
EXPECT_FLOAT_EQ(expected.bounds_h, text_runs[i].bounds.size.height) << i;
EXPECT_EQ(PP_PRIVATEDIRECTION_LTR, text_runs[i].direction);
}
ASSERT_EQ(kExpectedCharCount, chars.size());
for (size_t i = 0; i < kExpectedCharCount; ++i) {
const auto& expected = kExpectedChars[i];
EXPECT_EQ(expected.unicode_character, chars[i].unicode_character) << i;
double expected_char_width =
GetExpectedCharWidth(is_chromeos, i, expected.char_width);
EXPECT_NEAR(expected_char_width, chars[i].char_width, 0.001) << i;
}
}
} // namespace chrome_pdf