[PDF] Use AccessibilityTextRunInfo to calculate selection rectangles
PDFiumPage::GetTextRunInfo() already has good heuristics to calculate the set of characters that form a text run. Use it in PDFiumRange::GetScreenRects() to create a rectangle for every text run. Then add more heuristics to merge the rectangles when they have sufficient overlap in the horizontal direction. As a result, the text selection, which is based on GetScreenRects() calculations, will appear more continuous and less jagged. Update test expectations to match the new code's behavior. Keep the original FPDFText_CountRects()-based implementation around, behind an emergency kill switch, in case the new code has serious problems. Bug: 40448046 Change-Id: I17b9dd744671c6721faaf9060622cff788371f1f Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6237116 Reviewed-by: Andy Phan <andyphan@chromium.org> Reviewed-by: Alan Screen <awscreen@chromium.org> Commit-Queue: Lei Zhang <thestig@chromium.org> Cr-Commit-Position: refs/heads/main@{#1417508}


@ -366,14 +366,14 @@ IN_PROC_BROWSER_TEST_P(PDFExtensionInteractiveUITest,
|
||||
|
||||
gfx::SelectionBound start_bound = touch_selection_controller->start();
|
||||
EXPECT_EQ(gfx::SelectionBound::LEFT, start_bound.type());
|
||||
EXPECT_POINTF_NEAR(gfx::PointF(454.0f, 161.0f), start_bound.edge_start(),
|
||||
EXPECT_POINTF_NEAR(gfx::PointF(454.0f, 157.0f), start_bound.edge_start(),
|
||||
1.0f);
|
||||
EXPECT_POINTF_NEAR(gfx::PointF(454.0f, 171.0f), start_bound.edge_end(), 1.0f);
|
||||
EXPECT_POINTF_NEAR(gfx::PointF(454.0f, 174.0f), start_bound.edge_end(), 1.0f);
|
||||
|
||||
gfx::SelectionBound end_bound = touch_selection_controller->end();
|
||||
EXPECT_EQ(gfx::SelectionBound::RIGHT, end_bound.type());
|
||||
EXPECT_POINTF_NEAR(gfx::PointF(492.0f, 161.0f), end_bound.edge_start(), 1.0f);
|
||||
EXPECT_POINTF_NEAR(gfx::PointF(492.0f, 171.0f), end_bound.edge_end(), 1.0f);
|
||||
EXPECT_POINTF_NEAR(gfx::PointF(494.0f, 157.0f), end_bound.edge_start(), 1.0f);
|
||||
EXPECT_POINTF_NEAR(gfx::PointF(494.0f, 174.0f), end_bound.edge_end(), 1.0f);
|
||||
}
|
||||
#endif // defined(TOOLKIT_VIEWS) && defined(USE_AURA)
|
||||
|
||||
|
@ -7,7 +7,10 @@
|
||||
#include <string>
|
||||
|
||||
#include "base/check_op.h"
|
||||
#include "base/containers/span.h"
|
||||
#include "base/feature_list.h"
|
||||
#include "base/strings/string_util.h"
|
||||
#include "pdf/accessibility_structs.h"
|
||||
#include "pdf/pdfium/pdfium_api_string_buffer_adapter.h"
|
||||
#include "third_party/pdfium/public/fpdf_searchex.h"
|
||||
#include "ui/gfx/geometry/point.h"
|
||||
@ -18,6 +21,12 @@ namespace chrome_pdf {
|
||||
|
||||
namespace {
|
||||
|
||||
// Enables AccessibilityTextRunInfo-based screen rects.
|
||||
// TODO(crbug.com/40448046): Remove this kill switch after a safe rollout.
|
||||
BASE_FEATURE(kPdfAccessibilityTextRunInfoScreenRects,
|
||||
"PdfAccessibilityTextRunInfoScreenRects",
|
||||
base::FEATURE_ENABLED_BY_DEFAULT);
|
||||
|
||||
void AdjustForBackwardsRange(int& index, int& count) {
|
||||
if (count < 0) {
|
||||
count *= -1;
|
||||
@ -25,6 +34,83 @@ void AdjustForBackwardsRange(int& index, int& count) {
|
||||
}
|
||||
}
|
||||
|
||||
// Struct with only the text run info needed for PDFiumRange::GetScreenRects().
|
||||
struct ScreenRectTextRunInfo {
|
||||
gfx::Rect screen_rect;
|
||||
size_t char_count;
|
||||
};
|
||||
|
||||
// Returns a ratio between [0, 1].
|
||||
float GetVerticalOverlap(const gfx::Rect& rect1, const gfx::Rect& rect2) {
|
||||
CHECK(!rect1.IsEmpty());
|
||||
CHECK(!rect2.IsEmpty());
|
||||
|
||||
gfx::Rect union_rect = rect1;
|
||||
union_rect.Union(rect2);
|
||||
|
||||
if (union_rect.height() == rect1.height() ||
|
||||
union_rect.height() == rect2.height()) {
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
gfx::Rect intersect_rect = rect1;
|
||||
intersect_rect.Intersect(rect2);
|
||||
return static_cast<float>(intersect_rect.height()) / union_rect.height();
|
||||
}
|
||||
|
||||
// Returns true if there is sufficient horizontal and vertical overlap.
|
||||
bool ShouldMergeHorizontalRects(const ScreenRectTextRunInfo& text_run1,
|
||||
const ScreenRectTextRunInfo& text_run2) {
|
||||
static constexpr float kVerticalOverlapThreshold = 0.8f;
|
||||
const gfx::Rect& rect1 = text_run1.screen_rect;
|
||||
const gfx::Rect& rect2 = text_run2.screen_rect;
|
||||
if (GetVerticalOverlap(rect1, rect2) < kVerticalOverlapThreshold) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static constexpr float kHorizontalWidthFactor = 1.0f;
|
||||
const float average_width1 =
|
||||
kHorizontalWidthFactor * rect1.width() / text_run1.char_count;
|
||||
const float average_width2 =
|
||||
kHorizontalWidthFactor * rect2.width() / text_run2.char_count;
|
||||
const float rect1_left = rect1.x() - average_width1;
|
||||
const float rect1_right = rect1.right() + average_width1;
|
||||
const float rect2_left = rect2.x() - average_width2;
|
||||
const float rect2_right = rect2.right() + average_width2;
|
||||
return rect1_left < rect2_right && rect1_right > rect2_left;
|
||||
}
|
||||
|
||||
// Since PDFiumPage::GetTextRunInfo() can end a text run for a variety of
|
||||
// reasons, post-process the collected text run data and merge rectangles.
|
||||
std::vector<gfx::Rect> MergeAdjacentRects(
|
||||
base::span<ScreenRectTextRunInfo> text_runs) {
|
||||
std::vector<gfx::Rect> results;
|
||||
const ScreenRectTextRunInfo* previous_text_run = nullptr;
|
||||
gfx::Rect current_screen_rect;
|
||||
for (const auto& text_run : text_runs) {
|
||||
if (previous_text_run) {
|
||||
// TODO(crbug.com/40448046): Improve vertical text handling.
|
||||
// For now, treat all text as horizontal, as that is the majority of the
|
||||
// text. Also, PDFiumPage::GetTextPage() has bugs in its heuristics where
|
||||
// it mistakenly reports horizontal text as vertical.
|
||||
if (ShouldMergeHorizontalRects(*previous_text_run, text_run)) {
|
||||
current_screen_rect.Union(text_run.screen_rect);
|
||||
} else {
|
||||
results.push_back(current_screen_rect);
|
||||
current_screen_rect = text_run.screen_rect;
|
||||
}
|
||||
} else {
|
||||
current_screen_rect = text_run.screen_rect;
|
||||
}
|
||||
previous_text_run = &text_run;
|
||||
}
|
||||
|
||||
if (!current_screen_rect.IsEmpty()) {
|
||||
results.push_back(current_screen_rect);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
bool IsIgnorableCharacter(char16_t c) {
|
||||
@ -109,20 +195,77 @@ const std::vector<gfx::Rect>& PDFiumRange::GetScreenRects(
|
||||
DCHECK_LT(char_index, FPDFText_CountChars(text_page))
|
||||
<< " start: " << char_index_ << " count: " << char_count_;
|
||||
|
||||
int count = FPDFText_CountRects(text_page, char_index, char_count);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
double left;
|
||||
double top;
|
||||
double right;
|
||||
double bottom;
|
||||
FPDFText_GetRect(text_page, i, &left, &top, &right, &bottom);
|
||||
gfx::Rect rect =
|
||||
page_->PageToScreen(point, zoom, left, top, right, bottom, orientation);
|
||||
if (rect.IsEmpty())
|
||||
continue;
|
||||
cached_screen_rects_.push_back(rect);
|
||||
if (!base::FeatureList::IsEnabled(kPdfAccessibilityTextRunInfoScreenRects)) {
|
||||
int count = FPDFText_CountRects(text_page, char_index, char_count);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
double left;
|
||||
double top;
|
||||
double right;
|
||||
double bottom;
|
||||
FPDFText_GetRect(text_page, i, &left, &top, &right, &bottom);
|
||||
gfx::Rect rect = page_->PageToScreen(point, zoom, left, top, right,
|
||||
bottom, orientation);
|
||||
if (rect.IsEmpty()) {
|
||||
continue;
|
||||
}
|
||||
cached_screen_rects_.push_back(rect);
|
||||
}
|
||||
|
||||
return cached_screen_rects_;
|
||||
}
|
||||
|
||||
std::vector<ScreenRectTextRunInfo> text_runs;
|
||||
const int end_char_index = char_index + char_count;
|
||||
bool reached_end = false;
|
||||
while (!reached_end) {
|
||||
// Should not fail because `text_page` is non-null and `char_index` is
|
||||
// always in range.
|
||||
std::optional<AccessibilityTextRunInfo> text_run_info =
|
||||
page_->GetTextRunInfo(char_index);
|
||||
CHECK(text_run_info.has_value());
|
||||
|
||||
// Figure out how many characters to process in the for-loop below, and
|
||||
// determine if this while-loop iteration reached the end of the range.
|
||||
int next_char_index = char_index + text_run_info.value().len;
|
||||
reached_end = next_char_index >= end_char_index;
|
||||
if (reached_end) {
|
||||
next_char_index = end_char_index;
|
||||
}
|
||||
|
||||
// Do not use the bounds from `text_run_info`, as those are in the wrong
|
||||
// coordinate system. Calculate it here instead.
|
||||
gfx::Rect text_run_rect;
|
||||
for (int i = char_index; i < next_char_index; ++i) {
|
||||
// Use the loose rectangle, which gives the selection a bit more padding.
|
||||
// In comparison, the rectangle from FPDFText_GetCharBox() surrounds the
|
||||
// glyph too tightly.
|
||||
//
|
||||
// Should not fail because `text_page` is non-null, `i` is always in
|
||||
// range, and the out-parameter is non-null.
|
||||
FS_RECTF rect;
|
||||
bool got_rect = FPDFText_GetLooseCharBox(text_page, i, &rect);
|
||||
CHECK(got_rect);
|
||||
|
||||
// Check for empty `rect` and skip. PDFiumPage::PageToScreen() may round
|
||||
// an empty `rect` to a 1x1 `screen_rect`, which is hard to distinguish
|
||||
// from an actual 1x1 `rect`.
|
||||
if (rect.left == rect.right || rect.top == rect.bottom) {
|
||||
continue;
|
||||
}
|
||||
|
||||
gfx::Rect screen_rect =
|
||||
page_->PageToScreen(point, zoom, rect.left, rect.top, rect.right,
|
||||
rect.bottom, orientation);
|
||||
text_run_rect.Union(screen_rect);
|
||||
}
|
||||
if (!text_run_rect.IsEmpty()) {
|
||||
text_runs.emplace_back(text_run_rect, next_char_index - char_index);
|
||||
}
|
||||
|
||||
char_index = next_char_index;
|
||||
}
|
||||
|
||||
cached_screen_rects_ = MergeAdjacentRects(text_runs);
|
||||
return cached_screen_rects_;
|
||||
}
|
||||
|
||||
|
Before ![]() (image error) Size: 502 B After ![]() (image error) Size: 511 B ![]() ![]() |
Before ![]() (image error) Size: 479 B After ![]() (image error) Size: 481 B ![]() ![]() |
Before ![]() (image error) Size: 479 B After ![]() (image error) Size: 481 B ![]() ![]() |
Before ![]() (image error) Size: 480 B After ![]() (image error) Size: 481 B ![]() ![]() |
Before ![]() (image error) Size: 480 B After ![]() (image error) Size: 481 B ![]() ![]() |
Before ![]() (image error) Size: 500 B After ![]() (image error) Size: 490 B ![]() ![]() |
Before ![]() (image error) Size: 479 B After ![]() (image error) Size: 479 B ![]() ![]() |
Before ![]() (image error) Size: 479 B After ![]() (image error) Size: 480 B ![]() ![]() |
Before ![]() (image error) Size: 173 B After ![]() (image error) Size: 126 B ![]() ![]() |
Before ![]() (image error) Size: 173 B After ![]() (image error) Size: 128 B ![]() ![]() |
Before ![]() (image error) Size: 173 B After ![]() (image error) Size: 128 B ![]() ![]() |
Before ![]() (image error) Size: 142 B After ![]() (image error) Size: 142 B ![]() ![]() |
Before ![]() (image error) Size: 140 B After ![]() (image error) Size: 138 B ![]() ![]() |
Before ![]() (image error) Size: 140 B After ![]() (image error) Size: 138 B ![]() ![]() |
Before ![]() (image error) Size: 140 B After ![]() (image error) Size: 138 B ![]() ![]() |
Before ![]() (image error) Size: 137 B After ![]() (image error) Size: 138 B ![]() ![]() |
Before ![]() (image error) Size: 137 B After ![]() (image error) Size: 138 B ![]() ![]() |
Before ![]() (image error) Size: 137 B After ![]() (image error) Size: 138 B ![]() ![]() |