Update ChromeScreenAI proto and set min version for the library.
Update chrome_screen_ai.proto to match the latest version, limit Chromium to use library versions that have the new proto (127.19+), and update use cases of changed fields. AX-Relnotes: n/a Bug: 378472917 Change-Id: I2b25025e8bbaada506a35e7ea8f7688f234149af Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6309094 Reviewed-by: Kyungjun Lee <kyungjunlee@google.com> Reviewed-by: Chu-Hsuan Yang <chuhsuan@chromium.org> Reviewed-by: Antonio Sartori <antoniosartori@chromium.org> Reviewed-by: Lei Zhang <thestig@chromium.org> Commit-Queue: Antonio Sartori <antoniosartori@chromium.org> Auto-Submit: Ramin Halavati <rhalavati@chromium.org> Cr-Commit-Position: refs/heads/main@{#1427576}
This commit is contained in:

committed by
Chromium LUCI CQ

parent
8d3a92a725
commit
02286a45e6
chrome/browser/screen_ai
pdf/pdfium
services/screen_ai
@ -19,8 +19,8 @@
|
||||
#include "ui/accessibility/accessibility_features.h"
|
||||
|
||||
namespace {
|
||||
// See crbug.com/393349281 and crbug.com/359853518
|
||||
const char kMinExpectedVersion[] = "127.15";
|
||||
// Proto updated.
|
||||
const char kMinExpectedVersion[] = "127.19";
|
||||
const int kScreenAICleanUpDelayInDays = 30;
|
||||
|
||||
bool IsDeviceCompatible() {
|
||||
|
@ -242,8 +242,6 @@ TEST_F(PDFiumEngineExportsTest, Searchify) {
|
||||
CHECK(!bitmap.empty());
|
||||
auto annotation = screen_ai::mojom::VisualAnnotation::New();
|
||||
auto line_box = screen_ai::mojom::LineBox::New();
|
||||
line_box->baseline_box = gfx::Rect(0, 0, 100, 100);
|
||||
line_box->baseline_box_angle = 0;
|
||||
line_box->bounding_box = gfx::Rect(0, 0, 100, 100);
|
||||
line_box->bounding_box_angle = 0;
|
||||
auto word_box = screen_ai::mojom::WordBox::New();
|
||||
@ -306,8 +304,6 @@ TEST_F(PDFiumEngineExportsTest, SearchifyBigImage) {
|
||||
|
||||
constexpr gfx::Rect kRect1(0, 30, 10, 5);
|
||||
auto line_box1 = screen_ai::mojom::LineBox::New();
|
||||
line_box1->baseline_box = kRect1;
|
||||
line_box1->baseline_box_angle = 0;
|
||||
line_box1->bounding_box = kRect1;
|
||||
line_box1->bounding_box_angle = 0;
|
||||
auto word_box1 = screen_ai::mojom::WordBox::New();
|
||||
@ -319,8 +315,6 @@ TEST_F(PDFiumEngineExportsTest, SearchifyBigImage) {
|
||||
|
||||
constexpr gfx::Rect kRect2(200, 210, 67, 57);
|
||||
auto line_box2 = screen_ai::mojom::LineBox::New();
|
||||
line_box2->baseline_box = kRect2;
|
||||
line_box2->baseline_box_angle = 0;
|
||||
line_box2->bounding_box = kRect2;
|
||||
line_box2->bounding_box_angle = 0;
|
||||
auto word_box2 = screen_ai::mojom::WordBox::New();
|
||||
@ -425,8 +419,6 @@ TEST_F(PDFiumEngineExportsTest, PdfProgressiveSearchifierText) {
|
||||
bitmap.allocN32Pixels(100, 100);
|
||||
auto annotation = screen_ai::mojom::VisualAnnotation::New();
|
||||
auto line_box = screen_ai::mojom::LineBox::New();
|
||||
line_box->baseline_box = gfx::Rect(0, 0, 100, 100);
|
||||
line_box->baseline_box_angle = 0;
|
||||
line_box->bounding_box = gfx::Rect(0, 0, 100, 100);
|
||||
line_box->bounding_box_angle = 0;
|
||||
auto word_box = screen_ai::mojom::WordBox::New();
|
||||
|
@ -91,8 +91,6 @@ VisualAnnotationPtr CreateEmptyAnnotation() {
|
||||
VisualAnnotationPtr CreateSampleAnnotation(int call_number) {
|
||||
auto annotation = CreateEmptyAnnotation();
|
||||
auto line_box = screen_ai::mojom::LineBox::New();
|
||||
line_box->baseline_box = gfx::Rect(0, 0, 100, 100);
|
||||
line_box->baseline_box_angle = 0;
|
||||
line_box->bounding_box = gfx::Rect(0, 0, 100, 100);
|
||||
line_box->bounding_box_angle = 0;
|
||||
auto word_box = screen_ai::mojom::WordBox::New();
|
||||
|
@ -245,7 +245,7 @@ std::vector<screen_ai::mojom::WordBox> GetWordsAndSpaces(
|
||||
GetSpaceRect(current_word->bounding_box, words[i + 1]->bounding_box);
|
||||
if (!space_rect.IsEmpty()) {
|
||||
words_and_spaces.push_back(screen_ai::mojom::WordBox(
|
||||
/*word=*/" ", /*dictionary_word=*/false, current_word->language,
|
||||
/*word=*/" ", current_word->language,
|
||||
/*has_space_after=*/false, space_rect,
|
||||
current_word->bounding_box_angle, current_word->direction,
|
||||
/*confidence=*/1));
|
||||
@ -344,8 +344,10 @@ bool AddTextOnImage(FPDF_DOCUMENT document,
|
||||
|
||||
bool added_text = false;
|
||||
for (const auto& line : annotation->lines) {
|
||||
// TODO(crbug.com/398694513): Try to get baseline information from font
|
||||
// information.
|
||||
SearchifyBoundingBoxOrigin baseline_origin =
|
||||
ConvertToPdfOrigin(line->baseline_box, line->baseline_box_angle,
|
||||
ConvertToPdfOrigin(line->bounding_box, line->bounding_box_angle,
|
||||
image_pixel_size.height());
|
||||
|
||||
std::vector<screen_ai::mojom::WordBox> words_and_spaces =
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2022 The Chromium Authors
|
||||
// Copyright 2022 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
@ -6,10 +6,13 @@
|
||||
// https://source.chromium.org/chromium/chromium/src/+/main:services/screen_ai/proto/chrome_screen_ai.proto
|
||||
// http://google3/chrome/accessibility/machine_intelligence/chrome_screen_ai//chrome_screen_ai.proto
|
||||
|
||||
syntax = "proto3";
|
||||
edition = "2023";
|
||||
|
||||
package chrome_screen_ai;
|
||||
|
||||
// Treat all fields as implicit present by default (proto3 behavior).
|
||||
option features.field_presence = IMPLICIT;
|
||||
|
||||
// Defines a rectangle.
|
||||
message Rect {
|
||||
int32 x = 1;
|
||||
@ -49,8 +52,8 @@ message LineBox {
|
||||
// ID of the text block that this line belongs to.
|
||||
int32 block_id = 5;
|
||||
|
||||
// Index within the block that this line belongs to.
|
||||
int32 order_within_block = 6;
|
||||
reserved 6; // order_within_block, deprecated in Chrome M135.
|
||||
reserved order_within_block;
|
||||
|
||||
// The direction of the script contained in the line.
|
||||
Direction direction = 7;
|
||||
@ -58,12 +61,15 @@ message LineBox {
|
||||
// Content type for this line.
|
||||
ContentType content_type = 8;
|
||||
|
||||
// Line bounding box relative to the original image with bottom edge
|
||||
// representing estimated baseline of text.
|
||||
Rect baseline_box = 9;
|
||||
reserved 9; // baseline_box, deprecated in Chrome M135.
|
||||
reserved baseline_box;
|
||||
|
||||
// Confidence as computed by the OCR engine. The value is in range [0, 1].
|
||||
float confidence = 10;
|
||||
|
||||
// ID of the paragraph that this line belongs to. Paragraphs are ordered from
|
||||
// zero in each block.
|
||||
int32 paragraph_id = 11;
|
||||
}
|
||||
|
||||
// Word with associated bounding box.
|
||||
@ -79,8 +85,8 @@ message WordBox {
|
||||
// Word string in UTF8 format.
|
||||
string utf8_string = 3;
|
||||
|
||||
// True if the word passes the internal beamsearch dictionary check.
|
||||
bool dictionary_word = 4;
|
||||
reserved 4; // dictionary_word, deprecated in Chrome M135.
|
||||
reserved dictionary_word;
|
||||
|
||||
// Language guess for the word. The format is the ISO 639-1 two-letter
|
||||
// language code if that is defined (e.g. "en"), or else the ISO 639-2
|
||||
@ -113,8 +119,8 @@ message WordBox {
|
||||
// Content type for this word.
|
||||
ContentType content_type = 13;
|
||||
|
||||
// Detected orientation of the text.
|
||||
Orientation orientation = 14;
|
||||
reserved 14; // orientation, deprecated in Chrome M135;
|
||||
reserved orientation;
|
||||
|
||||
// Confidence as computed by the OCR engine.
|
||||
float confidence = 15;
|
||||
@ -167,8 +173,8 @@ enum ContentType {
|
||||
}
|
||||
|
||||
message VisualAnnotation {
|
||||
// `ui_component` deprecated in Chrome M128.
|
||||
reserved 1;
|
||||
reserved 1; // ui_component, deprecated in Chrome M128.
|
||||
reserved ui_component;
|
||||
|
||||
repeated LineBox lines = 2;
|
||||
}
|
||||
|
@ -481,23 +481,6 @@ ui::AXTreeUpdate VisualAnnotationToAXTreeUpdate(
|
||||
}
|
||||
}
|
||||
|
||||
// Each unique `chrome_screen_ai::LineBox::block_id` signifies a different
|
||||
// block of text, and so it creates a new static text node in the
|
||||
// accessibility tree. Each block has a sorted set of line boxes, everyone of
|
||||
// which is turned into one or more inline text box nodes in the accessibility
|
||||
// tree. Line boxes are sorted using their
|
||||
// `chrome_screen_ai::LineBox::order_within_block` member and are identified
|
||||
// by their index in the container of line boxes. Use std::map to sort both
|
||||
// text blocks and the line boxes that belong to each one, both operations
|
||||
// having an O(n * log(n)) complexity.
|
||||
// TODO(accessibility): Determine reading order based on visual positioning of
|
||||
// text blocks, not on the order of their block IDs.
|
||||
std::map<int32_t, std::map<int32_t, int>> blocks_to_lines_map;
|
||||
for (int i = 0; i < visual_annotation.lines_size(); ++i) {
|
||||
const chrome_screen_ai::LineBox& line = visual_annotation.lines(i);
|
||||
blocks_to_lines_map[line.block_id()].emplace(
|
||||
std::make_pair(line.order_within_block(), i));
|
||||
}
|
||||
|
||||
// Need four more nodes that convey the disclaimer messages. There are two
|
||||
// messages, one before the content and one after. Each message is wrapped
|
||||
@ -505,8 +488,25 @@ ui::AXTreeUpdate VisualAnnotationToAXTreeUpdate(
|
||||
// reader user and thus not missed.
|
||||
formatting_context_count += 4;
|
||||
|
||||
// There are the same number of paragraphs as blocks.
|
||||
size_t paragraph_count = blocks_to_lines_map.size();
|
||||
// Block and pargraph ids are sorted ascendingly in the suggested reading
|
||||
// order. Verify that, and find the number of paragraphs across all blocks.
|
||||
size_t paragraph_count = 0;
|
||||
{
|
||||
int last_block_id = -1;
|
||||
int last_paragraph_id = -1;
|
||||
for (const auto& line : visual_annotation.lines()) {
|
||||
if (line.paragraph_id() != last_paragraph_id ||
|
||||
line.block_id() != last_block_id) {
|
||||
CHECK_GE(line.block_id(), last_block_id);
|
||||
if (line.block_id() == last_block_id) {
|
||||
CHECK_GE(line.paragraph_id(), last_paragraph_id);
|
||||
}
|
||||
paragraph_count++;
|
||||
last_paragraph_id = line.paragraph_id();
|
||||
last_block_id = line.block_id();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ui::AXNodeData> nodes(1 + // Root Node
|
||||
visual_annotation.lines().size() +
|
||||
@ -541,30 +541,33 @@ ui::AXTreeUpdate VisualAnnotationToAXTreeUpdate(
|
||||
begin_node.relative_bounds.bounds = begin_node_wrapper.relative_bounds.bounds;
|
||||
begin_node_wrapper.child_ids.push_back(begin_node.id);
|
||||
|
||||
for (const auto& block_to_lines_pair : blocks_to_lines_map) {
|
||||
ui::AXNodeData* paragraph_node = nullptr;
|
||||
int block_id = -1;
|
||||
int paragraph_id = -1;
|
||||
for (const auto& line : visual_annotation.lines()) {
|
||||
// TODO(crbug.com/347622611): Create separate paragraphs based on the
|
||||
// blocks' spacing (e.g. by utilizing heuristics found in
|
||||
// PdfAccessibilityTree). Blocks as returned by the OCR engine are still
|
||||
// too small.
|
||||
ui::AXNodeData& paragraph_node = nodes[index++];
|
||||
paragraph_node.role = ax::mojom::Role::kParagraph;
|
||||
paragraph_node.id = GetNextNegativeNodeID();
|
||||
page_node.child_ids.push_back(paragraph_node.id);
|
||||
|
||||
for (const auto& line_sequence_number_to_index_pair :
|
||||
block_to_lines_pair.second) {
|
||||
const chrome_screen_ai::LineBox& line_box =
|
||||
visual_annotation.lines(line_sequence_number_to_index_pair.second);
|
||||
// Every line with a textual accessibility role should turn into one or
|
||||
// more inline text boxes, each one representing a formatting context.
|
||||
// If the line is not of a textual role, only one node is initialized
|
||||
// having a more specific role such as `ax::mojom::Role::kImage`.
|
||||
index += SerializeLineBox(line_box, index, paragraph_node, nodes);
|
||||
|
||||
// Accumulate bounds of all lines for the paragraph.
|
||||
auto& bounding_box = line_box.bounding_box();
|
||||
paragraph_node.relative_bounds.bounds.Union(ToGfxRect(bounding_box));
|
||||
if (block_id != line.block_id() || paragraph_id != line.paragraph_id()) {
|
||||
block_id = line.block_id();
|
||||
paragraph_id = line.paragraph_id();
|
||||
paragraph_node = &nodes[index++];
|
||||
paragraph_node->role = ax::mojom::Role::kParagraph;
|
||||
paragraph_node->id = GetNextNegativeNodeID();
|
||||
page_node.child_ids.push_back(paragraph_node->id);
|
||||
}
|
||||
|
||||
// Every line with a textual accessibility role should turn into one or
|
||||
// more inline text boxes, each one representing a formatting context.
|
||||
// If the line is not of a textual role, only one node is initialized
|
||||
// having a more specific role such as `ax::mojom::Role::kImage`.
|
||||
index += SerializeLineBox(line, index, *paragraph_node, nodes);
|
||||
|
||||
// Accumulate bounds of all lines for the paragraph.
|
||||
auto& bounding_box = line.bounding_box();
|
||||
CHECK(paragraph_node);
|
||||
paragraph_node->relative_bounds.bounds.Union(ToGfxRect(bounding_box));
|
||||
}
|
||||
|
||||
// Add a disclaimer node informing the user of the end of extracted text,
|
||||
@ -606,24 +609,14 @@ mojom::VisualAnnotationPtr ConvertProtoToVisualAnnotation(
|
||||
line_box->text_line = line.utf8_string();
|
||||
line_box->block_id = line.block_id();
|
||||
line_box->language = line.language();
|
||||
line_box->order_within_block = line.order_within_block();
|
||||
line_box->paragraph_id = line.paragraph_id();
|
||||
line_box->bounding_box = ProtoToMojo(line.bounding_box());
|
||||
line_box->bounding_box_angle = line.bounding_box().angle();
|
||||
line_box->confidence = line.confidence();
|
||||
|
||||
// `baseline_box` is not available in ChromeScreenAI library prior to
|
||||
// version 122.1.
|
||||
// If it is not provided by the OCR, the library assigns bounding box's
|
||||
// value to it and it's done the same here.
|
||||
auto baseline_box =
|
||||
line.has_baseline_box() ? line.baseline_box() : line.bounding_box();
|
||||
line_box->baseline_box = ProtoToMojo(baseline_box);
|
||||
line_box->baseline_box_angle = baseline_box.angle();
|
||||
|
||||
for (const auto& word : line.words()) {
|
||||
auto word_box = screen_ai::mojom::WordBox::New();
|
||||
word_box->word = word.utf8_string();
|
||||
word_box->dictionary_word = word.dictionary_word();
|
||||
word_box->language = word.language();
|
||||
word_box->bounding_box = ProtoToMojo(word.bounding_box());
|
||||
word_box->bounding_box_angle = word.bounding_box().angle();
|
||||
|
@ -12,28 +12,18 @@
|
||||
|
||||
namespace {
|
||||
|
||||
void InitLineBox(chrome_screen_ai::LineBox* line_box,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
const char* text,
|
||||
const char* language,
|
||||
chrome_screen_ai::Direction direction,
|
||||
int32_t block_id,
|
||||
int32_t order_within_block,
|
||||
float angle) {
|
||||
chrome_screen_ai::Rect* rect = line_box->mutable_bounding_box();
|
||||
void InitSymbolBox(chrome_screen_ai::SymbolBox* symbol_box,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
const char* text) {
|
||||
chrome_screen_ai::Rect* rect = symbol_box->mutable_bounding_box();
|
||||
rect->set_x(x);
|
||||
rect->set_y(y);
|
||||
rect->set_width(width);
|
||||
rect->set_height(height);
|
||||
rect->set_angle(angle);
|
||||
line_box->set_utf8_string(text);
|
||||
line_box->set_language(language);
|
||||
line_box->set_direction(direction);
|
||||
line_box->set_block_id(block_id);
|
||||
line_box->set_order_within_block(order_within_block);
|
||||
symbol_box->set_utf8_string(text);
|
||||
}
|
||||
|
||||
void InitWordBox(chrome_screen_ai::WordBox* word_box,
|
||||
@ -63,18 +53,37 @@ void InitWordBox(chrome_screen_ai::WordBox* word_box,
|
||||
word_box->set_foreground_rgb_value(foreground_rgb_value);
|
||||
}
|
||||
|
||||
void InitSymbolBox(chrome_screen_ai::SymbolBox* symbol_box,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
const char* text) {
|
||||
chrome_screen_ai::Rect* rect = symbol_box->mutable_bounding_box();
|
||||
void InitLineBox(chrome_screen_ai::LineBox* line_box,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
const char* text,
|
||||
const char* language,
|
||||
chrome_screen_ai::Direction direction,
|
||||
int32_t block_id,
|
||||
int32_t paragraph_id,
|
||||
float angle,
|
||||
bool add_word = false) {
|
||||
chrome_screen_ai::Rect* rect = line_box->mutable_bounding_box();
|
||||
rect->set_x(x);
|
||||
rect->set_y(y);
|
||||
rect->set_width(width);
|
||||
rect->set_height(height);
|
||||
symbol_box->set_utf8_string(text);
|
||||
rect->set_angle(angle);
|
||||
line_box->set_utf8_string(text);
|
||||
line_box->set_language(language);
|
||||
line_box->set_direction(direction);
|
||||
line_box->set_block_id(block_id);
|
||||
line_box->set_paragraph_id(paragraph_id);
|
||||
if (add_word) {
|
||||
InitWordBox(line_box->add_words(), x, y, width, height, text, language,
|
||||
direction,
|
||||
/*has_space_after=*/false,
|
||||
/*background_rgb_value=*/0,
|
||||
/*foreground_rgb_value=*/0,
|
||||
/*angle=*/0);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@ -127,8 +136,8 @@ TEST_F(ScreenAIVisualAnnotatorProtoConvertorTest,
|
||||
/*text=*/"Hello world",
|
||||
/*language=*/"en",
|
||||
/*direction=*/chrome_screen_ai::DIRECTION_LEFT_TO_RIGHT,
|
||||
/*block_id=*/1,
|
||||
/*order_within_block=*/1,
|
||||
/*block_id=*/0,
|
||||
/*paragraph_id=*/0,
|
||||
/*angle=*/0);
|
||||
}
|
||||
|
||||
@ -198,8 +207,8 @@ TEST_F(ScreenAIVisualAnnotatorProtoConvertorTest,
|
||||
/*text=*/"Bonjour world",
|
||||
/*language=*/"en",
|
||||
/*direction=*/chrome_screen_ai::DIRECTION_LEFT_TO_RIGHT,
|
||||
/*block_id=*/1,
|
||||
/*order_within_block=*/1,
|
||||
/*block_id=*/0,
|
||||
/*paragraph_id=*/0,
|
||||
/*angle=*/0);
|
||||
}
|
||||
|
||||
@ -269,16 +278,9 @@ TEST_F(ScreenAIVisualAnnotatorProtoConvertorTest,
|
||||
/*text=*/"Hello world",
|
||||
/*language=*/"en",
|
||||
/*direction=*/chrome_screen_ai::DIRECTION_LEFT_TO_RIGHT,
|
||||
/*block_id=*/1,
|
||||
/*order_within_block=*/1,
|
||||
/*block_id=*/0,
|
||||
/*paragraph_id=*/0,
|
||||
/*angle=*/1.5);
|
||||
|
||||
chrome_screen_ai::Rect* line_baseline_box = line_0->mutable_baseline_box();
|
||||
line_baseline_box->set_x(110);
|
||||
line_baseline_box->set_y(110);
|
||||
line_baseline_box->set_width(510);
|
||||
line_baseline_box->set_height(30);
|
||||
line_baseline_box->set_angle(11.5);
|
||||
}
|
||||
|
||||
{
|
||||
@ -291,19 +293,13 @@ TEST_F(ScreenAIVisualAnnotatorProtoConvertorTest,
|
||||
EXPECT_EQ(line->bounding_box.width(), 500);
|
||||
EXPECT_EQ(line->bounding_box.height(), 20);
|
||||
EXPECT_EQ(line->bounding_box_angle, 1.5);
|
||||
EXPECT_EQ(line->baseline_box.x(), 110);
|
||||
EXPECT_EQ(line->baseline_box.y(), 110);
|
||||
EXPECT_EQ(line->baseline_box.width(), 510);
|
||||
EXPECT_EQ(line->baseline_box.height(), 30);
|
||||
EXPECT_EQ(line->baseline_box_angle, 11.5);
|
||||
EXPECT_EQ(line->text_line, "Hello world");
|
||||
EXPECT_EQ(line->block_id, 1);
|
||||
EXPECT_EQ(line->order_within_block, 1);
|
||||
EXPECT_EQ(line->block_id, 0);
|
||||
EXPECT_EQ(line->paragraph_id, 0);
|
||||
EXPECT_EQ(line->words.size(), static_cast<unsigned long>(2));
|
||||
|
||||
mojom::WordBoxPtr& word_0 = line->words[0];
|
||||
EXPECT_EQ(word_0->word, "Hello");
|
||||
EXPECT_EQ(word_0->dictionary_word, false);
|
||||
EXPECT_EQ(word_0->language, "en");
|
||||
EXPECT_EQ(word_0->has_space_after, true);
|
||||
EXPECT_EQ(word_0->bounding_box.x(), 100);
|
||||
@ -315,7 +311,6 @@ TEST_F(ScreenAIVisualAnnotatorProtoConvertorTest,
|
||||
|
||||
mojom::WordBoxPtr& word_1 = line->words[1];
|
||||
EXPECT_EQ(word_1->word, "world");
|
||||
EXPECT_EQ(word_1->dictionary_word, false);
|
||||
EXPECT_EQ(word_1->language, "en");
|
||||
EXPECT_EQ(word_1->has_space_after, false);
|
||||
EXPECT_EQ(word_1->bounding_box.x(), 350);
|
||||
@ -415,8 +410,8 @@ TEST_F(ScreenAIVisualAnnotatorProtoConvertorTest,
|
||||
/*text=*/"روز بخیر",
|
||||
/*language=*/"fa",
|
||||
/*direction=*/chrome_screen_ai::DIRECTION_RIGHT_TO_LEFT,
|
||||
/*block_id=*/1,
|
||||
/*order_within_block=*/1,
|
||||
/*block_id=*/0,
|
||||
/*paragraph_id=*/0,
|
||||
/*angle=*/0);
|
||||
}
|
||||
|
||||
@ -531,8 +526,8 @@ TEST_F(
|
||||
/*text=*/"Day One",
|
||||
/*language=*/"en",
|
||||
/*direction=*/chrome_screen_ai::DIRECTION_LEFT_TO_RIGHT,
|
||||
/*block_id=*/1,
|
||||
/*order_within_block=*/1,
|
||||
/*block_id=*/0,
|
||||
/*paragraph_id=*/0,
|
||||
/*angle=*/0);
|
||||
}
|
||||
|
||||
@ -647,8 +642,8 @@ TEST_F(
|
||||
/*text=*/"Day One",
|
||||
/*language=*/"en",
|
||||
/*direction=*/chrome_screen_ai::DIRECTION_LEFT_TO_RIGHT,
|
||||
/*block_id=*/1,
|
||||
/*order_within_block=*/1,
|
||||
/*block_id=*/0,
|
||||
/*paragraph_id=*/0,
|
||||
/*angle=*/90);
|
||||
}
|
||||
|
||||
@ -674,4 +669,89 @@ TEST_F(
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ScreenAIVisualAnnotatorProtoConvertorTest,
|
||||
VisualAnnotationToAXTreeUpdate_OcrResults_Paragraphs) {
|
||||
chrome_screen_ai::VisualAnnotation annotation;
|
||||
gfx::Rect snapshot_bounds(800, 900);
|
||||
|
||||
screen_ai::ResetNodeIDForTesting();
|
||||
|
||||
{
|
||||
typedef struct {
|
||||
int block_id;
|
||||
int paragraph_id;
|
||||
const char* text;
|
||||
} LineInfo;
|
||||
|
||||
// Expected paragraphs: (Jan, Feb, Mar), (Apr, May), (Jun)
|
||||
LineInfo lines[] = {{0, 0, "Jan"}, {0, 0, "Feb"}, {0, 0, "Mar"},
|
||||
{0, 1, "Apr"}, {0, 1, "May"}, {2, 0, "Jun"}};
|
||||
|
||||
int y = 100;
|
||||
for (auto& line : lines) {
|
||||
InitLineBox(annotation.add_lines(),
|
||||
/*x=*/100,
|
||||
/*y=*/y,
|
||||
/*width=*/100,
|
||||
/*height=*/20,
|
||||
/*text=*/line.text,
|
||||
/*language=*/"en",
|
||||
/*direction=*/chrome_screen_ai::DIRECTION_LEFT_TO_RIGHT,
|
||||
/*block_id=*/line.block_id,
|
||||
/*paragraph_id=*/line.paragraph_id,
|
||||
/*angle=*/0,
|
||||
/*add_word=*/true);
|
||||
y += 20;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
const ui::AXTreeUpdate update =
|
||||
VisualAnnotationToAXTreeUpdate(annotation, snapshot_bounds);
|
||||
|
||||
const std::string expected_update(
|
||||
"AXTreeUpdate: root id -2\n"
|
||||
"id=-2 region class_name=ocred_page child_ids=-3,-5,-12,-17,-20 (0, "
|
||||
"0)-(800, 900) is_page_breaking_object=true\n"
|
||||
" id=-3 banner child_ids=-4 (0, 0)-(1, 1)\n"
|
||||
" id=-4 staticText name=Start of extracted text (0, 0)-(1, 1)\n"
|
||||
" id=-5 paragraph child_ids=-6,-8,-10 (100, 100)-(100, 60)\n"
|
||||
" id=-6 staticText name=Jan child_ids=-7 (100, 100)-(100, 20) "
|
||||
"text_direction=ltr language=en\n"
|
||||
" id=-7 inlineTextBox name=Jan (100, 100)-(100, 20) "
|
||||
"background_color=&0 color=&0 text_direction=ltr word_starts=0 "
|
||||
"word_ends=2\n"
|
||||
" id=-8 staticText name=Feb child_ids=-9 (100, 120)-(100, 20) "
|
||||
"text_direction=ltr language=en\n"
|
||||
" id=-9 inlineTextBox name=Feb (100, 120)-(100, 20) "
|
||||
"background_color=&0 color=&0 text_direction=ltr word_starts=0 "
|
||||
"word_ends=2\n"
|
||||
" id=-10 staticText name=Mar child_ids=-11 (100, 140)-(100, 20) "
|
||||
"text_direction=ltr language=en\n"
|
||||
" id=-11 inlineTextBox name=Mar (100, 140)-(100, 20) "
|
||||
"background_color=&0 color=&0 text_direction=ltr word_starts=0 "
|
||||
"word_ends=2\n"
|
||||
" id=-12 paragraph child_ids=-13,-15 (100, 160)-(100, 40)\n"
|
||||
" id=-13 staticText name=Apr child_ids=-14 (100, 160)-(100, 20) "
|
||||
"text_direction=ltr language=en\n"
|
||||
" id=-14 inlineTextBox name=Apr (100, 160)-(100, 20) "
|
||||
"background_color=&0 color=&0 text_direction=ltr word_starts=0 "
|
||||
"word_ends=2\n"
|
||||
" id=-15 staticText name=May child_ids=-16 (100, 180)-(100, 20) "
|
||||
"text_direction=ltr language=en\n"
|
||||
" id=-16 inlineTextBox name=May (100, 180)-(100, 20) "
|
||||
"background_color=&0 color=&0 text_direction=ltr word_starts=0 "
|
||||
"word_ends=2\n"
|
||||
" id=-17 paragraph child_ids=-18 (100, 200)-(100, 20)\n"
|
||||
" id=-18 staticText name=Jun child_ids=-19 (100, 200)-(100, 20) "
|
||||
"text_direction=ltr language=en\n"
|
||||
" id=-19 inlineTextBox name=Jun (100, 200)-(100, 20) "
|
||||
"background_color=&0 color=&0 text_direction=ltr word_starts=0 "
|
||||
"word_ends=2\n"
|
||||
" id=-20 contentInfo child_ids=-21 (800, 900)-(1, 1)\n"
|
||||
" id=-21 staticText name=End of extracted text (800, 900)-(1, 1)\n");
|
||||
EXPECT_EQ(expected_update, update.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace screen_ai
|
||||
|
@ -57,8 +57,8 @@ struct LineBox {
|
||||
// ID of the text block that this line belongs to.
|
||||
int32 block_id;
|
||||
|
||||
// Index within the block that this line belongs to.
|
||||
int32 order_within_block;
|
||||
// ID of the paragraph that this line belongs to.
|
||||
int32 paragraph_id;
|
||||
|
||||
// Line bounding box relative to the original image.
|
||||
gfx.mojom.Rect bounding_box;
|
||||
@ -67,14 +67,6 @@ struct LineBox {
|
||||
// top-left corner.
|
||||
float bounding_box_angle;
|
||||
|
||||
// Line bounding box relative to the original image with bottom edge
|
||||
// representing estimated baseline of text.
|
||||
gfx.mojom.Rect baseline_box;
|
||||
|
||||
// Rotation angle (in degrees, clockwise) of the line baseline box about its
|
||||
// top-left corner.
|
||||
float baseline_box_angle;
|
||||
|
||||
// Confidence as computed by the OCR engine. The value is in range [0, 1].
|
||||
float confidence;
|
||||
};
|
||||
@ -84,9 +76,6 @@ struct WordBox {
|
||||
// A single word in UTF8 format.
|
||||
string word;
|
||||
|
||||
// True if the word passes the internal beamsearch dictionary check.
|
||||
bool dictionary_word;
|
||||
|
||||
// Language guess for the word. The format is the ISO 639-1 two-letter
|
||||
// language code if that is defined (e.g. "en"), or else the ISO 639-2
|
||||
// three-letter code if that is defined, or else a Google-specific code.
|
||||
|
Reference in New Issue
Block a user