0

Add main content extractor client type.

Similar to OCR client type, add main content extractor client type.
This addition makes it easier to distinguish between usage, failure, and
bugs for different clients of this service and centeralize metrics
collection.

AX-Relnotes: n/a
Bug: 359853518
Change-Id: I29403795f80a77c8899f26ad09d63036101653b5
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6368519
Reviewed-by: Alex Gough <ajgo@chromium.org>
Reviewed-by: CJ Huang <chenjih@google.com>
Commit-Queue: Ramin Halavati <rhalavati@chromium.org>
Reviewed-by: Mark Schillaci <mschillaci@google.com>
Cr-Commit-Position: refs/heads/main@{#1435250}
This commit is contained in:
Ramin Halavati
2025-03-19 22:36:43 -07:00
committed by Chromium LUCI CQ
parent e39b508fb1
commit d0d6231bb7
9 changed files with 48 additions and 5 deletions
chrome
browser
renderer
chromeos/components/mahi
content/renderer/accessibility/annotations
services/screen_ai

@ -42,6 +42,8 @@ void AXTreeFixingScreenAIService::IdentifyMainNode(
screen_ai::ScreenAIServiceRouterFactory::GetForBrowserContext(profile_)
->BindMainContentExtractor(std::move(receiver));
screen_ai_service_.reset_on_disconnect();
screen_ai_service_->SetClientType(
screen_ai::mojom::MceClientType::kMainNode);
}
// Identify the main node using ScreenAI.

@ -11,8 +11,8 @@ See `services/screen_ai/README.md` for more.
Depending on your use case restrictions, choose one of the following
approaches.
1. If you are adding a new client for OCR, add a new enum value to
`screen_ai::mojom::OcrClientType`, otherwise choose an appropriate one from
it for the next steps.
`screen_ai::mojom::OcrClientType`, otherwise choose an appropriate one for it
in the next steps.
1. Using `OpticalCharacterRecognizer:CreateWithStatusCallback`, create an OCR
object, and wait until the callback is called. This will trigger download
and startup of the service (if needed) and reports the result.\
@ -31,17 +31,20 @@ approaches.
in a callback.\
Once you know the service is ready, trigger connection to it in your process
by connecting to `screen_ai:mojom:ScreenAIAnnotator` interface.\
Before calling any of the `PerformOCR` functions, call `SetClient` once to
set the client type.\
Before calling any of the `PerformOCR` functions, call `SetClientType` once
to set the client type.\
For an example see `components/pdf/renderer/pdf_ocr_helper.cc`.
## How to use Main Content Extraction
If you are adding a new client for MCE, add a new enum value to
`screen_ai::mojom::MceClientType`.
In the browser process call
`screen_ai:ScreenAIServiceRouterFactory:GetForBrowserContext:GetServiceStateAsync`
to trigger library download and service initialization and receive the result in
a callback.\
Once you know the service is ready, trigger connection to it in your process by
connecting to `screen_ai:mojom:Screen2xMainContentExtractor` interface.\
Call `SetClientType` once to set the client type.\
For an example see `chrome/renderer/accessibility/ax_tree_distiller.cc`.
## Caution

@ -107,6 +107,8 @@ class MainContentExtractionTest : public InProcessBrowserTest {
ScreenAIServiceRouterFactory::GetForBrowserContext(browser()->profile())
->BindMainContentExtractor(
main_content_extractor_.BindNewPipeAndPassReceiver());
main_content_extractor_->SetClientType(
screen_ai::mojom::MceClientType::kTest);
}
ui::AXTreeUpdate DistillPage(const std::string& relative_url) {

@ -228,6 +228,8 @@ void AXTreeDistiller::DistillViaScreen2x(
main_content_extractor_.set_disconnect_handler(
base::BindOnce(&AXTreeDistiller::OnMainContentExtractorDisconnected,
weak_ptr_factory_.GetWeakPtr()));
main_content_extractor_->SetClientType(
screen_ai::mojom::MceClientType::kReadingMode);
}
base::TimeTicks screen2x_start_time = base::TimeTicks::Now();

@ -128,6 +128,8 @@ void AXTreeExtractor::OnScreen2xReady(
screen2x_main_content_extractor_.reset_on_disconnect();
screen2x_main_content_extractor_.reset_on_idle_timeout(
kScreenAIIdleDisconnectDelay);
screen2x_main_content_extractor_->SetClientType(
screen_ai::mojom::MceClientType::kMahi);
}
void AXTreeExtractor::ExtractContent(

@ -56,6 +56,8 @@ class MockMainNodeAnnotationService
void IdentifyMainNode(const ui::AXTreeUpdate& snapshot,
IdentifyMainNodeCallback callback) override {}
void SetClientType(screen_ai::mojom::MceClientType client) override {}
// Tests should not modify entries in these lists.
std::vector<ui::AXNodeID> content_nodes_;
ui::AXNodeID main_ = ui::kInvalidAXNodeID;

@ -36,6 +36,18 @@ enum OcrClientType {
kScreenshotTextDetection,
};
// Clients of the Main Content Extraction service.
enum MceClientType {
// To be used only for testing.
kTest,
// Used in the reading mode.
kReadingMode,
// Used for main node annotation.
kMainNode,
// Used for Mahi feature on ChromeOS.
kMahi,
};
// A wrapper struct mirroring parts of the chrome_screen_ai.proto.
struct VisualAnnotation {
array<LineBox> lines;
@ -146,6 +158,9 @@ interface Screen2xMainContentExtractor {
// to identify an accurate main node for the entire page.
IdentifyMainNode(ax.mojom.AXTreeUpdate ax_tree) =>
(ax.mojom.AXTreeID tree_id, int32 node_id);
// Sets Mce client type for metrics.
SetClientType(MceClientType client_type);
};
// Provides an interface to the OCR functionality of the Screen AI service.

@ -429,6 +429,11 @@ void ScreenAIService::SetClientType(mojom::OcrClientType client_type) {
ocr_client_types_[screen_ai_annotators_.current_receiver()] = client_type;
}
void ScreenAIService::SetClientType(mojom::MceClientType client_type) {
mce_client_types_[screen2x_main_content_extractors_.current_receiver()] =
client_type;
}
void ScreenAIService::PerformOcrAndReturnAnnotation(
const SkBitmap& image,
PerformOcrAndReturnAnnotationCallback callback) {
@ -458,7 +463,6 @@ void ScreenAIService::PerformOcrAndReturnAXTreeUpdate(
void ScreenAIService::ExtractMainContent(const ui::AXTreeUpdate& snapshot,
ExtractMainContentCallback callback) {
main_content_extraction_last_used_ = base::TimeTicks::Now();
ui::AXTree tree;
std::optional<std::vector<int32_t>> content_node_ids;
bool success = ExtractMainContentInternal(snapshot, tree, content_node_ids);
@ -502,6 +506,11 @@ bool ScreenAIService::ExtractMainContentInternal(
const ui::AXTreeUpdate& snapshot,
ui::AXTree& tree,
std::optional<std::vector<int32_t>>& content_node_ids) {
// TODO(crbug.com/chrome/359853518): Add latency, usage, and success metrics
// separated on client type and add client type as crash key.
CHECK(base::Contains(mce_client_types_,
screen2x_main_content_extractors_.current_receiver()));
main_content_extraction_last_used_ = base::TimeTicks::Now();
// Early return if input is empty.
if (snapshot.nodes.empty()) {
return false;

@ -61,6 +61,9 @@ class ScreenAIService : public mojom::ScreenAIServiceFactory,
// mojom::ScreenAIAnnotator:
void SetClientType(mojom::OcrClientType client) override;
// mojom::Screen2xMainContentExtractor:
void SetClientType(mojom::MceClientType client) override;
// mojom::ScreenAIAnnotator:
void PerformOcrAndReturnAXTreeUpdate(
const SkBitmap& image,
@ -147,6 +150,9 @@ class ScreenAIService : public mojom::ScreenAIServiceFactory,
// Client type for each OCR receiver.
std::map<mojo::ReceiverId, mojom::OcrClientType> ocr_client_types_;
// Client type for each MCE receiver.
std::map<mojo::ReceiverId, mojom::MceClientType> mce_client_types_;
// Browser side shutdown handler.
mojo::Remote<mojom::ScreenAIServiceShutdownHandler>
screen_ai_shutdown_handler_;