0

Add pdf document load observer

Adds observer for pdf document load complete event.
Also changes the consumers (contextual cueing, page_content_annotations) to listen to the event before querying page count.

Change-Id: Iecb4571aeb61c52dde6fe1bc770e5bdb372a1525
Bug: 395086836
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6250270
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Raj T <rajendrant@chromium.org>
Reviewed-by: Will Harris <wfh@chromium.org>
Reviewed-by: Duncan Mercer <mercerd@google.com>
Cr-Commit-Position: refs/heads/main@{#1420001}
This commit is contained in:
rajendrant
2025-02-13 10:46:28 -08:00
committed by Chromium LUCI CQ
parent 1d0a99e1a2
commit 456b83c54e
10 changed files with 160 additions and 10 deletions

@@ -116,10 +116,31 @@ ContextualCueingPageData::DidMatchCueingConditions(
#if BUILDFLAG(ENABLE_PDF) #if BUILDFLAG(ENABLE_PDF)
void ContextualCueingPageData::RequestPdfPageCount() { void ContextualCueingPageData::RequestPdfPageCount() {
CHECK(page().GetContentsMimeType() == pdf::kPDFMimeType); CHECK_EQ(pdf::kPDFMimeType, page().GetContentsMimeType());
pdf::PDFDocumentHelper* pdf_helper =
pdf::PDFDocumentHelper::MaybeGetForWebContents( auto* pdf_helper = pdf::PDFDocumentHelper::MaybeGetForWebContents(
content::WebContents::FromRenderFrameHost(&page().GetMainDocument())); content::WebContents::FromRenderFrameHost(&page().GetMainDocument()));
if (!pdf_helper) {
return;
}
if (!pdf_helper->IsDocumentLoadComplete()) {
// Wait for the PDF to load.
pdf_load_obseration_.Observe(pdf_helper);
return;
}
// Fetch zero PDF bytes to just receive the total page count.
pdf_helper->GetPdfBytes(
/*size_limit=*/0,
base::BindOnce(&ContextualCueingPageData::OnPdfPageCountReceived,
weak_factory_.GetWeakPtr()));
}
void ContextualCueingPageData::OnDocumentLoadComplete() {
CHECK_EQ(pdf::kPDFMimeType, page().GetContentsMimeType());
pdf_load_obseration_.Reset();
auto* pdf_helper = pdf::PDFDocumentHelper::MaybeGetForWebContents(
content::WebContents::FromRenderFrameHost(&page().GetMainDocument()));
if (pdf_helper) { if (pdf_helper) {
// Fetch zero PDF bytes to just receive the total page count. // Fetch zero PDF bytes to just receive the total page count.
pdf_helper->GetPdfBytes( pdf_helper->GetPdfBytes(

@@ -5,17 +5,25 @@
#ifndef CHROME_BROWSER_CONTEXTUAL_CUEING_CONTEXTUAL_CUEING_PAGE_DATA_H_ #ifndef CHROME_BROWSER_CONTEXTUAL_CUEING_CONTEXTUAL_CUEING_PAGE_DATA_H_
#define CHROME_BROWSER_CONTEXTUAL_CUEING_CONTEXTUAL_CUEING_PAGE_DATA_H_ #define CHROME_BROWSER_CONTEXTUAL_CUEING_CONTEXTUAL_CUEING_PAGE_DATA_H_
#include "base/scoped_observation.h"
#include "components/optimization_guide/proto/contextual_cueing_metadata.pb.h" #include "components/optimization_guide/proto/contextual_cueing_metadata.pb.h"
#include "components/pdf/common/constants.h" #include "components/pdf/common/constants.h"
#include "content/public/browser/page_user_data.h" #include "content/public/browser/page_user_data.h"
#include "pdf/buildflags.h" #include "pdf/buildflags.h"
#include "pdf/mojom/pdf.mojom.h" #include "pdf/mojom/pdf.mojom.h"
#if BUILDFLAG(ENABLE_PDF)
#include "components/pdf/browser/pdf_document_helper.h"
#endif // BUILDFLAG(ENABLE_PDF)
namespace contextual_cueing { namespace contextual_cueing {
// Decider for contextual cueing that is scoped to `Page`. // Decider for contextual cueing that is scoped to `Page`.
class ContextualCueingPageData class ContextualCueingPageData :
: public content::PageUserData<ContextualCueingPageData> { #if BUILDFLAG(ENABLE_PDF)
public pdf::PDFDocumentHelper::Observer,
#endif // BUILDFLAG(ENABLE_PDF)
public content::PageUserData<ContextualCueingPageData> {
public: public:
using CueingDecisionCallback = base::OnceCallback<void(const std::string&)>; using CueingDecisionCallback = base::OnceCallback<void(const std::string&)>;
@@ -48,11 +56,16 @@ class ContextualCueingPageData
const optimization_guide::proto::GlicCueingConfiguration& config); const optimization_guide::proto::GlicCueingConfiguration& config);
#if BUILDFLAG(ENABLE_PDF) #if BUILDFLAG(ENABLE_PDF)
// Requests for page count if this is a PDF page.
void RequestPdfPageCount(); void RequestPdfPageCount();
// Invoked when page count is received.
void OnPdfPageCountReceived(pdf::mojom::PdfListener::GetPdfBytesStatus status, void OnPdfPageCountReceived(pdf::mojom::PdfListener::GetPdfBytesStatus status,
const std::vector<uint8_t>& bytes, const std::vector<uint8_t>& bytes,
uint32_t page_count); uint32_t page_count);
// pdf::PDFDocumentHelper::Observer:
void OnDocumentLoadComplete() override;
#endif // BUILDFLAG(ENABLE_PDF) #endif // BUILDFLAG(ENABLE_PDF)
const optimization_guide::proto::GlicContextualCueingMetadata metadata_; const optimization_guide::proto::GlicContextualCueingMetadata metadata_;
@@ -64,6 +77,12 @@ class ContextualCueingPageData
CueingDecisionCallback cueing_decision_callback_; CueingDecisionCallback cueing_decision_callback_;
#if BUILDFLAG(ENABLE_PDF)
base::ScopedObservation<pdf::PDFDocumentHelper,
pdf::PDFDocumentHelper::Observer>
pdf_load_obseration_{this};
#endif // BUILDFLAG(ENABLE_PDF)
base::WeakPtrFactory<ContextualCueingPageData> weak_factory_{this}; base::WeakPtrFactory<ContextualCueingPageData> weak_factory_{this};
PAGE_USER_DATA_KEY_DECL(); PAGE_USER_DATA_KEY_DECL();

@@ -228,8 +228,30 @@ void AnnotatedPageContentRequest::OnInnerTextReceived(
#if BUILDFLAG(ENABLE_PDF) #if BUILDFLAG(ENABLE_PDF)
void AnnotatedPageContentRequest::RequestPdfPageCount() { void AnnotatedPageContentRequest::RequestPdfPageCount() {
CHECK(web_contents_->GetContentsMimeType() == pdf::kPDFMimeType); CHECK_EQ(pdf::kPDFMimeType, web_contents_->GetContentsMimeType());
pdf::PDFDocumentHelper* pdf_helper = auto* pdf_helper =
pdf::PDFDocumentHelper::MaybeGetForWebContents(web_contents_);
if (!pdf_helper) {
return;
}
if (!pdf_helper->IsDocumentLoadComplete()) {
// Wait for the PDF to load.
pdf_load_obseration_.Observe(pdf_helper);
return;
}
// Fetch zero PDF bytes to just receive the total page count.
pdf_helper->GetPdfBytes(
/*size_limit=*/0,
base::BindOnce(
&RecordPdfPageCountMetrics,
web_contents_->GetPrimaryMainFrame()->GetPageUkmSourceId()));
}
void AnnotatedPageContentRequest::OnDocumentLoadComplete() {
CHECK_EQ(pdf::kPDFMimeType, web_contents_->GetContentsMimeType());
pdf_load_obseration_.Reset();
auto* pdf_helper =
pdf::PDFDocumentHelper::MaybeGetForWebContents(web_contents_); pdf::PDFDocumentHelper::MaybeGetForWebContents(web_contents_);
if (pdf_helper) { if (pdf_helper) {
// Fetch zero PDF bytes to just receive the total page count. // Fetch zero PDF bytes to just receive the total page count.

@@ -1,20 +1,30 @@
// Copyright 2025 The Chromium Authors // Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#ifndef CHROME_BROWSER_PAGE_CONTENT_ANNOTATIONS_PAGE_CONTENT_ANNOTATIONS_ANNOTATE_PAGE_CONTENT_REQUEST_H_ #ifndef CHROME_BROWSER_PAGE_CONTENT_ANNOTATIONS_PAGE_CONTENT_ANNOTATIONS_ANNOTATE_PAGE_CONTENT_REQUEST_H_
#define CHROME_BROWSER_PAGE_CONTENT_ANNOTATIONS_PAGE_CONTENT_ANNOTATIONS_ANNOTATE_PAGE_CONTENT_REQUEST_H_ #define CHROME_BROWSER_PAGE_CONTENT_ANNOTATIONS_PAGE_CONTENT_ANNOTATIONS_ANNOTATE_PAGE_CONTENT_REQUEST_H_
#include "base/scoped_observation.h"
#include "chrome/browser/content_extraction/inner_text.h" #include "chrome/browser/content_extraction/inner_text.h"
#include "components/optimization_guide/proto/features/common_quality_data.pb.h" #include "components/optimization_guide/proto/features/common_quality_data.pb.h"
#include "content/public/browser/web_contents.h" #include "content/public/browser/web_contents.h"
#include "pdf/buildflags.h" #include "pdf/buildflags.h"
#include "third_party/blink/public/mojom/content_extraction/ai_page_content.mojom.h" #include "third_party/blink/public/mojom/content_extraction/ai_page_content.mojom.h"
#if BUILDFLAG(ENABLE_PDF)
#include "components/pdf/browser/pdf_document_helper.h"
#endif // BUILDFLAG(ENABLE_PDF)
namespace page_content_annotations { namespace page_content_annotations {
// Class for deciding when a page is ready for getting page content, and // Class for deciding when a page is ready for getting page content, and
// extracts page content. // extracts page content.
class AnnotatedPageContentRequest { class AnnotatedPageContentRequest
#if BUILDFLAG(ENABLE_PDF)
: public pdf::PDFDocumentHelper::Observer
#endif // BUILDFLAG(ENABLE_PDF)
{
public: public:
static std::unique_ptr<AnnotatedPageContentRequest> MaybeCreate( static std::unique_ptr<AnnotatedPageContentRequest> MaybeCreate(
content::WebContents* web_contents); content::WebContents* web_contents);
@@ -25,7 +35,11 @@ class AnnotatedPageContentRequest {
AnnotatedPageContentRequest(const AnnotatedPageContentRequest&) = delete; AnnotatedPageContentRequest(const AnnotatedPageContentRequest&) = delete;
AnnotatedPageContentRequest& operator=(const AnnotatedPageContentRequest&) = AnnotatedPageContentRequest& operator=(const AnnotatedPageContentRequest&) =
delete; delete;
#if BUILDFLAG(ENABLE_PDF)
~AnnotatedPageContentRequest() override;
#else
~AnnotatedPageContentRequest(); ~AnnotatedPageContentRequest();
#endif // BUILDFLAG(ENABLE_PDF)
void PrimaryPageChanged(); void PrimaryPageChanged();
@@ -53,6 +67,9 @@ class AnnotatedPageContentRequest {
#if BUILDFLAG(ENABLE_PDF) #if BUILDFLAG(ENABLE_PDF)
void RequestPdfPageCount(); void RequestPdfPageCount();
// pdf::PDFDocumentHelper::Observer:
void OnDocumentLoadComplete() override;
#endif // BUILDFLAG(ENABLE_PDF) #endif // BUILDFLAG(ENABLE_PDF)
const raw_ptr<content::WebContents> web_contents_; const raw_ptr<content::WebContents> web_contents_;
@@ -66,6 +83,12 @@ class AnnotatedPageContentRequest {
bool waiting_for_load_ = false; bool waiting_for_load_ = false;
bool waiting_for_fcp_ = false; bool waiting_for_fcp_ = false;
#if BUILDFLAG(ENABLE_PDF)
base::ScopedObservation<pdf::PDFDocumentHelper,
pdf::PDFDocumentHelper::Observer>
pdf_load_obseration_{this};
#endif // BUILDFLAG(ENABLE_PDF)
base::WeakPtrFactory<AnnotatedPageContentRequest> weak_factory_{this}; base::WeakPtrFactory<AnnotatedPageContentRequest> weak_factory_{this};
}; };

@@ -262,6 +262,14 @@ void PDFDocumentHelper::GetMostVisiblePageIndex(
remote_pdf_client_->GetMostVisiblePageIndex(std::move(callback)); remote_pdf_client_->GetMostVisiblePageIndex(std::move(callback));
} }
void PDFDocumentHelper::AddObserver(Observer* observer) {
observers_.AddObserver(observer);
}
void PDFDocumentHelper::RemoveObserver(Observer* observer) {
observers_.RemoveObserver(observer);
}
void PDFDocumentHelper::OnSelectionEvent(ui::SelectionEventType event) { void PDFDocumentHelper::OnSelectionEvent(ui::SelectionEventType event) {
// Should be handled by `TouchSelectionControllerClientAura`. // Should be handled by `TouchSelectionControllerClientAura`.
NOTREACHED(); NOTREACHED();
@@ -381,6 +389,14 @@ void PDFDocumentHelper::InitTouchSelectionClientManager() {
touch_selection_controller_client_manager_->AddObserver(this); touch_selection_controller_client_manager_->AddObserver(this);
} }
void PDFDocumentHelper::OnDocumentLoadComplete() {
// Only notify the consumers on first load complete.
if (!is_document_load_complete_) {
is_document_load_complete_ = true;
observers_.Notify(&Observer::OnDocumentLoadComplete);
}
}
void PDFDocumentHelper::SaveUrlAs(const GURL& url, void PDFDocumentHelper::SaveUrlAs(const GURL& url,
network::mojom::ReferrerPolicy policy) { network::mojom::ReferrerPolicy policy) {
client_->OnSaveURL(&GetWebContents()); client_->OnSaveURL(&GetWebContents());

@@ -8,6 +8,7 @@
#include <memory> #include <memory>
#include "base/memory/raw_ptr.h" #include "base/memory/raw_ptr.h"
#include "base/observer_list.h"
#include "content/public/browser/document_user_data.h" #include "content/public/browser/document_user_data.h"
#include "content/public/browser/render_frame_host_receiver_set.h" #include "content/public/browser/render_frame_host_receiver_set.h"
#include "content/public/browser/render_widget_host_observer.h" #include "content/public/browser/render_widget_host_observer.h"
@@ -38,6 +39,15 @@ class PDFDocumentHelper
public ui::TouchSelectionMenuClient, public ui::TouchSelectionMenuClient,
public content::TouchSelectionControllerClientManager::Observer { public content::TouchSelectionControllerClientManager::Observer {
public: public:
class Observer : public base::CheckedObserver {
public:
// Invoked when the document load is completed successfully. Will not be
// invoked when the PDF is already loaded. Will not be invoked when the load
// fails. This is useful to wait for document metadata to be loaded, before
// calls to `GetPdfBytes`, `GetPageText` can be made.
virtual void OnDocumentLoadComplete() {}
};
PDFDocumentHelper(const PDFDocumentHelper&) = delete; PDFDocumentHelper(const PDFDocumentHelper&) = delete;
PDFDocumentHelper& operator=(const PDFDocumentHelper&) = delete; PDFDocumentHelper& operator=(const PDFDocumentHelper&) = delete;
@@ -81,6 +91,7 @@ class PDFDocumentHelper
// pdf::mojom::PdfHost: // pdf::mojom::PdfHost:
void SetListener(mojo::PendingRemote<mojom::PdfListener> listener) override; void SetListener(mojo::PendingRemote<mojom::PdfListener> listener) override;
void OnDocumentLoadComplete() override;
void SaveUrlAs(const GURL& url, void SaveUrlAs(const GURL& url,
network::mojom::ReferrerPolicy policy) override; network::mojom::ReferrerPolicy policy) override;
void UpdateContentRestrictions(int32_t content_restrictions) override; void UpdateContentRestrictions(int32_t content_restrictions) override;
@@ -93,6 +104,10 @@ class PDFDocumentHelper
void OnSearchifyStarted() override; void OnSearchifyStarted() override;
#endif #endif
// Returns whether document is loaded, at which point, the other calls to
// document metadata such as `GetPdfBytes`, `GetPageText` can be made.
bool IsDocumentLoadComplete() const { return is_document_load_complete_; }
void GetPdfBytes(uint32_t size_limit, void GetPdfBytes(uint32_t size_limit,
pdf::mojom::PdfListener::GetPdfBytesCallback callback); pdf::mojom::PdfListener::GetPdfBytesCallback callback);
@@ -101,6 +116,9 @@ class PDFDocumentHelper
void GetMostVisiblePageIndex( void GetMostVisiblePageIndex(
pdf::mojom::PdfListener::GetMostVisiblePageIndexCallback callback); pdf::mojom::PdfListener::GetMostVisiblePageIndexCallback callback);
void AddObserver(Observer* observer);
void RemoveObserver(Observer* observer);
private: private:
friend class content::DocumentUserData<PDFDocumentHelper>; friend class content::DocumentUserData<PDFDocumentHelper>;
@@ -130,8 +148,12 @@ class PDFDocumentHelper
int32_t selection_right_height_ = 0; int32_t selection_right_height_ = 0;
bool has_selection_ = false; bool has_selection_ = false;
bool is_document_load_complete_ = false;
mojo::Remote<mojom::PdfListener> remote_pdf_client_; mojo::Remote<mojom::PdfListener> remote_pdf_client_;
base::ObserverList<Observer> observers_;
DOCUMENT_USER_DATA_KEY_DECL(); DOCUMENT_USER_DATA_KEY_DECL();
}; };

@@ -55,6 +55,11 @@ class FakePdfListener : public pdf::mojom::PdfListener {
(override)); (override));
}; };
class FakePdfLoadObserver : public PDFDocumentHelper::Observer {
public:
MOCK_METHOD(void, OnDocumentLoadComplete, (), (override));
};
class TestPDFDocumentHelperClient : public PDFDocumentHelperClient { class TestPDFDocumentHelperClient : public PDFDocumentHelperClient {
public: public:
TestPDFDocumentHelperClient() = default; TestPDFDocumentHelperClient() = default;
@@ -266,6 +271,19 @@ IN_PROC_BROWSER_TEST_P(PDFDocumentHelperTest, DefaultImplementation) {
EXPECT_TRUE(pdf_document_helper()->GetSelectedText().empty()); EXPECT_TRUE(pdf_document_helper()->GetSelectedText().empty());
} }
IN_PROC_BROWSER_TEST_P(PDFDocumentHelperTest, DocumentLoadComplete) {
NiceMock<FakePdfLoadObserver> listener;
EXPECT_FALSE(pdf_document_helper()->IsDocumentLoadComplete());
pdf_document_helper()->AddObserver(&listener);
EXPECT_CALL(listener, OnDocumentLoadComplete);
pdf_document_helper()->OnDocumentLoadComplete();
EXPECT_TRUE(pdf_document_helper()->IsDocumentLoadComplete());
// Subsequent load complete should not trigger listener calls.
EXPECT_CALL(listener, OnDocumentLoadComplete).Times(0);
pdf_document_helper()->OnDocumentLoadComplete();
}
// TODO(crbug.com/40268279): Stop testing both modes after OOPIF PDF viewer // TODO(crbug.com/40268279): Stop testing both modes after OOPIF PDF viewer
// launches. // launches.
INSTANTIATE_FEATURE_OVERRIDE_TEST_SUITE(PDFDocumentHelperTest); INSTANTIATE_FEATURE_OVERRIDE_TEST_SUITE(PDFDocumentHelperTest);

@@ -48,6 +48,10 @@ interface PdfListener {
interface PdfHost { interface PdfHost {
SetListener(pending_remote<PdfListener> client); SetListener(pending_remote<PdfListener> client);
// Invoked when document load is completed successfully. Will not be invoked
// if the load fails.
OnDocumentLoadComplete();
// Updates the content restrictions, i.e. to disable print/copy. // Updates the content restrictions, i.e. to disable print/copy.
UpdateContentRestrictions(int32 restrictions); UpdateContentRestrictions(int32 restrictions);

@@ -1309,6 +1309,7 @@ void PdfViewWebPlugin::DocumentLoadComplete() {
return; return;
DidStopLoading(); DidStopLoading();
pdf_host_->OnDocumentLoadComplete();
pdf_host_->UpdateContentRestrictions(GetContentRestrictions()); pdf_host_->UpdateContentRestrictions(GetContentRestrictions());
} }

@@ -161,8 +161,9 @@ MATCHER(SearchStringResultEq, "") {
} }
MATCHER_P(IsExpectedImeKeyEvent, expected_text, "") { MATCHER_P(IsExpectedImeKeyEvent, expected_text, "") {
if (arg.GetType() != blink::WebInputEvent::Type::kChar) if (arg.GetType() != blink::WebInputEvent::Type::kChar) {
return false; return false;
}
const auto& event = static_cast<const blink::WebKeyboardEvent&>(arg); const auto& event = static_cast<const blink::WebKeyboardEvent&>(arg);
return event.GetModifiers() == blink::WebInputEvent::kNoModifiers && return event.GetModifiers() == blink::WebInputEvent::kNoModifiers &&
@@ -365,6 +366,7 @@ class FakePdfHost : public pdf::mojom::PdfHost {
SetListener, SetListener,
(mojo::PendingRemote<pdf::mojom::PdfListener>), (mojo::PendingRemote<pdf::mojom::PdfListener>),
(override)); (override));
MOCK_METHOD(void, OnDocumentLoadComplete, (), (override));
MOCK_METHOD(void, UpdateContentRestrictions, (int32_t), (override)); MOCK_METHOD(void, UpdateContentRestrictions, (int32_t), (override));
MOCK_METHOD(void, MOCK_METHOD(void,
SaveUrlAs, SaveUrlAs,
@@ -739,6 +741,7 @@ TEST_F(PdfViewWebPluginFullFrameTest, DocumentLoadComplete) {
kContentRestrictionPaste | kContentRestrictionPaste |
kContentRestrictionCut | kContentRestrictionCut |
kContentRestrictionCopy)); kContentRestrictionCopy));
EXPECT_CALL(pdf_host_, OnDocumentLoadComplete);
plugin_->DocumentLoadComplete(); plugin_->DocumentLoadComplete();
EXPECT_EQ(PdfViewWebPlugin::DocumentLoadState::kComplete, EXPECT_EQ(PdfViewWebPlugin::DocumentLoadState::kComplete,
@@ -751,6 +754,7 @@ TEST_F(PdfViewWebPluginTest, DocumentLoadFailed) {
EXPECT_CALL(*client_ptr_, RecordComputedAction("PDF.LoadFailure")); EXPECT_CALL(*client_ptr_, RecordComputedAction("PDF.LoadFailure"));
EXPECT_CALL(*client_ptr_, DidStopLoading).Times(0); EXPECT_CALL(*client_ptr_, DidStopLoading).Times(0);
EXPECT_CALL(pdf_host_, OnDocumentLoadComplete).Times(0);
plugin_->DocumentLoadFailed(); plugin_->DocumentLoadFailed();
EXPECT_EQ(PdfViewWebPlugin::DocumentLoadState::kFailed, EXPECT_EQ(PdfViewWebPlugin::DocumentLoadState::kFailed,