0

Add pdf document load observer

Adds observer for pdf document load complete event.
Also changes the consumers (contextual cueing, page_content_annotations) to listen to the event before querying page count.

Change-Id: Iecb4571aeb61c52dde6fe1bc770e5bdb372a1525
Bug: 395086836
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6250270
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Raj T <rajendrant@chromium.org>
Reviewed-by: Will Harris <wfh@chromium.org>
Reviewed-by: Duncan Mercer <mercerd@google.com>
Cr-Commit-Position: refs/heads/main@{#1420001}
This commit is contained in:
rajendrant
2025-02-13 10:46:28 -08:00
committed by Chromium LUCI CQ
parent 1d0a99e1a2
commit 456b83c54e
10 changed files with 160 additions and 10 deletions

@ -116,10 +116,31 @@ ContextualCueingPageData::DidMatchCueingConditions(
#if BUILDFLAG(ENABLE_PDF)
void ContextualCueingPageData::RequestPdfPageCount() {
CHECK(page().GetContentsMimeType() == pdf::kPDFMimeType);
pdf::PDFDocumentHelper* pdf_helper =
pdf::PDFDocumentHelper::MaybeGetForWebContents(
content::WebContents::FromRenderFrameHost(&page().GetMainDocument()));
CHECK_EQ(pdf::kPDFMimeType, page().GetContentsMimeType());
auto* pdf_helper = pdf::PDFDocumentHelper::MaybeGetForWebContents(
content::WebContents::FromRenderFrameHost(&page().GetMainDocument()));
if (!pdf_helper) {
return;
}
if (!pdf_helper->IsDocumentLoadComplete()) {
// Wait for the PDF to load.
pdf_load_obseration_.Observe(pdf_helper);
return;
}
// Fetch zero PDF bytes to just receive the total page count.
pdf_helper->GetPdfBytes(
/*size_limit=*/0,
base::BindOnce(&ContextualCueingPageData::OnPdfPageCountReceived,
weak_factory_.GetWeakPtr()));
}
void ContextualCueingPageData::OnDocumentLoadComplete() {
CHECK_EQ(pdf::kPDFMimeType, page().GetContentsMimeType());
pdf_load_obseration_.Reset();
auto* pdf_helper = pdf::PDFDocumentHelper::MaybeGetForWebContents(
content::WebContents::FromRenderFrameHost(&page().GetMainDocument()));
if (pdf_helper) {
// Fetch zero PDF bytes to just receive the total page count.
pdf_helper->GetPdfBytes(

@ -5,17 +5,25 @@
#ifndef CHROME_BROWSER_CONTEXTUAL_CUEING_CONTEXTUAL_CUEING_PAGE_DATA_H_
#define CHROME_BROWSER_CONTEXTUAL_CUEING_CONTEXTUAL_CUEING_PAGE_DATA_H_
#include "base/scoped_observation.h"
#include "components/optimization_guide/proto/contextual_cueing_metadata.pb.h"
#include "components/pdf/common/constants.h"
#include "content/public/browser/page_user_data.h"
#include "pdf/buildflags.h"
#include "pdf/mojom/pdf.mojom.h"
#if BUILDFLAG(ENABLE_PDF)
#include "components/pdf/browser/pdf_document_helper.h"
#endif // BUILDFLAG(ENABLE_PDF)
namespace contextual_cueing {
// Decider for contextual cueing that is scoped to `Page`.
class ContextualCueingPageData
: public content::PageUserData<ContextualCueingPageData> {
class ContextualCueingPageData :
#if BUILDFLAG(ENABLE_PDF)
public pdf::PDFDocumentHelper::Observer,
#endif // BUILDFLAG(ENABLE_PDF)
public content::PageUserData<ContextualCueingPageData> {
public:
using CueingDecisionCallback = base::OnceCallback<void(const std::string&)>;
@ -48,11 +56,16 @@ class ContextualCueingPageData
const optimization_guide::proto::GlicCueingConfiguration& config);
#if BUILDFLAG(ENABLE_PDF)
// Requests for page count if this is a PDF page.
void RequestPdfPageCount();
// Invoked when page count is received.
void OnPdfPageCountReceived(pdf::mojom::PdfListener::GetPdfBytesStatus status,
const std::vector<uint8_t>& bytes,
uint32_t page_count);
// pdf::PDFDocumentHelper::Observer:
void OnDocumentLoadComplete() override;
#endif // BUILDFLAG(ENABLE_PDF)
const optimization_guide::proto::GlicContextualCueingMetadata metadata_;
@ -64,6 +77,12 @@ class ContextualCueingPageData
CueingDecisionCallback cueing_decision_callback_;
#if BUILDFLAG(ENABLE_PDF)
base::ScopedObservation<pdf::PDFDocumentHelper,
pdf::PDFDocumentHelper::Observer>
pdf_load_obseration_{this};
#endif // BUILDFLAG(ENABLE_PDF)
base::WeakPtrFactory<ContextualCueingPageData> weak_factory_{this};
PAGE_USER_DATA_KEY_DECL();

@ -228,8 +228,30 @@ void AnnotatedPageContentRequest::OnInnerTextReceived(
#if BUILDFLAG(ENABLE_PDF)
void AnnotatedPageContentRequest::RequestPdfPageCount() {
CHECK(web_contents_->GetContentsMimeType() == pdf::kPDFMimeType);
pdf::PDFDocumentHelper* pdf_helper =
CHECK_EQ(pdf::kPDFMimeType, web_contents_->GetContentsMimeType());
auto* pdf_helper =
pdf::PDFDocumentHelper::MaybeGetForWebContents(web_contents_);
if (!pdf_helper) {
return;
}
if (!pdf_helper->IsDocumentLoadComplete()) {
// Wait for the PDF to load.
pdf_load_obseration_.Observe(pdf_helper);
return;
}
// Fetch zero PDF bytes to just receive the total page count.
pdf_helper->GetPdfBytes(
/*size_limit=*/0,
base::BindOnce(
&RecordPdfPageCountMetrics,
web_contents_->GetPrimaryMainFrame()->GetPageUkmSourceId()));
}
void AnnotatedPageContentRequest::OnDocumentLoadComplete() {
CHECK_EQ(pdf::kPDFMimeType, web_contents_->GetContentsMimeType());
pdf_load_obseration_.Reset();
auto* pdf_helper =
pdf::PDFDocumentHelper::MaybeGetForWebContents(web_contents_);
if (pdf_helper) {
// Fetch zero PDF bytes to just receive the total page count.

@ -1,20 +1,30 @@
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_BROWSER_PAGE_CONTENT_ANNOTATIONS_PAGE_CONTENT_ANNOTATIONS_ANNOTATE_PAGE_CONTENT_REQUEST_H_
#define CHROME_BROWSER_PAGE_CONTENT_ANNOTATIONS_PAGE_CONTENT_ANNOTATIONS_ANNOTATE_PAGE_CONTENT_REQUEST_H_
#include "base/scoped_observation.h"
#include "chrome/browser/content_extraction/inner_text.h"
#include "components/optimization_guide/proto/features/common_quality_data.pb.h"
#include "content/public/browser/web_contents.h"
#include "pdf/buildflags.h"
#include "third_party/blink/public/mojom/content_extraction/ai_page_content.mojom.h"
#if BUILDFLAG(ENABLE_PDF)
#include "components/pdf/browser/pdf_document_helper.h"
#endif // BUILDFLAG(ENABLE_PDF)
namespace page_content_annotations {
// Class for deciding when a page is ready for getting page content, and
// extracts page content.
class AnnotatedPageContentRequest {
class AnnotatedPageContentRequest
#if BUILDFLAG(ENABLE_PDF)
: public pdf::PDFDocumentHelper::Observer
#endif // BUILDFLAG(ENABLE_PDF)
{
public:
static std::unique_ptr<AnnotatedPageContentRequest> MaybeCreate(
content::WebContents* web_contents);
@ -25,7 +35,11 @@ class AnnotatedPageContentRequest {
AnnotatedPageContentRequest(const AnnotatedPageContentRequest&) = delete;
AnnotatedPageContentRequest& operator=(const AnnotatedPageContentRequest&) =
delete;
#if BUILDFLAG(ENABLE_PDF)
~AnnotatedPageContentRequest() override;
#else
~AnnotatedPageContentRequest();
#endif // BUILDFLAG(ENABLE_PDF)
void PrimaryPageChanged();
@ -53,6 +67,9 @@ class AnnotatedPageContentRequest {
#if BUILDFLAG(ENABLE_PDF)
void RequestPdfPageCount();
// pdf::PDFDocumentHelper::Observer:
void OnDocumentLoadComplete() override;
#endif // BUILDFLAG(ENABLE_PDF)
const raw_ptr<content::WebContents> web_contents_;
@ -66,6 +83,12 @@ class AnnotatedPageContentRequest {
bool waiting_for_load_ = false;
bool waiting_for_fcp_ = false;
#if BUILDFLAG(ENABLE_PDF)
base::ScopedObservation<pdf::PDFDocumentHelper,
pdf::PDFDocumentHelper::Observer>
pdf_load_obseration_{this};
#endif // BUILDFLAG(ENABLE_PDF)
base::WeakPtrFactory<AnnotatedPageContentRequest> weak_factory_{this};
};

@ -262,6 +262,14 @@ void PDFDocumentHelper::GetMostVisiblePageIndex(
remote_pdf_client_->GetMostVisiblePageIndex(std::move(callback));
}
void PDFDocumentHelper::AddObserver(Observer* observer) {
observers_.AddObserver(observer);
}
void PDFDocumentHelper::RemoveObserver(Observer* observer) {
observers_.RemoveObserver(observer);
}
void PDFDocumentHelper::OnSelectionEvent(ui::SelectionEventType event) {
// Should be handled by `TouchSelectionControllerClientAura`.
NOTREACHED();
@ -381,6 +389,14 @@ void PDFDocumentHelper::InitTouchSelectionClientManager() {
touch_selection_controller_client_manager_->AddObserver(this);
}
void PDFDocumentHelper::OnDocumentLoadComplete() {
// Only notify the consumers on first load complete.
if (!is_document_load_complete_) {
is_document_load_complete_ = true;
observers_.Notify(&Observer::OnDocumentLoadComplete);
}
}
void PDFDocumentHelper::SaveUrlAs(const GURL& url,
network::mojom::ReferrerPolicy policy) {
client_->OnSaveURL(&GetWebContents());

@ -8,6 +8,7 @@
#include <memory>
#include "base/memory/raw_ptr.h"
#include "base/observer_list.h"
#include "content/public/browser/document_user_data.h"
#include "content/public/browser/render_frame_host_receiver_set.h"
#include "content/public/browser/render_widget_host_observer.h"
@ -38,6 +39,15 @@ class PDFDocumentHelper
public ui::TouchSelectionMenuClient,
public content::TouchSelectionControllerClientManager::Observer {
public:
class Observer : public base::CheckedObserver {
public:
// Invoked when the document load is completed successfully. Will not be
// invoked when the PDF is already loaded. Will not be invoked when the load
// fails. This is useful to wait for document metadata to be loaded, before
// calls to `GetPdfBytes`, `GetPageText` can be made.
virtual void OnDocumentLoadComplete() {}
};
PDFDocumentHelper(const PDFDocumentHelper&) = delete;
PDFDocumentHelper& operator=(const PDFDocumentHelper&) = delete;
@ -81,6 +91,7 @@ class PDFDocumentHelper
// pdf::mojom::PdfHost:
void SetListener(mojo::PendingRemote<mojom::PdfListener> listener) override;
void OnDocumentLoadComplete() override;
void SaveUrlAs(const GURL& url,
network::mojom::ReferrerPolicy policy) override;
void UpdateContentRestrictions(int32_t content_restrictions) override;
@ -93,6 +104,10 @@ class PDFDocumentHelper
void OnSearchifyStarted() override;
#endif
// Returns whether document is loaded, at which point, the other calls to
// document metadata such as `GetPdfBytes`, `GetPageText` can be made.
bool IsDocumentLoadComplete() const { return is_document_load_complete_; }
void GetPdfBytes(uint32_t size_limit,
pdf::mojom::PdfListener::GetPdfBytesCallback callback);
@ -101,6 +116,9 @@ class PDFDocumentHelper
void GetMostVisiblePageIndex(
pdf::mojom::PdfListener::GetMostVisiblePageIndexCallback callback);
void AddObserver(Observer* observer);
void RemoveObserver(Observer* observer);
private:
friend class content::DocumentUserData<PDFDocumentHelper>;
@ -130,8 +148,12 @@ class PDFDocumentHelper
int32_t selection_right_height_ = 0;
bool has_selection_ = false;
bool is_document_load_complete_ = false;
mojo::Remote<mojom::PdfListener> remote_pdf_client_;
base::ObserverList<Observer> observers_;
DOCUMENT_USER_DATA_KEY_DECL();
};

@ -55,6 +55,11 @@ class FakePdfListener : public pdf::mojom::PdfListener {
(override));
};
class FakePdfLoadObserver : public PDFDocumentHelper::Observer {
public:
MOCK_METHOD(void, OnDocumentLoadComplete, (), (override));
};
class TestPDFDocumentHelperClient : public PDFDocumentHelperClient {
public:
TestPDFDocumentHelperClient() = default;
@ -266,6 +271,19 @@ IN_PROC_BROWSER_TEST_P(PDFDocumentHelperTest, DefaultImplementation) {
EXPECT_TRUE(pdf_document_helper()->GetSelectedText().empty());
}
IN_PROC_BROWSER_TEST_P(PDFDocumentHelperTest, DocumentLoadComplete) {
NiceMock<FakePdfLoadObserver> listener;
EXPECT_FALSE(pdf_document_helper()->IsDocumentLoadComplete());
pdf_document_helper()->AddObserver(&listener);
EXPECT_CALL(listener, OnDocumentLoadComplete);
pdf_document_helper()->OnDocumentLoadComplete();
EXPECT_TRUE(pdf_document_helper()->IsDocumentLoadComplete());
// Subsequent load complete should not trigger listener calls.
EXPECT_CALL(listener, OnDocumentLoadComplete).Times(0);
pdf_document_helper()->OnDocumentLoadComplete();
}
// TODO(crbug.com/40268279): Stop testing both modes after OOPIF PDF viewer
// launches.
INSTANTIATE_FEATURE_OVERRIDE_TEST_SUITE(PDFDocumentHelperTest);

@ -48,6 +48,10 @@ interface PdfListener {
interface PdfHost {
SetListener(pending_remote<PdfListener> client);
// Invoked when document load is completed successfully. Will not be invoked
// if the load fails.
OnDocumentLoadComplete();
// Updates the content restrictions, i.e. to disable print/copy.
UpdateContentRestrictions(int32 restrictions);

@ -1309,6 +1309,7 @@ void PdfViewWebPlugin::DocumentLoadComplete() {
return;
DidStopLoading();
pdf_host_->OnDocumentLoadComplete();
pdf_host_->UpdateContentRestrictions(GetContentRestrictions());
}

@ -161,8 +161,9 @@ MATCHER(SearchStringResultEq, "") {
}
MATCHER_P(IsExpectedImeKeyEvent, expected_text, "") {
if (arg.GetType() != blink::WebInputEvent::Type::kChar)
if (arg.GetType() != blink::WebInputEvent::Type::kChar) {
return false;
}
const auto& event = static_cast<const blink::WebKeyboardEvent&>(arg);
return event.GetModifiers() == blink::WebInputEvent::kNoModifiers &&
@ -365,6 +366,7 @@ class FakePdfHost : public pdf::mojom::PdfHost {
SetListener,
(mojo::PendingRemote<pdf::mojom::PdfListener>),
(override));
MOCK_METHOD(void, OnDocumentLoadComplete, (), (override));
MOCK_METHOD(void, UpdateContentRestrictions, (int32_t), (override));
MOCK_METHOD(void,
SaveUrlAs,
@ -739,6 +741,7 @@ TEST_F(PdfViewWebPluginFullFrameTest, DocumentLoadComplete) {
kContentRestrictionPaste |
kContentRestrictionCut |
kContentRestrictionCopy));
EXPECT_CALL(pdf_host_, OnDocumentLoadComplete);
plugin_->DocumentLoadComplete();
EXPECT_EQ(PdfViewWebPlugin::DocumentLoadState::kComplete,
@ -751,6 +754,7 @@ TEST_F(PdfViewWebPluginTest, DocumentLoadFailed) {
EXPECT_CALL(*client_ptr_, RecordComputedAction("PDF.LoadFailure"));
EXPECT_CALL(*client_ptr_, DidStopLoading).Times(0);
EXPECT_CALL(pdf_host_, OnDocumentLoadComplete).Times(0);
plugin_->DocumentLoadFailed();
EXPECT_EQ(PdfViewWebPlugin::DocumentLoadState::kFailed,