0

Save extracted text for searchified PDFs.

If a PDF is searchified, the extracted text is written to the file when
the PDF is saved.
This is done behind `chrome_pdf::features::kPdfSearchifySave` flag which
is disabled by default.
In a next CL (http://crrev.com/c/6110470), the user will be given the
option to save the original PDF or the PDF with extracted text.

Bug: 382610226
Change-Id: I73e1028ce60211e8bed165ffaff7f6630c9103c4
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6049156
Reviewed-by: Andy Phan <andyphan@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: Alan Screen <awscreen@chromium.org>
Commit-Queue: Ramin Halavati <rhalavati@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1407724}
This commit is contained in:
Ramin Halavati
2025-01-16 21:33:54 -08:00
committed by Chromium LUCI CQ
parent 3a5cb33935
commit 82ee6dc25c
6 changed files with 43 additions and 2 deletions
chrome/browser/resources/pdf
pdf
tools/metrics/histograms/metadata/pdf

@ -88,6 +88,7 @@ export enum SaveRequestType {
ANNOTATION, ANNOTATION,
ORIGINAL, ORIGINAL,
EDITED, EDITED,
SEARCHIFIED, // Saves the PDF with extracted text.
} }
export interface Point { export interface Point {

@ -261,7 +261,11 @@ export enum UserAction {
OPEN_INK2_BOTTOM_TOOLBAR_FIRST = 97, OPEN_INK2_BOTTOM_TOOLBAR_FIRST = 97,
OPEN_INK2_BOTTOM_TOOLBAR = 98, OPEN_INK2_BOTTOM_TOOLBAR = 98,
NUMBER_OF_ACTIONS = 99, // Recorded when the user triggers a save of the searchified document.
SAVE_SEARCHIFIED_FIRST = 99,
SAVE_SEARCHIFIED = 100,
NUMBER_OF_ACTIONS = 101,
} }
function createFirstMap(): Map<UserAction, UserAction> { function createFirstMap(): Map<UserAction, UserAction> {

@ -1155,6 +1155,8 @@ export class PdfViewerElement extends PdfViewerBaseElement {
saveMode = SaveRequestType.ANNOTATION; saveMode = SaveRequestType.ANNOTATION;
} else if (this.hasEdits_) { } else if (this.hasEdits_) {
saveMode = SaveRequestType.EDITED; saveMode = SaveRequestType.EDITED;
} else if (this.hasSearchifyText_) {
saveMode = SaveRequestType.SEARCHIFIED;
} else { } else {
saveMode = SaveRequestType.ORIGINAL; saveMode = SaveRequestType.ORIGINAL;
} }
@ -1292,6 +1294,11 @@ export class PdfViewerElement extends PdfViewerBaseElement {
private async save_(requestType: SaveRequestType) { private async save_(requestType: SaveRequestType) {
this.recordSaveMetrics_(requestType); this.recordSaveMetrics_(requestType);
// TODO(crbug.com/382610226): Update for `SaveRequestType.SEARCHIFIED` to
// allow users to select saving original PDF or text extracted one.
// To do so, the save type should be asked first, and then content would be
// fetched based on the selected type.
// If we have entered annotation mode we must require the local // If we have entered annotation mode we must require the local
// contents to ensure annotations are saved, unless the user specifically // contents to ensure annotations are saved, unless the user specifically
// requested the original document. Otherwise we would save the cached // requested the original document. Otherwise we would save the cached
@ -1436,6 +1443,12 @@ export class PdfViewerElement extends PdfViewerBaseElement {
case SaveRequestType.EDITED: case SaveRequestType.EDITED:
record(UserAction.SAVE_EDITED); record(UserAction.SAVE_EDITED);
break; break;
case SaveRequestType.SEARCHIFIED:
// TODO(crbug.com/382610226): Update metric after the code is updated to
// give users the option to save searchified or original PDF, and add
// test.
record(UserAction.SAVE_SEARCHIFIED);
break;
} }
} }

@ -1499,6 +1499,9 @@ void PdfViewWebPlugin::OnHasSearchifyText() {
message.Set("type", "setHasSearchifyText"); message.Set("type", "setHasSearchifyText");
client_->PostMessage(std::move(message)); client_->PostMessage(std::move(message));
pdf_accessibility_data_handler_->OnHasSearchifyText(); pdf_accessibility_data_handler_->OnHasSearchifyText();
if (chrome_pdf::features::IsPdfSearchifySaveEnabled()) {
SetPluginCanSave(true);
}
} }
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE) #endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
@ -1778,6 +1781,18 @@ void PdfViewWebPlugin::HandleSaveMessage(const base::Value::Dict& message) {
case SaveRequestType::kEdited: case SaveRequestType::kEdited:
SaveToBuffer(request_type, token); SaveToBuffer(request_type, token);
return; return;
case SaveRequestType::kSearchified:
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
CHECK(chrome_pdf::features::IsPdfSearchifySaveEnabled());
// TODO(crbug.com/382610226): If engine has searchified text, ensure all
// pages are searchified and then save.
SaveToBuffer(request_type, token);
return;
#else
// PDF Searchify is not expected to be triggered when ScreenAI service is
// not enabled.
NOTREACHED();
#endif
} }
NOTREACHED(); NOTREACHED();
} }
@ -1947,7 +1962,8 @@ void PdfViewWebPlugin::HandleViewportMessage(const base::Value::Dict& message) {
void PdfViewWebPlugin::SaveToBuffer(SaveRequestType request_type, void PdfViewWebPlugin::SaveToBuffer(SaveRequestType request_type,
const std::string& token) { const std::string& token) {
CHECK(request_type == SaveRequestType::kAnnotation || CHECK(request_type == SaveRequestType::kAnnotation ||
request_type == SaveRequestType::kEdited); request_type == SaveRequestType::kEdited ||
request_type == SaveRequestType::kSearchified);
engine_->KillFormFocus(); engine_->KillFormFocus();
@ -1966,6 +1982,10 @@ void PdfViewWebPlugin::SaveToBuffer(SaveRequestType request_type,
use_save_data |= !!ink_module_; use_save_data |= !!ink_module_;
#endif // BUILDFLAG(ENABLE_PDF_INK2) #endif // BUILDFLAG(ENABLE_PDF_INK2)
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
use_save_data |= (request_type == SaveRequestType::kSearchified);
#endif
if (use_save_data) { if (use_save_data) {
base::Value::BlobStorage data = engine_->GetSaveData(); base::Value::BlobStorage data = engine_->GetSaveData();
if (IsSaveDataSizeValid(data.size())) { if (IsSaveDataSizeValid(data.size())) {

@ -113,6 +113,7 @@ class PdfViewWebPlugin final : public PDFiumEngineClient,
kAnnotation = 0, kAnnotation = 0,
kOriginal = 1, kOriginal = 1,
kEdited = 2, kEdited = 2,
kSearchified = 3,
}; };
// Provides services from the plugin's container. // Provides services from the plugin's container.

@ -126,6 +126,8 @@ chromium-metrics-reviews@google.com.
<int value="96" label="OpenInk2SidePanel"/> <int value="96" label="OpenInk2SidePanel"/>
<int value="97" label="OpenInk2BottomToolbarFirst"/> <int value="97" label="OpenInk2BottomToolbarFirst"/>
<int value="98" label="OpenInk2BottomToolbar"/> <int value="98" label="OpenInk2BottomToolbar"/>
<int value="99" label="SaveSearchifiedFirst"/>
<int value="100" label="SaveSearchified"/>
</enum> </enum>
<enum name="ChromePDFViewerLoadStatus"> <enum name="ChromePDFViewerLoadStatus">