0

Save extracted text for searchified PDFs.

If a PDF is searchified, the extracted text is written to the file when
the PDF is saved.
This is done behind `chrome_pdf::features::kPdfSearchifySave` flag which
is disabled by default.
In a next CL (http://crrev.com/c/6110470), the user will be given the
option to save the original PDF or the PDF with extracted text.

Bug: 382610226
Change-Id: I73e1028ce60211e8bed165ffaff7f6630c9103c4
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6049156
Reviewed-by: Andy Phan <andyphan@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: Alan Screen <awscreen@chromium.org>
Commit-Queue: Ramin Halavati <rhalavati@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1407724}
This commit is contained in:
Ramin Halavati
2025-01-16 21:33:54 -08:00
committed by Chromium LUCI CQ
parent 3a5cb33935
commit 82ee6dc25c
6 changed files with 43 additions and 2 deletions
chrome/browser/resources/pdf
pdf
tools/metrics/histograms/metadata/pdf

@ -88,6 +88,7 @@ export enum SaveRequestType {
ANNOTATION,
ORIGINAL,
EDITED,
SEARCHIFIED, // Saves the PDF with extracted text.
}
export interface Point {

@ -261,7 +261,11 @@ export enum UserAction {
OPEN_INK2_BOTTOM_TOOLBAR_FIRST = 97,
OPEN_INK2_BOTTOM_TOOLBAR = 98,
NUMBER_OF_ACTIONS = 99,
// Recorded when the user triggers a save of the searchified document.
SAVE_SEARCHIFIED_FIRST = 99,
SAVE_SEARCHIFIED = 100,
NUMBER_OF_ACTIONS = 101,
}
function createFirstMap(): Map<UserAction, UserAction> {

@ -1155,6 +1155,8 @@ export class PdfViewerElement extends PdfViewerBaseElement {
saveMode = SaveRequestType.ANNOTATION;
} else if (this.hasEdits_) {
saveMode = SaveRequestType.EDITED;
} else if (this.hasSearchifyText_) {
saveMode = SaveRequestType.SEARCHIFIED;
} else {
saveMode = SaveRequestType.ORIGINAL;
}
@ -1292,6 +1294,11 @@ export class PdfViewerElement extends PdfViewerBaseElement {
private async save_(requestType: SaveRequestType) {
this.recordSaveMetrics_(requestType);
// TODO(crbug.com/382610226): Update for `SaveRequestType.SEARCHIFIED` to
// allow users to select saving original PDF or text extracted one.
// To do so, the save type should be asked first, and then content would be
// fetched based on the selected type.
// If we have entered annotation mode we must require the local
// contents to ensure annotations are saved, unless the user specifically
// requested the original document. Otherwise we would save the cached
@ -1436,6 +1443,12 @@ export class PdfViewerElement extends PdfViewerBaseElement {
case SaveRequestType.EDITED:
record(UserAction.SAVE_EDITED);
break;
case SaveRequestType.SEARCHIFIED:
// TODO(crbug.com/382610226): Update metric after the code is updated to
// give users the option to save searchified or original PDF, and add
// test.
record(UserAction.SAVE_SEARCHIFIED);
break;
}
}

@ -1499,6 +1499,9 @@ void PdfViewWebPlugin::OnHasSearchifyText() {
message.Set("type", "setHasSearchifyText");
client_->PostMessage(std::move(message));
pdf_accessibility_data_handler_->OnHasSearchifyText();
if (chrome_pdf::features::IsPdfSearchifySaveEnabled()) {
SetPluginCanSave(true);
}
}
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
@ -1778,6 +1781,18 @@ void PdfViewWebPlugin::HandleSaveMessage(const base::Value::Dict& message) {
case SaveRequestType::kEdited:
SaveToBuffer(request_type, token);
return;
case SaveRequestType::kSearchified:
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
CHECK(chrome_pdf::features::IsPdfSearchifySaveEnabled());
// TODO(crbug.com/382610226): If engine has searchified text, ensure all
// pages are searchified and then save.
SaveToBuffer(request_type, token);
return;
#else
// PDF Searchify is not expected to be triggered when ScreenAI service is
// not enabled.
NOTREACHED();
#endif
}
NOTREACHED();
}
@ -1947,7 +1962,8 @@ void PdfViewWebPlugin::HandleViewportMessage(const base::Value::Dict& message) {
void PdfViewWebPlugin::SaveToBuffer(SaveRequestType request_type,
const std::string& token) {
CHECK(request_type == SaveRequestType::kAnnotation ||
request_type == SaveRequestType::kEdited);
request_type == SaveRequestType::kEdited ||
request_type == SaveRequestType::kSearchified);
engine_->KillFormFocus();
@ -1966,6 +1982,10 @@ void PdfViewWebPlugin::SaveToBuffer(SaveRequestType request_type,
use_save_data |= !!ink_module_;
#endif // BUILDFLAG(ENABLE_PDF_INK2)
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
use_save_data |= (request_type == SaveRequestType::kSearchified);
#endif
if (use_save_data) {
base::Value::BlobStorage data = engine_->GetSaveData();
if (IsSaveDataSizeValid(data.size())) {

@ -113,6 +113,7 @@ class PdfViewWebPlugin final : public PDFiumEngineClient,
kAnnotation = 0,
kOriginal = 1,
kEdited = 2,
kSearchified = 3,
};
// Provides services from the plugin's container.

@ -126,6 +126,8 @@ chromium-metrics-reviews@google.com.
<int value="96" label="OpenInk2SidePanel"/>
<int value="97" label="OpenInk2BottomToolbarFirst"/>
<int value="98" label="OpenInk2BottomToolbar"/>
<int value="99" label="SaveSearchifiedFirst"/>
<int value="100" label="SaveSearchified"/>
</enum>
<enum name="ChromePDFViewerLoadStatus">