Save extracted text for searchified PDFs.
If a PDF is searchified, the extracted text is written to the file when the PDF is saved. This is done behind `chrome_pdf::features::kPdfSearchifySave` flag which is disabled by default. In a next CL (http://crrev.com/c/6110470), the user will be given the option to save the original PDF or the PDF with extracted text. Bug: 382610226 Change-Id: I73e1028ce60211e8bed165ffaff7f6630c9103c4 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6049156 Reviewed-by: Andy Phan <andyphan@chromium.org> Reviewed-by: Lei Zhang <thestig@chromium.org> Reviewed-by: Alan Screen <awscreen@chromium.org> Commit-Queue: Ramin Halavati <rhalavati@chromium.org> Cr-Commit-Position: refs/heads/main@{#1407724}
This commit is contained in:

committed by
Chromium LUCI CQ

parent
3a5cb33935
commit
82ee6dc25c
chrome/browser/resources/pdf
pdf
tools/metrics/histograms/metadata/pdf
@ -88,6 +88,7 @@ export enum SaveRequestType {
|
||||
ANNOTATION,
|
||||
ORIGINAL,
|
||||
EDITED,
|
||||
SEARCHIFIED, // Saves the PDF with extracted text.
|
||||
}
|
||||
|
||||
export interface Point {
|
||||
|
@ -261,7 +261,11 @@ export enum UserAction {
|
||||
OPEN_INK2_BOTTOM_TOOLBAR_FIRST = 97,
|
||||
OPEN_INK2_BOTTOM_TOOLBAR = 98,
|
||||
|
||||
NUMBER_OF_ACTIONS = 99,
|
||||
// Recorded when the user triggers a save of the searchified document.
|
||||
SAVE_SEARCHIFIED_FIRST = 99,
|
||||
SAVE_SEARCHIFIED = 100,
|
||||
|
||||
NUMBER_OF_ACTIONS = 101,
|
||||
}
|
||||
|
||||
function createFirstMap(): Map<UserAction, UserAction> {
|
||||
|
@ -1155,6 +1155,8 @@ export class PdfViewerElement extends PdfViewerBaseElement {
|
||||
saveMode = SaveRequestType.ANNOTATION;
|
||||
} else if (this.hasEdits_) {
|
||||
saveMode = SaveRequestType.EDITED;
|
||||
} else if (this.hasSearchifyText_) {
|
||||
saveMode = SaveRequestType.SEARCHIFIED;
|
||||
} else {
|
||||
saveMode = SaveRequestType.ORIGINAL;
|
||||
}
|
||||
@ -1292,6 +1294,11 @@ export class PdfViewerElement extends PdfViewerBaseElement {
|
||||
private async save_(requestType: SaveRequestType) {
|
||||
this.recordSaveMetrics_(requestType);
|
||||
|
||||
// TODO(crbug.com/382610226): Update for `SaveRequestType.SEARCHIFIED` to
|
||||
// allow users to select saving original PDF or text extracted one.
|
||||
// To do so, the save type should be asked first, and then content would be
|
||||
// fetched based on the selected type.
|
||||
|
||||
// If we have entered annotation mode we must require the local
|
||||
// contents to ensure annotations are saved, unless the user specifically
|
||||
// requested the original document. Otherwise we would save the cached
|
||||
@ -1436,6 +1443,12 @@ export class PdfViewerElement extends PdfViewerBaseElement {
|
||||
case SaveRequestType.EDITED:
|
||||
record(UserAction.SAVE_EDITED);
|
||||
break;
|
||||
case SaveRequestType.SEARCHIFIED:
|
||||
// TODO(crbug.com/382610226): Update metric after the code is updated to
|
||||
// give users the option to save searchified or original PDF, and add
|
||||
// test.
|
||||
record(UserAction.SAVE_SEARCHIFIED);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1499,6 +1499,9 @@ void PdfViewWebPlugin::OnHasSearchifyText() {
|
||||
message.Set("type", "setHasSearchifyText");
|
||||
client_->PostMessage(std::move(message));
|
||||
pdf_accessibility_data_handler_->OnHasSearchifyText();
|
||||
if (chrome_pdf::features::IsPdfSearchifySaveEnabled()) {
|
||||
SetPluginCanSave(true);
|
||||
}
|
||||
}
|
||||
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
|
||||
@ -1778,6 +1781,18 @@ void PdfViewWebPlugin::HandleSaveMessage(const base::Value::Dict& message) {
|
||||
case SaveRequestType::kEdited:
|
||||
SaveToBuffer(request_type, token);
|
||||
return;
|
||||
case SaveRequestType::kSearchified:
|
||||
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
CHECK(chrome_pdf::features::IsPdfSearchifySaveEnabled());
|
||||
// TODO(crbug.com/382610226): If engine has searchified text, ensure all
|
||||
// pages are searchified and then save.
|
||||
SaveToBuffer(request_type, token);
|
||||
return;
|
||||
#else
|
||||
// PDF Searchify is not expected to be triggered when ScreenAI service is
|
||||
// not enabled.
|
||||
NOTREACHED();
|
||||
#endif
|
||||
}
|
||||
NOTREACHED();
|
||||
}
|
||||
@ -1947,7 +1962,8 @@ void PdfViewWebPlugin::HandleViewportMessage(const base::Value::Dict& message) {
|
||||
void PdfViewWebPlugin::SaveToBuffer(SaveRequestType request_type,
|
||||
const std::string& token) {
|
||||
CHECK(request_type == SaveRequestType::kAnnotation ||
|
||||
request_type == SaveRequestType::kEdited);
|
||||
request_type == SaveRequestType::kEdited ||
|
||||
request_type == SaveRequestType::kSearchified);
|
||||
|
||||
engine_->KillFormFocus();
|
||||
|
||||
@ -1966,6 +1982,10 @@ void PdfViewWebPlugin::SaveToBuffer(SaveRequestType request_type,
|
||||
use_save_data |= !!ink_module_;
|
||||
#endif // BUILDFLAG(ENABLE_PDF_INK2)
|
||||
|
||||
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
use_save_data |= (request_type == SaveRequestType::kSearchified);
|
||||
#endif
|
||||
|
||||
if (use_save_data) {
|
||||
base::Value::BlobStorage data = engine_->GetSaveData();
|
||||
if (IsSaveDataSizeValid(data.size())) {
|
||||
|
@ -113,6 +113,7 @@ class PdfViewWebPlugin final : public PDFiumEngineClient,
|
||||
kAnnotation = 0,
|
||||
kOriginal = 1,
|
||||
kEdited = 2,
|
||||
kSearchified = 3,
|
||||
};
|
||||
|
||||
// Provides services from the plugin's container.
|
||||
|
@ -126,6 +126,8 @@ chromium-metrics-reviews@google.com.
|
||||
<int value="96" label="OpenInk2SidePanel"/>
|
||||
<int value="97" label="OpenInk2BottomToolbarFirst"/>
|
||||
<int value="98" label="OpenInk2BottomToolbar"/>
|
||||
<int value="99" label="SaveSearchifiedFirst"/>
|
||||
<int value="100" label="SaveSearchified"/>
|
||||
</enum>
|
||||
|
||||
<enum name="ChromePDFViewerLoadStatus">
|
||||
|
Reference in New Issue
Block a user