0

PDF: Land initial version of PdfSearchify

PdfSearchify takes a PDF and outputs a searchable PDF by performing OCR
and adding an invisible text layer on each image in the PDF. Each
execution should take place in an isolated process, and each process
should be terminated upon completion of the work.

Bug: 41487613
Test: Create PDFs with CL:5307294
Change-Id: I69d5d4db0b9405e085f6b260567743c0d59f519d
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5246898
Reviewed-by: Ramin Halavati <rhalavati@chromium.org>
Reviewed-by: Sean Li <seannli@google.com>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Chu-Hsuan Yang <chuhsuan@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1289713}
This commit is contained in:
Chu-Hsuan Yang
2024-04-19 01:47:45 +00:00
committed by Chromium LUCI CQ
parent cda93f408c
commit ec94716da1
16 changed files with 661 additions and 53 deletions

@ -6,6 +6,7 @@ import("//build/buildflag_header.gni")
import("//build/config/features.gni")
import("//pdf/features.gni")
import("//ppapi/buildflags/buildflags.gni")
import("//services/screen_ai/buildflags/features.gni")
import("//testing/libfuzzer/fuzzer_test.gni")
import("//testing/test.gni")
import("//third_party/pdfium/pdfium.gni")
@ -47,6 +48,7 @@ if (enable_pdf) {
":internal",
"//base",
"//build:chromeos_buildflags",
"//services/screen_ai/buildflags",
"//ui/gfx/geometry",
]
}
@ -115,6 +117,8 @@ if (enable_pdf) {
"pdfium/pdfium_form_filler.h",
"pdfium/pdfium_mem_buffer_file_write.cc",
"pdfium/pdfium_mem_buffer_file_write.h",
"pdfium/pdfium_ocr.cc",
"pdfium/pdfium_ocr.h",
"pdfium/pdfium_page.cc",
"pdfium/pdfium_page.h",
"pdfium/pdfium_permissions.cc",
@ -157,6 +161,7 @@ if (enable_pdf) {
"//gin",
"//pdf/loader",
"//printing",
"//services/screen_ai/buildflags",
"//third_party/blink/public:blink_headers",
"//third_party/blink/public/common:headers",
"//third_party/icu",
@ -184,6 +189,17 @@ if (enable_pdf) {
"flatten_pdf_result.h",
]
}
if (enable_screen_ai_service) {
sources += [
"pdfium/pdfium_searchify.cc",
"pdfium/pdfium_searchify.h",
"pdfium/pdfium_searchify_font.cc",
"pdfium/pdfium_searchify_font.h",
]
public_deps += [ "//services/screen_ai/public/mojom" ]
}
}
source_set("assert_enums") {

@ -6,6 +6,9 @@ include_rules = [
"+net",
"+printing",
"+services/network/public/mojom/referrer_policy.mojom-shared.h",
"+services/screen_ai/buildflags",
"+services/screen_ai/public/mojom/screen_ai_service.mojom.h",
"+services/screen_ai/public/mojom/screen_ai_service.mojom-forward.h",
"+third_party/blink/public",
"+third_party/skia/include/core",
"+ui/base",

@ -15,9 +15,16 @@
#include "pdf/pdf_engine.h"
#include "pdf/pdf_features.h"
#include "pdf/pdf_init.h"
#include "services/screen_ai/buildflags/buildflags.h"
#include "ui/gfx/geometry/rect.h"
#include "ui/gfx/geometry/size_f.h"
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
#include "base/functional/callback_forward.h"
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
#include "third_party/skia/include/core/SkBitmap.h"
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
namespace chrome_pdf {
namespace {
@ -173,4 +180,15 @@ std::vector<uint8_t> ConvertPdfDocumentToNupPdf(
input_buffer, pages_per_sheet, page_size, printable_area);
}
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
std::vector<uint8_t> Searchify(
base::span<const uint8_t> pdf_buffer,
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
const SkBitmap& bitmap)> perform_ocr_callback) {
ScopedSdkInitializer scoped_sdk_initializer(/*enable_v8=*/false);
PDFEngineExports* engine_exports = PDFEngineExports::Get();
return engine_exports->Searchify(pdf_buffer, std::move(perform_ocr_callback));
}
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
} // namespace chrome_pdf

@ -12,6 +12,7 @@
#include "base/values.h"
#include "build/build_config.h"
#include "pdf/document_metadata.h"
#include "services/screen_ai/buildflags/buildflags.h"
#if BUILDFLAG(IS_CHROMEOS)
#include "pdf/flatten_pdf_result.h"
@ -21,6 +22,12 @@
#include <windows.h>
#endif
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
#include "base/functional/callback_forward.h"
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
#include "third_party/skia/include/core/SkBitmap.h"
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
namespace gfx {
class Rect;
class Size;
@ -212,6 +219,25 @@ std::vector<uint8_t> ConvertPdfDocumentToNupPdf(
const gfx::Size& page_size,
const gfx::Rect& printable_area);
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
// Converts an inaccessible PDF to a searchable PDF.
// `pdf_buffer` is the buffer of the inaccessible PDF.
// `searchified_callback` is the callback that is called with the searchified
// PDF when the conversion is done.
// `perform_ocr_callback` is the callback that takes an image and outputs
// the OCR result. It may be called multiple times.
//
// The conversion is done by performing OCR on each image in the PDF and adding
// a layer of invisible text to the PDF to make text on images accessible. Each
// execution should take place in an isolated process, and each process should
// be terminated upon completion of the conversion. An empty vector is returned
// on failure.
std::vector<uint8_t> Searchify(
base::span<const uint8_t> pdf_buffer,
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
const SkBitmap& bitmap)> perform_ocr_callback);
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
} // namespace chrome_pdf
#endif // PDF_PDF_H_

@ -19,6 +19,7 @@
#include "build/build_config.h"
#include "pdf/document_layout.h"
#include "printing/mojom/print.mojom-forward.h"
#include "services/screen_ai/buildflags/buildflags.h"
#include "third_party/skia/include/core/SkColor.h"
#include "ui/base/cursor/mojom/cursor_type.mojom-forward.h"
#include "ui/base/window_open_disposition.h"
@ -35,6 +36,10 @@
#include "pdf/flatten_pdf_result.h"
#endif
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
#include "services/screen_ai/public/mojom/screen_ai_service.mojom-forward.h"
#endif
class SkBitmap;
namespace blink {
@ -582,6 +587,15 @@ class PDFEngineExports {
virtual std::optional<gfx::SizeF> GetPDFPageSizeByIndex(
base::span<const uint8_t> pdf_buffer,
int page_index) = 0;
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
// Converts an inaccessible PDF to a searchable PDF. See `Searchify` in pdf.h
// for more details.
virtual std::vector<uint8_t> Searchify(
base::span<const uint8_t> pdf_buffer,
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
const SkBitmap& bitmap)> perform_ocr_callback) = 0;
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
};
} // namespace chrome_pdf

@ -23,6 +23,7 @@
#include "pdf/pdfium/pdfium_unsupported_features.h"
#include "printing/nup_parameters.h"
#include "printing/units.h"
#include "services/screen_ai/buildflags/buildflags.h"
#include "third_party/pdfium/public/cpp/fpdf_scopers.h"
#include "third_party/pdfium/public/fpdf_attachment.h"
#include "third_party/pdfium/public/fpdf_catalog.h"
@ -35,6 +36,13 @@
#include "ui/gfx/geometry/size_f.h"
#include "ui/gfx/geometry/vector2d.h"
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
#include "base/functional/callback.h"
#include "pdf/pdfium/pdfium_searchify.h"
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
#include "third_party/skia/include/core/SkBitmap.h"
#endif
using printing::ConvertUnitFloat;
using printing::kPointsPerInch;
@ -554,4 +562,13 @@ std::optional<gfx::SizeF> PDFiumEngineExports::GetPDFPageSizeByIndex(
return gfx::SizeF(size.width, size.height);
}
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
std::vector<uint8_t> PDFiumEngineExports::Searchify(
base::span<const uint8_t> pdf_buffer,
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
const SkBitmap& bitmap)> perform_ocr_callback) {
return PDFiumSearchify(pdf_buffer, std::move(perform_ocr_callback));
}
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
} // namespace chrome_pdf

@ -11,6 +11,13 @@
#include "build/build_config.h"
#include "pdf/document_metadata.h"
#include "pdf/pdf_engine.h"
#include "services/screen_ai/buildflags/buildflags.h"
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
#include "base/functional/callback_forward.h"
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
#include "third_party/skia/include/core/SkBitmap.h"
#endif
namespace chrome_pdf {
@ -61,6 +68,12 @@ class PDFiumEngineExports : public PDFEngineExports {
std::optional<gfx::SizeF> GetPDFPageSizeByIndex(
base::span<const uint8_t> pdf_buffer,
int page_index) override;
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
std::vector<uint8_t> Searchify(
base::span<const uint8_t> pdf_buffer,
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
const SkBitmap& bitmap)> perform_ocr_callback) override;
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
};
} // namespace chrome_pdf

@ -3,11 +3,18 @@
// found in the LICENSE file.
#include <optional>
#include <utility>
#include "base/files/file_util.h"
#include "base/functional/bind.h"
#include "base/functional/callback.h"
#include "base/path_service.h"
#include "base/test/mock_callback.h"
#include "pdf/pdf.h"
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/skia/include/core/SkBitmap.h"
#include "ui/gfx/geometry/rect.h"
#include "ui/gfx/geometry/size.h"
#include "ui/gfx/geometry/size_f.h"
@ -145,4 +152,36 @@ TEST_F(PDFiumEngineExportsTest, ConvertPdfDocumentToNupPdf) {
}
}
TEST_F(PDFiumEngineExportsTest, Searchify) {
base::FilePath pdf_path =
pdf_data_dir().Append(FILE_PATH_LITERAL("image_alt_text.pdf"));
std::optional<std::vector<uint8_t>> pdf_buffer =
base::ReadFileToBytes(pdf_path);
ASSERT_TRUE(pdf_buffer.has_value());
base::MockCallback<base::RepeatingCallback<
screen_ai::mojom::VisualAnnotationPtr(const SkBitmap&)>>
perform_ocr_callback;
EXPECT_CALL(perform_ocr_callback, Run)
.Times(3)
.WillRepeatedly([](const SkBitmap& bitmap) {
auto annotation = screen_ai::mojom::VisualAnnotation::New();
auto line_box = screen_ai::mojom::LineBox::New();
line_box->baseline_box = gfx::Rect(0, 0, 100, 100);
line_box->baseline_box_angle = 0;
line_box->bounding_box = gfx::Rect(0, 0, 100, 100);
line_box->bounding_box_angle = 0;
auto word_box = screen_ai::mojom::WordBox::New();
word_box->word = "foo";
word_box->bounding_box = gfx::Rect(0, 0, 100, 100);
word_box->bounding_box_angle = 0;
line_box->words.push_back(std::move(word_box));
annotation->lines.push_back(std::move(line_box));
return annotation;
});
std::vector<uint8_t> output_pdf_buffer =
Searchify(*pdf_buffer, perform_ocr_callback.Get());
ASSERT_GT(output_pdf_buffer.size(), 0U);
}
} // namespace chrome_pdf

85
pdf/pdfium/pdfium_ocr.cc Normal file

@ -0,0 +1,85 @@
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "pdf/pdfium/pdfium_ocr.h"
#include <stddef.h>
#include "base/check.h"
#include "base/check_op.h"
#include "base/logging.h"
#include "third_party/pdfium/public/cpp/fpdf_scopers.h"
#include "third_party/pdfium/public/fpdf_edit.h"
#include "third_party/pdfium/public/fpdfview.h"
#include "third_party/skia/include/core/SkAlphaType.h"
#include "third_party/skia/include/core/SkBitmap.h"
#include "third_party/skia/include/core/SkColorType.h"
#include "third_party/skia/include/core/SkImageInfo.h"
#include "third_party/skia/include/core/SkPixmap.h"
namespace chrome_pdf {
SkBitmap GetImageForOcr(FPDF_DOCUMENT doc,
FPDF_PAGE page,
int page_object_index) {
SkBitmap bitmap;
FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, page_object_index);
if (FPDFPageObj_GetType(page_object) != FPDF_PAGEOBJ_IMAGE) {
return bitmap;
}
// OCR needs the image with the highest available quality. To get it, the
// image transform matrix is reset to no-scale, the bitmap is extracted,
// and then the original matrix is restored.
FS_MATRIX original_matrix;
if (!FPDFPageObj_GetMatrix(page_object, &original_matrix)) {
DLOG(ERROR) << "Failed to get original matrix";
return bitmap;
}
// Get the actual image size.
unsigned int width;
unsigned int height;
if (!FPDFImageObj_GetImagePixelSize(page_object, &width, &height)) {
DLOG(ERROR) << "Failed to get image size";
return bitmap;
}
// Resize the matrix to actual size.
FS_MATRIX new_matrix = {static_cast<float>(width), 0, 0,
static_cast<float>(height), 0, 0};
if (!FPDFPageObj_SetMatrix(page_object, &new_matrix)) {
DLOG(ERROR) << "Failed to set new matrix on image";
return bitmap;
}
ScopedFPDFBitmap raw_bitmap(
FPDFImageObj_GetRenderedBitmap(doc, page, page_object));
if (!raw_bitmap) {
DLOG(ERROR) << "Failed to get rendered bitmap";
return bitmap;
}
// Restore the original matrix.
CHECK(FPDFPageObj_SetMatrix(page_object, &original_matrix));
CHECK_EQ(FPDFBitmap_GetFormat(raw_bitmap.get()), FPDFBitmap_BGRA);
SkImageInfo info =
SkImageInfo::Make(FPDFBitmap_GetWidth(raw_bitmap.get()),
FPDFBitmap_GetHeight(raw_bitmap.get()),
kBGRA_8888_SkColorType, kOpaque_SkAlphaType);
const size_t row_bytes = FPDFBitmap_GetStride(raw_bitmap.get());
SkPixmap pixels(info, FPDFBitmap_GetBuffer(raw_bitmap.get()), row_bytes);
if (!bitmap.tryAllocPixels(info, row_bytes)) {
DLOG(ERROR) << "Failed to allocate pixel memory";
return bitmap;
}
bitmap.writePixels(pixels);
return bitmap;
}
} // namespace chrome_pdf

19
pdf/pdfium/pdfium_ocr.h Normal file

@ -0,0 +1,19 @@
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef PDF_PDFIUM_PDFIUM_OCR_H_
#define PDF_PDFIUM_PDFIUM_OCR_H_
#include "third_party/pdfium/public/fpdfview.h"
#include "third_party/skia/include/core/SkBitmap.h"
namespace chrome_pdf {
SkBitmap GetImageForOcr(FPDF_DOCUMENT doc,
FPDF_PAGE page,
int page_object_index);
} // namespace chrome_pdf
#endif // PDF_PDFIUM_PDFIUM_OCR_H_

@ -24,6 +24,7 @@
#include "pdf/accessibility_structs.h"
#include "pdf/pdfium/pdfium_api_string_buffer_adapter.h"
#include "pdf/pdfium/pdfium_engine.h"
#include "pdf/pdfium/pdfium_ocr.h"
#include "pdf/pdfium/pdfium_unsupported_features.h"
#include "pdf/ui/thumbnail.h"
#include "printing/units.h"
@ -32,8 +33,7 @@
#include "third_party/pdfium/public/fpdf_catalog.h"
#include "third_party/pdfium/public/fpdf_edit.h"
#include "third_party/pdfium/public/fpdfview.h"
#include "third_party/skia/include/core/SkImageInfo.h"
#include "third_party/skia/include/core/SkPixmap.h"
#include "third_party/skia/include/core/SkBitmap.h"
#include "ui/accessibility/accessibility_features.h"
#include "ui/gfx/geometry/point.h"
#include "ui/gfx/geometry/point_f.h"
@ -778,58 +778,9 @@ std::vector<AccessibilityImageInfo> PDFiumPage::GetImageInfo(
}
SkBitmap PDFiumPage::GetImageForOcr(int page_object_index) {
SkBitmap bitmap;
FPDF_PAGE page = GetPage();
FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, page_object_index);
if (FPDFPageObj_GetType(page_object) != FPDF_PAGEOBJ_IMAGE) {
return bitmap;
}
// OCR needs the image with the highest available quality. To get it, the
// image transform matrix is reset to no-scale, the bitmap is extracted,
// and then the original matrix is restored.
FS_MATRIX original_matrix;
if (!FPDFPageObj_GetMatrix(page_object, &original_matrix)) {
return bitmap;
}
// Get the actual image size.
unsigned int width;
unsigned int height;
if (!FPDFImageObj_GetImagePixelSize(page_object, &width, &height)) {
return bitmap;
}
// Resize the matrix to actual size.
FS_MATRIX new_matrix = {static_cast<float>(width), 0, 0,
static_cast<float>(height), 0, 0};
if (!FPDFPageObj_SetMatrix(page_object, &new_matrix)) {
return bitmap;
}
ScopedFPDFBitmap raw_bitmap(
FPDFImageObj_GetRenderedBitmap(engine_->doc(), page, page_object));
// Restore the original matrix.
CHECK(FPDFPageObj_SetMatrix(page_object, &original_matrix));
if (!raw_bitmap) {
return SkBitmap();
}
CHECK_EQ(FPDFBitmap_GetFormat(raw_bitmap.get()), FPDFBitmap_BGRA);
SkImageInfo info =
SkImageInfo::Make(FPDFBitmap_GetWidth(raw_bitmap.get()),
FPDFBitmap_GetHeight(raw_bitmap.get()),
kBGRA_8888_SkColorType, kOpaque_SkAlphaType);
const size_t row_bytes = FPDFBitmap_GetStride(raw_bitmap.get());
SkPixmap pixels(info, FPDFBitmap_GetBuffer(raw_bitmap.get()), row_bytes);
if (!bitmap.tryAllocPixels(info, row_bytes)) {
return bitmap;
}
bitmap.writePixels(pixels);
SkBitmap bitmap =
::chrome_pdf::GetImageForOcr(engine_->doc(), page, page_object_index);
SkBitmapOperations::RotationAmount rotation;
switch (FPDFPage_GetRotation(page)) {
@ -846,6 +797,11 @@ SkBitmap PDFiumPage::GetImageForOcr(int page_object_index) {
break;
}
// TODO(crbug/40068467): Currently, `::chrome_pdf::GetImageForOcr` returns the
// full image stored in the PDF without applying the transformation matrix. To
// ensure the image sent to OCR matches how users view it on the browser,
// rotate the bitmap by the page's rotation. We may also need to consider the
// transformation of the image.
return SkBitmapOperations::Rotate(bitmap, rotation);
}

@ -21,6 +21,7 @@
#include "third_party/pdfium/public/fpdf_doc.h"
#include "third_party/pdfium/public/fpdf_formfill.h"
#include "third_party/pdfium/public/fpdf_text.h"
#include "third_party/pdfium/public/fpdfview.h"
#include "third_party/skia/include/core/SkBitmap.h"
#include "ui/gfx/geometry/point_f.h"
#include "ui/gfx/geometry/rect.h"

@ -0,0 +1,259 @@
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "pdf/pdfium/pdfium_searchify.h"
#include <algorithm>
#include <numbers>
#include <vector>
#include "base/check.h"
#include "base/check_op.h"
#include "base/functional/callback.h"
#include "base/strings/utf_string_conversions.h"
#include "pdf/pdfium/pdfium_mem_buffer_file_write.h"
#include "pdf/pdfium/pdfium_ocr.h"
#include "pdf/pdfium/pdfium_searchify_font.h"
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
#include "third_party/pdfium/public/cpp/fpdf_scopers.h"
#include "third_party/pdfium/public/fpdf_edit.h"
#include "third_party/pdfium/public/fpdf_save.h"
#include "third_party/pdfium/public/fpdfview.h"
#include "third_party/skia/include/core/SkBitmap.h"
#include "third_party/skia/include/core/SkImageInfo.h"
#include "third_party/skia/include/core/SkPixmap.h"
namespace chrome_pdf {
namespace {
std::vector<uint32_t> Utf8ToCharcodes(const std::string& string) {
std::u16string utf16_str = base::UTF8ToUTF16(string);
std::vector<uint32_t> charcodes;
charcodes.reserve(utf16_str.size());
for (auto c : utf16_str) {
charcodes.push_back(c);
}
return charcodes;
}
struct BoundingBoxOrigin {
double x;
double y;
double theta;
};
// The coordinate systems between OCR and PDF are different. OCR's origin is at
// top-left, so we need to convert them to PDF's bottom-left.
BoundingBoxOrigin ConvertToPdfOrigin(int x,
int y,
int width,
int height,
double angle,
double coordinate_system_height) {
double theta = angle * std::numbers::pi / 180;
return {.x = x - (sin(theta) * height),
.y = coordinate_system_height - (y + cos(theta) * height),
.theta = -theta};
}
// Project the text object's origin to the baseline's origin.
BoundingBoxOrigin ProjectToBaseline(const BoundingBoxOrigin& origin,
const BoundingBoxOrigin& baseline_origin) {
// The length between `origin` and `baseline_origin`.
double length = (origin.x - baseline_origin.x) * cos(baseline_origin.theta) +
(origin.y - baseline_origin.y) * sin(baseline_origin.theta);
return {.x = baseline_origin.x + length * cos(baseline_origin.theta),
.y = baseline_origin.y + length * sin(baseline_origin.theta),
.theta = baseline_origin.theta};
}
void AddTextOnImage(FPDF_DOCUMENT document,
FPDF_PAGE page,
FPDF_FONT font,
FPDF_PAGEOBJECT image,
screen_ai::mojom::VisualAnnotationPtr annotation) {
FS_QUADPOINTSF quadpoints;
if (!FPDFPageObj_GetRotatedBounds(image, &quadpoints)) {
DLOG(ERROR) << "Failed to get image rendered dimensions";
return;
}
double image_rendered_width = sqrt(pow(quadpoints.x1 - quadpoints.x2, 2) +
pow(quadpoints.y1 - quadpoints.y2, 2));
double image_rendered_height = sqrt(pow(quadpoints.x2 - quadpoints.x3, 2) +
pow(quadpoints.y2 - quadpoints.y3, 2));
unsigned int image_pixel_width;
unsigned int image_pixel_height;
if (!FPDFImageObj_GetImagePixelSize(image, &image_pixel_width,
&image_pixel_height)) {
DLOG(ERROR) << "Failed to get image dimensions";
return;
}
FS_MATRIX image_matrix;
if (!FPDFPageObj_GetMatrix(image, &image_matrix)) {
DLOG(ERROR) << "Failed to get image matrix";
return;
}
for (const auto& line : annotation->lines) {
BoundingBoxOrigin baseline_origin = ConvertToPdfOrigin(
line->baseline_box.x(), line->baseline_box.y(),
line->baseline_box.width(), line->baseline_box.height(),
line->baseline_box_angle, image_rendered_height);
for (const auto& word : line->words) {
double width = word->bounding_box.width();
double height = word->bounding_box.height();
if (width == 0 || height == 0) {
continue;
}
ScopedFPDFPageObject text(
FPDFPageObj_CreateTextObj(document, font, height));
CHECK(text);
std::string word_string = word->word;
// TODO(crbug.com/41487613): A more accurate width would be the distance
// from current word's origin to next word's origin.
if (word->has_space_after) {
word_string.push_back(' ');
}
if (word_string.empty()) {
DLOG(ERROR) << "Got empty word";
continue;
}
std::vector<uint32_t> charcodes = Utf8ToCharcodes(word_string);
if (!FPDFText_SetCharcodes(text.get(), charcodes.data(),
charcodes.size())) {
DLOG(ERROR) << "Failed to set charcodes";
continue;
}
// Make text invisible
if (!FPDFTextObj_SetTextRenderMode(text.get(),
FPDF_TEXTRENDERMODE_INVISIBLE)) {
DLOG(ERROR) << "Failed to make text invisible";
continue;
}
float left;
float bottom;
float right;
float top;
if (!FPDFPageObj_GetBounds(text.get(), &left, &bottom, &right, &top)) {
DLOG(ERROR) << "Failed to get the bounding box of original text object";
continue;
}
double original_text_object_width = right - left;
double original_text_object_height = top - bottom;
CHECK_GT(original_text_object_width, 0);
CHECK_GT(original_text_object_height, 0);
double width_scale = width / original_text_object_width;
double height_scale = height / original_text_object_height;
FPDFPageObj_Transform(text.get(), width_scale, 0, 0, height_scale, 0, 0);
// Move text object to the corresponding text position on the full image.
BoundingBoxOrigin origin = ConvertToPdfOrigin(
word->bounding_box.x(), word->bounding_box.y(), width, height,
word->bounding_box_angle, image_rendered_height);
origin = ProjectToBaseline(origin, baseline_origin);
double a = cos(origin.theta);
double b = sin(origin.theta);
double c = -sin(origin.theta);
double d = cos(origin.theta);
double e = origin.x;
double f = origin.y;
if (word->direction ==
screen_ai::mojom::Direction::DIRECTION_RIGHT_TO_LEFT) {
a = -a;
b = -b;
e += cos(origin.theta) * width;
f += sin(origin.theta) * width;
}
FPDFPageObj_Transform(text.get(), a, b, c, d, e, f);
// Scale from full image size to rendered image size on the PDF.
FPDFPageObj_Transform(text.get(),
image_rendered_width / image_pixel_width, 0, 0,
image_rendered_height / image_pixel_height, 0, 0);
// Apply the image's transformation matrix on the PDF page without the
// scaling matrix.
FPDFPageObj_Transform(text.get(), image_matrix.a / image_rendered_width,
image_matrix.b / image_rendered_width,
image_matrix.c / image_rendered_height,
image_matrix.d / image_rendered_height,
image_matrix.e, image_matrix.f);
FPDFPage_InsertObject(page, text.release());
}
}
}
} // namespace
std::vector<uint8_t> PDFiumSearchify(
base::span<const uint8_t> pdf_buffer,
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
const SkBitmap& bitmap)> perform_ocr_callback) {
ScopedFPDFDocument document(
FPDF_LoadMemDocument64(pdf_buffer.data(), pdf_buffer.size(), nullptr));
if (!document) {
DLOG(ERROR) << "Failed to load document";
return {};
}
int page_count = FPDF_GetPageCount(document.get());
if (page_count == 0) {
DLOG(ERROR) << "Got zero page count";
return {};
}
std::vector<uint8_t> cid_to_gid_map(CreateCidToGidMap());
ScopedFPDFFont font(FPDFText_LoadCidType2Font(
document.get(), kPdfTtf, kPdfTtfSize, kToUnicodeCMap,
cid_to_gid_map.data(), cid_to_gid_map.size()));
CHECK(font);
for (int page_index = 0; page_index < page_count; page_index++) {
ScopedFPDFPage page(FPDF_LoadPage(document.get(), page_index));
if (!page) {
DLOG(ERROR) << "Failed to load page";
continue;
}
int object_count = FPDFPage_CountObjects(page.get());
for (int object_index = 0; object_index < object_count; object_index++) {
SkBitmap bitmap =
GetImageForOcr(document.get(), page.get(), object_index);
// The object is not an image or failed to get the bitmap from the image.
if (bitmap.empty()) {
continue;
}
FPDF_PAGEOBJECT image = FPDFPage_GetObject(page.get(), object_index);
if (!image) {
DLOG(ERROR) << "Failed to get image object";
continue;
}
auto annotation = perform_ocr_callback.Run(bitmap);
if (!annotation) {
DLOG(ERROR) << "Failed to get OCR annotation on the image";
continue;
}
AddTextOnImage(document.get(), page.get(), font.get(), image,
std::move(annotation));
}
if (!FPDFPage_GenerateContent(page.get())) {
DLOG(ERROR) << "Failed to generate content";
continue;
}
}
PDFiumMemBufferFileWrite output_file_write;
if (!FPDF_SaveAsCopy(document.get(), &output_file_write, 0)) {
DLOG(ERROR) << "Failed to save the document";
return {};
}
return output_file_write.TakeBuffer();
}
} // namespace chrome_pdf

@ -0,0 +1,24 @@
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef PDF_PDFIUM_PDFIUM_SEARCHIFY_H_
#define PDF_PDFIUM_PDFIUM_SEARCHIFY_H_
#include <vector>
#include "base/containers/span.h"
#include "base/functional/callback_forward.h"
#include "services/screen_ai/public/mojom/screen_ai_service.mojom-forward.h"
#include "third_party/skia/include/core/SkBitmap.h"
namespace chrome_pdf {
std::vector<uint8_t> PDFiumSearchify(
base::span<const uint8_t> pdf_buffer,
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
const SkBitmap& bitmap)> perform_ocr_callback);
} // namespace chrome_pdf
#endif // PDF_PDFIUM_PDFIUM_SEARCHIFY_H_

@ -0,0 +1,96 @@
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "pdf/pdfium/pdfium_searchify_font.h"
#include <cstdint>
#include <vector>
namespace chrome_pdf {
const uint8_t kPdfTtf[] = {
0x0, 0x1, 0x0, 0x0, 0x0, 0xa, 0x0, 0x80, 0x0, 0x3, 0x0, 0x20,
0x4f, 0x53, 0x2f, 0x32, 0x56, 0xde, 0xc8, 0x94, 0x0, 0x0, 0x1, 0x28,
0x0, 0x0, 0x0, 0x60, 0x63, 0x6d, 0x61, 0x70, 0x0, 0xa, 0x0, 0x34,
0x0, 0x0, 0x1, 0x90, 0x0, 0x0, 0x0, 0x1e, 0x67, 0x6c, 0x79, 0x66,
0x15, 0x22, 0x41, 0x24, 0x0, 0x0, 0x1, 0xb8, 0x0, 0x0, 0x0, 0x18,
0x68, 0x65, 0x61, 0x64, 0xb, 0x78, 0xf1, 0x65, 0x0, 0x0, 0x0, 0xac,
0x0, 0x0, 0x0, 0x36, 0x68, 0x68, 0x65, 0x61, 0xc, 0x2, 0x4, 0x2,
0x0, 0x0, 0x0, 0xe4, 0x0, 0x0, 0x0, 0x24, 0x68, 0x6d, 0x74, 0x78,
0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x88, 0x0, 0x0, 0x0, 0x8,
0x6c, 0x6f, 0x63, 0x61, 0x0, 0xc, 0x0, 0x0, 0x0, 0x0, 0x1, 0xb0,
0x0, 0x0, 0x0, 0x6, 0x6d, 0x61, 0x78, 0x70, 0x0, 0x4, 0x0, 0x5,
0x0, 0x0, 0x1, 0x8, 0x0, 0x0, 0x0, 0x20, 0x6e, 0x61, 0x6d, 0x65,
0xf2, 0xeb, 0x16, 0xda, 0x0, 0x0, 0x1, 0xd0, 0x0, 0x0, 0x0, 0x4b,
0x70, 0x6f, 0x73, 0x74, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x2, 0x1c,
0x0, 0x0, 0x0, 0x20, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0,
0xb0, 0x94, 0x71, 0x10, 0x5f, 0xf, 0x3c, 0xf5, 0x4, 0x7, 0x8, 0x0,
0x0, 0x0, 0x0, 0x0, 0xcf, 0x9a, 0xfc, 0x6e, 0x0, 0x0, 0x0, 0x0,
0xd4, 0xc3, 0xa7, 0xf2, 0x0, 0x0, 0x0, 0x0, 0x4, 0x0, 0x8, 0x0,
0x0, 0x0, 0x0, 0x10, 0x0, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x1, 0x0, 0x0, 0x8, 0x0, 0xff, 0xff, 0x0, 0x0, 0x4, 0x0,
0x0, 0x0, 0x0, 0x0, 0x4, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2,
0x0, 0x1, 0x0, 0x0, 0x0, 0x2, 0x0, 0x4, 0x0, 0x1, 0x0, 0x0,
0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0,
0x1, 0x90, 0x0, 0x5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x47, 0x4f, 0x4f, 0x47, 0x0, 0x40,
0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0xff, 0xff, 0x0, 0x0, 0x0, 0x1,
0x0, 0x1, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, 0x1, 0x0, 0x0,
0x0, 0x0, 0x0, 0x14, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x14,
0x0, 0x6, 0x0, 0xa, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0xc, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0,
0x0, 0x0, 0x4, 0x0, 0x8, 0x0, 0x0, 0x3, 0x0, 0x0, 0x31, 0x21,
0x11, 0x21, 0x4, 0x0, 0xfc, 0x0, 0x8, 0x0, 0x0, 0x0, 0x0, 0x3,
0x0, 0x2a, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x5, 0x0, 0x16,
0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0xb,
0x0, 0x16, 0x0, 0x3, 0x0, 0x1, 0x4, 0x9, 0x0, 0x5, 0x0, 0x16,
0x0, 0x0, 0x0, 0x56, 0x0, 0x65, 0x0, 0x72, 0x0, 0x73, 0x0, 0x69,
0x0, 0x6f, 0x0, 0x6e, 0x0, 0x20, 0x0, 0x31, 0x0, 0x2e, 0x0, 0x30,
0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x31, 0x2e, 0x30, 0x0,
0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
const uint32_t kPdfTtfSize = sizeof(kPdfTtf);
const char kToUnicodeCMap[] = R"(
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo <<
/Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Adobe-Identity-H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
1 beginbfrange
<0000> <FFFF> <0000>
endbfrange
endcmap
CMapName currentdict /CMap defineresource pop
end
end
)";
std::vector<uint8_t> CreateCidToGidMap() {
std::vector<uint8_t> arr;
constexpr int kCIDToGIDMapSize = 2 * (1 << 16);
for (int i = 0; i < kCIDToGIDMapSize; i++) {
arr.push_back((i % 2) ? 1 : 0);
}
return arr;
}
} // namespace chrome_pdf

@ -0,0 +1,22 @@
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef PDF_PDFIUM_PDFIUM_SEARCHIFY_FONT_H_
#define PDF_PDFIUM_PDFIUM_SEARCHIFY_FONT_H_
#include <cstdint>
#include <vector>
namespace chrome_pdf {
extern const uint8_t kPdfTtf[];
extern const uint32_t kPdfTtfSize;
extern const char kToUnicodeCMap[];
std::vector<uint8_t> CreateCidToGidMap();
} // namespace chrome_pdf
#endif // PDF_PDFIUM_PDFIUM_SEARCHIFY_FONT_H_