PDF: Land initial version of PdfSearchify
PdfSearchify takes a PDF and outputs a searchable PDF by performing OCR and adding an invisible text layer on each image in the PDF. Each execution should take place in an isolated process, and each process should be terminated upon completion of the work. Bug: 41487613 Test: Create PDFs with CL:5307294 Change-Id: I69d5d4db0b9405e085f6b260567743c0d59f519d Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5246898 Reviewed-by: Ramin Halavati <rhalavati@chromium.org> Reviewed-by: Sean Li <seannli@google.com> Reviewed-by: Lei Zhang <thestig@chromium.org> Commit-Queue: Chu-Hsuan Yang <chuhsuan@chromium.org> Cr-Commit-Position: refs/heads/main@{#1289713}
This commit is contained in:

committed by
Chromium LUCI CQ

parent
cda93f408c
commit
ec94716da1
16
pdf/BUILD.gn
16
pdf/BUILD.gn
@ -6,6 +6,7 @@ import("//build/buildflag_header.gni")
|
||||
import("//build/config/features.gni")
|
||||
import("//pdf/features.gni")
|
||||
import("//ppapi/buildflags/buildflags.gni")
|
||||
import("//services/screen_ai/buildflags/features.gni")
|
||||
import("//testing/libfuzzer/fuzzer_test.gni")
|
||||
import("//testing/test.gni")
|
||||
import("//third_party/pdfium/pdfium.gni")
|
||||
@ -47,6 +48,7 @@ if (enable_pdf) {
|
||||
":internal",
|
||||
"//base",
|
||||
"//build:chromeos_buildflags",
|
||||
"//services/screen_ai/buildflags",
|
||||
"//ui/gfx/geometry",
|
||||
]
|
||||
}
|
||||
@ -115,6 +117,8 @@ if (enable_pdf) {
|
||||
"pdfium/pdfium_form_filler.h",
|
||||
"pdfium/pdfium_mem_buffer_file_write.cc",
|
||||
"pdfium/pdfium_mem_buffer_file_write.h",
|
||||
"pdfium/pdfium_ocr.cc",
|
||||
"pdfium/pdfium_ocr.h",
|
||||
"pdfium/pdfium_page.cc",
|
||||
"pdfium/pdfium_page.h",
|
||||
"pdfium/pdfium_permissions.cc",
|
||||
@ -157,6 +161,7 @@ if (enable_pdf) {
|
||||
"//gin",
|
||||
"//pdf/loader",
|
||||
"//printing",
|
||||
"//services/screen_ai/buildflags",
|
||||
"//third_party/blink/public:blink_headers",
|
||||
"//third_party/blink/public/common:headers",
|
||||
"//third_party/icu",
|
||||
@ -184,6 +189,17 @@ if (enable_pdf) {
|
||||
"flatten_pdf_result.h",
|
||||
]
|
||||
}
|
||||
|
||||
if (enable_screen_ai_service) {
|
||||
sources += [
|
||||
"pdfium/pdfium_searchify.cc",
|
||||
"pdfium/pdfium_searchify.h",
|
||||
"pdfium/pdfium_searchify_font.cc",
|
||||
"pdfium/pdfium_searchify_font.h",
|
||||
]
|
||||
|
||||
public_deps += [ "//services/screen_ai/public/mojom" ]
|
||||
}
|
||||
}
|
||||
|
||||
source_set("assert_enums") {
|
||||
|
3
pdf/DEPS
3
pdf/DEPS
@ -6,6 +6,9 @@ include_rules = [
|
||||
"+net",
|
||||
"+printing",
|
||||
"+services/network/public/mojom/referrer_policy.mojom-shared.h",
|
||||
"+services/screen_ai/buildflags",
|
||||
"+services/screen_ai/public/mojom/screen_ai_service.mojom.h",
|
||||
"+services/screen_ai/public/mojom/screen_ai_service.mojom-forward.h",
|
||||
"+third_party/blink/public",
|
||||
"+third_party/skia/include/core",
|
||||
"+ui/base",
|
||||
|
18
pdf/pdf.cc
18
pdf/pdf.cc
@ -15,9 +15,16 @@
|
||||
#include "pdf/pdf_engine.h"
|
||||
#include "pdf/pdf_features.h"
|
||||
#include "pdf/pdf_init.h"
|
||||
#include "services/screen_ai/buildflags/buildflags.h"
|
||||
#include "ui/gfx/geometry/rect.h"
|
||||
#include "ui/gfx/geometry/size_f.h"
|
||||
|
||||
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
#include "base/functional/callback_forward.h"
|
||||
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
|
||||
#include "third_party/skia/include/core/SkBitmap.h"
|
||||
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
namespace {
|
||||
@ -173,4 +180,15 @@ std::vector<uint8_t> ConvertPdfDocumentToNupPdf(
|
||||
input_buffer, pages_per_sheet, page_size, printable_area);
|
||||
}
|
||||
|
||||
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
std::vector<uint8_t> Searchify(
|
||||
base::span<const uint8_t> pdf_buffer,
|
||||
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
|
||||
const SkBitmap& bitmap)> perform_ocr_callback) {
|
||||
ScopedSdkInitializer scoped_sdk_initializer(/*enable_v8=*/false);
|
||||
PDFEngineExports* engine_exports = PDFEngineExports::Get();
|
||||
return engine_exports->Searchify(pdf_buffer, std::move(perform_ocr_callback));
|
||||
}
|
||||
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
26
pdf/pdf.h
26
pdf/pdf.h
@ -12,6 +12,7 @@
|
||||
#include "base/values.h"
|
||||
#include "build/build_config.h"
|
||||
#include "pdf/document_metadata.h"
|
||||
#include "services/screen_ai/buildflags/buildflags.h"
|
||||
|
||||
#if BUILDFLAG(IS_CHROMEOS)
|
||||
#include "pdf/flatten_pdf_result.h"
|
||||
@ -21,6 +22,12 @@
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
#include "base/functional/callback_forward.h"
|
||||
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
|
||||
#include "third_party/skia/include/core/SkBitmap.h"
|
||||
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
|
||||
namespace gfx {
|
||||
class Rect;
|
||||
class Size;
|
||||
@ -212,6 +219,25 @@ std::vector<uint8_t> ConvertPdfDocumentToNupPdf(
|
||||
const gfx::Size& page_size,
|
||||
const gfx::Rect& printable_area);
|
||||
|
||||
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
// Converts an inaccessible PDF to a searchable PDF.
|
||||
// `pdf_buffer` is the buffer of the inaccessible PDF.
|
||||
// `searchified_callback` is the callback that is called with the searchified
|
||||
// PDF when the conversion is done.
|
||||
// `perform_ocr_callback` is the callback that takes an image and outputs
|
||||
// the OCR result. It may be called multiple times.
|
||||
//
|
||||
// The conversion is done by performing OCR on each image in the PDF and adding
|
||||
// a layer of invisible text to the PDF to make text on images accessible. Each
|
||||
// execution should take place in an isolated process, and each process should
|
||||
// be terminated upon completion of the conversion. An empty vector is returned
|
||||
// on failure.
|
||||
std::vector<uint8_t> Searchify(
|
||||
base::span<const uint8_t> pdf_buffer,
|
||||
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
|
||||
const SkBitmap& bitmap)> perform_ocr_callback);
|
||||
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
||||
#endif // PDF_PDF_H_
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "build/build_config.h"
|
||||
#include "pdf/document_layout.h"
|
||||
#include "printing/mojom/print.mojom-forward.h"
|
||||
#include "services/screen_ai/buildflags/buildflags.h"
|
||||
#include "third_party/skia/include/core/SkColor.h"
|
||||
#include "ui/base/cursor/mojom/cursor_type.mojom-forward.h"
|
||||
#include "ui/base/window_open_disposition.h"
|
||||
@ -35,6 +36,10 @@
|
||||
#include "pdf/flatten_pdf_result.h"
|
||||
#endif
|
||||
|
||||
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
#include "services/screen_ai/public/mojom/screen_ai_service.mojom-forward.h"
|
||||
#endif
|
||||
|
||||
class SkBitmap;
|
||||
|
||||
namespace blink {
|
||||
@ -582,6 +587,15 @@ class PDFEngineExports {
|
||||
virtual std::optional<gfx::SizeF> GetPDFPageSizeByIndex(
|
||||
base::span<const uint8_t> pdf_buffer,
|
||||
int page_index) = 0;
|
||||
|
||||
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
// Converts an inaccessible PDF to a searchable PDF. See `Searchify` in pdf.h
|
||||
// for more details.
|
||||
virtual std::vector<uint8_t> Searchify(
|
||||
base::span<const uint8_t> pdf_buffer,
|
||||
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
|
||||
const SkBitmap& bitmap)> perform_ocr_callback) = 0;
|
||||
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
};
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "pdf/pdfium/pdfium_unsupported_features.h"
|
||||
#include "printing/nup_parameters.h"
|
||||
#include "printing/units.h"
|
||||
#include "services/screen_ai/buildflags/buildflags.h"
|
||||
#include "third_party/pdfium/public/cpp/fpdf_scopers.h"
|
||||
#include "third_party/pdfium/public/fpdf_attachment.h"
|
||||
#include "third_party/pdfium/public/fpdf_catalog.h"
|
||||
@ -35,6 +36,13 @@
|
||||
#include "ui/gfx/geometry/size_f.h"
|
||||
#include "ui/gfx/geometry/vector2d.h"
|
||||
|
||||
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
#include "base/functional/callback.h"
|
||||
#include "pdf/pdfium/pdfium_searchify.h"
|
||||
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
|
||||
#include "third_party/skia/include/core/SkBitmap.h"
|
||||
#endif
|
||||
|
||||
using printing::ConvertUnitFloat;
|
||||
using printing::kPointsPerInch;
|
||||
|
||||
@ -554,4 +562,13 @@ std::optional<gfx::SizeF> PDFiumEngineExports::GetPDFPageSizeByIndex(
|
||||
return gfx::SizeF(size.width, size.height);
|
||||
}
|
||||
|
||||
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
std::vector<uint8_t> PDFiumEngineExports::Searchify(
|
||||
base::span<const uint8_t> pdf_buffer,
|
||||
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
|
||||
const SkBitmap& bitmap)> perform_ocr_callback) {
|
||||
return PDFiumSearchify(pdf_buffer, std::move(perform_ocr_callback));
|
||||
}
|
||||
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
@ -11,6 +11,13 @@
|
||||
#include "build/build_config.h"
|
||||
#include "pdf/document_metadata.h"
|
||||
#include "pdf/pdf_engine.h"
|
||||
#include "services/screen_ai/buildflags/buildflags.h"
|
||||
|
||||
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
#include "base/functional/callback_forward.h"
|
||||
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
|
||||
#include "third_party/skia/include/core/SkBitmap.h"
|
||||
#endif
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
@ -61,6 +68,12 @@ class PDFiumEngineExports : public PDFEngineExports {
|
||||
std::optional<gfx::SizeF> GetPDFPageSizeByIndex(
|
||||
base::span<const uint8_t> pdf_buffer,
|
||||
int page_index) override;
|
||||
#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
std::vector<uint8_t> Searchify(
|
||||
base::span<const uint8_t> pdf_buffer,
|
||||
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
|
||||
const SkBitmap& bitmap)> perform_ocr_callback) override;
|
||||
#endif // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
|
||||
};
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
@ -3,11 +3,18 @@
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
|
||||
#include "base/files/file_util.h"
|
||||
#include "base/functional/bind.h"
|
||||
#include "base/functional/callback.h"
|
||||
#include "base/path_service.h"
|
||||
#include "base/test/mock_callback.h"
|
||||
#include "pdf/pdf.h"
|
||||
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
|
||||
#include "testing/gmock/include/gmock/gmock.h"
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "third_party/skia/include/core/SkBitmap.h"
|
||||
#include "ui/gfx/geometry/rect.h"
|
||||
#include "ui/gfx/geometry/size.h"
|
||||
#include "ui/gfx/geometry/size_f.h"
|
||||
@ -145,4 +152,36 @@ TEST_F(PDFiumEngineExportsTest, ConvertPdfDocumentToNupPdf) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(PDFiumEngineExportsTest, Searchify) {
|
||||
base::FilePath pdf_path =
|
||||
pdf_data_dir().Append(FILE_PATH_LITERAL("image_alt_text.pdf"));
|
||||
std::optional<std::vector<uint8_t>> pdf_buffer =
|
||||
base::ReadFileToBytes(pdf_path);
|
||||
ASSERT_TRUE(pdf_buffer.has_value());
|
||||
|
||||
base::MockCallback<base::RepeatingCallback<
|
||||
screen_ai::mojom::VisualAnnotationPtr(const SkBitmap&)>>
|
||||
perform_ocr_callback;
|
||||
EXPECT_CALL(perform_ocr_callback, Run)
|
||||
.Times(3)
|
||||
.WillRepeatedly([](const SkBitmap& bitmap) {
|
||||
auto annotation = screen_ai::mojom::VisualAnnotation::New();
|
||||
auto line_box = screen_ai::mojom::LineBox::New();
|
||||
line_box->baseline_box = gfx::Rect(0, 0, 100, 100);
|
||||
line_box->baseline_box_angle = 0;
|
||||
line_box->bounding_box = gfx::Rect(0, 0, 100, 100);
|
||||
line_box->bounding_box_angle = 0;
|
||||
auto word_box = screen_ai::mojom::WordBox::New();
|
||||
word_box->word = "foo";
|
||||
word_box->bounding_box = gfx::Rect(0, 0, 100, 100);
|
||||
word_box->bounding_box_angle = 0;
|
||||
line_box->words.push_back(std::move(word_box));
|
||||
annotation->lines.push_back(std::move(line_box));
|
||||
return annotation;
|
||||
});
|
||||
std::vector<uint8_t> output_pdf_buffer =
|
||||
Searchify(*pdf_buffer, perform_ocr_callback.Get());
|
||||
ASSERT_GT(output_pdf_buffer.size(), 0U);
|
||||
}
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
85
pdf/pdfium/pdfium_ocr.cc
Normal file
85
pdf/pdfium/pdfium_ocr.cc
Normal file
@ -0,0 +1,85 @@
|
||||
// Copyright 2024 The Chromium Authors
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "pdf/pdfium/pdfium_ocr.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "base/check.h"
|
||||
#include "base/check_op.h"
|
||||
#include "base/logging.h"
|
||||
#include "third_party/pdfium/public/cpp/fpdf_scopers.h"
|
||||
#include "third_party/pdfium/public/fpdf_edit.h"
|
||||
#include "third_party/pdfium/public/fpdfview.h"
|
||||
#include "third_party/skia/include/core/SkAlphaType.h"
|
||||
#include "third_party/skia/include/core/SkBitmap.h"
|
||||
#include "third_party/skia/include/core/SkColorType.h"
|
||||
#include "third_party/skia/include/core/SkImageInfo.h"
|
||||
#include "third_party/skia/include/core/SkPixmap.h"
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
SkBitmap GetImageForOcr(FPDF_DOCUMENT doc,
|
||||
FPDF_PAGE page,
|
||||
int page_object_index) {
|
||||
SkBitmap bitmap;
|
||||
|
||||
FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, page_object_index);
|
||||
if (FPDFPageObj_GetType(page_object) != FPDF_PAGEOBJ_IMAGE) {
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
// OCR needs the image with the highest available quality. To get it, the
|
||||
// image transform matrix is reset to no-scale, the bitmap is extracted,
|
||||
// and then the original matrix is restored.
|
||||
FS_MATRIX original_matrix;
|
||||
if (!FPDFPageObj_GetMatrix(page_object, &original_matrix)) {
|
||||
DLOG(ERROR) << "Failed to get original matrix";
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
// Get the actual image size.
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
if (!FPDFImageObj_GetImagePixelSize(page_object, &width, &height)) {
|
||||
DLOG(ERROR) << "Failed to get image size";
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
// Resize the matrix to actual size.
|
||||
FS_MATRIX new_matrix = {static_cast<float>(width), 0, 0,
|
||||
static_cast<float>(height), 0, 0};
|
||||
if (!FPDFPageObj_SetMatrix(page_object, &new_matrix)) {
|
||||
DLOG(ERROR) << "Failed to set new matrix on image";
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
ScopedFPDFBitmap raw_bitmap(
|
||||
FPDFImageObj_GetRenderedBitmap(doc, page, page_object));
|
||||
|
||||
if (!raw_bitmap) {
|
||||
DLOG(ERROR) << "Failed to get rendered bitmap";
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
// Restore the original matrix.
|
||||
CHECK(FPDFPageObj_SetMatrix(page_object, &original_matrix));
|
||||
|
||||
CHECK_EQ(FPDFBitmap_GetFormat(raw_bitmap.get()), FPDFBitmap_BGRA);
|
||||
SkImageInfo info =
|
||||
SkImageInfo::Make(FPDFBitmap_GetWidth(raw_bitmap.get()),
|
||||
FPDFBitmap_GetHeight(raw_bitmap.get()),
|
||||
kBGRA_8888_SkColorType, kOpaque_SkAlphaType);
|
||||
const size_t row_bytes = FPDFBitmap_GetStride(raw_bitmap.get());
|
||||
SkPixmap pixels(info, FPDFBitmap_GetBuffer(raw_bitmap.get()), row_bytes);
|
||||
if (!bitmap.tryAllocPixels(info, row_bytes)) {
|
||||
DLOG(ERROR) << "Failed to allocate pixel memory";
|
||||
return bitmap;
|
||||
}
|
||||
bitmap.writePixels(pixels);
|
||||
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
} // namespace chrome_pdf
|
19
pdf/pdfium/pdfium_ocr.h
Normal file
19
pdf/pdfium/pdfium_ocr.h
Normal file
@ -0,0 +1,19 @@
|
||||
// Copyright 2024 The Chromium Authors
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef PDF_PDFIUM_PDFIUM_OCR_H_
|
||||
#define PDF_PDFIUM_PDFIUM_OCR_H_
|
||||
|
||||
#include "third_party/pdfium/public/fpdfview.h"
|
||||
#include "third_party/skia/include/core/SkBitmap.h"
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
SkBitmap GetImageForOcr(FPDF_DOCUMENT doc,
|
||||
FPDF_PAGE page,
|
||||
int page_object_index);
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
||||
#endif // PDF_PDFIUM_PDFIUM_OCR_H_
|
@ -24,6 +24,7 @@
|
||||
#include "pdf/accessibility_structs.h"
|
||||
#include "pdf/pdfium/pdfium_api_string_buffer_adapter.h"
|
||||
#include "pdf/pdfium/pdfium_engine.h"
|
||||
#include "pdf/pdfium/pdfium_ocr.h"
|
||||
#include "pdf/pdfium/pdfium_unsupported_features.h"
|
||||
#include "pdf/ui/thumbnail.h"
|
||||
#include "printing/units.h"
|
||||
@ -32,8 +33,7 @@
|
||||
#include "third_party/pdfium/public/fpdf_catalog.h"
|
||||
#include "third_party/pdfium/public/fpdf_edit.h"
|
||||
#include "third_party/pdfium/public/fpdfview.h"
|
||||
#include "third_party/skia/include/core/SkImageInfo.h"
|
||||
#include "third_party/skia/include/core/SkPixmap.h"
|
||||
#include "third_party/skia/include/core/SkBitmap.h"
|
||||
#include "ui/accessibility/accessibility_features.h"
|
||||
#include "ui/gfx/geometry/point.h"
|
||||
#include "ui/gfx/geometry/point_f.h"
|
||||
@ -778,58 +778,9 @@ std::vector<AccessibilityImageInfo> PDFiumPage::GetImageInfo(
|
||||
}
|
||||
|
||||
SkBitmap PDFiumPage::GetImageForOcr(int page_object_index) {
|
||||
SkBitmap bitmap;
|
||||
|
||||
FPDF_PAGE page = GetPage();
|
||||
FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, page_object_index);
|
||||
|
||||
if (FPDFPageObj_GetType(page_object) != FPDF_PAGEOBJ_IMAGE) {
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
// OCR needs the image with the highest available quality. To get it, the
|
||||
// image transform matrix is reset to no-scale, the bitmap is extracted,
|
||||
// and then the original matrix is restored.
|
||||
FS_MATRIX original_matrix;
|
||||
if (!FPDFPageObj_GetMatrix(page_object, &original_matrix)) {
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
// Get the actual image size.
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
if (!FPDFImageObj_GetImagePixelSize(page_object, &width, &height)) {
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
// Resize the matrix to actual size.
|
||||
FS_MATRIX new_matrix = {static_cast<float>(width), 0, 0,
|
||||
static_cast<float>(height), 0, 0};
|
||||
if (!FPDFPageObj_SetMatrix(page_object, &new_matrix)) {
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
ScopedFPDFBitmap raw_bitmap(
|
||||
FPDFImageObj_GetRenderedBitmap(engine_->doc(), page, page_object));
|
||||
|
||||
// Restore the original matrix.
|
||||
CHECK(FPDFPageObj_SetMatrix(page_object, &original_matrix));
|
||||
|
||||
if (!raw_bitmap) {
|
||||
return SkBitmap();
|
||||
}
|
||||
|
||||
CHECK_EQ(FPDFBitmap_GetFormat(raw_bitmap.get()), FPDFBitmap_BGRA);
|
||||
SkImageInfo info =
|
||||
SkImageInfo::Make(FPDFBitmap_GetWidth(raw_bitmap.get()),
|
||||
FPDFBitmap_GetHeight(raw_bitmap.get()),
|
||||
kBGRA_8888_SkColorType, kOpaque_SkAlphaType);
|
||||
const size_t row_bytes = FPDFBitmap_GetStride(raw_bitmap.get());
|
||||
SkPixmap pixels(info, FPDFBitmap_GetBuffer(raw_bitmap.get()), row_bytes);
|
||||
if (!bitmap.tryAllocPixels(info, row_bytes)) {
|
||||
return bitmap;
|
||||
}
|
||||
bitmap.writePixels(pixels);
|
||||
SkBitmap bitmap =
|
||||
::chrome_pdf::GetImageForOcr(engine_->doc(), page, page_object_index);
|
||||
|
||||
SkBitmapOperations::RotationAmount rotation;
|
||||
switch (FPDFPage_GetRotation(page)) {
|
||||
@ -846,6 +797,11 @@ SkBitmap PDFiumPage::GetImageForOcr(int page_object_index) {
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO(crbug/40068467): Currently, `::chrome_pdf::GetImageForOcr` returns the
|
||||
// full image stored in the PDF without applying the transformation matrix. To
|
||||
// ensure the image sent to OCR matches how users view it on the browser,
|
||||
// rotate the bitmap by the page's rotation. We may also need to consider the
|
||||
// transformation of the image.
|
||||
return SkBitmapOperations::Rotate(bitmap, rotation);
|
||||
}
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "third_party/pdfium/public/fpdf_doc.h"
|
||||
#include "third_party/pdfium/public/fpdf_formfill.h"
|
||||
#include "third_party/pdfium/public/fpdf_text.h"
|
||||
#include "third_party/pdfium/public/fpdfview.h"
|
||||
#include "third_party/skia/include/core/SkBitmap.h"
|
||||
#include "ui/gfx/geometry/point_f.h"
|
||||
#include "ui/gfx/geometry/rect.h"
|
||||
|
259
pdf/pdfium/pdfium_searchify.cc
Normal file
259
pdf/pdfium/pdfium_searchify.cc
Normal file
@ -0,0 +1,259 @@
|
||||
// Copyright 2024 The Chromium Authors
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "pdf/pdfium/pdfium_searchify.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <numbers>
|
||||
#include <vector>
|
||||
|
||||
#include "base/check.h"
|
||||
#include "base/check_op.h"
|
||||
#include "base/functional/callback.h"
|
||||
#include "base/strings/utf_string_conversions.h"
|
||||
#include "pdf/pdfium/pdfium_mem_buffer_file_write.h"
|
||||
#include "pdf/pdfium/pdfium_ocr.h"
|
||||
#include "pdf/pdfium/pdfium_searchify_font.h"
|
||||
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
|
||||
#include "third_party/pdfium/public/cpp/fpdf_scopers.h"
|
||||
#include "third_party/pdfium/public/fpdf_edit.h"
|
||||
#include "third_party/pdfium/public/fpdf_save.h"
|
||||
#include "third_party/pdfium/public/fpdfview.h"
|
||||
#include "third_party/skia/include/core/SkBitmap.h"
|
||||
#include "third_party/skia/include/core/SkImageInfo.h"
|
||||
#include "third_party/skia/include/core/SkPixmap.h"
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
namespace {
|
||||
|
||||
std::vector<uint32_t> Utf8ToCharcodes(const std::string& string) {
|
||||
std::u16string utf16_str = base::UTF8ToUTF16(string);
|
||||
std::vector<uint32_t> charcodes;
|
||||
charcodes.reserve(utf16_str.size());
|
||||
for (auto c : utf16_str) {
|
||||
charcodes.push_back(c);
|
||||
}
|
||||
return charcodes;
|
||||
}
|
||||
|
||||
struct BoundingBoxOrigin {
|
||||
double x;
|
||||
double y;
|
||||
double theta;
|
||||
};
|
||||
|
||||
// The coordinate systems between OCR and PDF are different. OCR's origin is at
|
||||
// top-left, so we need to convert them to PDF's bottom-left.
|
||||
BoundingBoxOrigin ConvertToPdfOrigin(int x,
|
||||
int y,
|
||||
int width,
|
||||
int height,
|
||||
double angle,
|
||||
double coordinate_system_height) {
|
||||
double theta = angle * std::numbers::pi / 180;
|
||||
return {.x = x - (sin(theta) * height),
|
||||
.y = coordinate_system_height - (y + cos(theta) * height),
|
||||
.theta = -theta};
|
||||
}
|
||||
|
||||
// Project the text object's origin to the baseline's origin.
|
||||
BoundingBoxOrigin ProjectToBaseline(const BoundingBoxOrigin& origin,
|
||||
const BoundingBoxOrigin& baseline_origin) {
|
||||
// The length between `origin` and `baseline_origin`.
|
||||
double length = (origin.x - baseline_origin.x) * cos(baseline_origin.theta) +
|
||||
(origin.y - baseline_origin.y) * sin(baseline_origin.theta);
|
||||
return {.x = baseline_origin.x + length * cos(baseline_origin.theta),
|
||||
.y = baseline_origin.y + length * sin(baseline_origin.theta),
|
||||
.theta = baseline_origin.theta};
|
||||
}
|
||||
|
||||
void AddTextOnImage(FPDF_DOCUMENT document,
|
||||
FPDF_PAGE page,
|
||||
FPDF_FONT font,
|
||||
FPDF_PAGEOBJECT image,
|
||||
screen_ai::mojom::VisualAnnotationPtr annotation) {
|
||||
FS_QUADPOINTSF quadpoints;
|
||||
if (!FPDFPageObj_GetRotatedBounds(image, &quadpoints)) {
|
||||
DLOG(ERROR) << "Failed to get image rendered dimensions";
|
||||
return;
|
||||
}
|
||||
double image_rendered_width = sqrt(pow(quadpoints.x1 - quadpoints.x2, 2) +
|
||||
pow(quadpoints.y1 - quadpoints.y2, 2));
|
||||
double image_rendered_height = sqrt(pow(quadpoints.x2 - quadpoints.x3, 2) +
|
||||
pow(quadpoints.y2 - quadpoints.y3, 2));
|
||||
unsigned int image_pixel_width;
|
||||
unsigned int image_pixel_height;
|
||||
if (!FPDFImageObj_GetImagePixelSize(image, &image_pixel_width,
|
||||
&image_pixel_height)) {
|
||||
DLOG(ERROR) << "Failed to get image dimensions";
|
||||
return;
|
||||
}
|
||||
FS_MATRIX image_matrix;
|
||||
if (!FPDFPageObj_GetMatrix(image, &image_matrix)) {
|
||||
DLOG(ERROR) << "Failed to get image matrix";
|
||||
return;
|
||||
}
|
||||
|
||||
for (const auto& line : annotation->lines) {
|
||||
BoundingBoxOrigin baseline_origin = ConvertToPdfOrigin(
|
||||
line->baseline_box.x(), line->baseline_box.y(),
|
||||
line->baseline_box.width(), line->baseline_box.height(),
|
||||
line->baseline_box_angle, image_rendered_height);
|
||||
|
||||
for (const auto& word : line->words) {
|
||||
double width = word->bounding_box.width();
|
||||
double height = word->bounding_box.height();
|
||||
|
||||
if (width == 0 || height == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ScopedFPDFPageObject text(
|
||||
FPDFPageObj_CreateTextObj(document, font, height));
|
||||
CHECK(text);
|
||||
|
||||
std::string word_string = word->word;
|
||||
// TODO(crbug.com/41487613): A more accurate width would be the distance
|
||||
// from current word's origin to next word's origin.
|
||||
if (word->has_space_after) {
|
||||
word_string.push_back(' ');
|
||||
}
|
||||
|
||||
if (word_string.empty()) {
|
||||
DLOG(ERROR) << "Got empty word";
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<uint32_t> charcodes = Utf8ToCharcodes(word_string);
|
||||
if (!FPDFText_SetCharcodes(text.get(), charcodes.data(),
|
||||
charcodes.size())) {
|
||||
DLOG(ERROR) << "Failed to set charcodes";
|
||||
continue;
|
||||
}
|
||||
|
||||
// Make text invisible
|
||||
if (!FPDFTextObj_SetTextRenderMode(text.get(),
|
||||
FPDF_TEXTRENDERMODE_INVISIBLE)) {
|
||||
DLOG(ERROR) << "Failed to make text invisible";
|
||||
continue;
|
||||
}
|
||||
|
||||
float left;
|
||||
float bottom;
|
||||
float right;
|
||||
float top;
|
||||
if (!FPDFPageObj_GetBounds(text.get(), &left, &bottom, &right, &top)) {
|
||||
DLOG(ERROR) << "Failed to get the bounding box of original text object";
|
||||
continue;
|
||||
}
|
||||
double original_text_object_width = right - left;
|
||||
double original_text_object_height = top - bottom;
|
||||
CHECK_GT(original_text_object_width, 0);
|
||||
CHECK_GT(original_text_object_height, 0);
|
||||
double width_scale = width / original_text_object_width;
|
||||
double height_scale = height / original_text_object_height;
|
||||
FPDFPageObj_Transform(text.get(), width_scale, 0, 0, height_scale, 0, 0);
|
||||
|
||||
// Move text object to the corresponding text position on the full image.
|
||||
BoundingBoxOrigin origin = ConvertToPdfOrigin(
|
||||
word->bounding_box.x(), word->bounding_box.y(), width, height,
|
||||
word->bounding_box_angle, image_rendered_height);
|
||||
origin = ProjectToBaseline(origin, baseline_origin);
|
||||
double a = cos(origin.theta);
|
||||
double b = sin(origin.theta);
|
||||
double c = -sin(origin.theta);
|
||||
double d = cos(origin.theta);
|
||||
double e = origin.x;
|
||||
double f = origin.y;
|
||||
if (word->direction ==
|
||||
screen_ai::mojom::Direction::DIRECTION_RIGHT_TO_LEFT) {
|
||||
a = -a;
|
||||
b = -b;
|
||||
e += cos(origin.theta) * width;
|
||||
f += sin(origin.theta) * width;
|
||||
}
|
||||
FPDFPageObj_Transform(text.get(), a, b, c, d, e, f);
|
||||
|
||||
// Scale from full image size to rendered image size on the PDF.
|
||||
FPDFPageObj_Transform(text.get(),
|
||||
image_rendered_width / image_pixel_width, 0, 0,
|
||||
image_rendered_height / image_pixel_height, 0, 0);
|
||||
|
||||
// Apply the image's transformation matrix on the PDF page without the
|
||||
// scaling matrix.
|
||||
FPDFPageObj_Transform(text.get(), image_matrix.a / image_rendered_width,
|
||||
image_matrix.b / image_rendered_width,
|
||||
image_matrix.c / image_rendered_height,
|
||||
image_matrix.d / image_rendered_height,
|
||||
image_matrix.e, image_matrix.f);
|
||||
|
||||
FPDFPage_InsertObject(page, text.release());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::vector<uint8_t> PDFiumSearchify(
|
||||
base::span<const uint8_t> pdf_buffer,
|
||||
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
|
||||
const SkBitmap& bitmap)> perform_ocr_callback) {
|
||||
ScopedFPDFDocument document(
|
||||
FPDF_LoadMemDocument64(pdf_buffer.data(), pdf_buffer.size(), nullptr));
|
||||
if (!document) {
|
||||
DLOG(ERROR) << "Failed to load document";
|
||||
return {};
|
||||
}
|
||||
int page_count = FPDF_GetPageCount(document.get());
|
||||
if (page_count == 0) {
|
||||
DLOG(ERROR) << "Got zero page count";
|
||||
return {};
|
||||
}
|
||||
std::vector<uint8_t> cid_to_gid_map(CreateCidToGidMap());
|
||||
ScopedFPDFFont font(FPDFText_LoadCidType2Font(
|
||||
document.get(), kPdfTtf, kPdfTtfSize, kToUnicodeCMap,
|
||||
cid_to_gid_map.data(), cid_to_gid_map.size()));
|
||||
CHECK(font);
|
||||
for (int page_index = 0; page_index < page_count; page_index++) {
|
||||
ScopedFPDFPage page(FPDF_LoadPage(document.get(), page_index));
|
||||
if (!page) {
|
||||
DLOG(ERROR) << "Failed to load page";
|
||||
continue;
|
||||
}
|
||||
int object_count = FPDFPage_CountObjects(page.get());
|
||||
for (int object_index = 0; object_index < object_count; object_index++) {
|
||||
SkBitmap bitmap =
|
||||
GetImageForOcr(document.get(), page.get(), object_index);
|
||||
// The object is not an image or failed to get the bitmap from the image.
|
||||
if (bitmap.empty()) {
|
||||
continue;
|
||||
}
|
||||
FPDF_PAGEOBJECT image = FPDFPage_GetObject(page.get(), object_index);
|
||||
if (!image) {
|
||||
DLOG(ERROR) << "Failed to get image object";
|
||||
continue;
|
||||
}
|
||||
auto annotation = perform_ocr_callback.Run(bitmap);
|
||||
if (!annotation) {
|
||||
DLOG(ERROR) << "Failed to get OCR annotation on the image";
|
||||
continue;
|
||||
}
|
||||
AddTextOnImage(document.get(), page.get(), font.get(), image,
|
||||
std::move(annotation));
|
||||
}
|
||||
if (!FPDFPage_GenerateContent(page.get())) {
|
||||
DLOG(ERROR) << "Failed to generate content";
|
||||
continue;
|
||||
}
|
||||
}
|
||||
PDFiumMemBufferFileWrite output_file_write;
|
||||
if (!FPDF_SaveAsCopy(document.get(), &output_file_write, 0)) {
|
||||
DLOG(ERROR) << "Failed to save the document";
|
||||
return {};
|
||||
}
|
||||
return output_file_write.TakeBuffer();
|
||||
}
|
||||
|
||||
} // namespace chrome_pdf
|
24
pdf/pdfium/pdfium_searchify.h
Normal file
24
pdf/pdfium/pdfium_searchify.h
Normal file
@ -0,0 +1,24 @@
|
||||
// Copyright 2024 The Chromium Authors
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef PDF_PDFIUM_PDFIUM_SEARCHIFY_H_
|
||||
#define PDF_PDFIUM_PDFIUM_SEARCHIFY_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "base/containers/span.h"
|
||||
#include "base/functional/callback_forward.h"
|
||||
#include "services/screen_ai/public/mojom/screen_ai_service.mojom-forward.h"
|
||||
#include "third_party/skia/include/core/SkBitmap.h"
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
std::vector<uint8_t> PDFiumSearchify(
|
||||
base::span<const uint8_t> pdf_buffer,
|
||||
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
|
||||
const SkBitmap& bitmap)> perform_ocr_callback);
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
||||
#endif // PDF_PDFIUM_PDFIUM_SEARCHIFY_H_
|
96
pdf/pdfium/pdfium_searchify_font.cc
Normal file
96
pdf/pdfium/pdfium_searchify_font.cc
Normal file
@ -0,0 +1,96 @@
|
||||
// Copyright 2024 The Chromium Authors
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "pdf/pdfium/pdfium_searchify_font.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
const uint8_t kPdfTtf[] = {
|
||||
0x0, 0x1, 0x0, 0x0, 0x0, 0xa, 0x0, 0x80, 0x0, 0x3, 0x0, 0x20,
|
||||
0x4f, 0x53, 0x2f, 0x32, 0x56, 0xde, 0xc8, 0x94, 0x0, 0x0, 0x1, 0x28,
|
||||
0x0, 0x0, 0x0, 0x60, 0x63, 0x6d, 0x61, 0x70, 0x0, 0xa, 0x0, 0x34,
|
||||
0x0, 0x0, 0x1, 0x90, 0x0, 0x0, 0x0, 0x1e, 0x67, 0x6c, 0x79, 0x66,
|
||||
0x15, 0x22, 0x41, 0x24, 0x0, 0x0, 0x1, 0xb8, 0x0, 0x0, 0x0, 0x18,
|
||||
0x68, 0x65, 0x61, 0x64, 0xb, 0x78, 0xf1, 0x65, 0x0, 0x0, 0x0, 0xac,
|
||||
0x0, 0x0, 0x0, 0x36, 0x68, 0x68, 0x65, 0x61, 0xc, 0x2, 0x4, 0x2,
|
||||
0x0, 0x0, 0x0, 0xe4, 0x0, 0x0, 0x0, 0x24, 0x68, 0x6d, 0x74, 0x78,
|
||||
0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x88, 0x0, 0x0, 0x0, 0x8,
|
||||
0x6c, 0x6f, 0x63, 0x61, 0x0, 0xc, 0x0, 0x0, 0x0, 0x0, 0x1, 0xb0,
|
||||
0x0, 0x0, 0x0, 0x6, 0x6d, 0x61, 0x78, 0x70, 0x0, 0x4, 0x0, 0x5,
|
||||
0x0, 0x0, 0x1, 0x8, 0x0, 0x0, 0x0, 0x20, 0x6e, 0x61, 0x6d, 0x65,
|
||||
0xf2, 0xeb, 0x16, 0xda, 0x0, 0x0, 0x1, 0xd0, 0x0, 0x0, 0x0, 0x4b,
|
||||
0x70, 0x6f, 0x73, 0x74, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x2, 0x1c,
|
||||
0x0, 0x0, 0x0, 0x20, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0,
|
||||
0xb0, 0x94, 0x71, 0x10, 0x5f, 0xf, 0x3c, 0xf5, 0x4, 0x7, 0x8, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0xcf, 0x9a, 0xfc, 0x6e, 0x0, 0x0, 0x0, 0x0,
|
||||
0xd4, 0xc3, 0xa7, 0xf2, 0x0, 0x0, 0x0, 0x0, 0x4, 0x0, 0x8, 0x0,
|
||||
0x0, 0x0, 0x0, 0x10, 0x0, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x1, 0x0, 0x0, 0x8, 0x0, 0xff, 0xff, 0x0, 0x0, 0x4, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x4, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2,
|
||||
0x0, 0x1, 0x0, 0x0, 0x0, 0x2, 0x0, 0x4, 0x0, 0x1, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0,
|
||||
0x1, 0x90, 0x0, 0x5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x47, 0x4f, 0x4f, 0x47, 0x0, 0x40,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0xff, 0xff, 0x0, 0x0, 0x0, 0x1,
|
||||
0x0, 0x1, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, 0x1, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x14, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x14,
|
||||
0x0, 0x6, 0x0, 0xa, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0xc, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0,
|
||||
0x0, 0x0, 0x4, 0x0, 0x8, 0x0, 0x0, 0x3, 0x0, 0x0, 0x31, 0x21,
|
||||
0x11, 0x21, 0x4, 0x0, 0xfc, 0x0, 0x8, 0x0, 0x0, 0x0, 0x0, 0x3,
|
||||
0x0, 0x2a, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x5, 0x0, 0x16,
|
||||
0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0xb,
|
||||
0x0, 0x16, 0x0, 0x3, 0x0, 0x1, 0x4, 0x9, 0x0, 0x5, 0x0, 0x16,
|
||||
0x0, 0x0, 0x0, 0x56, 0x0, 0x65, 0x0, 0x72, 0x0, 0x73, 0x0, 0x69,
|
||||
0x0, 0x6f, 0x0, 0x6e, 0x0, 0x20, 0x0, 0x31, 0x0, 0x2e, 0x0, 0x30,
|
||||
0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x31, 0x2e, 0x30, 0x0,
|
||||
0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
|
||||
|
||||
const uint32_t kPdfTtfSize = sizeof(kPdfTtf);
|
||||
|
||||
const char kToUnicodeCMap[] = R"(
|
||||
/CIDInit /ProcSet findresource begin
|
||||
12 dict begin
|
||||
begincmap
|
||||
/CIDSystemInfo <<
|
||||
/Registry (Adobe)
|
||||
/Ordering (Identity)
|
||||
/Supplement 0
|
||||
>> def
|
||||
/CMapName /Adobe-Identity-H def
|
||||
/CMapType 2 def
|
||||
1 begincodespacerange
|
||||
<0000> <FFFF>
|
||||
endcodespacerange
|
||||
1 beginbfrange
|
||||
<0000> <FFFF> <0000>
|
||||
endbfrange
|
||||
endcmap
|
||||
CMapName currentdict /CMap defineresource pop
|
||||
end
|
||||
end
|
||||
)";
|
||||
|
||||
std::vector<uint8_t> CreateCidToGidMap() {
|
||||
std::vector<uint8_t> arr;
|
||||
constexpr int kCIDToGIDMapSize = 2 * (1 << 16);
|
||||
for (int i = 0; i < kCIDToGIDMapSize; i++) {
|
||||
arr.push_back((i % 2) ? 1 : 0);
|
||||
}
|
||||
return arr;
|
||||
}
|
||||
|
||||
} // namespace chrome_pdf
|
22
pdf/pdfium/pdfium_searchify_font.h
Normal file
22
pdf/pdfium/pdfium_searchify_font.h
Normal file
@ -0,0 +1,22 @@
|
||||
// Copyright 2024 The Chromium Authors
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef PDF_PDFIUM_PDFIUM_SEARCHIFY_FONT_H_
|
||||
#define PDF_PDFIUM_PDFIUM_SEARCHIFY_FONT_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
extern const uint8_t kPdfTtf[];
|
||||
extern const uint32_t kPdfTtfSize;
|
||||
|
||||
extern const char kToUnicodeCMap[];
|
||||
|
||||
std::vector<uint8_t> CreateCidToGidMap();
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
||||
#endif // PDF_PDFIUM_PDFIUM_SEARCHIFY_FONT_H_
|
Reference in New Issue
Block a user