Move PdfiumProgressiveSearchifier class to a separate file.
`PdfiumProgressiveSearchifier` class is moved to a separate file to make it clearer to add `PdfiumOnDemandSearchifier` in next CL. This CL does not change any behavior. Bug: 360803943 Change-Id: Iefbd35b7d11e77dcecff0f9818e8a5106fcb82d9 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5864309 Reviewed-by: Lei Zhang <thestig@chromium.org> Commit-Queue: Ramin Halavati <rhalavati@chromium.org> Cr-Commit-Position: refs/heads/main@{#1356287}
This commit is contained in:

committed by
Chromium LUCI CQ

parent
e710ab6499
commit
f57d9cd67e
@ -231,6 +231,8 @@ if (enable_pdf) {
|
||||
|
||||
if (enable_screen_ai_service) {
|
||||
sources += [
|
||||
"pdfium/pdfium_progressive_searchifier.cc",
|
||||
"pdfium/pdfium_progressive_searchifier.h",
|
||||
"pdfium/pdfium_searchify.cc",
|
||||
"pdfium/pdfium_searchify.h",
|
||||
"pdfium/pdfium_searchify_font.cc",
|
||||
|
@ -43,6 +43,7 @@
|
||||
|
||||
#include "base/functional/callback.h"
|
||||
#include "pdf/pdf_progressive_searchifier.h"
|
||||
#include "pdf/pdfium/pdfium_progressive_searchifier.h"
|
||||
#include "pdf/pdfium/pdfium_searchify.h"
|
||||
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
|
||||
#include "third_party/skia/include/core/SkBitmap.h"
|
||||
|
90
pdf/pdfium/pdfium_progressive_searchifier.cc
Normal file
90
pdf/pdfium/pdfium_progressive_searchifier.cc
Normal file
@ -0,0 +1,90 @@
|
||||
// Copyright 2024 The Chromium Authors
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "pdf/pdfium/pdfium_progressive_searchifier.h"
|
||||
|
||||
#include "base/check.h"
|
||||
#include "base/compiler_specific.h"
|
||||
#include "pdf/pdfium/pdfium_engine.h"
|
||||
#include "pdf/pdfium/pdfium_mem_buffer_file_write.h"
|
||||
#include "pdf/pdfium/pdfium_searchify.h"
|
||||
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
|
||||
#include "ui/gfx/codec/jpeg_codec.h"
|
||||
|
||||
namespace {
|
||||
|
||||
int GetBlockForJpeg(void* param,
|
||||
unsigned long pos,
|
||||
unsigned char* buf,
|
||||
unsigned long size) {
|
||||
auto data_vector = *static_cast<base::span<const uint8_t>*>(param);
|
||||
if (pos + size < pos || pos + size > data_vector.size()) {
|
||||
return 0;
|
||||
}
|
||||
// TODO(tsepez): spanify arguments to remove the error.
|
||||
base::span<uint8_t> UNSAFE_TODO(buf_span(buf, size));
|
||||
buf_span.copy_from(data_vector.subspan(pos, size));
|
||||
return 1;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
PdfiumProgressiveSearchifier::ScopedSdkInitializer::ScopedSdkInitializer() {
|
||||
// TODO(thestig): Check the default value of `use_skia`.
|
||||
InitializeSDK(false, false, FontMappingMode::kNoMapping);
|
||||
}
|
||||
|
||||
PdfiumProgressiveSearchifier::ScopedSdkInitializer::~ScopedSdkInitializer() {
|
||||
ShutdownSDK();
|
||||
}
|
||||
|
||||
PdfiumProgressiveSearchifier::PdfiumProgressiveSearchifier()
|
||||
: doc_(FPDF_CreateNewDocument()), font_(CreateFont(doc_.get())) {
|
||||
CHECK(doc_);
|
||||
CHECK(font_);
|
||||
}
|
||||
|
||||
PdfiumProgressiveSearchifier::~PdfiumProgressiveSearchifier() = default;
|
||||
|
||||
// TODO(chuhsuan): Return bool instead of crashing on error.
|
||||
void PdfiumProgressiveSearchifier::AddPage(
|
||||
const SkBitmap& bitmap,
|
||||
uint32_t page_index,
|
||||
screen_ai::mojom::VisualAnnotationPtr annotation) {
|
||||
CHECK(annotation);
|
||||
// Replace the page if it already exists.
|
||||
DeletePage(page_index);
|
||||
int width = bitmap.width();
|
||||
int height = bitmap.height();
|
||||
ScopedFPDFPage page(FPDFPage_New(doc_.get(), page_index, width, height));
|
||||
CHECK(page);
|
||||
ScopedFPDFPageObject image(FPDFPageObj_NewImageObj(doc_.get()));
|
||||
CHECK(image);
|
||||
std::vector<uint8_t> encoded;
|
||||
CHECK(gfx::JPEGCodec::Encode(bitmap, 100, &encoded));
|
||||
FPDF_FILEACCESS file_access{
|
||||
.m_FileLen = static_cast<unsigned long>(encoded.size()),
|
||||
.m_GetBlock = &GetBlockForJpeg,
|
||||
.m_Param = &encoded};
|
||||
CHECK(FPDFImageObj_LoadJpegFileInline(nullptr, 0, image.get(), &file_access));
|
||||
CHECK(FPDFImageObj_SetMatrix(image.get(), width, 0, 0, height, 0, 0));
|
||||
AddTextOnImage(doc_.get(), page.get(), font_.get(), image.get(),
|
||||
std::move(annotation), gfx::Size(width, height));
|
||||
FPDFPage_InsertObject(page.get(), image.release());
|
||||
CHECK(FPDFPage_GenerateContent(page.get()));
|
||||
}
|
||||
|
||||
void PdfiumProgressiveSearchifier::DeletePage(uint32_t page_index) {
|
||||
FPDFPage_Delete(doc_.get(), page_index);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> PdfiumProgressiveSearchifier::Save() {
|
||||
PDFiumMemBufferFileWrite output_file_write;
|
||||
CHECK(FPDF_SaveAsCopy(doc_.get(), &output_file_write, 0));
|
||||
return output_file_write.TakeBuffer();
|
||||
}
|
||||
|
||||
} // namespace chrome_pdf
|
49
pdf/pdfium/pdfium_progressive_searchifier.h
Normal file
49
pdf/pdfium/pdfium_progressive_searchifier.h
Normal file
@ -0,0 +1,49 @@
|
||||
// Copyright 2024 The Chromium Authors
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef PDF_PDFIUM_PDFIUM_PROGRESSIVE_SEARCHIFIER_H_
|
||||
#define PDF_PDFIUM_PDFIUM_PROGRESSIVE_SEARCHIFIER_H_
|
||||
|
||||
#include "pdf/pdf_progressive_searchifier.h"
|
||||
#include "services/screen_ai/public/mojom/screen_ai_service.mojom-forward.h"
|
||||
#include "third_party/pdfium/public/cpp/fpdf_scopers.h"
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
class PdfiumProgressiveSearchifier : public PdfProgressiveSearchifier {
|
||||
public:
|
||||
PdfiumProgressiveSearchifier();
|
||||
PdfiumProgressiveSearchifier(const PdfiumProgressiveSearchifier&) = delete;
|
||||
PdfiumProgressiveSearchifier& operator=(const PdfiumProgressiveSearchifier&) =
|
||||
delete;
|
||||
~PdfiumProgressiveSearchifier() override;
|
||||
|
||||
// PdfProgressiveSearchifier:
|
||||
void AddPage(const SkBitmap& bitmap,
|
||||
uint32_t page_index,
|
||||
screen_ai::mojom::VisualAnnotationPtr annotation) override;
|
||||
void DeletePage(uint32_t page_index) override;
|
||||
std::vector<uint8_t> Save() override;
|
||||
|
||||
private:
|
||||
// TODO(chuhsuan): Consider moving this to pdf_init.h as
|
||||
// pdfium_engine_exports_unittest.cc and pdf.cc have similar ones.
|
||||
class ScopedSdkInitializer {
|
||||
public:
|
||||
ScopedSdkInitializer();
|
||||
|
||||
ScopedSdkInitializer(const ScopedSdkInitializer&) = delete;
|
||||
ScopedSdkInitializer& operator=(const ScopedSdkInitializer&) = delete;
|
||||
|
||||
~ScopedSdkInitializer();
|
||||
};
|
||||
|
||||
ScopedSdkInitializer sdk_initializer_;
|
||||
ScopedFPDFDocument doc_;
|
||||
ScopedFPDFFont font_;
|
||||
};
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
||||
#endif // PDF_PDFIUM_PDFIUM_PROGRESSIVE_SEARCHIFIER_H_
|
@ -32,7 +32,6 @@
|
||||
#include "third_party/skia/include/core/SkBitmap.h"
|
||||
#include "third_party/skia/include/core/SkImageInfo.h"
|
||||
#include "third_party/skia/include/core/SkPixmap.h"
|
||||
#include "ui/gfx/codec/jpeg_codec.h"
|
||||
#include "ui/gfx/geometry/point_f.h"
|
||||
#include "ui/gfx/geometry/rect.h"
|
||||
#include "ui/gfx/geometry/size.h"
|
||||
@ -170,81 +169,6 @@ void AddWordOnImage(FPDF_DOCUMENT document,
|
||||
FPDFPage_InsertObject(page, text.release());
|
||||
}
|
||||
|
||||
void AddTextOnImage(FPDF_DOCUMENT document,
|
||||
FPDF_PAGE page,
|
||||
FPDF_FONT font,
|
||||
FPDF_PAGEOBJECT image,
|
||||
screen_ai::mojom::VisualAnnotationPtr annotation,
|
||||
const gfx::Size& image_pixel_size) {
|
||||
const gfx::SizeF image_rendered_size = GetRenderedImageSize(image);
|
||||
if (image_rendered_size.IsEmpty()) {
|
||||
DLOG(ERROR) << "Failed to get image rendered dimensions";
|
||||
return;
|
||||
}
|
||||
|
||||
// The transformation matrices is applied as follows:
|
||||
std::array<FS_MATRIX, 3> transform_matrices;
|
||||
// Move text object to the corresponding text position on the full image.
|
||||
FS_MATRIX& move_matrix = transform_matrices[0];
|
||||
// Scale from full image size to rendered image size on the PDF.
|
||||
FS_MATRIX& image_scale_matrix = transform_matrices[1];
|
||||
// Apply the image's transformation matrix on the PDF page without the
|
||||
// scaling matrix.
|
||||
FS_MATRIX& image_without_scaling_matrix = transform_matrices[2];
|
||||
|
||||
image_scale_matrix = {
|
||||
image_rendered_size.width() / image_pixel_size.width(), 0, 0,
|
||||
image_rendered_size.height() / image_pixel_size.height(), 0, 0};
|
||||
if (!CalculateImageWithoutScalingMatrix(image, image_rendered_size,
|
||||
image_without_scaling_matrix)) {
|
||||
DLOG(ERROR) << "Failed to get image matrix";
|
||||
return;
|
||||
}
|
||||
|
||||
for (const auto& line : annotation->lines) {
|
||||
SearchifyBoundingBoxOrigin baseline_origin =
|
||||
ConvertToPdfOrigin(line->baseline_box, line->baseline_box_angle,
|
||||
image_pixel_size.height());
|
||||
|
||||
for (const auto& word : line->words) {
|
||||
if (word->bounding_box.IsEmpty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
SearchifyBoundingBoxOrigin origin =
|
||||
ConvertToPdfOrigin(word->bounding_box, word->bounding_box_angle,
|
||||
image_pixel_size.height());
|
||||
move_matrix = CalculateWordMoveMatrix(
|
||||
ProjectToBaseline(origin.point, baseline_origin),
|
||||
word->bounding_box.width(),
|
||||
word->direction ==
|
||||
screen_ai::mojom::Direction::DIRECTION_RIGHT_TO_LEFT);
|
||||
AddWordOnImage(document, page, font, word, transform_matrices);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ScopedFPDFFont CreateFont(FPDF_DOCUMENT document) {
|
||||
std::vector<uint8_t> cid_to_gid_map(CreateCidToGidMap());
|
||||
return ScopedFPDFFont(
|
||||
FPDFText_LoadCidType2Font(document, kPdfTtf, kPdfTtfSize, kToUnicodeCMap,
|
||||
cid_to_gid_map.data(), cid_to_gid_map.size()));
|
||||
}
|
||||
|
||||
int GetBlockForJpeg(void* param,
|
||||
unsigned long pos,
|
||||
unsigned char* buf,
|
||||
unsigned long size) {
|
||||
auto data_vector = *static_cast<base::span<const uint8_t>*>(param);
|
||||
if (pos + size < pos || pos + size > data_vector.size()) {
|
||||
return 0;
|
||||
}
|
||||
// TODO(tsepez): spanify arguments to remove the error.
|
||||
base::span<uint8_t> UNSAFE_TODO(buf_span(buf, size));
|
||||
buf_span.copy_from(data_vector.subspan(pos, size));
|
||||
return 1;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::vector<uint8_t> PDFiumSearchify(
|
||||
@ -301,6 +225,60 @@ std::vector<uint8_t> PDFiumSearchify(
|
||||
return output_file_write.TakeBuffer();
|
||||
}
|
||||
|
||||
void AddTextOnImage(FPDF_DOCUMENT document,
|
||||
FPDF_PAGE page,
|
||||
FPDF_FONT font,
|
||||
FPDF_PAGEOBJECT image,
|
||||
screen_ai::mojom::VisualAnnotationPtr annotation,
|
||||
const gfx::Size& image_pixel_size) {
|
||||
const gfx::SizeF image_rendered_size = GetRenderedImageSize(image);
|
||||
if (image_rendered_size.IsEmpty()) {
|
||||
DLOG(ERROR) << "Failed to get image rendered dimensions";
|
||||
return;
|
||||
}
|
||||
|
||||
// The transformation matrices is applied as follows:
|
||||
std::array<FS_MATRIX, 3> transform_matrices;
|
||||
// Move text object to the corresponding text position on the full image.
|
||||
FS_MATRIX& move_matrix = transform_matrices[0];
|
||||
// Scale from full image size to rendered image size on the PDF.
|
||||
FS_MATRIX& image_scale_matrix = transform_matrices[1];
|
||||
// Apply the image's transformation matrix on the PDF page without the
|
||||
// scaling matrix.
|
||||
FS_MATRIX& image_without_scaling_matrix = transform_matrices[2];
|
||||
|
||||
image_scale_matrix = {
|
||||
image_rendered_size.width() / image_pixel_size.width(), 0, 0,
|
||||
image_rendered_size.height() / image_pixel_size.height(), 0, 0};
|
||||
if (!CalculateImageWithoutScalingMatrix(image, image_rendered_size,
|
||||
image_without_scaling_matrix)) {
|
||||
DLOG(ERROR) << "Failed to get image matrix";
|
||||
return;
|
||||
}
|
||||
|
||||
for (const auto& line : annotation->lines) {
|
||||
SearchifyBoundingBoxOrigin baseline_origin =
|
||||
ConvertToPdfOrigin(line->baseline_box, line->baseline_box_angle,
|
||||
image_pixel_size.height());
|
||||
|
||||
for (const auto& word : line->words) {
|
||||
if (word->bounding_box.IsEmpty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
SearchifyBoundingBoxOrigin origin =
|
||||
ConvertToPdfOrigin(word->bounding_box, word->bounding_box_angle,
|
||||
image_pixel_size.height());
|
||||
move_matrix = CalculateWordMoveMatrix(
|
||||
ProjectToBaseline(origin.point, baseline_origin),
|
||||
word->bounding_box.width(),
|
||||
word->direction ==
|
||||
screen_ai::mojom::Direction::DIRECTION_RIGHT_TO_LEFT);
|
||||
AddWordOnImage(document, page, font, word, transform_matrices);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SearchifyBoundingBoxOrigin ConvertToPdfOriginForTesting(
|
||||
const gfx::Rect& rect,
|
||||
float angle,
|
||||
@ -315,59 +293,11 @@ FS_MATRIX CalculateWordMoveMatrixForTesting(
|
||||
return CalculateWordMoveMatrix(origin, word_bounding_box_width, word_is_rtl);
|
||||
}
|
||||
|
||||
PdfiumProgressiveSearchifier::ScopedSdkInitializer::ScopedSdkInitializer() {
|
||||
// TODO(thestig): Check the default value of `use_skia`.
|
||||
InitializeSDK(false, false, FontMappingMode::kNoMapping);
|
||||
}
|
||||
|
||||
PdfiumProgressiveSearchifier::ScopedSdkInitializer::~ScopedSdkInitializer() {
|
||||
ShutdownSDK();
|
||||
}
|
||||
|
||||
PdfiumProgressiveSearchifier::PdfiumProgressiveSearchifier()
|
||||
: doc_(FPDF_CreateNewDocument()), font_(CreateFont(doc_.get())) {
|
||||
CHECK(doc_);
|
||||
CHECK(font_);
|
||||
}
|
||||
|
||||
PdfiumProgressiveSearchifier::~PdfiumProgressiveSearchifier() = default;
|
||||
|
||||
// TODO(chuhsuan): Return bool instead of crashing on error.
|
||||
void PdfiumProgressiveSearchifier::AddPage(
|
||||
const SkBitmap& bitmap,
|
||||
uint32_t page_index,
|
||||
screen_ai::mojom::VisualAnnotationPtr annotation) {
|
||||
CHECK(annotation);
|
||||
// Replace the page if it already exists.
|
||||
DeletePage(page_index);
|
||||
int width = bitmap.width();
|
||||
int height = bitmap.height();
|
||||
ScopedFPDFPage page(FPDFPage_New(doc_.get(), page_index, width, height));
|
||||
CHECK(page);
|
||||
ScopedFPDFPageObject image(FPDFPageObj_NewImageObj(doc_.get()));
|
||||
CHECK(image);
|
||||
std::vector<uint8_t> encoded;
|
||||
CHECK(gfx::JPEGCodec::Encode(bitmap, 100, &encoded));
|
||||
FPDF_FILEACCESS file_access{
|
||||
.m_FileLen = static_cast<unsigned long>(encoded.size()),
|
||||
.m_GetBlock = &GetBlockForJpeg,
|
||||
.m_Param = &encoded};
|
||||
CHECK(FPDFImageObj_LoadJpegFileInline(nullptr, 0, image.get(), &file_access));
|
||||
CHECK(FPDFImageObj_SetMatrix(image.get(), width, 0, 0, height, 0, 0));
|
||||
AddTextOnImage(doc_.get(), page.get(), font_.get(), image.get(),
|
||||
std::move(annotation), gfx::Size(width, height));
|
||||
FPDFPage_InsertObject(page.get(), image.release());
|
||||
CHECK(FPDFPage_GenerateContent(page.get()));
|
||||
}
|
||||
|
||||
void PdfiumProgressiveSearchifier::DeletePage(uint32_t page_index) {
|
||||
FPDFPage_Delete(doc_.get(), page_index);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> PdfiumProgressiveSearchifier::Save() {
|
||||
PDFiumMemBufferFileWrite output_file_write;
|
||||
CHECK(FPDF_SaveAsCopy(doc_.get(), &output_file_write, 0));
|
||||
return output_file_write.TakeBuffer();
|
||||
ScopedFPDFFont CreateFont(FPDF_DOCUMENT document) {
|
||||
std::vector<uint8_t> cid_to_gid_map(CreateCidToGidMap());
|
||||
return ScopedFPDFFont(
|
||||
FPDFText_LoadCidType2Font(document, kPdfTtf, kPdfTtfSize, kToUnicodeCMap,
|
||||
cid_to_gid_map.data(), cid_to_gid_map.size()));
|
||||
}
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
@ -10,7 +10,6 @@
|
||||
|
||||
#include "base/containers/span.h"
|
||||
#include "base/functional/callback_forward.h"
|
||||
#include "pdf/pdf_progressive_searchifier.h"
|
||||
#include "services/screen_ai/public/mojom/screen_ai_service.mojom-forward.h"
|
||||
#include "third_party/pdfium/public/cpp/fpdf_scopers.h"
|
||||
#include "third_party/pdfium/public/fpdfview.h"
|
||||
@ -19,6 +18,7 @@
|
||||
|
||||
namespace gfx {
|
||||
class Rect;
|
||||
class Size;
|
||||
} // namespace gfx
|
||||
|
||||
namespace chrome_pdf {
|
||||
@ -33,6 +33,18 @@ std::vector<uint8_t> PDFiumSearchify(
|
||||
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
|
||||
const SkBitmap& bitmap)> perform_ocr_callback);
|
||||
|
||||
// Creates an invisible font.
|
||||
ScopedFPDFFont CreateFont(FPDF_DOCUMENT document);
|
||||
|
||||
// Adds the recognized text in `annotation` to the given `page`, to be written
|
||||
// over `image`.
|
||||
void AddTextOnImage(FPDF_DOCUMENT document,
|
||||
FPDF_PAGE page,
|
||||
FPDF_FONT font,
|
||||
FPDF_PAGEOBJECT image,
|
||||
screen_ai::mojom::VisualAnnotationPtr annotation,
|
||||
const gfx::Size& image_pixel_size);
|
||||
|
||||
// Internal functions exposed for testing.
|
||||
SearchifyBoundingBoxOrigin ConvertToPdfOriginForTesting(
|
||||
const gfx::Rect& rect,
|
||||
@ -43,39 +55,6 @@ FS_MATRIX CalculateWordMoveMatrixForTesting(
|
||||
int word_bounding_box_width,
|
||||
bool word_is_rtl);
|
||||
|
||||
class PdfiumProgressiveSearchifier : public PdfProgressiveSearchifier {
|
||||
public:
|
||||
PdfiumProgressiveSearchifier();
|
||||
PdfiumProgressiveSearchifier(const PdfiumProgressiveSearchifier&) = delete;
|
||||
PdfiumProgressiveSearchifier& operator=(const PdfiumProgressiveSearchifier&) =
|
||||
delete;
|
||||
~PdfiumProgressiveSearchifier() override;
|
||||
|
||||
// PdfProgressiveSearchifier:
|
||||
void AddPage(const SkBitmap& bitmap,
|
||||
uint32_t page_index,
|
||||
screen_ai::mojom::VisualAnnotationPtr annotation) override;
|
||||
void DeletePage(uint32_t page_index) override;
|
||||
std::vector<uint8_t> Save() override;
|
||||
|
||||
private:
|
||||
// TODO(chuhsuan): Consider moving this to pdf_init.h as
|
||||
// pdfium_engine_exports_unittest.cc and pdf.cc have similar ones.
|
||||
class ScopedSdkInitializer {
|
||||
public:
|
||||
ScopedSdkInitializer();
|
||||
|
||||
ScopedSdkInitializer(const ScopedSdkInitializer&) = delete;
|
||||
ScopedSdkInitializer& operator=(const ScopedSdkInitializer&) = delete;
|
||||
|
||||
~ScopedSdkInitializer();
|
||||
};
|
||||
|
||||
ScopedSdkInitializer sdk_initializer_;
|
||||
ScopedFPDFDocument doc_;
|
||||
ScopedFPDFFont font_;
|
||||
};
|
||||
|
||||
} // namespace chrome_pdf
|
||||
|
||||
#endif // PDF_PDFIUM_PDFIUM_SEARCHIFY_H_
|
||||
|
Reference in New Issue
Block a user