Simplify ConvertToPdfOrigin() inside pdfium_searchify.cc and add tests
Expose the function as ConvertToPdfOriginForTesting() to add some unit tests. Also remove the unused width parameter. Change-Id: Iaa7242f138fbe1a132c58643652daac80cc03aac Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5691718 Commit-Queue: Lei Zhang <thestig@chromium.org> Reviewed-by: Chu-Hsuan Yang <chuhsuan@chromium.org> Cr-Commit-Position: refs/heads/main@{#1325951}
This commit is contained in:

committed by
Chromium LUCI CQ

parent
c3e6dac201
commit
ece448f559
@ -477,6 +477,10 @@ if (enable_pdf) {
|
||||
]
|
||||
}
|
||||
|
||||
if (enable_screen_ai_service) {
|
||||
sources += [ "pdfium/pdfium_searchify_unittest.cc" ]
|
||||
}
|
||||
|
||||
if (v8_use_external_startup_data) {
|
||||
deps += [
|
||||
"//tools/v8_context_snapshot",
|
||||
|
@ -47,20 +47,13 @@ std::vector<uint32_t> Utf8ToCharcodes(const std::string& string) {
|
||||
return charcodes;
|
||||
}
|
||||
|
||||
struct BoundingBoxOrigin {
|
||||
double x;
|
||||
double y;
|
||||
double theta;
|
||||
};
|
||||
|
||||
// The coordinate systems between OCR and PDF are different. OCR's origin is at
|
||||
// top-left, so we need to convert them to PDF's bottom-left.
|
||||
BoundingBoxOrigin ConvertToPdfOrigin(int x,
|
||||
int y,
|
||||
int width,
|
||||
int height,
|
||||
double angle,
|
||||
double coordinate_system_height) {
|
||||
SearchifyBoundingBoxOrigin ConvertToPdfOrigin(int x,
|
||||
int y,
|
||||
int height,
|
||||
double angle,
|
||||
double coordinate_system_height) {
|
||||
const double theta = base::DegToRad(angle);
|
||||
return {.x = x - (sin(theta) * height),
|
||||
.y = coordinate_system_height - (y + cos(theta) * height),
|
||||
@ -68,8 +61,9 @@ BoundingBoxOrigin ConvertToPdfOrigin(int x,
|
||||
}
|
||||
|
||||
// Project the text object's origin to the baseline's origin.
|
||||
BoundingBoxOrigin ProjectToBaseline(const BoundingBoxOrigin& origin,
|
||||
const BoundingBoxOrigin& baseline_origin) {
|
||||
SearchifyBoundingBoxOrigin ProjectToBaseline(
|
||||
const SearchifyBoundingBoxOrigin& origin,
|
||||
const SearchifyBoundingBoxOrigin& baseline_origin) {
|
||||
// The length between `origin` and `baseline_origin`.
|
||||
double length = (origin.x - baseline_origin.x) * cos(baseline_origin.theta) +
|
||||
(origin.y - baseline_origin.y) * sin(baseline_origin.theta);
|
||||
@ -107,10 +101,10 @@ void AddTextOnImage(FPDF_DOCUMENT document,
|
||||
}
|
||||
|
||||
for (const auto& line : annotation->lines) {
|
||||
BoundingBoxOrigin baseline_origin = ConvertToPdfOrigin(
|
||||
line->baseline_box.x(), line->baseline_box.y(),
|
||||
line->baseline_box.width(), line->baseline_box.height(),
|
||||
line->baseline_box_angle, image_rendered_height);
|
||||
SearchifyBoundingBoxOrigin baseline_origin =
|
||||
ConvertToPdfOrigin(line->baseline_box.x(), line->baseline_box.y(),
|
||||
line->baseline_box.height(),
|
||||
line->baseline_box_angle, image_rendered_height);
|
||||
|
||||
for (const auto& word : line->words) {
|
||||
double width = word->bounding_box.width();
|
||||
@ -167,8 +161,8 @@ void AddTextOnImage(FPDF_DOCUMENT document,
|
||||
FPDFPageObj_Transform(text.get(), width_scale, 0, 0, height_scale, 0, 0);
|
||||
|
||||
// Move text object to the corresponding text position on the full image.
|
||||
BoundingBoxOrigin origin = ConvertToPdfOrigin(
|
||||
word->bounding_box.x(), word->bounding_box.y(), width, height,
|
||||
SearchifyBoundingBoxOrigin origin = ConvertToPdfOrigin(
|
||||
word->bounding_box.x(), word->bounding_box.y(), height,
|
||||
word->bounding_box_angle, image_rendered_height);
|
||||
origin = ProjectToBaseline(origin, baseline_origin);
|
||||
double a = cos(origin.theta);
|
||||
@ -280,6 +274,15 @@ std::vector<uint8_t> PDFiumSearchify(
|
||||
return output_file_write.TakeBuffer();
|
||||
}
|
||||
|
||||
SearchifyBoundingBoxOrigin ConvertToPdfOriginForTesting(
|
||||
int x,
|
||||
int y,
|
||||
int height,
|
||||
double angle,
|
||||
double coordinate_system_height) {
|
||||
return ConvertToPdfOrigin(x, y, height, angle, coordinate_system_height);
|
||||
}
|
||||
|
||||
PdfiumProgressiveSearchifier::ScopedSdkInitializer::ScopedSdkInitializer() {
|
||||
// TODO(thestig): Check the default value of `use_skia`.
|
||||
InitializeSDK(false, false, FontMappingMode::kNoMapping);
|
||||
|
@ -18,11 +18,25 @@
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
struct SearchifyBoundingBoxOrigin {
|
||||
double x;
|
||||
double y;
|
||||
double theta;
|
||||
};
|
||||
|
||||
std::vector<uint8_t> PDFiumSearchify(
|
||||
base::span<const uint8_t> pdf_buffer,
|
||||
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
|
||||
const SkBitmap& bitmap)> perform_ocr_callback);
|
||||
|
||||
// Internal function exposed for testing.
|
||||
SearchifyBoundingBoxOrigin ConvertToPdfOriginForTesting(
|
||||
int x,
|
||||
int y,
|
||||
int height,
|
||||
double angle,
|
||||
double coordinate_system_height);
|
||||
|
||||
class PdfiumProgressiveSearchifier : public PdfProgressiveSearchifier {
|
||||
public:
|
||||
PdfiumProgressiveSearchifier();
|
||||
|
73
pdf/pdfium/pdfium_searchify_unittest.cc
Normal file
73
pdf/pdfium/pdfium_searchify_unittest.cc
Normal file
@ -0,0 +1,73 @@
|
||||
// Copyright 2024 The Chromium Authors
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "pdf/pdfium/pdfium_searchify.h"
|
||||
|
||||
#include <numbers>
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
|
||||
namespace chrome_pdf {
|
||||
|
||||
TEST(PdfiumSearchifyTest, ConvertToPdfOrigin) {
|
||||
{
|
||||
SearchifyBoundingBoxOrigin result = ConvertToPdfOriginForTesting(
|
||||
/*x=*/100,
|
||||
/*y=*/50,
|
||||
/*height=*/30,
|
||||
/*angle=*/0,
|
||||
/*coordinate_system_height=*/792);
|
||||
EXPECT_DOUBLE_EQ(100, result.x);
|
||||
EXPECT_DOUBLE_EQ(712, result.y);
|
||||
EXPECT_DOUBLE_EQ(0, result.theta);
|
||||
}
|
||||
|
||||
{
|
||||
SearchifyBoundingBoxOrigin result = ConvertToPdfOriginForTesting(
|
||||
/*x=*/100,
|
||||
/*y=*/50,
|
||||
/*height=*/30,
|
||||
/*angle=*/45,
|
||||
/*coordinate_system_height=*/792);
|
||||
EXPECT_DOUBLE_EQ(78.786796564403573, result.x);
|
||||
EXPECT_DOUBLE_EQ(720.78679656440363, result.y);
|
||||
EXPECT_DOUBLE_EQ(-std::numbers::pi / 4, result.theta);
|
||||
}
|
||||
|
||||
{
|
||||
SearchifyBoundingBoxOrigin result = ConvertToPdfOriginForTesting(
|
||||
/*x=*/100,
|
||||
/*y=*/50,
|
||||
/*height=*/30,
|
||||
/*angle=*/90,
|
||||
/*coordinate_system_height=*/792);
|
||||
EXPECT_DOUBLE_EQ(70, result.x);
|
||||
EXPECT_DOUBLE_EQ(742, result.y);
|
||||
EXPECT_DOUBLE_EQ(-std::numbers::pi / 2, result.theta);
|
||||
}
|
||||
{
|
||||
SearchifyBoundingBoxOrigin result = ConvertToPdfOriginForTesting(
|
||||
/*x=*/100,
|
||||
/*y=*/50,
|
||||
/*height=*/30,
|
||||
/*angle=*/180,
|
||||
/*coordinate_system_height=*/792);
|
||||
EXPECT_DOUBLE_EQ(100, result.x);
|
||||
EXPECT_DOUBLE_EQ(772, result.y);
|
||||
EXPECT_DOUBLE_EQ(-std::numbers::pi, result.theta);
|
||||
}
|
||||
{
|
||||
SearchifyBoundingBoxOrigin result = ConvertToPdfOriginForTesting(
|
||||
/*x=*/100,
|
||||
/*y=*/50,
|
||||
/*height=*/30,
|
||||
/*angle=*/-90,
|
||||
/*coordinate_system_height=*/792);
|
||||
EXPECT_DOUBLE_EQ(130, result.x);
|
||||
EXPECT_DOUBLE_EQ(742, result.y);
|
||||
EXPECT_DOUBLE_EQ(std::numbers::pi / 2, result.theta);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace chrome_pdf
|
Reference in New Issue
Block a user