Simplify ConvertToPdfOrigin() inside pdfium_searchify.cc and add tests
Expose the function as ConvertToPdfOriginForTesting() to add some unit tests. Also remove the unused width parameter. Change-Id: Iaa7242f138fbe1a132c58643652daac80cc03aac Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5691718 Commit-Queue: Lei Zhang <thestig@chromium.org> Reviewed-by: Chu-Hsuan Yang <chuhsuan@chromium.org> Cr-Commit-Position: refs/heads/main@{#1325951}
This commit is contained in:

committed by
Chromium LUCI CQ

parent
c3e6dac201
commit
ece448f559
@ -477,6 +477,10 @@ if (enable_pdf) {
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (enable_screen_ai_service) {
|
||||||
|
sources += [ "pdfium/pdfium_searchify_unittest.cc" ]
|
||||||
|
}
|
||||||
|
|
||||||
if (v8_use_external_startup_data) {
|
if (v8_use_external_startup_data) {
|
||||||
deps += [
|
deps += [
|
||||||
"//tools/v8_context_snapshot",
|
"//tools/v8_context_snapshot",
|
||||||
|
@ -47,20 +47,13 @@ std::vector<uint32_t> Utf8ToCharcodes(const std::string& string) {
|
|||||||
return charcodes;
|
return charcodes;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct BoundingBoxOrigin {
|
|
||||||
double x;
|
|
||||||
double y;
|
|
||||||
double theta;
|
|
||||||
};
|
|
||||||
|
|
||||||
// The coordinate systems between OCR and PDF are different. OCR's origin is at
|
// The coordinate systems between OCR and PDF are different. OCR's origin is at
|
||||||
// top-left, so we need to convert them to PDF's bottom-left.
|
// top-left, so we need to convert them to PDF's bottom-left.
|
||||||
BoundingBoxOrigin ConvertToPdfOrigin(int x,
|
SearchifyBoundingBoxOrigin ConvertToPdfOrigin(int x,
|
||||||
int y,
|
int y,
|
||||||
int width,
|
int height,
|
||||||
int height,
|
double angle,
|
||||||
double angle,
|
double coordinate_system_height) {
|
||||||
double coordinate_system_height) {
|
|
||||||
const double theta = base::DegToRad(angle);
|
const double theta = base::DegToRad(angle);
|
||||||
return {.x = x - (sin(theta) * height),
|
return {.x = x - (sin(theta) * height),
|
||||||
.y = coordinate_system_height - (y + cos(theta) * height),
|
.y = coordinate_system_height - (y + cos(theta) * height),
|
||||||
@ -68,8 +61,9 @@ BoundingBoxOrigin ConvertToPdfOrigin(int x,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Project the text object's origin to the baseline's origin.
|
// Project the text object's origin to the baseline's origin.
|
||||||
BoundingBoxOrigin ProjectToBaseline(const BoundingBoxOrigin& origin,
|
SearchifyBoundingBoxOrigin ProjectToBaseline(
|
||||||
const BoundingBoxOrigin& baseline_origin) {
|
const SearchifyBoundingBoxOrigin& origin,
|
||||||
|
const SearchifyBoundingBoxOrigin& baseline_origin) {
|
||||||
// The length between `origin` and `baseline_origin`.
|
// The length between `origin` and `baseline_origin`.
|
||||||
double length = (origin.x - baseline_origin.x) * cos(baseline_origin.theta) +
|
double length = (origin.x - baseline_origin.x) * cos(baseline_origin.theta) +
|
||||||
(origin.y - baseline_origin.y) * sin(baseline_origin.theta);
|
(origin.y - baseline_origin.y) * sin(baseline_origin.theta);
|
||||||
@ -107,10 +101,10 @@ void AddTextOnImage(FPDF_DOCUMENT document,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (const auto& line : annotation->lines) {
|
for (const auto& line : annotation->lines) {
|
||||||
BoundingBoxOrigin baseline_origin = ConvertToPdfOrigin(
|
SearchifyBoundingBoxOrigin baseline_origin =
|
||||||
line->baseline_box.x(), line->baseline_box.y(),
|
ConvertToPdfOrigin(line->baseline_box.x(), line->baseline_box.y(),
|
||||||
line->baseline_box.width(), line->baseline_box.height(),
|
line->baseline_box.height(),
|
||||||
line->baseline_box_angle, image_rendered_height);
|
line->baseline_box_angle, image_rendered_height);
|
||||||
|
|
||||||
for (const auto& word : line->words) {
|
for (const auto& word : line->words) {
|
||||||
double width = word->bounding_box.width();
|
double width = word->bounding_box.width();
|
||||||
@ -167,8 +161,8 @@ void AddTextOnImage(FPDF_DOCUMENT document,
|
|||||||
FPDFPageObj_Transform(text.get(), width_scale, 0, 0, height_scale, 0, 0);
|
FPDFPageObj_Transform(text.get(), width_scale, 0, 0, height_scale, 0, 0);
|
||||||
|
|
||||||
// Move text object to the corresponding text position on the full image.
|
// Move text object to the corresponding text position on the full image.
|
||||||
BoundingBoxOrigin origin = ConvertToPdfOrigin(
|
SearchifyBoundingBoxOrigin origin = ConvertToPdfOrigin(
|
||||||
word->bounding_box.x(), word->bounding_box.y(), width, height,
|
word->bounding_box.x(), word->bounding_box.y(), height,
|
||||||
word->bounding_box_angle, image_rendered_height);
|
word->bounding_box_angle, image_rendered_height);
|
||||||
origin = ProjectToBaseline(origin, baseline_origin);
|
origin = ProjectToBaseline(origin, baseline_origin);
|
||||||
double a = cos(origin.theta);
|
double a = cos(origin.theta);
|
||||||
@ -280,6 +274,15 @@ std::vector<uint8_t> PDFiumSearchify(
|
|||||||
return output_file_write.TakeBuffer();
|
return output_file_write.TakeBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SearchifyBoundingBoxOrigin ConvertToPdfOriginForTesting(
|
||||||
|
int x,
|
||||||
|
int y,
|
||||||
|
int height,
|
||||||
|
double angle,
|
||||||
|
double coordinate_system_height) {
|
||||||
|
return ConvertToPdfOrigin(x, y, height, angle, coordinate_system_height);
|
||||||
|
}
|
||||||
|
|
||||||
PdfiumProgressiveSearchifier::ScopedSdkInitializer::ScopedSdkInitializer() {
|
PdfiumProgressiveSearchifier::ScopedSdkInitializer::ScopedSdkInitializer() {
|
||||||
// TODO(thestig): Check the default value of `use_skia`.
|
// TODO(thestig): Check the default value of `use_skia`.
|
||||||
InitializeSDK(false, false, FontMappingMode::kNoMapping);
|
InitializeSDK(false, false, FontMappingMode::kNoMapping);
|
||||||
|
@ -18,11 +18,25 @@
|
|||||||
|
|
||||||
namespace chrome_pdf {
|
namespace chrome_pdf {
|
||||||
|
|
||||||
|
struct SearchifyBoundingBoxOrigin {
|
||||||
|
double x;
|
||||||
|
double y;
|
||||||
|
double theta;
|
||||||
|
};
|
||||||
|
|
||||||
std::vector<uint8_t> PDFiumSearchify(
|
std::vector<uint8_t> PDFiumSearchify(
|
||||||
base::span<const uint8_t> pdf_buffer,
|
base::span<const uint8_t> pdf_buffer,
|
||||||
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
|
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
|
||||||
const SkBitmap& bitmap)> perform_ocr_callback);
|
const SkBitmap& bitmap)> perform_ocr_callback);
|
||||||
|
|
||||||
|
// Internal function exposed for testing.
|
||||||
|
SearchifyBoundingBoxOrigin ConvertToPdfOriginForTesting(
|
||||||
|
int x,
|
||||||
|
int y,
|
||||||
|
int height,
|
||||||
|
double angle,
|
||||||
|
double coordinate_system_height);
|
||||||
|
|
||||||
class PdfiumProgressiveSearchifier : public PdfProgressiveSearchifier {
|
class PdfiumProgressiveSearchifier : public PdfProgressiveSearchifier {
|
||||||
public:
|
public:
|
||||||
PdfiumProgressiveSearchifier();
|
PdfiumProgressiveSearchifier();
|
||||||
|
73
pdf/pdfium/pdfium_searchify_unittest.cc
Normal file
73
pdf/pdfium/pdfium_searchify_unittest.cc
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
// Copyright 2024 The Chromium Authors
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file.
|
||||||
|
|
||||||
|
#include "pdf/pdfium/pdfium_searchify.h"
|
||||||
|
|
||||||
|
#include <numbers>
|
||||||
|
|
||||||
|
#include "testing/gtest/include/gtest/gtest.h"
|
||||||
|
|
||||||
|
namespace chrome_pdf {
|
||||||
|
|
||||||
|
TEST(PdfiumSearchifyTest, ConvertToPdfOrigin) {
|
||||||
|
{
|
||||||
|
SearchifyBoundingBoxOrigin result = ConvertToPdfOriginForTesting(
|
||||||
|
/*x=*/100,
|
||||||
|
/*y=*/50,
|
||||||
|
/*height=*/30,
|
||||||
|
/*angle=*/0,
|
||||||
|
/*coordinate_system_height=*/792);
|
||||||
|
EXPECT_DOUBLE_EQ(100, result.x);
|
||||||
|
EXPECT_DOUBLE_EQ(712, result.y);
|
||||||
|
EXPECT_DOUBLE_EQ(0, result.theta);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
SearchifyBoundingBoxOrigin result = ConvertToPdfOriginForTesting(
|
||||||
|
/*x=*/100,
|
||||||
|
/*y=*/50,
|
||||||
|
/*height=*/30,
|
||||||
|
/*angle=*/45,
|
||||||
|
/*coordinate_system_height=*/792);
|
||||||
|
EXPECT_DOUBLE_EQ(78.786796564403573, result.x);
|
||||||
|
EXPECT_DOUBLE_EQ(720.78679656440363, result.y);
|
||||||
|
EXPECT_DOUBLE_EQ(-std::numbers::pi / 4, result.theta);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
SearchifyBoundingBoxOrigin result = ConvertToPdfOriginForTesting(
|
||||||
|
/*x=*/100,
|
||||||
|
/*y=*/50,
|
||||||
|
/*height=*/30,
|
||||||
|
/*angle=*/90,
|
||||||
|
/*coordinate_system_height=*/792);
|
||||||
|
EXPECT_DOUBLE_EQ(70, result.x);
|
||||||
|
EXPECT_DOUBLE_EQ(742, result.y);
|
||||||
|
EXPECT_DOUBLE_EQ(-std::numbers::pi / 2, result.theta);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
SearchifyBoundingBoxOrigin result = ConvertToPdfOriginForTesting(
|
||||||
|
/*x=*/100,
|
||||||
|
/*y=*/50,
|
||||||
|
/*height=*/30,
|
||||||
|
/*angle=*/180,
|
||||||
|
/*coordinate_system_height=*/792);
|
||||||
|
EXPECT_DOUBLE_EQ(100, result.x);
|
||||||
|
EXPECT_DOUBLE_EQ(772, result.y);
|
||||||
|
EXPECT_DOUBLE_EQ(-std::numbers::pi, result.theta);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
SearchifyBoundingBoxOrigin result = ConvertToPdfOriginForTesting(
|
||||||
|
/*x=*/100,
|
||||||
|
/*y=*/50,
|
||||||
|
/*height=*/30,
|
||||||
|
/*angle=*/-90,
|
||||||
|
/*coordinate_system_height=*/792);
|
||||||
|
EXPECT_DOUBLE_EQ(130, result.x);
|
||||||
|
EXPECT_DOUBLE_EQ(742, result.y);
|
||||||
|
EXPECT_DOUBLE_EQ(std::numbers::pi / 2, result.theta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace chrome_pdf
|
Reference in New Issue
Block a user