0

Rotate images in rotated PDFs before sending them to OCR.

If a PDF page is marked as rotated, the images in that page are rotated
before being sent to OCR service.

Bug: 1469236
Change-Id: I82d8981df86f0d7224886765589ed10dec551338
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4735395
Commit-Queue: Ramin Halavati <rhalavati@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1180140}
This commit is contained in:
Ramin Halavati
2023-08-07 06:05:13 +00:00
committed by Chromium LUCI CQ
parent d096e5402f
commit 6e0174a2c8
5 changed files with 158 additions and 2 deletions

@ -43,6 +43,7 @@
#include "ui/gfx/geometry/vector2d.h"
#include "ui/gfx/geometry/vector2d_f.h"
#include "ui/gfx/range/range.h"
#include "ui/gfx/skbitmap_operations.h"
using printing::ConvertUnitFloat;
using printing::kPixelsPerInch;
@ -1322,8 +1323,27 @@ void PDFiumPage::CalculateImages() {
kBGRA_8888_SkColorType, kOpaque_SkAlphaType);
const size_t row_bytes = FPDFBitmap_GetStride(bitmap.get());
SkPixmap pixels(info, FPDFBitmap_GetBuffer(bitmap.get()), row_bytes);
if (image.image_data.tryAllocPixels(info, row_bytes))
image.image_data.writePixels(pixels);
if (!image.image_data.tryAllocPixels(info, row_bytes)) {
continue;
}
image.image_data.writePixels(pixels);
SkBitmapOperations::RotationAmount rotation;
switch (FPDFPage_GetRotation(page)) {
case 0:
continue;
case 1:
rotation = SkBitmapOperations::RotationAmount::ROTATION_90_CW;
break;
case 2:
rotation = SkBitmapOperations::RotationAmount::ROTATION_180_CW;
break;
case 3:
rotation = SkBitmapOperations::RotationAmount::ROTATION_270_CW;
break;
}
image.image_data = SkBitmapOperations::Rotate(image.image_data, rotation);
}
}

@ -231,6 +231,7 @@ class PDFiumPage {
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, CalculateImages);
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, ImageAltText);
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageDataTest, ImageData);
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageDataTest, RotatedPageImageData);
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, AnnotLinkGeneration);
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, GetLinkTarget);
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, GetUTF8LinkTarget);

@ -553,6 +553,23 @@ TEST_P(PDFiumPageImageDataTest, ImageData) {
EXPECT_EQ(page.images_[1].image_data.height(), 50);
}
TEST_P(PDFiumPageImageDataTest, RotatedPageImageData) {
TestClient client;
std::unique_ptr<PDFiumEngine> engine =
InitializeEngine(&client, FILE_PATH_LITERAL("rotated_page.pdf"));
ASSERT_TRUE(engine);
ASSERT_EQ(1, engine->GetNumberOfPages());
PDFiumPage& page = GetPDFiumPageForTest(*engine, 0);
page.CalculateImages();
ASSERT_EQ(1u, page.images_.size());
// This page is rotated, therefore the extracted image size is 25x100 while
// the stored image is 100x25.
EXPECT_EQ(page.images_[0].image_data.width(), 25);
EXPECT_EQ(page.images_[0].image_data.height(), 100);
}
INSTANTIATE_TEST_SUITE_P(All, PDFiumPageImageDataTest, testing::Bool());
using PDFiumPageTextTest = PDFiumTestBase;

@ -0,0 +1,53 @@
{{header}}
{{object 1 0}} <<
/Type /Catalog
/Pages 2 0 R
>>
endobj
{{object 2 0}} <<
/Type /Pages
/MediaBox [0 0 200 200]
/Count 1
/Kids [3 0 R]
>>
endobj
{{object 3 0}} <<
/Type /Page
/Parent 2 0 R
/Contents 4 0 R
/Rotate 270
/Resources <<
/XObject <<
/Img 5 0 R
>>
>>
>>
endobj
{{object 4 0}} <<
{{streamlen}}
>>
stream
q
30 -30 40 40 100 100 cm
/Img Do
Q
endstream
endobj
{{object 5 0}} <<
/Type /XObject
/Subtype /Image
/Width 100
/Height 25
/BitsPerComponent 8
/ColorSpace /DeviceRGB
/Filter [/ASCIIHexDecode /FlateDecode]
{{streamlen}}
>>
stream
789cedc2310d00000c03a07f2aaab3ea7bcf03842655555555555555f5bf01cc7818dc
endstream
endobj
{{xref}}
{{trailer}}
{{startxref}}
%%EOF

@ -0,0 +1,65 @@
%PDF-1.7
%<25><><EFBFBD><EFBFBD>
1 0 obj <<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj <<
/Type /Pages
/MediaBox [0 0 200 200]
/Count 1
/Kids [3 0 R]
>>
endobj
3 0 obj <<
/Type /Page
/Parent 2 0 R
/Contents 4 0 R
/Rotate 270
/Resources <<
/XObject <<
/Img 5 0 R
>>
>>
>>
endobj
4 0 obj <<
/Length 36
>>
stream
q
30 -30 40 40 100 100 cm
/Img Do
Q
endstream
endobj
5 0 obj <<
/Type /XObject
/Subtype /Image
/Width 100
/Height 25
/BitsPerComponent 8
/ColorSpace /DeviceRGB
/Filter [/ASCIIHexDecode /FlateDecode]
/Length 71
>>
stream
789cedc2310d00000c03a07f2aaab3ea7bcf03842655555555555555f5bf01cc7818dc
endstream
endobj
xref
0 6
0000000000 65535 f
0000000015 00000 n
0000000068 00000 n
0000000157 00000 n
0000000301 00000 n
0000000388 00000 n
trailer <<
/Root 1 0 R
/Size 6
>>
startxref
659
%%EOF