Rotate images in rotated PDFs before sending them to OCR.
If a PDF page is marked as rotated, the images in that page are rotated before being sent to OCR service. Bug: 1469236 Change-Id: I82d8981df86f0d7224886765589ed10dec551338 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4735395 Commit-Queue: Ramin Halavati <rhalavati@chromium.org> Reviewed-by: Lei Zhang <thestig@chromium.org> Cr-Commit-Position: refs/heads/main@{#1180140}
This commit is contained in:

committed by
Chromium LUCI CQ

parent
d096e5402f
commit
6e0174a2c8
@ -43,6 +43,7 @@
|
|||||||
#include "ui/gfx/geometry/vector2d.h"
|
#include "ui/gfx/geometry/vector2d.h"
|
||||||
#include "ui/gfx/geometry/vector2d_f.h"
|
#include "ui/gfx/geometry/vector2d_f.h"
|
||||||
#include "ui/gfx/range/range.h"
|
#include "ui/gfx/range/range.h"
|
||||||
|
#include "ui/gfx/skbitmap_operations.h"
|
||||||
|
|
||||||
using printing::ConvertUnitFloat;
|
using printing::ConvertUnitFloat;
|
||||||
using printing::kPixelsPerInch;
|
using printing::kPixelsPerInch;
|
||||||
@ -1322,8 +1323,27 @@ void PDFiumPage::CalculateImages() {
|
|||||||
kBGRA_8888_SkColorType, kOpaque_SkAlphaType);
|
kBGRA_8888_SkColorType, kOpaque_SkAlphaType);
|
||||||
const size_t row_bytes = FPDFBitmap_GetStride(bitmap.get());
|
const size_t row_bytes = FPDFBitmap_GetStride(bitmap.get());
|
||||||
SkPixmap pixels(info, FPDFBitmap_GetBuffer(bitmap.get()), row_bytes);
|
SkPixmap pixels(info, FPDFBitmap_GetBuffer(bitmap.get()), row_bytes);
|
||||||
if (image.image_data.tryAllocPixels(info, row_bytes))
|
if (!image.image_data.tryAllocPixels(info, row_bytes)) {
|
||||||
image.image_data.writePixels(pixels);
|
continue;
|
||||||
|
}
|
||||||
|
image.image_data.writePixels(pixels);
|
||||||
|
|
||||||
|
SkBitmapOperations::RotationAmount rotation;
|
||||||
|
switch (FPDFPage_GetRotation(page)) {
|
||||||
|
case 0:
|
||||||
|
continue;
|
||||||
|
case 1:
|
||||||
|
rotation = SkBitmapOperations::RotationAmount::ROTATION_90_CW;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
rotation = SkBitmapOperations::RotationAmount::ROTATION_180_CW;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
rotation = SkBitmapOperations::RotationAmount::ROTATION_270_CW;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
image.image_data = SkBitmapOperations::Rotate(image.image_data, rotation);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -231,6 +231,7 @@ class PDFiumPage {
|
|||||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, CalculateImages);
|
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, CalculateImages);
|
||||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, ImageAltText);
|
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, ImageAltText);
|
||||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageDataTest, ImageData);
|
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageDataTest, ImageData);
|
||||||
|
FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageDataTest, RotatedPageImageData);
|
||||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, AnnotLinkGeneration);
|
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, AnnotLinkGeneration);
|
||||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, GetLinkTarget);
|
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, GetLinkTarget);
|
||||||
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, GetUTF8LinkTarget);
|
FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, GetUTF8LinkTarget);
|
||||||
|
@ -553,6 +553,23 @@ TEST_P(PDFiumPageImageDataTest, ImageData) {
|
|||||||
EXPECT_EQ(page.images_[1].image_data.height(), 50);
|
EXPECT_EQ(page.images_[1].image_data.height(), 50);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(PDFiumPageImageDataTest, RotatedPageImageData) {
|
||||||
|
TestClient client;
|
||||||
|
std::unique_ptr<PDFiumEngine> engine =
|
||||||
|
InitializeEngine(&client, FILE_PATH_LITERAL("rotated_page.pdf"));
|
||||||
|
ASSERT_TRUE(engine);
|
||||||
|
ASSERT_EQ(1, engine->GetNumberOfPages());
|
||||||
|
|
||||||
|
PDFiumPage& page = GetPDFiumPageForTest(*engine, 0);
|
||||||
|
page.CalculateImages();
|
||||||
|
ASSERT_EQ(1u, page.images_.size());
|
||||||
|
|
||||||
|
// This page is rotated, therefore the extracted image size is 25x100 while
|
||||||
|
// the stored image is 100x25.
|
||||||
|
EXPECT_EQ(page.images_[0].image_data.width(), 25);
|
||||||
|
EXPECT_EQ(page.images_[0].image_data.height(), 100);
|
||||||
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_SUITE_P(All, PDFiumPageImageDataTest, testing::Bool());
|
INSTANTIATE_TEST_SUITE_P(All, PDFiumPageImageDataTest, testing::Bool());
|
||||||
|
|
||||||
using PDFiumPageTextTest = PDFiumTestBase;
|
using PDFiumPageTextTest = PDFiumTestBase;
|
||||||
|
53
pdf/test/data/rotated_page.in
Normal file
53
pdf/test/data/rotated_page.in
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
{{header}}
|
||||||
|
{{object 1 0}} <<
|
||||||
|
/Type /Catalog
|
||||||
|
/Pages 2 0 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
{{object 2 0}} <<
|
||||||
|
/Type /Pages
|
||||||
|
/MediaBox [0 0 200 200]
|
||||||
|
/Count 1
|
||||||
|
/Kids [3 0 R]
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
{{object 3 0}} <<
|
||||||
|
/Type /Page
|
||||||
|
/Parent 2 0 R
|
||||||
|
/Contents 4 0 R
|
||||||
|
/Rotate 270
|
||||||
|
/Resources <<
|
||||||
|
/XObject <<
|
||||||
|
/Img 5 0 R
|
||||||
|
>>
|
||||||
|
>>
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
{{object 4 0}} <<
|
||||||
|
{{streamlen}}
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
q
|
||||||
|
30 -30 40 40 100 100 cm
|
||||||
|
/Img Do
|
||||||
|
Q
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
{{object 5 0}} <<
|
||||||
|
/Type /XObject
|
||||||
|
/Subtype /Image
|
||||||
|
/Width 100
|
||||||
|
/Height 25
|
||||||
|
/BitsPerComponent 8
|
||||||
|
/ColorSpace /DeviceRGB
|
||||||
|
/Filter [/ASCIIHexDecode /FlateDecode]
|
||||||
|
{{streamlen}}
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
789cedc2310d00000c03a07f2aaab3ea7bcf03842655555555555555f5bf01cc7818dc
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
{{xref}}
|
||||||
|
{{trailer}}
|
||||||
|
{{startxref}}
|
||||||
|
%%EOF
|
65
pdf/test/data/rotated_page.pdf
Normal file
65
pdf/test/data/rotated_page.pdf
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
%PDF-1.7
|
||||||
|
%<25><><EFBFBD><EFBFBD>
|
||||||
|
1 0 obj <<
|
||||||
|
/Type /Catalog
|
||||||
|
/Pages 2 0 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
2 0 obj <<
|
||||||
|
/Type /Pages
|
||||||
|
/MediaBox [0 0 200 200]
|
||||||
|
/Count 1
|
||||||
|
/Kids [3 0 R]
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
3 0 obj <<
|
||||||
|
/Type /Page
|
||||||
|
/Parent 2 0 R
|
||||||
|
/Contents 4 0 R
|
||||||
|
/Rotate 270
|
||||||
|
/Resources <<
|
||||||
|
/XObject <<
|
||||||
|
/Img 5 0 R
|
||||||
|
>>
|
||||||
|
>>
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
4 0 obj <<
|
||||||
|
/Length 36
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
q
|
||||||
|
30 -30 40 40 100 100 cm
|
||||||
|
/Img Do
|
||||||
|
Q
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
5 0 obj <<
|
||||||
|
/Type /XObject
|
||||||
|
/Subtype /Image
|
||||||
|
/Width 100
|
||||||
|
/Height 25
|
||||||
|
/BitsPerComponent 8
|
||||||
|
/ColorSpace /DeviceRGB
|
||||||
|
/Filter [/ASCIIHexDecode /FlateDecode]
|
||||||
|
/Length 71
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
789cedc2310d00000c03a07f2aaab3ea7bcf03842655555555555555f5bf01cc7818dc
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
xref
|
||||||
|
0 6
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000015 00000 n
|
||||||
|
0000000068 00000 n
|
||||||
|
0000000157 00000 n
|
||||||
|
0000000301 00000 n
|
||||||
|
0000000388 00000 n
|
||||||
|
trailer <<
|
||||||
|
/Root 1 0 R
|
||||||
|
/Size 6
|
||||||
|
>>
|
||||||
|
startxref
|
||||||
|
659
|
||||||
|
%%EOF
|
Reference in New Issue
Block a user