Fix PDFiumRange's handling of FPDFText_GetText() results.
Use FPDFText_GetText()'s returned count as the source of truth for how many characters are in the output string, instead of trusting this count is consistent with the FPDFText_CountChars() return value. In some cases, they are not the same. Bug: 1357385 Change-Id: I355a573c036e15f96972901cd562e3bcee1be74a Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3908491 Reviewed-by: Tom Sepez <tsepez@chromium.org> Commit-Queue: Lei Zhang <thestig@chromium.org> Cr-Commit-Position: refs/heads/main@{#1050686}
This commit is contained in:

committed by
Chromium LUCI CQ

parent
f949c8de94
commit
161d189731
@ -864,6 +864,18 @@ TEST_F(PDFiumEngineTest, SelectLinkAreaWithNoText) {
|
||||
EXPECT_EQ(kExpectedText, engine->GetSelectedText());
|
||||
}
|
||||
|
||||
TEST_F(PDFiumEngineTest, SelectTextWithNonPrintableCharacter) {
|
||||
NiceMock<MockTestClient> client;
|
||||
std::unique_ptr<PDFiumEngine> engine =
|
||||
InitializeEngine(&client, FILE_PATH_LITERAL("bug_1357385.pdf"));
|
||||
ASSERT_TRUE(engine);
|
||||
|
||||
EXPECT_THAT(engine->GetSelectedText(), IsEmpty());
|
||||
|
||||
engine->SelectAll();
|
||||
EXPECT_EQ("Hello, world!", engine->GetSelectedText());
|
||||
}
|
||||
|
||||
using PDFiumEngineDeathTest = PDFiumEngineTest;
|
||||
|
||||
TEST_F(PDFiumEngineDeathTest, RequestThumbnailRedundant) {
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "base/containers/cxx20_erase.h"
|
||||
#include "base/strings/string_util.h"
|
||||
#include "pdf/pdfium/pdfium_api_string_buffer_adapter.h"
|
||||
#include "third_party/pdfium/public/fpdf_searchex.h"
|
||||
#include "ui/gfx/geometry/point.h"
|
||||
#include "ui/gfx/geometry/rect.h"
|
||||
#include "ui/gfx/geometry/rect_f.h"
|
||||
@ -105,20 +106,46 @@ std::u16string PDFiumRange::GetText() const {
|
||||
AdjustForBackwardsRange(index, count);
|
||||
if (count > 0) {
|
||||
// Note that the `expected_size` value includes the NUL terminator.
|
||||
//
|
||||
// Cannot set `check_expected_size` to true here because the fix to
|
||||
// https://crbug.com/pdfium/1139 made it such that FPDFText_GetText() is
|
||||
// not always consistent with FPDFText_CountChars() and may trim characters.
|
||||
//
|
||||
// Instead, treat `count` as the requested count, but use the size of
|
||||
// `result` as the source of truth for how many characters
|
||||
// FPDFText_GetText() actually wrote out.
|
||||
PDFiumAPIStringBufferAdapter<std::u16string> api_string_adapter(
|
||||
&result, /*expected_size=*/count + 1, /*check_expected_size=*/true);
|
||||
&result, /*expected_size=*/count + 1, /*check_expected_size=*/false);
|
||||
unsigned short* data =
|
||||
reinterpret_cast<unsigned short*>(api_string_adapter.GetData());
|
||||
int written = FPDFText_GetText(page_->GetTextPage(), index, count, data);
|
||||
// FPDFText_GetText() returns 0 on failure. Never negative value.
|
||||
DCHECK_GE(written, 0);
|
||||
api_string_adapter.Close(written);
|
||||
|
||||
const gfx::RectF page_bounds = page_->GetCroppedRect();
|
||||
std::u16string in_bound_text;
|
||||
in_bound_text.reserve(result.size());
|
||||
for (int i = 0; i < count; ++i) {
|
||||
|
||||
// If FPDFText_GetText() trimmed off characters, figure out how many were
|
||||
// trimmed from the front. Store the result in `index_offset`, so the
|
||||
// IsCharInPageBounds() calls below can have the correct index.
|
||||
CHECK_GE(static_cast<size_t>(count), result.size());
|
||||
size_t trimmed_count = static_cast<size_t>(count) - result.size();
|
||||
int index_offset = 0;
|
||||
while (trimmed_count) {
|
||||
if (FPDFText_GetTextIndexFromCharIndex(page_->GetTextPage(),
|
||||
index + index_offset) >= 0) {
|
||||
break;
|
||||
}
|
||||
--trimmed_count;
|
||||
++index_offset;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < result.size(); ++i) {
|
||||
// Filter out characters outside the page bounds, which are semantically
|
||||
// not part of the page.
|
||||
if (page_->IsCharInPageBounds(index + i, page_bounds))
|
||||
if (page_->IsCharInPageBounds(index + index_offset + i, page_bounds))
|
||||
in_bound_text += result[i];
|
||||
}
|
||||
result = in_bound_text;
|
||||
|
46
pdf/test/data/bug_1357385.in
Normal file
46
pdf/test/data/bug_1357385.in
Normal file
@ -0,0 +1,46 @@
|
||||
{{header}}
|
||||
{{object 1 0}} <<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
{{object 2 0}} <<
|
||||
/Type /Pages
|
||||
/MediaBox [0 0 200 200]
|
||||
/Count 1
|
||||
/Kids [3 0 R]
|
||||
>>
|
||||
endobj
|
||||
{{object 3 0}} <<
|
||||
/Type /Page
|
||||
/Parent 2 0 R
|
||||
/Resources <<
|
||||
/Font <<
|
||||
/F1 4 0 R
|
||||
>>
|
||||
>>
|
||||
/Contents 5 0 R
|
||||
>>
|
||||
endobj
|
||||
{{object 4 0}} <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Times-Roman
|
||||
>>
|
||||
endobj
|
||||
{{object 5 0}} <<
|
||||
{{streamlen}}
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
20 50 Td
|
||||
/F1 12 Tf
|
||||
% Modified in a way similar to PDFium's bug_1139.in.
|
||||
(\003\003\003Hello, world!\003) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
{{xref}}
|
||||
{{trailer}}
|
||||
{{startxref}}
|
||||
%%EOF
|
58
pdf/test/data/bug_1357385.pdf
Normal file
58
pdf/test/data/bug_1357385.pdf
Normal file
@ -0,0 +1,58 @@
|
||||
%PDF-1.7
|
||||
%<25><><EFBFBD><EFBFBD>
|
||||
1 0 obj <<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
2 0 obj <<
|
||||
/Type /Pages
|
||||
/MediaBox [0 0 200 200]
|
||||
/Count 1
|
||||
/Kids [3 0 R]
|
||||
>>
|
||||
endobj
|
||||
3 0 obj <<
|
||||
/Type /Page
|
||||
/Parent 2 0 R
|
||||
/Resources <<
|
||||
/Font <<
|
||||
/F1 4 0 R
|
||||
>>
|
||||
>>
|
||||
/Contents 5 0 R
|
||||
>>
|
||||
endobj
|
||||
4 0 obj <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Times-Roman
|
||||
>>
|
||||
endobj
|
||||
5 0 obj <<
|
||||
/Length 113
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
20 50 Td
|
||||
/F1 12 Tf
|
||||
% Modified in a way similar to PDFium's bug_1139.in.
|
||||
(\003\003\003Hello, world!\003) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
xref
|
||||
0 6
|
||||
0000000000 65535 f
|
||||
0000000015 00000 n
|
||||
0000000068 00000 n
|
||||
0000000157 00000 n
|
||||
0000000283 00000 n
|
||||
0000000361 00000 n
|
||||
trailer <<
|
||||
/Root 1 0 R
|
||||
/Size 6
|
||||
>>
|
||||
startxref
|
||||
526
|
||||
%%EOF
|
Reference in New Issue
Block a user