Add PDF engine method to get all text in the document
This will be used by the Lens Overlay to pass a fraction of the document to be used for suggest signals while the full PDF is being processed. Retrieving the text in the Lens overlay controller will come in a followup CL. Bug: 379344946 Change-Id: Ie52df82022916a3bb367150207d1b70e03fbce8a Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6032353 Reviewed-by: Lei Zhang <thestig@chromium.org> Commit-Queue: Duncan Mercer <mercerd@google.com> Cr-Commit-Position: refs/heads/main@{#1385831}
This commit is contained in:

committed by
Chromium LUCI CQ

parent
738dff33b7
commit
8d44e4899d
@ -2248,6 +2248,24 @@ void PDFiumEngine::DisplayAnnotations(bool display) {
|
||||
InvalidateAllPages();
|
||||
}
|
||||
|
||||
std::u16string PDFiumEngine::GetAllText(uint32_t size_limit) {
|
||||
std::u16string all_pages_text;
|
||||
|
||||
for (auto& page : pages_) {
|
||||
// Add the current page's text to the output.
|
||||
auto range = PDFiumRange::AllTextOnPage(page.get());
|
||||
all_pages_text.append(range.GetText());
|
||||
|
||||
// Truncate and exit early if over the size limit.
|
||||
if (all_pages_text.size() > size_limit) {
|
||||
all_pages_text.resize(size_limit);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return all_pages_text;
|
||||
}
|
||||
|
||||
void PDFiumEngine::InvalidateAllPages() {
|
||||
CancelPaints();
|
||||
StopFind();
|
||||
|
@ -222,6 +222,11 @@ class PDFiumEngine : public DocumentLoader::Client, public IFSDK_PAUSE {
|
||||
void SetDocumentLayout(DocumentLayout::PageSpread page_spread);
|
||||
void DisplayAnnotations(bool display);
|
||||
|
||||
// Returns the text contained in the PDF. If the size of the text is larger
|
||||
// than `size_limit`, the remaining text will be truncated, and a string with
|
||||
// the first `size_limit` characters will be returned.
|
||||
std::u16string GetAllText(uint32_t size_limit);
|
||||
|
||||
// Applies the document layout options proposed by a call to
|
||||
// PDFiumEngineClient::ProposeDocumentLayout(), returning the overall size of
|
||||
// the new effective layout.
|
||||
|
@ -1097,6 +1097,32 @@ TEST_P(PDFiumEngineTest, DrawTextSelectionsBigtableMicro) {
|
||||
*engine, /*page_index=*/0, "bigtable_micro_selection.png");
|
||||
}
|
||||
|
||||
TEST_P(PDFiumEngineTest, GetAllText) {
|
||||
NiceMock<MockTestClient> client;
|
||||
std::unique_ptr<PDFiumEngine> engine =
|
||||
InitializeEngine(&client, FILE_PATH_LITERAL("hello_world2.pdf"));
|
||||
ASSERT_TRUE(engine);
|
||||
|
||||
auto text = engine->GetAllText(/*size_limit=*/100);
|
||||
constexpr char16_t kExpectedText[] =
|
||||
u"Hello, world!\r\nGoodbye, world!Hello, world!\r\nGoodbye, world!";
|
||||
|
||||
EXPECT_EQ(kExpectedText, text);
|
||||
}
|
||||
|
||||
TEST_P(PDFiumEngineTest, GetAllTextSizeLimit) {
|
||||
NiceMock<MockTestClient> client;
|
||||
std::unique_ptr<PDFiumEngine> engine =
|
||||
InitializeEngine(&client, FILE_PATH_LITERAL("hello_world2.pdf"));
|
||||
ASSERT_TRUE(engine);
|
||||
|
||||
auto text = engine->GetAllText(/*size_limit=*/30);
|
||||
ASSERT_EQ(30u, text.size());
|
||||
|
||||
constexpr char16_t kExpectedText[] = u"Hello, world!\r\nGoodbye, world!";
|
||||
EXPECT_EQ(kExpectedText, text);
|
||||
}
|
||||
|
||||
TEST_P(PDFiumEngineTest, LinkNavigates) {
|
||||
NiceMock<MockTestClient> client;
|
||||
std::unique_ptr<PDFiumEngine> engine =
|
||||
|
Reference in New Issue
Block a user