Reland "Add PDF engine method to get all text in the document"
This is a reland of commit 8d44e4899d
The original CL added a method to grab all the text because that was
the formatted needed by the LensOverlayController that was going to use
this method. The format has thus changed, so the CL modified the
original method from getting all text at once, to getting the text on a
certain page instead.
Original change's description:
> Add PDF engine method to get all text in the document
>
> This will be used by the Lens Overlay to pass a fraction of the document
> to be used for suggest signals while the full PDF is being processed.
> Retrieving the text in the Lens overlay controller will come in a
> followup CL.
>
> Bug: 379344946
> Change-Id: Ie52df82022916a3bb367150207d1b70e03fbce8a
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6032353
> Reviewed-by: Lei Zhang <thestig@chromium.org>
> Commit-Queue: Duncan Mercer <mercerd@google.com>
> Cr-Commit-Position: refs/heads/main@{#1385831}
Bug: 379344946
Change-Id: I91d09c610fb64b050ae12e29ed519804dd1dfe38
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6042085
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Duncan Mercer <mercerd@google.com>
Cr-Commit-Position: refs/heads/main@{#1386411}
This commit is contained in:

committed by
Chromium LUCI CQ

parent
20b8655eba
commit
fba49a40ba
@ -2248,6 +2248,12 @@ void PDFiumEngine::DisplayAnnotations(bool display) {
|
||||
InvalidateAllPages();
|
||||
}
|
||||
|
||||
std::u16string PDFiumEngine::GetPageText(int page_index) {
|
||||
CHECK(PageIndexInBounds(page_index));
|
||||
auto range = PDFiumRange::AllTextOnPage(pages_[page_index].get());
|
||||
return range.GetText();
|
||||
}
|
||||
|
||||
void PDFiumEngine::InvalidateAllPages() {
|
||||
CancelPaints();
|
||||
StopFind();
|
||||
|
@ -222,6 +222,10 @@ class PDFiumEngine : public DocumentLoader::Client, public IFSDK_PAUSE {
|
||||
void SetDocumentLayout(DocumentLayout::PageSpread page_spread);
|
||||
void DisplayAnnotations(bool display);
|
||||
|
||||
// Returns the text contained on the given page. The caller is responsible for
|
||||
// passing a valid `page_index`.
|
||||
std::u16string GetPageText(int page_index);
|
||||
|
||||
// Applies the document layout options proposed by a call to
|
||||
// PDFiumEngineClient::ProposeDocumentLayout(), returning the overall size of
|
||||
// the new effective layout.
|
||||
|
@ -1097,6 +1097,18 @@ TEST_P(PDFiumEngineTest, DrawTextSelectionsBigtableMicro) {
|
||||
*engine, /*page_index=*/0, "bigtable_micro_selection.png");
|
||||
}
|
||||
|
||||
TEST_P(PDFiumEngineTest, GetPageText) {
|
||||
NiceMock<MockTestClient> client;
|
||||
std::unique_ptr<PDFiumEngine> engine =
|
||||
InitializeEngine(&client, FILE_PATH_LITERAL("hello_world2.pdf"));
|
||||
ASSERT_TRUE(engine);
|
||||
|
||||
static constexpr char16_t kExpectedPageText[] = u"Hello, world!\r\nGoodbye, world!";
|
||||
|
||||
EXPECT_EQ(kExpectedPageText, engine->GetPageText(/*page_index=*/0));
|
||||
EXPECT_EQ(kExpectedPageText, engine->GetPageText(/*page_index=*/1));
|
||||
}
|
||||
|
||||
TEST_P(PDFiumEngineTest, LinkNavigates) {
|
||||
NiceMock<MockTestClient> client;
|
||||
std::unique_ptr<PDFiumEngine> engine =
|
||||
|
Reference in New Issue
Block a user