0

Revert "Add PDF engine method to get all text in the document"

This reverts commit 8d44e4899d.

Reason for revert: The agreed upon format needed by Lens changed, so we do need this to retrieve the singular page instead of all the text at once. Reverting and going to reland with the new approach so it is easier to cherrypick.

Original change's description:
> Add PDF engine method to get all text in the document
>
> This will be used by the Lens Overlay to pass a fraction of the document
> to be used for suggest signals while the full PDF is being processed.
> Retrieving the text in the Lens overlay controller will come in a
> followup CL.
>
> Bug: 379344946
> Change-Id: Ie52df82022916a3bb367150207d1b70e03fbce8a
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6032353
> Reviewed-by: Lei Zhang <thestig@chromium.org>
> Commit-Queue: Duncan Mercer <mercerd@google.com>
> Cr-Commit-Position: refs/heads/main@{#1385831}

Bug: 379344946
Change-Id: I87138767e09d82f9b6b93d7a10f177828a0f1291
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6042413
Reviewed-by: Lei Zhang <thestig@chromium.org>
Auto-Submit: Duncan Mercer <mercerd@google.com>
Commit-Queue: Lei Zhang <thestig@chromium.org>
Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Cr-Commit-Position: refs/heads/main@{#1386340}
This commit is contained in:
Duncan Mercer
2024-11-21 18:41:36 +00:00
committed by Chromium LUCI CQ
parent 2bbdb5f961
commit cf5c6c0b58
3 changed files with 0 additions and 49 deletions

@ -2248,24 +2248,6 @@ void PDFiumEngine::DisplayAnnotations(bool display) {
InvalidateAllPages();
}
std::u16string PDFiumEngine::GetAllText(uint32_t size_limit) {
std::u16string all_pages_text;
for (auto& page : pages_) {
// Add the current page's text to the output.
auto range = PDFiumRange::AllTextOnPage(page.get());
all_pages_text.append(range.GetText());
// Truncate and exit early if over the size limit.
if (all_pages_text.size() > size_limit) {
all_pages_text.resize(size_limit);
break;
}
}
return all_pages_text;
}
void PDFiumEngine::InvalidateAllPages() {
CancelPaints();
StopFind();

@ -222,11 +222,6 @@ class PDFiumEngine : public DocumentLoader::Client, public IFSDK_PAUSE {
void SetDocumentLayout(DocumentLayout::PageSpread page_spread);
void DisplayAnnotations(bool display);
// Returns the text contained in the PDF. If the size of the text is larger
// than `size_limit`, the remaining text will be truncated, and a string with
// the first `size_limit` characters will be returned.
std::u16string GetAllText(uint32_t size_limit);
// Applies the document layout options proposed by a call to
// PDFiumEngineClient::ProposeDocumentLayout(), returning the overall size of
// the new effective layout.

@ -1097,32 +1097,6 @@ TEST_P(PDFiumEngineTest, DrawTextSelectionsBigtableMicro) {
*engine, /*page_index=*/0, "bigtable_micro_selection.png");
}
TEST_P(PDFiumEngineTest, GetAllText) {
NiceMock<MockTestClient> client;
std::unique_ptr<PDFiumEngine> engine =
InitializeEngine(&client, FILE_PATH_LITERAL("hello_world2.pdf"));
ASSERT_TRUE(engine);
auto text = engine->GetAllText(/*size_limit=*/100);
constexpr char16_t kExpectedText[] =
u"Hello, world!\r\nGoodbye, world!Hello, world!\r\nGoodbye, world!";
EXPECT_EQ(kExpectedText, text);
}
TEST_P(PDFiumEngineTest, GetAllTextSizeLimit) {
NiceMock<MockTestClient> client;
std::unique_ptr<PDFiumEngine> engine =
InitializeEngine(&client, FILE_PATH_LITERAL("hello_world2.pdf"));
ASSERT_TRUE(engine);
auto text = engine->GetAllText(/*size_limit=*/30);
ASSERT_EQ(30u, text.size());
constexpr char16_t kExpectedText[] = u"Hello, world!\r\nGoodbye, world!";
EXPECT_EQ(kExpectedText, text);
}
TEST_P(PDFiumEngineTest, LinkNavigates) {
NiceMock<MockTestClient> client;
std::unique_ptr<PDFiumEngine> engine =