PDF a11y: Use style info in the text-runs break heuristic
The text-runs break heuristic now considers the text style. With this change, a text run will break if it encounters a difference in the following text style properties: * font-family * font-weight * rendering mode * font-size * fill color * stroke color * italic flag * bold flag Before this change, it used to break only if a font-size difference was observed. I added an "ultimate" dump test that tests for all of those properties. Bug: 985604 Change-Id: Ice4b4a2dcdd8cc9642ca0e9d069a415c5b2700fd Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1771987 Commit-Queue: Benjamin Beaudry <benjamin.beaudry@microsoft.com> Reviewed-by: Lei Zhang <thestig@chromium.org> Reviewed-by: Ian Prest <iapres@microsoft.com> Cr-Commit-Position: refs/heads/master@{#710522}
This commit is contained in:

committed by
Commit Bot

parent
f6f8dbb311
commit
8feb637ac1
chrome
browser
test
data
pdf
accessibility
text-format-expected-blink.txttext-run-style-heuristic-expected-auralinux.txttext-run-style-heuristic-expected-blink.txttext-run-style-heuristic-expected-mac.txttext-run-style-heuristic-expected-uia-win.txttext-run-style-heuristic-expected-win.txttext-run-style-heuristic.intext-run-style-heuristic.pdf
pdf
@ -2617,3 +2617,8 @@ IN_PROC_BROWSER_TEST_P(PDFExtensionAccessibilityTreeDumpTest,
|
||||
LinksImagesAndText) {
|
||||
RunPDFTest(FILE_PATH_LITERAL("text-image-link.pdf"));
|
||||
}
|
||||
|
||||
IN_PROC_BROWSER_TEST_P(PDFExtensionAccessibilityTreeDumpTest,
|
||||
TextRunStyleHeuristic) {
|
||||
RunPDFTest(FILE_PATH_LITERAL("text-run-style-heuristic.pdf"));
|
||||
}
|
||||
|
@ -0,0 +1,15 @@
|
||||
embeddedObject fontFamily='Roboto' color=-16777216 fontSize=13.00 fontWeight=400.00
|
||||
++document fontFamily='Roboto' restriction=readOnly
|
||||
++++region fontFamily='Roboto' name='Page 1' restriction=readOnly isPageBreakingObject=true
|
||||
++++++paragraph fontFamily='Roboto' restriction=readOnly
|
||||
++++++++staticText fontFamily='Roboto' name='Case 1<newline>' restriction=readOnly
|
||||
++++++++++inlineTextBox fontFamily='Helvetica-Bold-Italic' name='Case 1<newline>' restriction=readOnly color=-65536 fontSize=38.00 fontWeight=600.00
|
||||
++++++paragraph fontFamily='Roboto' restriction=readOnly
|
||||
++++++++staticText fontFamily='Roboto' name='Case 2<newline>' restriction=readOnly
|
||||
++++++++++inlineTextBox fontFamily='Helvetica-Bold-Italic' name='Case 2<newline>' restriction=readOnly color=-16711936 fontSize=38.00 fontWeight=600.00
|
||||
++++++paragraph fontFamily='Roboto' restriction=readOnly
|
||||
++++++++staticText fontFamily='Roboto' name='Case 3<newline>' restriction=readOnly
|
||||
++++++++++inlineTextBox fontFamily='Helvetica-Bold-Italic' name='Case 3<newline>' restriction=readOnly color=-65536 fontSize=38.00 fontWeight=600.00
|
||||
++++++paragraph fontFamily='Roboto' restriction=readOnly
|
||||
++++++++staticText fontFamily='Roboto' name='Case 4' restriction=readOnly
|
||||
++++++++++inlineTextBox fontFamily='Helvetica-Bold-Italic' name='Case 4' restriction=readOnly fontSize=38.00 fontWeight=600.00
|
@ -0,0 +1,5 @@
|
||||
[embedded component]
|
||||
++[document frame]
|
||||
++++[landmark] name='Page 1'
|
||||
++++++[paragraph]
|
||||
++++++++[text] name='Hello world! One word equals to one text run.'
|
@ -0,0 +1,14 @@
|
||||
embeddedObject
|
||||
++document restriction=readOnly
|
||||
++++region name='Page 1' restriction=readOnly isPageBreakingObject=true
|
||||
++++++paragraph restriction=readOnly
|
||||
++++++++staticText name='Hello world! One word equals to one text run.' restriction=readOnly
|
||||
++++++++++inlineTextBox name='Hello ' restriction=readOnly
|
||||
++++++++++inlineTextBox name='world! ' restriction=readOnly
|
||||
++++++++++inlineTextBox name='One ' restriction=readOnly
|
||||
++++++++++inlineTextBox name='word ' restriction=readOnly
|
||||
++++++++++inlineTextBox name='equals ' restriction=readOnly
|
||||
++++++++++inlineTextBox name='to ' restriction=readOnly
|
||||
++++++++++inlineTextBox name='one ' restriction=readOnly
|
||||
++++++++++inlineTextBox name='text ' restriction=readOnly
|
||||
++++++++++inlineTextBox name='run.' restriction=readOnly
|
@ -0,0 +1,5 @@
|
||||
AXGroup AXDescription='Page 1'
|
||||
++AXGroup
|
||||
++++AXGroup AXDescription='Page 1'
|
||||
++++++AXGroup
|
||||
++++++++AXStaticText AXValue='Hello world! One word equals to one text run.'
|
@ -0,0 +1,5 @@
|
||||
group
|
||||
++document
|
||||
++++region Name='Page 1'
|
||||
++++++group
|
||||
++++++++description Name='Hello world! One word equals to one text run.'
|
@ -0,0 +1,5 @@
|
||||
ROLE_SYSTEM_GROUPING FOCUSABLE
|
||||
++ROLE_SYSTEM_DOCUMENT READONLY FOCUSABLE
|
||||
++++IA2_ROLE_LANDMARK name='Page 1' READONLY
|
||||
++++++IA2_ROLE_PARAGRAPH READONLY
|
||||
++++++++ROLE_SYSTEM_STATICTEXT name='Hello world! One word equals to one text run.' READONLY
|
143
chrome/test/data/pdf/accessibility/text-run-style-heuristic.in
Normal file
143
chrome/test/data/pdf/accessibility/text-run-style-heuristic.in
Normal file
@ -0,0 +1,143 @@
|
||||
{{header}}
|
||||
{{object 1 0}} <<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
{{object 2 0}} <<
|
||||
/Type /Pages
|
||||
/MediaBox [ 0 0 200 200 ]
|
||||
/Count 1
|
||||
/Kids [ 3 0 R ]
|
||||
>>
|
||||
endobj
|
||||
{{object 3 0}} <<
|
||||
/Type /Page
|
||||
/Parent 2 0 R
|
||||
/Resources <<
|
||||
/Font <<
|
||||
/F1 4 0 R %Basic font. Times-Roman, no style.
|
||||
/F2 5 0 R %Basic font. Helvetica, no style.
|
||||
/F3 6 0 R %F2 + font-weigth (/stemV and /Weight)
|
||||
/F4 8 0 R %F3 + italic flag
|
||||
>>
|
||||
>>
|
||||
/Contents 10 0 R
|
||||
>>
|
||||
endobj
|
||||
{{object 4 0}} <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Times-Roman
|
||||
>>
|
||||
endobj
|
||||
{{object 5 0}} <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Helvetica
|
||||
>>
|
||||
endobj
|
||||
{{object 6 0}} <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Helvetica-Bold
|
||||
/FontDescriptor 7 0 R
|
||||
>>
|
||||
endobj
|
||||
{{object 7 0}} <<
|
||||
/Type /FontDescriptor
|
||||
/FontName /Helvetica-Bold
|
||||
/FontWeight 600
|
||||
/StemV 80
|
||||
/ItalicAngle 0
|
||||
/Ascent 776
|
||||
/Flags 0
|
||||
/FontBBox [-250 -236 2827 1000]
|
||||
/CapHeight 763
|
||||
/Descent -223
|
||||
>>
|
||||
endobj
|
||||
{{object 8 0}} <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Helvetica-Bold-Italic
|
||||
/FontDescriptor 9 0 R
|
||||
>>
|
||||
endobj
|
||||
{{object 9 0}} <<
|
||||
/Type /FontDescriptor
|
||||
/FontName /Helvetica-Bold-Italic
|
||||
/FontWeight 600
|
||||
/StemV 80
|
||||
/ItalicAngle 45
|
||||
/Ascent 776
|
||||
/Flags 64
|
||||
/FontBBox [-250 -236 2827 1000]
|
||||
/CapHeight 763
|
||||
/Descent -223
|
||||
>>
|
||||
endobj
|
||||
{{object 10 0}} <<
|
||||
{{streamlen}}
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
0 0 Td
|
||||
/F1 9 Tf
|
||||
1 0 0 1 2 30 Tm
|
||||
(Hello) Tj
|
||||
|
||||
0 0 Td
|
||||
/F2 9 Tf
|
||||
1 0 0 1 22 30 Tm
|
||||
( world!) Tj
|
||||
|
||||
0 0 Td
|
||||
/F2 8 Tf
|
||||
1 0 0 1 48 30 Tm
|
||||
( One) Tj
|
||||
|
||||
0 0 Td
|
||||
/F3 8 Tf
|
||||
1 0 0 1 65 30 Tm
|
||||
( word) Tj
|
||||
|
||||
0 0 Td
|
||||
/F4 8 Tf
|
||||
1 0 0 1 85 30 Tm
|
||||
( equals) Tj
|
||||
|
||||
0 0 Td
|
||||
/F4 8 Tf
|
||||
1 0 0 1 121 30 Tm
|
||||
3 Tr
|
||||
( to) Tj
|
||||
|
||||
0 0 Td
|
||||
/F4 8 Tf
|
||||
1 0 0 1 137 30 Tm
|
||||
1 Tr
|
||||
( one) Tj
|
||||
|
||||
0 0 Td
|
||||
/F4 8 Tf
|
||||
1 0 0 1 154 30 Tm
|
||||
1 0 0 rg
|
||||
0 1 0 RG
|
||||
2 Tr
|
||||
( text) Tj
|
||||
|
||||
0 0 Td
|
||||
/F4 8 Tf
|
||||
1 0 0 1 170 30 Tm
|
||||
0 0 1 rg
|
||||
1 0 0 RG
|
||||
1 Tr
|
||||
( run.) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
{{xref}}
|
||||
{{trailer}}
|
||||
{{startxref}}
|
||||
%%EOF
|
160
chrome/test/data/pdf/accessibility/text-run-style-heuristic.pdf
Normal file
160
chrome/test/data/pdf/accessibility/text-run-style-heuristic.pdf
Normal file
@ -0,0 +1,160 @@
|
||||
%PDF-1.7
|
||||
%<25><><EFBFBD><EFBFBD>
|
||||
1 0 obj <<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
2 0 obj <<
|
||||
/Type /Pages
|
||||
/MediaBox [ 0 0 200 200 ]
|
||||
/Count 1
|
||||
/Kids [ 3 0 R ]
|
||||
>>
|
||||
endobj
|
||||
3 0 obj <<
|
||||
/Type /Page
|
||||
/Parent 2 0 R
|
||||
/Resources <<
|
||||
/Font <<
|
||||
/F1 4 0 R %Basic font. Times-Roman, no style.
|
||||
/F2 5 0 R %Basic font. Helvetica, no style.
|
||||
/F3 6 0 R %F2 + font-weigth (/stemV and /Weight)
|
||||
/F4 8 0 R %F3 + italic flag
|
||||
>>
|
||||
>>
|
||||
/Contents 10 0 R
|
||||
>>
|
||||
endobj
|
||||
4 0 obj <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Times-Roman
|
||||
>>
|
||||
endobj
|
||||
5 0 obj <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Helvetica
|
||||
>>
|
||||
endobj
|
||||
6 0 obj <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Helvetica-Bold
|
||||
/FontDescriptor 7 0 R
|
||||
>>
|
||||
endobj
|
||||
7 0 obj <<
|
||||
/Type /FontDescriptor
|
||||
/FontName /Helvetica-Bold
|
||||
/FontWeight 600
|
||||
/StemV 80
|
||||
/ItalicAngle 0
|
||||
/Ascent 776
|
||||
/Flags 0
|
||||
/FontBBox [-250 -236 2827 1000]
|
||||
/CapHeight 763
|
||||
/Descent -223
|
||||
>>
|
||||
endobj
|
||||
8 0 obj <<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Helvetica-Bold-Italic
|
||||
/FontDescriptor 9 0 R
|
||||
>>
|
||||
endobj
|
||||
9 0 obj <<
|
||||
/Type /FontDescriptor
|
||||
/FontName /Helvetica-Bold-Italic
|
||||
/FontWeight 600
|
||||
/StemV 80
|
||||
/ItalicAngle 45
|
||||
/Ascent 776
|
||||
/Flags 64
|
||||
/FontBBox [-250 -236 2827 1000]
|
||||
/CapHeight 763
|
||||
/Descent -223
|
||||
>>
|
||||
endobj
|
||||
10 0 obj <<
|
||||
/Length 469
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
0 0 Td
|
||||
/F1 9 Tf
|
||||
1 0 0 1 2 30 Tm
|
||||
(Hello) Tj
|
||||
|
||||
0 0 Td
|
||||
/F2 9 Tf
|
||||
1 0 0 1 22 30 Tm
|
||||
( world!) Tj
|
||||
|
||||
0 0 Td
|
||||
/F2 8 Tf
|
||||
1 0 0 1 48 30 Tm
|
||||
( One) Tj
|
||||
|
||||
0 0 Td
|
||||
/F3 8 Tf
|
||||
1 0 0 1 65 30 Tm
|
||||
( word) Tj
|
||||
|
||||
0 0 Td
|
||||
/F4 8 Tf
|
||||
1 0 0 1 85 30 Tm
|
||||
( equals) Tj
|
||||
|
||||
0 0 Td
|
||||
/F4 8 Tf
|
||||
1 0 0 1 121 30 Tm
|
||||
3 Tr
|
||||
( to) Tj
|
||||
|
||||
0 0 Td
|
||||
/F4 8 Tf
|
||||
1 0 0 1 137 30 Tm
|
||||
1 Tr
|
||||
( one) Tj
|
||||
|
||||
0 0 Td
|
||||
/F4 8 Tf
|
||||
1 0 0 1 154 30 Tm
|
||||
1 0 0 rg
|
||||
0 1 0 RG
|
||||
2 Tr
|
||||
( text) Tj
|
||||
|
||||
0 0 Td
|
||||
/F4 8 Tf
|
||||
1 0 0 1 170 30 Tm
|
||||
0 0 1 rg
|
||||
1 0 0 RG
|
||||
1 Tr
|
||||
( run.) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
xref
|
||||
0 11
|
||||
0000000000 65535 f
|
||||
0000000015 00000 n
|
||||
0000000068 00000 n
|
||||
0000000161 00000 n
|
||||
0000000463 00000 n
|
||||
0000000541 00000 n
|
||||
0000000617 00000 n
|
||||
0000000722 00000 n
|
||||
0000000934 00000 n
|
||||
0000001046 00000 n
|
||||
0000001267 00000 n
|
||||
trailer <<
|
||||
/Root 1 0 R
|
||||
/Size 11
|
||||
>>
|
||||
startxref
|
||||
1789
|
||||
%%EOF
|
@ -376,9 +376,9 @@ class PDFEngine {
|
||||
// Get a given unicode character on a given page.
|
||||
virtual uint32_t GetCharUnicode(int page_index, int char_index) = 0;
|
||||
// Given a start char index, find the longest continuous run of text that's
|
||||
// in a single direction and with the same style and font size. Return a
|
||||
// filled out pp::PDF::PrivateAccessibilityTextRunInfo on success or
|
||||
// base::nullopt on failure. e.g. When |start_char_index| is out of bounds.
|
||||
// in a single direction and with the same text style. Return a filled out
|
||||
// pp::PDF::PrivateAccessibilityTextRunInfo on success or base::nullopt on
|
||||
// failure. e.g. When |start_char_index| is out of bounds.
|
||||
virtual base::Optional<pp::PDF::PrivateAccessibilityTextRunInfo>
|
||||
GetTextRunInfo(int page_index, int start_char_index) = 0;
|
||||
// Gets the number of links on a given page.
|
||||
|
@ -326,6 +326,21 @@ void PDFiumPage::CalculateTextRunStyleInfo(
|
||||
style_info->render_mode = FPDFText_GetTextRenderMode(text_page, char_index);
|
||||
}
|
||||
|
||||
bool PDFiumPage::AreTextStyleEqual(
|
||||
int char_index,
|
||||
const pp::PDF::PrivateAccessibilityTextStyleInfo& style) {
|
||||
pp::PDF::PrivateAccessibilityTextStyleInfo char_style;
|
||||
CalculateTextRunStyleInfo(char_index, &char_style);
|
||||
return char_style.font_name == style.font_name &&
|
||||
char_style.font_weight == style.font_weight &&
|
||||
char_style.render_mode == style.render_mode &&
|
||||
DoubleEquals(char_style.font_size, style.font_size) &&
|
||||
char_style.fill_color == style.fill_color &&
|
||||
char_style.stroke_color == style.stroke_color &&
|
||||
char_style.is_italic == style.is_italic &&
|
||||
char_style.is_bold == style.is_bold;
|
||||
}
|
||||
|
||||
base::Optional<pp::PDF::PrivateAccessibilityTextRunInfo>
|
||||
PDFiumPage::GetTextRunInfo(int start_char_index) {
|
||||
FPDF_PAGE page = GetPage();
|
||||
@ -406,11 +421,9 @@ PDFiumPage::GetTextRunInfo(int start_char_index) {
|
||||
GetFloatCharRectInPixels(page, text_page, char_index);
|
||||
|
||||
if (!base::IsUnicodeWhitespace(character)) {
|
||||
// TODO (bebeaudr): add text run break heuristic to break on style.
|
||||
|
||||
// Heuristic: End the text run if different font size is encountered.
|
||||
double font_size = FPDFText_GetFontSize(text_page, char_index);
|
||||
if (!DoubleEquals(font_size, text_run_font_size))
|
||||
// Heuristic: End the text run if the text style of the current character
|
||||
// is different from the text run's style.
|
||||
if (!AreTextStyleEqual(char_index, info.style))
|
||||
break;
|
||||
|
||||
// Heuristic: End text run if character isn't going in the same direction.
|
||||
|
@ -201,6 +201,11 @@ class PDFiumPage {
|
||||
void CalculateTextRunStyleInfo(
|
||||
int char_index,
|
||||
pp::PDF::PrivateAccessibilityTextStyleInfo* style_info);
|
||||
// Returns a boolean indicating if the character at index |char_index| has the
|
||||
// same text style as the text run.
|
||||
bool AreTextStyleEqual(
|
||||
int char_index,
|
||||
const pp::PDF::PrivateAccessibilityTextStyleInfo& style);
|
||||
|
||||
// Key : Marked content id for the image element as specified in the
|
||||
// struct tree.
|
||||
|
Reference in New Issue
Block a user