Save-Page-As-Complete-HTML: Even better handling of <object> elements.
crrev.com/1416113012 improved handling of <object> elements by
save-page-as-complete-html, but always treated them as resources.
The current CL tries to report them as subframes if they contain
a html document.
This CL also adjusts WebFrame::fromFrameOwnerElement and
WebLocalFrameImpl::fromFrameOwnerElement so that they truly work with
instances of HTMLFrameOwnerElement (as the name of these methods
suggests), rather than only with instances of HTMLFrameElementBase (a
subclass of HTMLFrameOwnerElement that only covers <frame> and <iframe>
elements and not <object> element or other frame-owning elements).
FWIW, I tracked down the comment in Web(Local)FrameImpl.cpp that questions
why we only check for <iframe> and <frame> to the following commit:
abarth@webkit.org fd5b347
2012-09-28 18:31:12 :
// FIXME: Why do we check specifically for <iframe> and <frame> here?
// Why can't we get the WebFrameImpl from an <object> element,
// for example.
BUG=553478
Review URL: https://codereview.chromium.org/1442463002
Cr-Commit-Position: refs/heads/master@{#362165}
This commit is contained in:
chrome
content/renderer
third_party/WebKit/Source/web
@ -1010,12 +1010,14 @@ IN_PROC_BROWSER_TEST_P(SavePageMultiFrameBrowserTest, ObjectElements) {
|
||||
|
||||
// 4 = main frame + iframe + object w/ html doc + object w/ pdf doc
|
||||
// (svg and png objects do not get a separate frame)
|
||||
int expected_number_of_frames = 4;
|
||||
int expected_number_of_frames = 6;
|
||||
|
||||
std::vector<std::string> expected_substrings{
|
||||
"frames-objects.htm: 8da13db4-a512-4d9b-b1c5-dc1c134234b9",
|
||||
"a.htm: 1b8aae2b-e164-462f-bd5b-98aa366205f2",
|
||||
"b.htm: 3a35f7fa-96a9-4487-9f18-4470263907fa",
|
||||
"frames-nested.htm: 4388232f-8d45-4d2e-9807-721b381be153",
|
||||
"frames-nested2.htm: 6d23dc47-f283-4977-96ec-66bcf72301a4",
|
||||
};
|
||||
|
||||
GURL url(
|
||||
|
@ -9,7 +9,7 @@
|
||||
|
||||
Html object:
|
||||
<br>
|
||||
<object data="b.htm" border="1">err_no_object_data</object>
|
||||
<object data="frames-nested.htm" border="1">err_no_object_data</object>
|
||||
<br>
|
||||
<br>
|
||||
|
||||
|
@ -34,41 +34,63 @@ using blink::WebView;
|
||||
namespace content {
|
||||
namespace {
|
||||
|
||||
// Get all savable resource links from current element. One element might
|
||||
// have more than one resource link. It is possible to have some links
|
||||
// in one CSS stylesheet.
|
||||
// Returns |true| if |web_frame| contains (or should be assumed to contain)
|
||||
// a html document.
|
||||
bool DoesFrameContainHtmlDocument(const WebFrame& web_frame,
|
||||
const WebElement& element) {
|
||||
if (web_frame.isWebLocalFrame()) {
|
||||
WebDocument doc = web_frame.document();
|
||||
return doc.isHTMLDocument() || doc.isXHTMLDocument();
|
||||
}
|
||||
|
||||
// Cannot inspect contents of a remote frame, so we use a heuristic:
|
||||
// Assume that <iframe> and <frame> elements contain a html document,
|
||||
// and other elements (i.e. <object>) contain plugins or other resources.
|
||||
// If the heuristic is wrong (i.e. the remote frame in <object> does
|
||||
// contain an html document), then things will still work, but with the
|
||||
// following caveats: 1) original frame content will be saved and 2) links
|
||||
// in frame's html doc will not be rewritten to point to locally saved
|
||||
// files.
|
||||
return element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame");
|
||||
}
|
||||
|
||||
// If present and valid, then push the link associated with |element|
|
||||
// into either SavableResourcesResult::subframes or
|
||||
// SavableResourcesResult::resources_list.
|
||||
void GetSavableResourceLinkForElement(
|
||||
const WebElement& element,
|
||||
const WebDocument& current_doc,
|
||||
SavableResourcesResult* result) {
|
||||
if (element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame")) {
|
||||
GURL complete_url = current_doc.completeURL(element.getAttribute("src"));
|
||||
WebFrame* web_frame = WebFrame::fromFrameOwnerElement(element);
|
||||
|
||||
SavableSubframe subframe;
|
||||
subframe.original_url = complete_url;
|
||||
subframe.routing_id = GetRoutingIdForFrameOrProxy(web_frame);
|
||||
|
||||
result->subframes->push_back(subframe);
|
||||
return;
|
||||
}
|
||||
|
||||
// Check whether the node has sub resource URL or not.
|
||||
WebString value = GetSubResourceLinkFromElement(element);
|
||||
if (value.isNull())
|
||||
return;
|
||||
|
||||
// Get absolute URL.
|
||||
GURL u = current_doc.completeURL(value);
|
||||
// ignore invalid URL
|
||||
if (!u.is_valid())
|
||||
GURL element_url = current_doc.completeURL(value);
|
||||
|
||||
// See whether to report this element as a subframe.
|
||||
WebFrame* web_frame = WebFrame::fromFrameOwnerElement(element);
|
||||
if (web_frame && DoesFrameContainHtmlDocument(*web_frame, element)) {
|
||||
SavableSubframe subframe;
|
||||
subframe.original_url = element_url;
|
||||
subframe.routing_id = GetRoutingIdForFrameOrProxy(web_frame);
|
||||
result->subframes->push_back(subframe);
|
||||
return;
|
||||
}
|
||||
|
||||
// Ignore invalid URL.
|
||||
if (!element_url.is_valid())
|
||||
return;
|
||||
|
||||
// Ignore those URLs which are not standard protocols. Because FTP
|
||||
// protocol does no have cache mechanism, we will skip all
|
||||
// sub-resources if they use FTP protocol.
|
||||
if (!u.SchemeIsHTTPOrHTTPS() && !u.SchemeIs(url::kFileScheme))
|
||||
if (!element_url.SchemeIsHTTPOrHTTPS() &&
|
||||
!element_url.SchemeIs(url::kFileScheme))
|
||||
return;
|
||||
|
||||
result->resources_list->push_back(u);
|
||||
result->resources_list->push_back(element_url);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@ -112,6 +134,8 @@ bool GetSavableResourceLinksForFrame(WebFrame* current_frame,
|
||||
WebString GetSubResourceLinkFromElement(const WebElement& element) {
|
||||
const char* attribute_name = NULL;
|
||||
if (element.hasHTMLTagName("img") ||
|
||||
element.hasHTMLTagName("frame") ||
|
||||
element.hasHTMLTagName("iframe") ||
|
||||
element.hasHTMLTagName("script")) {
|
||||
attribute_name = "src";
|
||||
} else if (element.hasHTMLTagName("input")) {
|
||||
@ -130,10 +154,6 @@ WebString GetSubResourceLinkFromElement(const WebElement& element) {
|
||||
element.hasHTMLTagName("ins")) {
|
||||
attribute_name = "cite";
|
||||
} else if (element.hasHTMLTagName("object")) {
|
||||
// TODO(lukasza): When <object> contains a html document, it should be
|
||||
// reported as a subframe, not as a savable resource (reporting as a
|
||||
// savable resource works, but will save original html contents, not
|
||||
// current html contents of the frame).
|
||||
attribute_name = "data";
|
||||
} else if (element.hasHTMLTagName("link")) {
|
||||
// If the link element is not linked to css, ignore it.
|
||||
|
4
third_party/WebKit/Source/web/WebFrame.cpp
vendored
4
third_party/WebKit/Source/web/WebFrame.cpp
vendored
@ -266,9 +266,9 @@ WebFrame* WebFrame::fromFrameOwnerElement(const WebElement& webElement)
|
||||
{
|
||||
Element* element = PassRefPtrWillBeRawPtr<Element>(webElement).get();
|
||||
|
||||
if (!isHTMLFrameElementBase(element))
|
||||
if (!element->isFrameOwnerElement())
|
||||
return nullptr;
|
||||
return fromFrame(toHTMLFrameElementBase(element)->contentFrame());
|
||||
return fromFrame(toHTMLFrameOwnerElement(element)->contentFrame());
|
||||
}
|
||||
|
||||
bool WebFrame::isLoading() const
|
||||
|
@ -1858,7 +1858,7 @@ void WebLocalFrameImpl::createFrameView()
|
||||
WebLocalFrameImpl* WebLocalFrameImpl::fromFrame(LocalFrame* frame)
|
||||
{
|
||||
if (!frame)
|
||||
return 0;
|
||||
return nullptr;
|
||||
return fromFrame(*frame);
|
||||
}
|
||||
|
||||
@ -1866,22 +1866,21 @@ WebLocalFrameImpl* WebLocalFrameImpl::fromFrame(LocalFrame& frame)
|
||||
{
|
||||
FrameLoaderClient* client = frame.loader().client();
|
||||
if (!client || !client->isFrameLoaderClientImpl())
|
||||
return 0;
|
||||
return nullptr;
|
||||
return toFrameLoaderClientImpl(client)->webFrame();
|
||||
}
|
||||
|
||||
WebLocalFrameImpl* WebLocalFrameImpl::fromFrameOwnerElement(Element* element)
|
||||
{
|
||||
// FIXME: Why do we check specifically for <iframe> and <frame> here? Why can't we get the WebLocalFrameImpl from an <object> element, for example.
|
||||
if (!isHTMLFrameElementBase(element))
|
||||
return 0;
|
||||
return fromFrame(toLocalFrame(toHTMLFrameElementBase(element)->contentFrame()));
|
||||
if (!element->isFrameOwnerElement())
|
||||
return nullptr;
|
||||
return fromFrame(toLocalFrame(toHTMLFrameOwnerElement(element)->contentFrame()));
|
||||
}
|
||||
|
||||
WebViewImpl* WebLocalFrameImpl::viewImpl() const
|
||||
{
|
||||
if (!frame())
|
||||
return 0;
|
||||
return nullptr;
|
||||
return WebViewImpl::fromPage(frame()->page());
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user