lib/child/frameScript.js - Issue 29338242: Issue 3792 - Fix to support multiprocess firefox

Delta Between Two Patch Sets: lib/child/frameScript.js

Issue 29338242: Issue 3792 - Fix to support multiprocess firefox (Closed)

Left Patch Set: fix race condition Created April 22, 2016, 12:32 p.m.

Right Patch Set: change comment Created Sept. 30, 2016, 12:43 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 /*	1 /*

2 * This Source Code is subject to the terms of the Mozilla Public License	2 * This Source Code is subject to the terms of the Mozilla Public License

3 * version 2.0 (the "License"). You can obtain a copy of the License at	3 * version 2.0 (the "License"). You can obtain a copy of the License at

4 * http://mozilla.org/MPL/2.0/.	4 * http://mozilla.org/MPL/2.0/.

5 */	5 */

	6

6 "use strict";	7 "use strict";
Wladimir Palant 2016/09/14 16:11:47 Nit: How about an empty line before that? Nit: How about an empty line before that? sergei 2016/09/29 09:58:11 Done. Show quoted text On 2016/09/14 16:11:47, Wladimir Palant wrote: > Nit: How about an empty line before that? Done.
7	8

8 const {classes: Cc, interfaces: Ci, utils: Cu, Cr: results} = Components;	9 const {classes: Cc, interfaces: Ci, utils: Cu, results: Cr} = Components;

9	10

10 /**	11 /**

11 * @param e exception	12 * @param e exception

12 */	13 */

13 function reportException(e)	14 function reportException(e)

14 {	15 {

15 let stack = "";	16 let stack = "";

16 if (e && typeof e == "object" && "stack" in e)	17 if (e && typeof e == "object" && "stack" in e)

17 stack = e.stack + "\n";	18 stack = e.stack + "\n";

18	19

19 Cu.reportError(e);	20 Cu.reportError(e);

20 dump(e + "\n" + stack + "\n");	21 dump(e + "\n" + stack + "\n");

21 }	22 }

22	23

23 let {Services} = Cu.import("resource://gre/modules/Services.jsm", {});	24 const {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {});

24 let {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {});

25	25

26 /**	26 /**

27 * Waits for finishing of the page loading, calls `gatherPageInfo` and sends	27 * Progress listener capturing the data of the current page and calling

28 * gahter information using "abpcrawler:pageInfoGathered" message.	28 * onPageLoaded(data) when loading is finished, where data contains

29 * https://developer.mozilla.org/en-US/docs/Mozilla/Tech/XPCOM/Reference/Interfa ce/nsIWebProgressListener	29 * HTTP status and headers.
Wladimir Palant 2016/09/14 16:11:48 Nit: How about: * Progress listener capturing the Nit: How about: * Progress listener capturing the data of any page to finish loading and * sending this data via "abpcrawler:pageInfoGathered" message. * * @type nsIWebProgressListener Note that type is enough to look up documentation. sergei 2016/09/29 09:58:12 I have changed the comment. In addition webProgres Show quoted text On 2016/09/14 16:11:48, Wladimir Palant wrote: > Nit: How about: > > * Progress listener capturing the data of any page to finish loading and * > sending this data via "abpcrawler:pageInfoGathered" message. > * > * @type nsIWebProgressListener > > Note that type is enough to look up documentation. I have changed the comment. In addition webProgressListener does not send the message, instead it calls onPageLoaded because in future version this function will be more complicated and it's better to keep such code out of webProgressListener.
	30 *

	31 * @type nsIWebProgressListener

30 */	32 */

31 let webProgressListener =	33 let webProgressListener =

32 {	34 {

33 onStateChange: function(webProgress, request, flags, status)	35 onStateChange: function(webProgress, request, flags, status)

34 {	36 {

35 // use isTopLevel to filter beacon requests out	37 if (webProgress.DOMWindow == content &&
Wladimir Palant 2016/09/14 16:11:46 This comment doesn't make sense - not just beacon This comment doesn't make sense - not just beacon requests, you don't want to be bothered with frames either. Either way, we don't want to risk receiving messages for the wrong window (e.g. another tab). How about checking `webProgress.DOMWindow == content` instead of isTopLevel here? Note that isTopLevel was introduced for the scenario where your progress listener runs in a different process but that's not what you have here. sergei 2016/09/29 09:58:12 Done. It seems it does indeed work, however I thin Show quoted text On 2016/09/14 16:11:46, Wladimir Palant wrote: > This comment doesn't make sense - not just beacon requests, you don't want to be > bothered with frames either. Either way, we don't want to risk receiving > messages for the wrong window (e.g. another tab). How about checking > `webProgress.DOMWindow == content` instead of isTopLevel here? Note that > isTopLevel was introduced for the scenario where your progress listener runs in > a different process but that's not what you have here. Done. It seems it does indeed work, however I think I have tried it and it was not working.
36 if (webProgress.isTopLevel &&	38 (flags & Ci.nsIWebProgressListener.STATE_STOP))

37 (flags & Ci.nsIWebProgressListener.STATE_STOP) &&

38 (flags & Ci.nsIWebProgressListener.STATE_IS_WINDOW))

39 {	39 {

	40 // First time we receive STATE_STOP for about:blank and the second time

	41 // for our interested URL which is distinct from about:blank.

	42 // However we should not process about:blank because it can happen that

	43 // the message with information about about:blank is delivered when the

	44 // code in crawler.js is already waiting for a message from this tab.

	45 // Another case we are not interested in is about:newtab.

	46 if (content.location.protocol == "about:")

	47 return;

	48 let pageInfo = {channelStatus: status};

40 if (request instanceof Ci.nsIHttpChannel)	49 if (request instanceof Ci.nsIHttpChannel)

41 {	50 {

42 let pageInfo = {headers: []};
Wladimir Palant 2016/09/14 16:11:48 What about `channelStatus: status`? We used to cap What about `channelStatus: status`? We used to capture that. sergei 2016/09/29 09:58:11 Restored. Show quoted text On 2016/09/14 16:11:48, Wladimir Palant wrote: > What about `channelStatus: status`? We used to capture that. Restored.
43 try	51 try

44 {	52 {

	53 pageInfo.headers = [];

45 pageInfo.headers.push("HTTP/x.x " + request.responseStatus + " " + req uest.responseStatusText);	54 pageInfo.headers.push("HTTP/x.x " + request.responseStatus + " " + req uest.responseStatusText);

46 request.visitResponseHeaders((header, value) => pageInfo.headers.push( header + ": " + value));	55 request.visitResponseHeaders((header, value) => pageInfo.headers.push( header + ": " + value));

47 }	56 }

48 catch (e)	57 catch (e)

49 {	58 {

50 // Ignore if called before the response has been received (before

51 // onStartRequest()).

52 if (e.result == Cr.NS_ERROR_NOT_AVAILABLE)

53 return;
Wladimir Palant 2016/09/14 16:11:48 So what happens if a host name doesn't resolve? We So what happens if a host name doesn't resolve? We just time out without collecting any data whatsoever? We have the channel status, it's useful information and should be sent. Maybe it's a good idea to not report exceptions in case of NS_ERROR_NOT_AVAILABLE, but just returning definitely isn't. sergei 2016/09/29 09:58:13 If the hostname is not resolved we don't get here Show quoted text On 2016/09/14 16:11:48, Wladimir Palant wrote: > So what happens if a host name doesn't resolve? We just time out without > collecting any data whatsoever? We have the channel status, it's useful > information and should be sent. Maybe it's a good idea to not report exceptions > in case of NS_ERROR_NOT_AVAILABLE, but just returning definitely isn't. If the hostname is not resolved we don't get here anyway, so, yes, we do wait for the timeout, https://issues.adblockplus.org/ticket/3975. It was used here as workaround for race condition, the exception is thrown when we try to access request.responseStatus but the page has not been loaded yet, so I think that simply returning and waiting for the real finish of loading was a good idea. I have added the check of the location.protocol above and removed that check, so if it happens on a real page it will dump the exception info and call onPageLoaded. Wladimir Palant 2016/09/29 11:44:57 I don't really see why the page isn't loaded when Show quoted text On 2016/09/29 09:58:13, sergei wrote: > It was used here as workaround for race condition, the exception is thrown when > we try to access request.responseStatus but the page has not been loaded yet, I don't really see why the page isn't loaded when you get STATE_STOP. sergei 2016/09/29 15:36:21 Sorry for confusion, I meant under the page the pa Show quoted text On 2016/09/29 11:44:57, Wladimir Palant wrote: > On 2016/09/29 09:58:13, sergei wrote: > > It was used here as workaround for race condition, the exception is thrown > when > > we try to access request.responseStatus but the page has not been loaded yet, > > I don't really see why the page isn't loaded when you get STATE_STOP. On 2016/09/29 11:44:58, Wladimir Palant wrote: > This needs an explanation. Is that check necessary because you load about:blank > into unused tabs and you don't want to report that load? At the very least, this > needs a comment saying so. > > Still, what issue does this check solve? My guess is that without this check the > code will collect data from about:blank, send a message and this message will be > ignored. Wasting a few CPU cycles, so what? Do we really need this special case? > Or maybe we should rather work correctly even if we get something like > about:newtab on our URL list? Sorry for confusion, I meant under the page the page with URL distinct from about:blank. First time we receive STATE_STOP for about:blank and the second time for our URL. We should not process about:blank because it can happen that the message with information about about:blank is delivered when the code in crawler.js is already waiting for a message from that tab. When about:blank is loaded first time, request.responseStatus is not available. I have put a comment about it into the code. BTW, I have just discovered yet one issue with https://github.com/adblockplus/abpcrawler/blob/master/lib/crawler.js#L111. When we navigate to about:blank from our URL we get STATE_STOP the third time but the value of content.location.href is sometimes (!) still old (our URL) and it tries to get the page info yet one time, and according to the log it fails trying to get screenshot. I guess it happens because the tab is going to be closed. It's easy to reproduce if run the crawler with one tab because in that case it happens very often. I would propose to come back to creation of a new tab in TabAllocator.releaseTab to avoid any side effects of state of already processed tab. sergei 2016/10/04 10:57:31 What about creation of a new tab in TabAllocator.r Show quoted text On 2016/09/29 15:36:21, sergei wrote: > On 2016/09/29 11:44:57, Wladimir Palant wrote: > > On 2016/09/29 09:58:13, sergei wrote: > > > It was used here as workaround for race condition, the exception is thrown > > when > > > we try to access request.responseStatus but the page has not been loaded > yet, > > > > I don't really see why the page isn't loaded when you get STATE_STOP. > > On 2016/09/29 11:44:58, Wladimir Palant wrote: > > This needs an explanation. Is that check necessary because you load > about:blank > > into unused tabs and you don't want to report that load? At the very least, > this > > needs a comment saying so. > > > > Still, what issue does this check solve? My guess is that without this check > the > > code will collect data from about:blank, send a message and this message will > be > > ignored. Wasting a few CPU cycles, so what? Do we really need this special > case? > > Or maybe we should rather work correctly even if we get something like > > about:newtab on our URL list? > > Sorry for confusion, I meant under the page the page with URL distinct from > about:blank. First time we receive STATE_STOP for about:blank and the second > time for our URL. We should not process about:blank because it can happen that > the message with information about about:blank is delivered when the code in > crawler.js is already waiting for a message from that tab. When about:blank is > loaded first time, request.responseStatus is not available. > I have put a comment about it into the code. > > BTW, I have just discovered yet one issue with > https://github.com/adblockplus/abpcrawler/blob/master/lib/crawler.js#L111. When > we navigate to about:blank from our URL we get STATE_STOP the third time but the > value of content.location.href is sometimes (!) still old (our URL) and it tries > to get the page info yet one time, and according to the log it fails trying to > get screenshot. I guess it happens because the tab is going to be closed. It's > easy to reproduce if run the crawler with one tab because in that case it > happens very often. > > I would propose to come back to creation of a new tab in TabAllocator.releaseTab > to avoid any side effects of state of already processed tab. What about creation of a new tab in TabAllocator.releaseTab? Wladimir Palant 2016/10/04 11:10:38 No, I'd rather not make the code more complicated Show quoted text On 2016/10/04 10:57:31, sergei wrote: > What about creation of a new tab in TabAllocator.releaseTab? No, I'd rather not make the code more complicated than it already is for this edge case. I'd suggest that you push your patch and deal with it in a follow-up issue - this took long enough already. Show quoted text > When we navigate to about:blank from our URL we get STATE_STOP the third time but the value of content.location.href is sometimes (!) still old (our URL) Sure that this is STATE_STOP for about:blank? I'd suspect that the page timed out, so it's actually getting its first STATE_STOP when you navigate away to about:blank - and content.location.href is set correctly at that point. I'm not sure why creating a screenshot fails at that point but maybe document.readyState will help you recognize this situation. sergei 2016/10/06 14:26:58 Of course it should be in a follow-up issue, the q Show quoted text On 2016/10/04 11:10:38, Wladimir Palant wrote: > On 2016/10/04 10:57:31, sergei wrote: > > What about creation of a new tab in TabAllocator.releaseTab? > > No, I'd rather not make the code more complicated than it already is for this > edge case. I'd suggest that you push your patch and deal with it in a follow-up > issue - this took long enough already. Of course it should be in a follow-up issue, the question is whether we need it or not. Show quoted text > > > When we navigate to about:blank from our URL we get STATE_STOP the third time > but the value of content.location.href is sometimes (!) still old (our URL) > > Sure that this is STATE_STOP for about:blank? I'd suspect that the page timed > out, so it's actually getting its first STATE_STOP when you navigate away to > about:blank - and content.location.href is set correctly at that point. I'm not > sure why creating a screenshot fails at that point but maybe document.readyState > will help you recognize this situation. Yes, sure, there is simply no space to time out. Here is the log when we get STATE_STOP and the number of tabs is 1. console.log: owinID: 14, href: about:blank, rstate: uninitialized console.log: owinID: 16, href: about:newtab, rstate: complete console.log: owinID: 14, href: ftp://speedtest.tele2.net/, rstate: complete console.log: owinID: 14, href: ftp://speedtest.tele2.net/, rstate: complete console.log: owinID: 20, href: about:blank, rstate: uninitialized console.log: owinID: 20, href: https://www.amazon.com/, rstate: complete console.log: owinID: 20, href: https://www.amazon.com/, rstate: complete console.log: owinID: 27, href: about:blank, rstate: uninitialized console.log: owinID: 27, href: https://codereview.adblockplus.org/, rstate: complete console.log: owinID: 27, href: https://codereview.adblockplus.org/, rstate: complete console.log: owinID: 30, href: about:blank, rstate: uninitialized console.log: owinID: 30, href: https://twitter.com/, rstate: complete console.log: owinID: 30, href: about:blank, rstate: complete console.log: owinID: 43, href: about:blank, rstate: complete console.log: owinID: 43, href: http://edition.cnn.com/, rstate: complete console.log: owinID: 43, href: http://edition.cnn.com/, rstate: complete console.log: owinID: 94, href: about:blank, rstate: uninitialized console.log: owinID: 94, href: http://bcc.com/, rstate: complete console.log: owinID: 94, href: http://bcc.com/, rstate: complete console.log: owinID: 97, href: about:blank, rstate: uninitialized console.log: owinID: 97, href: https://adblockplus.org/, rstate: complete console.log: owinID: 97, href: about:blank, rstate: complete owinID - outerWindowID rstate - document.readyState I have tried it with commented line tab.linkedBrowser.loadURI("about:blank", null, null); in releaseTab and it looks very similar however there is no any third entry for the same outer window ID.
54 reportException(e);	59 reportException(e);

55 }	60 }

56 Object.assign(pageInfo, gatherPageInfo(content));

57 sendAsyncMessage("abpcrawler:pageInfoGathered", pageInfo);
Wladimir Palant 2016/09/14 16:11:47 The two lines above don't depend on the request ob The two lines above don't depend on the request object and should be outside the `if` block. If we happen to have a page served via FTP and we cannot get the headers - we would still like to capture all other page data. sergei 2016/09/29 09:58:12 Moved. Show quoted text On 2016/09/14 16:11:47, Wladimir Palant wrote: > The two lines above don't depend on the request object and should be outside the > `if` block. If we happen to have a page served via FTP and we cannot get the > headers - we would still like to capture all other page data. Moved.
58 }	61 }

	62 onPageLoaded(pageInfo);

59 }	63 }

60 },	64 },

61	65

62 // definitions of the remaining functions see related documentation	66 onLocationChange: function() {},
Wladimir Palant 2016/09/14 16:11:48 Nit: this comment doesn't add any value, remove? Nit: this comment doesn't add any value, remove? sergei 2016/09/29 09:58:11 Done. Show quoted text On 2016/09/14 16:11:48, Wladimir Palant wrote: > Nit: this comment doesn't add any value, remove? Done.
63 onLocationChange: function(webProgress, request, URI, flag) {},	67 onProgressChange: function() {},

64 onProgressChange: function(aWebProgress, aRequest, curSelf, maxSelf, curTot, m axTot) {},	68 onStatusChange: function() {},
Wladimir Palant 2016/09/14 16:11:47 So, Hungarian notation or not? Our style guide say So, Hungarian notation or not? Our style guide says: not. But you can also just leave out the parameters since you aren't actually implementing these methods (and they won't even be called with proper filtering in place). sergei 2016/09/29 09:58:12 Done. Show quoted text On 2016/09/14 16:11:47, Wladimir Palant wrote: > So, Hungarian notation or not? Our style guide says: not. But you can also just > leave out the parameters since you aren't actually implementing these methods > (and they won't even be called with proper filtering in place). Done.
65 onStatusChange: function(aWebProgress, aRequest, aStatus, aMessage) {},	69 onSecurityChange: function() {},

66 onSecurityChange: function(aWebProgress, aRequest, aState) {},	70

67

68 QueryInterface: XPCOMUtils.generateQI([Ci.nsIWebProgressListener, Ci.nsISuppor tsWeakReference])	71 QueryInterface: XPCOMUtils.generateQI([Ci.nsIWebProgressListener, Ci.nsISuppor tsWeakReference])
Wladimir Palant 2016/09/14 16:11:48 I don't think that progress listeners can be weak I don't think that progress listeners can be weak references, exposing nsIWebProgressListener should be sufficient. sergei 2016/09/29 09:58:13 According to the documentation, "This object must Show quoted text On 2016/09/14 16:11:48, Wladimir Palant wrote: > I don't think that progress listeners can be weak references, exposing > nsIWebProgressListener should be sufficient. According to the documentation, "This object must also implement nsISupportsWeakReference." https://developer.mozilla.org/en-US/docs/Mozilla/Tech/XPCOM/Reference/Interfa....
69 };	72 };

70	73

71 let filter = Cc["@mozilla.org/appshell/component/browser-status-filter;1"]	74 function onPageLoaded(pageInfo)

72 .createInstance(Ci.nsIWebProgress);	75 {
Wladimir Palant 2016/09/14 16:11:48 Nit: we usually align the dot with the [ on the pr Nit: we usually align the dot with the [ on the previous line. Either way, why do we even bother with browser-status-filter if we already registered our progress listener on the docShell for the current tab? sergei 2016/09/29 09:58:12 Removed. BTW, in the example from mozilla they are Show quoted text On 2016/09/14 16:11:48, Wladimir Palant wrote: > Nit: we usually align the dot with the [ on the previous line. > > Either way, why do we even bother with browser-status-filter if we already > registered our progress listener on the docShell for the current tab? Removed. BTW, in the example from mozilla they are also using filter but it does indeed work without it. Wladimir Palant 2016/09/29 11:44:57 Yes, there is an example mentioning it - but no do Show quoted text On 2016/09/29 09:58:12, sergei wrote: > Removed. BTW, in the example from mozilla they are also using filter but it does > indeed work without it. Yes, there is an example mentioning it - but no documentation explaining what it does. The point of this filter is apparently limiting the number of times C++ needs to call into JavaScript, for performance reasons. For that it filters out some messages using an undocumented approach. This has all the signs on an internal component meant to work with the progress listener installed by Firefox but non necessary doing the right thing for us. We shouldn't use it, even if the results are correct right now they won't necessarily be in the next Firefox release. sergei 2016/09/29 15:36:21 Acknowledged. Show quoted text On 2016/09/29 11:44:57, Wladimir Palant wrote: > On 2016/09/29 09:58:12, sergei wrote: > > Removed. BTW, in the example from mozilla they are also using filter but it > does > > indeed work without it. > > Yes, there is an example mentioning it - but no documentation explaining what it > does. The point of this filter is apparently limiting the number of times C++ > needs to call into JavaScript, for performance reasons. For that it filters out > some messages using an undocumented approach. This has all the signs on an > internal component meant to work with the progress listener installed by Firefox > but non necessary doing the right thing for us. We shouldn't use it, even if the > results are correct right now they won't necessarily be in the next Firefox > release. Acknowledged.
73 filter.addProgressListener(webProgressListener, Ci.nsIWebProgress.NOTIFY_ALL);	76 Object.assign(pageInfo, gatherPageInfo(content));

	77 sendAsyncMessage("abpcrawler:pageInfoGathered", pageInfo);

	78 };

74	79

75 let webProgress = docShell.QueryInterface(Ci.nsIInterfaceRequestor)	80 let webProgress = docShell.QueryInterface(Ci.nsIInterfaceRequestor).getInterface (Ci.nsIWebProgress);

76 .getInterface(Ci.nsIWebProgress);	81 webProgress.addProgressListener(webProgressListener, Ci.nsIWebProgress.NOTIFY_ST ATE_WINDOW);

77 webProgress.addProgressListener(filter, Ci.nsIWebProgress.NOTIFY_ALL);
Wladimir Palant 2016/09/14 16:11:48 The reason why the original code is receiving all The reason why the original code is receiving all progress events: tabbrowser doesn't support filtering. nsIWebProgress supports filtering however, so you should use it rather than requesting all kind of stuff you don't need. Ci.nsIWebProgress.NOTIFY_STATE_WINDOW should do. sergei 2016/09/29 09:58:12 Done. Show quoted text On 2016/09/14 16:11:48, Wladimir Palant wrote: > The reason why the original code is receiving all progress events: tabbrowser > doesn't support filtering. nsIWebProgress supports filtering however, so you > should use it rather than requesting all kind of stuff you don't need. > Ci.nsIWebProgress.NOTIFY_STATE_WINDOW should do. Done.
78	82

79 /**	83 /**

80 * Gathers information about page using DOM window.	84 * Gathers information about a DOM window.
Wladimir Palant 2016/09/14 16:11:47 Just "Gathers information about a DOM window." may Just "Gathers information about a DOM window." maybe? sergei 2016/09/29 09:58:13 Done. Show quoted text On 2016/09/14 16:11:47, Wladimir Palant wrote: > Just "Gathers information about a DOM window." maybe? Done.
81 * Currently	85 * Currently

82 * - creates a screenshot of the page	86 * - creates a screenshot of the page

83 * - serializes the page source code	87 * - serializes the page source code

84 * @param {nsIDOMWindow} wnd window to process	88 * @param {nsIDOMWindow} wnd window to process

85 * @return {Object} the object containing "screenshot" and "source" properties.	89 * @return {Object} the object containing "screenshot" and "source" properties.

86 */	90 */

87 function gatherPageInfo(wnd)	91 function gatherPageInfo(wnd)

88 {	92 {

89 let document = wnd.document;	93 let document = wnd.document;

90 let result = {errors:[]};	94 let result = {errors:[]};

91 if (document.documentElement)	95 if (!document.documentElement)

92 {	96 {

93 try	97 result.errors.push("No document.documentElement");

94 {	98 return result;

95 let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "can vas");

96 // Firefox does not work with large canvas elements.

97 // http://stackoverflow.com/questions/6081483/maximum-size-of-a-canvas-ele ment
Wladimir Palant 2016/09/14 16:11:48 Great, you could have linked to http://stackoverfl Great, you could have linked to http://stackoverflow.com/a/12644047/785541 just as well :-( As I mentioned back in 2012 - these are all implementation details and subject to change any time. Nor are the values you hardcoded here actually correct (didn't you consider the number 472907776 rather unlikely?). The reason why I decided to just let the canvas fail is because the actual restrictions are hard to pinpoint and one wouldn't want to cut off screenshots unnecessarily. This is an unrelated change, please put it into a separate review and we can discuss the best approach there. sergei 2016/09/29 09:58:11 Removed from here. Show quoted text On 2016/09/14 16:11:48, Wladimir Palant wrote: > Great, you could have linked to http://stackoverflow.com/a/12644047/785541 just > as well :-( > > As I mentioned back in 2012 - these are all implementation details and subject > to change any time. Nor are the values you hardcoded here actually correct > (didn't you consider the number 472907776 rather unlikely?). The reason why I > decided to just let the canvas fail is because the actual restrictions are hard > to pinpoint and one wouldn't want to cut off screenshots unnecessarily. This is > an unrelated change, please put it into a separate review and we can discuss the > best approach there. Removed from here.
98 canvas.width = document.documentElement.scrollWidth;

99 canvas.height = document.documentElement.scrollHeight;

100 if (canvas.width > 0 && canvas.height > 0)

101 {

102 if (canvas.width > 32767)

103 {

104 result.errors.push("Width exceeds supported limit, " + canvas.width);

105 canvas.width = 32767;

106 }

107 if (canvas.height > 32767)

108 {

109 result.errors.push("Height exceeds supported limit, " + canvas.height) ;

110 canvas.height = 32767;

111 }

112 if (canvas.width * canvas.height > 472907776)

113 {

114 result.errrors.push("Area exceeds supported limit, " + canvas.height + " * " + canvas.width);

115 canvas.height = 472907776 / canvas.width;

116 }

117 let context = canvas.getContext("2d");

118 context.drawWindow(wnd, 0, 0, canvas.width, canvas.height, "rgb(255, 255 , 255)");

119 result.screenshot = canvas.toDataURL("image/jpeg", 0.8);

120 }

121 // TODO: Capture frames as well?

122 let serializer = new wnd.XMLSerializer();

123 result.source = serializer.serializeToString(document.documentElement);

124 }

125 catch (e)

126 {

127 reportException(e);

128 result.errors.push("Cannot gather page info");
Wladimir Palant 2016/09/14 16:11:47 The original code had the canvas in a separate try The original code had the canvas in a separate try block - this code is known to fail, it shouldn't affect the other data we are capturing. sergei 2016/09/29 09:58:13 Done. Show quoted text On 2016/09/14 16:11:47, Wladimir Palant wrote: > The original code had the canvas in a separate try block - this code is known to > fail, it shouldn't affect the other data we are capturing. Done.
129 }

130 }	99 }

	100

	101 try

	102 {

	103 let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "canva s");

	104 canvas.width = document.documentElement.scrollWidth;

	105 canvas.height = document.documentElement.scrollHeight;

	106 let context = canvas.getContext("2d");

	107 context.drawWindow(wnd, 0, 0, canvas.width, canvas.height, "rgb(255, 255, 25 5)");

	108 result.screenshot = canvas.toDataURL("image/jpeg", 0.8);

	109 }

	110 catch (e)

	111 {

	112 reportException(e);

	113 result.errors.push("Cannot make page screenshot");

	114 }

	115

	116 try

	117 {

	118 // TODO: Capture frames as well?

	119 let serializer = new wnd.XMLSerializer();

	120 result.source = serializer.serializeToString(document.documentElement);

	121 }

	122 catch(e)

	123 {

	124 reportException(e);

	125 result.errors.push("Cannot obtain page source code");

	126 }

	127

131 return result;	128 return result;

132 }	129 }

LEFT	RIGHT