Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 /* | 1 /* |
2 * This Source Code is subject to the terms of the Mozilla Public License | 2 * This Source Code is subject to the terms of the Mozilla Public License |
3 * version 2.0 (the "License"). You can obtain a copy of the License at | 3 * version 2.0 (the "License"). You can obtain a copy of the License at |
4 * http://mozilla.org/MPL/2.0/. | 4 * http://mozilla.org/MPL/2.0/. |
5 */ | 5 */ |
6 | |
6 "use strict"; | 7 "use strict"; |
Wladimir Palant
2016/09/14 16:11:47
Nit: How about an empty line before that?
sergei
2016/09/29 09:58:11
Done.
| |
7 | 8 |
8 const {classes: Cc, interfaces: Ci, utils: Cu, Cr: results} = Components; | 9 const {classes: Cc, interfaces: Ci, utils: Cu, results: Cr} = Components; |
9 | 10 |
10 /** | 11 /** |
11 * @param e exception | 12 * @param e exception |
12 */ | 13 */ |
13 function reportException(e) | 14 function reportException(e) |
14 { | 15 { |
15 let stack = ""; | 16 let stack = ""; |
16 if (e && typeof e == "object" && "stack" in e) | 17 if (e && typeof e == "object" && "stack" in e) |
17 stack = e.stack + "\n"; | 18 stack = e.stack + "\n"; |
18 | 19 |
19 Cu.reportError(e); | 20 Cu.reportError(e); |
20 dump(e + "\n" + stack + "\n"); | 21 dump(e + "\n" + stack + "\n"); |
21 } | 22 } |
22 | 23 |
23 let {Services} = Cu.import("resource://gre/modules/Services.jsm", {}); | 24 const {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {}); |
24 let {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {}); | |
25 | 25 |
26 /** | 26 /** |
27 * Waits for finishing of the page loading, calls `gatherPageInfo` and sends | 27 * Progress listener capturing the data of the current page and calling |
28 * gahter information using "abpcrawler:pageInfoGathered" message. | 28 * onPageLoaded(data) when loading is finished, where data contains |
29 * https://developer.mozilla.org/en-US/docs/Mozilla/Tech/XPCOM/Reference/Interfa ce/nsIWebProgressListener | 29 * HTTP status and headers. |
Wladimir Palant
2016/09/14 16:11:48
Nit: How about:
* Progress listener capturing the
sergei
2016/09/29 09:58:12
I have changed the comment. In addition webProgres
| |
30 * | |
31 * @type nsIWebProgressListener | |
30 */ | 32 */ |
31 let webProgressListener = | 33 let webProgressListener = |
32 { | 34 { |
33 onStateChange: function(webProgress, request, flags, status) | 35 onStateChange: function(webProgress, request, flags, status) |
34 { | 36 { |
35 // use isTopLevel to filter beacon requests out | 37 if (webProgress.DOMWindow == content && |
Wladimir Palant
2016/09/14 16:11:46
This comment doesn't make sense - not just beacon
sergei
2016/09/29 09:58:12
Done. It seems it does indeed work, however I thin
| |
36 if (webProgress.isTopLevel && | 38 (flags & Ci.nsIWebProgressListener.STATE_STOP)) |
37 (flags & Ci.nsIWebProgressListener.STATE_STOP) && | |
38 (flags & Ci.nsIWebProgressListener.STATE_IS_WINDOW)) | |
39 { | 39 { |
40 // First time we receive STATE_STOP for about:blank and the second time | |
41 // for our interested URL which is distinct from about:blank. | |
42 // However we should not process about:blank because it can happen that | |
43 // the message with information about about:blank is delivered when the | |
44 // code in crawler.js is already waiting for a message from this tab. | |
45 // Another case we are not interested in is about:newtab. | |
46 if (content.location.protocol == "about:") | |
47 return; | |
48 let pageInfo = {channelStatus: status}; | |
40 if (request instanceof Ci.nsIHttpChannel) | 49 if (request instanceof Ci.nsIHttpChannel) |
41 { | 50 { |
42 let pageInfo = {headers: []}; | |
Wladimir Palant
2016/09/14 16:11:48
What about `channelStatus: status`? We used to cap
sergei
2016/09/29 09:58:11
Restored.
| |
43 try | 51 try |
44 { | 52 { |
53 pageInfo.headers = []; | |
45 pageInfo.headers.push("HTTP/x.x " + request.responseStatus + " " + req uest.responseStatusText); | 54 pageInfo.headers.push("HTTP/x.x " + request.responseStatus + " " + req uest.responseStatusText); |
46 request.visitResponseHeaders((header, value) => pageInfo.headers.push( header + ": " + value)); | 55 request.visitResponseHeaders((header, value) => pageInfo.headers.push( header + ": " + value)); |
47 } | 56 } |
48 catch (e) | 57 catch (e) |
49 { | 58 { |
50 // Ignore if called before the response has been received (before | |
51 // onStartRequest()). | |
52 if (e.result == Cr.NS_ERROR_NOT_AVAILABLE) | |
53 return; | |
Wladimir Palant
2016/09/14 16:11:48
So what happens if a host name doesn't resolve? We
sergei
2016/09/29 09:58:13
If the hostname is not resolved we don't get here
Wladimir Palant
2016/09/29 11:44:57
I don't really see why the page isn't loaded when
sergei
2016/09/29 15:36:21
Sorry for confusion, I meant under the page the pa
sergei
2016/10/04 10:57:31
What about creation of a new tab in TabAllocator.r
Wladimir Palant
2016/10/04 11:10:38
No, I'd rather not make the code more complicated
sergei
2016/10/06 14:26:58
Of course it should be in a follow-up issue, the q
| |
54 reportException(e); | 59 reportException(e); |
55 } | 60 } |
56 Object.assign(pageInfo, gatherPageInfo(content)); | |
57 sendAsyncMessage("abpcrawler:pageInfoGathered", pageInfo); | |
Wladimir Palant
2016/09/14 16:11:47
The two lines above don't depend on the request ob
sergei
2016/09/29 09:58:12
Moved.
| |
58 } | 61 } |
62 onPageLoaded(pageInfo); | |
59 } | 63 } |
60 }, | 64 }, |
61 | 65 |
62 // definitions of the remaining functions see related documentation | 66 onLocationChange: function() {}, |
Wladimir Palant
2016/09/14 16:11:48
Nit: this comment doesn't add any value, remove?
sergei
2016/09/29 09:58:11
Done.
| |
63 onLocationChange: function(webProgress, request, URI, flag) {}, | 67 onProgressChange: function() {}, |
64 onProgressChange: function(aWebProgress, aRequest, curSelf, maxSelf, curTot, m axTot) {}, | 68 onStatusChange: function() {}, |
Wladimir Palant
2016/09/14 16:11:47
So, Hungarian notation or not? Our style guide say
sergei
2016/09/29 09:58:12
Done.
| |
65 onStatusChange: function(aWebProgress, aRequest, aStatus, aMessage) {}, | 69 onSecurityChange: function() {}, |
66 onSecurityChange: function(aWebProgress, aRequest, aState) {}, | 70 |
67 | |
68 QueryInterface: XPCOMUtils.generateQI([Ci.nsIWebProgressListener, Ci.nsISuppor tsWeakReference]) | 71 QueryInterface: XPCOMUtils.generateQI([Ci.nsIWebProgressListener, Ci.nsISuppor tsWeakReference]) |
Wladimir Palant
2016/09/14 16:11:48
I don't think that progress listeners can be weak
sergei
2016/09/29 09:58:13
According to the documentation, "This object must
| |
69 }; | 72 }; |
70 | 73 |
71 let filter = Cc["@mozilla.org/appshell/component/browser-status-filter;1"] | 74 function onPageLoaded(pageInfo) |
72 .createInstance(Ci.nsIWebProgress); | 75 { |
Wladimir Palant
2016/09/14 16:11:48
Nit: we usually align the dot with the [ on the pr
sergei
2016/09/29 09:58:12
Removed. BTW, in the example from mozilla they are
Wladimir Palant
2016/09/29 11:44:57
Yes, there is an example mentioning it - but no do
sergei
2016/09/29 15:36:21
Acknowledged.
| |
73 filter.addProgressListener(webProgressListener, Ci.nsIWebProgress.NOTIFY_ALL); | 76 Object.assign(pageInfo, gatherPageInfo(content)); |
77 sendAsyncMessage("abpcrawler:pageInfoGathered", pageInfo); | |
78 }; | |
74 | 79 |
75 let webProgress = docShell.QueryInterface(Ci.nsIInterfaceRequestor) | 80 let webProgress = docShell.QueryInterface(Ci.nsIInterfaceRequestor).getInterface (Ci.nsIWebProgress); |
76 .getInterface(Ci.nsIWebProgress); | 81 webProgress.addProgressListener(webProgressListener, Ci.nsIWebProgress.NOTIFY_ST ATE_WINDOW); |
77 webProgress.addProgressListener(filter, Ci.nsIWebProgress.NOTIFY_ALL); | |
Wladimir Palant
2016/09/14 16:11:48
The reason why the original code is receiving all
sergei
2016/09/29 09:58:12
Done.
| |
78 | 82 |
79 /** | 83 /** |
80 * Gathers information about page using DOM window. | 84 * Gathers information about a DOM window. |
Wladimir Palant
2016/09/14 16:11:47
Just "Gathers information about a DOM window." may
sergei
2016/09/29 09:58:13
Done.
| |
81 * Currently | 85 * Currently |
82 * - creates a screenshot of the page | 86 * - creates a screenshot of the page |
83 * - serializes the page source code | 87 * - serializes the page source code |
84 * @param {nsIDOMWindow} wnd window to process | 88 * @param {nsIDOMWindow} wnd window to process |
85 * @return {Object} the object containing "screenshot" and "source" properties. | 89 * @return {Object} the object containing "screenshot" and "source" properties. |
86 */ | 90 */ |
87 function gatherPageInfo(wnd) | 91 function gatherPageInfo(wnd) |
88 { | 92 { |
89 let document = wnd.document; | 93 let document = wnd.document; |
90 let result = {errors:[]}; | 94 let result = {errors:[]}; |
91 if (document.documentElement) | 95 if (!document.documentElement) |
92 { | 96 { |
93 try | 97 result.errors.push("No document.documentElement"); |
94 { | 98 return result; |
95 let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "can vas"); | |
96 // Firefox does not work with large canvas elements. | |
97 // http://stackoverflow.com/questions/6081483/maximum-size-of-a-canvas-ele ment | |
Wladimir Palant
2016/09/14 16:11:48
Great, you could have linked to http://stackoverfl
sergei
2016/09/29 09:58:11
Removed from here.
| |
98 canvas.width = document.documentElement.scrollWidth; | |
99 canvas.height = document.documentElement.scrollHeight; | |
100 if (canvas.width > 0 && canvas.height > 0) | |
101 { | |
102 if (canvas.width > 32767) | |
103 { | |
104 result.errors.push("Width exceeds supported limit, " + canvas.width); | |
105 canvas.width = 32767; | |
106 } | |
107 if (canvas.height > 32767) | |
108 { | |
109 result.errors.push("Height exceeds supported limit, " + canvas.height) ; | |
110 canvas.height = 32767; | |
111 } | |
112 if (canvas.width * canvas.height > 472907776) | |
113 { | |
114 result.errrors.push("Area exceeds supported limit, " + canvas.height + " * " + canvas.width); | |
115 canvas.height = 472907776 / canvas.width; | |
116 } | |
117 let context = canvas.getContext("2d"); | |
118 context.drawWindow(wnd, 0, 0, canvas.width, canvas.height, "rgb(255, 255 , 255)"); | |
119 result.screenshot = canvas.toDataURL("image/jpeg", 0.8); | |
120 } | |
121 // TODO: Capture frames as well? | |
122 let serializer = new wnd.XMLSerializer(); | |
123 result.source = serializer.serializeToString(document.documentElement); | |
124 } | |
125 catch (e) | |
126 { | |
127 reportException(e); | |
128 result.errors.push("Cannot gather page info"); | |
Wladimir Palant
2016/09/14 16:11:47
The original code had the canvas in a separate try
sergei
2016/09/29 09:58:13
Done.
| |
129 } | |
130 } | 99 } |
100 | |
101 try | |
102 { | |
103 let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "canva s"); | |
104 canvas.width = document.documentElement.scrollWidth; | |
105 canvas.height = document.documentElement.scrollHeight; | |
106 let context = canvas.getContext("2d"); | |
107 context.drawWindow(wnd, 0, 0, canvas.width, canvas.height, "rgb(255, 255, 25 5)"); | |
108 result.screenshot = canvas.toDataURL("image/jpeg", 0.8); | |
109 } | |
110 catch (e) | |
111 { | |
112 reportException(e); | |
113 result.errors.push("Cannot make page screenshot"); | |
114 } | |
115 | |
116 try | |
117 { | |
118 // TODO: Capture frames as well? | |
119 let serializer = new wnd.XMLSerializer(); | |
120 result.source = serializer.serializeToString(document.documentElement); | |
121 } | |
122 catch(e) | |
123 { | |
124 reportException(e); | |
125 result.errors.push("Cannot obtain page source code"); | |
126 } | |
127 | |
131 return result; | 128 return result; |
132 } | 129 } |
LEFT | RIGHT |