LEFT | RIGHT |
1 /* | 1 /* |
2 * This Source Code is subject to the terms of the Mozilla Public License | 2 * This Source Code is subject to the terms of the Mozilla Public License |
3 * version 2.0 (the "License"). You can obtain a copy of the License at | 3 * version 2.0 (the "License"). You can obtain a copy of the License at |
4 * http://mozilla.org/MPL/2.0/. | 4 * http://mozilla.org/MPL/2.0/. |
5 */ | 5 */ |
6 | 6 |
| 7 "use strict"; |
| 8 |
7 /** | 9 /** |
8 * @module crawler | 10 * @module crawler |
9 */ | 11 */ |
10 | 12 |
11 Cu.import("resource://gre/modules/Services.jsm"); | 13 const {Services} = Cu.import("resource://gre/modules/Services.jsm", {}); |
12 Cu.import("resource://gre/modules/Task.jsm"); | 14 const {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {}); |
13 Cu.import("resource://gre/modules/Promise.jsm"); | 15 const {Task} = Cu.import("resource://gre/modules/Task.jsm", {}); |
14 Cu.import("resource://gre/modules/Timer.jsm"); | 16 const {setTimeout, clearTimeout} = Cu.import("resource://gre/modules/Timer.jsm",
{}); |
15 | 17 |
16 function abprequire(module) | 18 function abprequire(module) |
17 { | 19 { |
18 let result = {}; | 20 let result = {}; |
19 result.wrappedJSObject = result; | 21 result.wrappedJSObject = result; |
20 Services.obs.notifyObservers(result, "adblockplus-require", module); | 22 Services.obs.notifyObservers(result, "adblockplus-require", module); |
21 return result.exports; | 23 return result.exports; |
22 } | 24 } |
23 | 25 |
24 let {RequestNotifier} = abprequire("requestNotifier"); | 26 let {RequestNotifier} = abprequire("requestNotifier"); |
25 let {FilterNotifier} = abprequire("filterNotifier"); | 27 let {FilterNotifier} = abprequire("filterNotifier"); |
26 let {FilterStorage} = abprequire("filterStorage"); | 28 let {FilterStorage} = abprequire("filterStorage"); |
27 | 29 |
28 /** | 30 /** |
29 * Allocates a new tab "about:blank" in the `browser`. | 31 * Allocates tabs on request but not more than maxtabs at the same time. |
30 * The method returns a {Promise} promise which is resolved with the `tab` | |
31 * when `outerWindowID` is already initialized. | |
32 * | |
33 * See: https://bugzilla.mozilla.org/show_bug.cgi?id=1256602#c1 | |
34 * | |
35 * @param {tabbrowser} browser | |
36 * The tabbed browser where tabs should be created | |
37 * @return {Promise} promise which will be resolved with the tab. | |
38 */ | |
39 function createTab(browser) | |
40 { | |
41 let tab = browser.addTab("about:blank"); | |
42 if (tab.linkedBrowser.outerWindowID) | |
43 return Promise.resolve(tab); | |
44 return new Promise((resolve, reject)=> | |
45 { | |
46 let onBrowserInit = (msg) => | |
47 { | |
48 tab.linkedBrowser.messageManager.removeMessageListener("Browser:Init", onB
rowserInit); | |
49 resolve(tab); | |
50 }; | |
51 tab.linkedBrowser.messageManager.addMessageListener("Browser:Init", onBrowse
rInit); | |
52 }); | |
53 } | |
54 | |
55 /** | |
56 * Allocates tabs on request but not more than `maxtabs` at the same time. | |
57 * | 32 * |
58 * @param {tabbrowser} browser | 33 * @param {tabbrowser} browser |
59 * The tabbed browser where tabs should be created | 34 * The tabbed browser where tabs should be created |
60 * @param {int} maxtabs | 35 * @param {int} maxtabs |
61 * The maximum number of tabs to be allocated | 36 * The maximum number of tabs to be allocated |
62 * @constructor | 37 * @constructor |
63 */ | 38 */ |
64 function TabAllocator(browser, maxtabs) | 39 function TabAllocator(browser, maxtabs) |
65 { | 40 { |
66 this._browser = browser; | 41 this._browser = browser; |
67 this._tabs = 0; | 42 this._tabs = 0; |
68 this._maxtabs = maxtabs; | 43 this._maxtabs = maxtabs; |
69 // the array of `resolve` functions of {Promise} promises returned by `getTab`
. | 44 // The queue containing resolve functions of promises waiting for a tab. |
70 this._resolvers = []; | 45 this._resolvers = []; |
71 // Keep at least one tab alive to prevent browser from closing of it self. | 46 // Keep at least one tab alive to prevent browser from closing itself. |
72 // That tab will be removed when the first tab is requested. | 47 this._tabKeepingWindowAlive = this._browser.tabs[0]; |
73 browser.removeAllTabsBut(browser.tabs[0]) | 48 this._browser.removeAllTabsBut(this._tabKeepingWindowAlive); |
74 } | 49 } |
75 TabAllocator.prototype = { | 50 TabAllocator.prototype = { |
| 51 _removeTabKeepingWindowAlive: function() |
| 52 { |
| 53 if (!this._tabKeepingWindowAlive) |
| 54 return; |
| 55 this._browser.removeTab(this._tabKeepingWindowAlive); |
| 56 delete this._tabKeepingWindowAlive; |
| 57 }, |
| 58 |
| 59 /** |
| 60 * Creates a blank tab in this._browser. |
| 61 * |
| 62 * @return {Promise.<tab>} promise which resolves once the tab is fully initia
lized. |
| 63 */ |
| 64 _createTab: function() |
| 65 { |
| 66 this._tabs++; |
| 67 let tab = this._browser.addTab("about:blank"); |
| 68 if (tab.linkedBrowser.outerWindowID) |
| 69 { |
| 70 this._removeTabKeepingWindowAlive(); |
| 71 return Promise.resolve(tab); |
| 72 } |
| 73 return new Promise((resolve, reject) => |
| 74 { |
| 75 let onBrowserInit = (msg) => |
| 76 { |
| 77 tab.linkedBrowser.messageManager.removeMessageListener("Browser:Init", o
nBrowserInit); |
| 78 this._removeTabKeepingWindowAlive(); |
| 79 resolve(tab); |
| 80 }; |
| 81 // "Browser:Init" message is sent once the browser is ready, see |
| 82 // https://bugzil.la/1256602#c1 |
| 83 tab.linkedBrowser.messageManager.addMessageListener("Browser:Init", onBrow
serInit); |
| 84 }); |
| 85 }, |
| 86 |
76 /** | 87 /** |
77 * Returns a promise that will resolve into a tab once a tab is allocated. | 88 * Returns a promise that will resolve into a tab once a tab is allocated. |
78 * The tab cannot be used by other tasks until releaseTab() is called. | 89 * The tab cannot be used by other tasks until releaseTab() is called. |
79 * | 90 * |
80 * @result {Promise} | 91 * @result {Promise.<tab>} |
81 */ | 92 */ |
82 getTab: function() | 93 getTab: function() |
83 { | 94 { |
84 if (this._tabs < this._maxtabs) | 95 if (this._tabs < this._maxtabs) |
85 { | 96 return this._createTab(); |
86 let tab = createTab(this._browser); | 97 return new Promise((resolve, reject) => this._resolvers.push(resolve)); |
87 // close initial tab, we don't need it anymore. | |
88 if (this._tabs == 0) | |
89 this._browser.removeTab(this._browser.tabs[0]); | |
90 this._tabs++; | |
91 return tab; | |
92 } | |
93 return new Promise((resolve, reject) => | |
94 { | |
95 this._resolvers.push(resolve); | |
96 }); | |
97 }, | 98 }, |
98 | 99 |
99 /** | 100 /** |
100 * Adds a tab back to the pool so that it can be used by other tasks. | 101 * Adds a tab back to the pool so that it can be used by other tasks. |
101 * | 102 * |
102 * @param {tab} tab | 103 * @param {tab} tab |
103 */ | 104 */ |
104 releaseTab: function(tab) | 105 releaseTab: function(tab) |
105 { | 106 { |
106 let browser = tab.parentNode.tabbrowser; | 107 // If we are about to close last tab don't close it immediately to keep |
107 browser.removeTab(tab); | 108 // the window alive. It will be closed when a new tab is created. |
108 | 109 if (this._tabs > 1) |
109 if (this._resolvers.length) | 110 this._browser.removeTab(tab); |
110 this._resolvers.shift()(createTab(this._browser)); | |
111 else | 111 else |
112 this._tabs--; | 112 { |
113 } | 113 // navigate away from previously opened URL |
| 114 tab.linkedBrowser.loadURI("about:blank", null, null); |
| 115 this._tabKeepingWindowAlive = tab; |
| 116 } |
| 117 |
| 118 this._tabs--; |
| 119 if (this._resolvers.length && this._tabs < this._maxtabs) |
| 120 { |
| 121 this._resolvers.shift()(this._createTab()); |
| 122 } |
| 123 }, |
114 }; | 124 }; |
115 | 125 |
116 /** | 126 /** |
117 * Once created, this object will make sure all new windows are dismissed | 127 * Once created, this object will make sure all new windows are dismissed |
118 * immediately. | 128 * immediately. |
119 * | 129 * |
120 * @constructor | 130 * @constructor |
121 */ | 131 */ |
122 function WindowCloser() | 132 function WindowCloser() |
123 { | 133 { |
(...skipping 16 matching lines...) Expand all Loading... |
140 { | 150 { |
141 if (window.document.documentElement.localName == 'dialog') | 151 if (window.document.documentElement.localName == 'dialog') |
142 window.document.documentElement.acceptDialog(); | 152 window.document.documentElement.acceptDialog(); |
143 else | 153 else |
144 window.close(); | 154 window.close(); |
145 }, false); | 155 }, false); |
146 }, | 156 }, |
147 | 157 |
148 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer
ence]) | 158 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer
ence]) |
149 }; | 159 }; |
| 160 |
| 161 function configureFrameScript() |
| 162 { |
| 163 const info = require("info"); |
| 164 let frameScriptPath = info.addonRoot + "/lib/child/frameScript.js"; |
| 165 Services.mm.loadFrameScript(frameScriptPath, true); |
| 166 |
| 167 onShutdown.add(() => |
| 168 { |
| 169 Services.mm.removeDelayedFrameScript(frameScriptPath); |
| 170 }); |
| 171 } |
150 | 172 |
151 /** | 173 /** |
152 * Starts the crawling session. The crawler opens each URL in a tab and stores | 174 * Starts the crawling session. The crawler opens each URL in a tab and stores |
153 * the results. | 175 * the results. |
154 * | 176 * |
155 * @param {Window} window | 177 * @param {Window} window |
156 * The browser window we're operating in | 178 * The browser window we're operating in |
157 * @param {String[]} urls | 179 * @param {String[]} urls |
158 * URLs to be crawled | 180 * URLs to be crawled |
159 * @param {int} timeout | 181 * @param {int} timeout |
160 * Load timeout in milliseconds | 182 * Load timeout in milliseconds |
161 * @param {int} maxtabs | 183 * @param {int} maxtabs |
162 * Maximum number of tabs to be opened | 184 * Maximum number of tabs to be opened |
163 * @param {String} targetURL | 185 * @param {String} targetURL |
164 * URL that should receive the results | 186 * URL that should receive the results |
165 * @param {Function} onDone | 187 * @param {Function} onDone |
166 * The callback which is called after finishing of crawling of all URLs. | 188 * The callback which is called after finishing of crawling of all URLs. |
167 */ | 189 */ |
168 function run(window, urls, timeout, maxtabs, targetURL, onDone) | 190 function run(window, urls, timeout, maxtabs, targetURL, onDone) |
169 { | 191 { |
| 192 configureFrameScript(); |
170 new Promise((resolve, reject) => | 193 new Promise((resolve, reject) => |
171 { | 194 { |
172 if (FilterStorage.subscriptions.length > 0) | 195 if (FilterStorage.subscriptions.length > 0) |
173 { | 196 { |
174 resolve(); | 197 resolve(); |
175 return; | 198 return; |
176 } | 199 } |
177 let onFiltersLoaded = (action, item, newValue, oldValue) => | 200 let onFiltersLoaded = (action, item, newValue, oldValue) => |
178 { | 201 { |
179 if (action == "load") | 202 if (action == "load") |
180 { | 203 { |
181 FilterNotifier.removeListener(onFiltersLoaded); | 204 FilterNotifier.removeListener(onFiltersLoaded); |
182 resolve(); | 205 resolve(); |
183 } | 206 } |
184 }; | 207 }; |
185 FilterNotifier.addListener(onFiltersLoaded); | 208 FilterNotifier.addListener(onFiltersLoaded); |
186 }).then(() => crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)) | 209 }).then(() => crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)) |
187 .catch(reportException); | 210 .catch(reportException); |
188 } | 211 } |
189 exports.run = run; | 212 exports.run = run; |
190 | 213 |
191 /** | 214 /** |
192 * Spawns a {Task} task to crawl each url from `urls` argument and calls | 215 * Spawns a {Task} task to crawl each url from urls argument and calls |
193 * `onDone` when all tasks are finished. | 216 * onDone when all tasks are finished. |
194 * @param {Window} window | 217 * @param {Window} window |
195 * The browser window we're operating in | 218 * The browser window we're operating in |
196 * @param {String[]} urls | 219 * @param {String[]} urls |
197 * URLs to be crawled | 220 * URLs to be crawled |
198 * @param {int} timeout | 221 * @param {int} timeout |
199 * Load timeout in milliseconds | 222 * Load timeout in milliseconds |
200 * @param {int} maxtabs | 223 * @param {int} maxtabs |
201 * Maximum number of tabs to be opened | 224 * Maximum number of tabs to be opened |
202 * @param {String} targetURL | 225 * @param {String} targetURL |
203 * URL that should receive the results | 226 * URL that should receive the results |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
243 startTime: Date.now(), | 266 startTime: Date.now(), |
244 error: String(exception) | 267 error: String(exception) |
245 })); | 268 })); |
246 }.bind(null, url)); | 269 }.bind(null, url)); |
247 } | 270 } |
248 } | 271 } |
249 | 272 |
250 /** | 273 /** |
251 * Expects to receive page info gathered in a content process for the specified | 274 * Expects to receive page info gathered in a content process for the specified |
252 * `tab`. If there is no relevant message within specified `timeout` then | 275 * `tab`. If there is no relevant message within specified `timeout` then |
253 * the result promise is resolve with error object. | 276 * the result promise is resolved with error object. |
254 * @param tab | 277 * @param tab |
255 * Tab in which we are interested in | 278 * Tab in which we are interested in |
256 * @param {int} timeout | 279 * @param {int} timeout |
257 * Timeout in milliseconds | 280 * Timeout in milliseconds |
258 * @return {Promise} promise which will be resolved with the received page info | 281 * @return {Promise} promise which will be resolved with the received page info |
259 */ | 282 */ |
260 function getPageInfo(tab, timeout) | 283 function getPageInfo(tab, timeout) |
261 { | 284 { |
262 return new Promise((resolve, result) => | 285 return new Promise((resolve, result) => |
263 { | 286 { |
264 let mm = tab.linkedBrowser.messageManager; | 287 let mm = tab.linkedBrowser.messageManager; |
265 let timerID; | 288 let timerID; |
266 let onDone = (pageInfo) => | 289 let onDone = (msg) => |
267 { | 290 { |
268 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone); | 291 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone); |
269 clearTimeout(timerID); | 292 clearTimeout(timerID); |
270 resolve(pageInfo); | 293 resolve(msg.data); |
271 } | 294 } |
272 mm.addMessageListener("abpcrawler:pageInfoGathered", (msg) => onDone(msg.dat
a));; | 295 mm.addMessageListener("abpcrawler:pageInfoGathered", onDone); |
273 timerID = setTimeout(onDone.bind(this, {error: "timeout"}), timeout); | 296 timerID = setTimeout(() => onDone({data: {error: "timeout"}}), timeout); |
274 }); | 297 }); |
275 } | 298 } |
276 | 299 |
277 /** | 300 /** |
278 * Crawls a URL. This is a generator meant to be used via a Task object. | 301 * Crawls a URL. This is a generator meant to be used via a Task object. |
279 * | 302 * |
280 * @param {String} url | 303 * @param {String} url |
281 * @param {TabAllocator} tabAllocator | 304 * @param {TabAllocator} tabAllocator |
282 * @param {int} timeout | 305 * @param {int} timeout |
283 * Load timeout in milliseconds | 306 * Load timeout in milliseconds |
(...skipping 12 matching lines...) Expand all Loading... |
296 function(entry, scanComplete) | 319 function(entry, scanComplete) |
297 { | 320 { |
298 if (!entry) | 321 if (!entry) |
299 return; | 322 return; |
300 let {type: contentType, location, filter} = entry; | 323 let {type: contentType, location, filter} = entry; |
301 result.requests.push({location, contentType, filter}); | 324 result.requests.push({location, contentType, filter}); |
302 }); | 325 }); |
303 | 326 |
304 tab.linkedBrowser.loadURI(url, null, null); | 327 tab.linkedBrowser.loadURI(url, null, null); |
305 | 328 |
| 329 Object.assign(result, yield getPageInfo(tab, timeout)); |
306 result.finalUrl = tab.linkedBrowser.currentURI.spec; | 330 result.finalUrl = tab.linkedBrowser.currentURI.spec; |
307 Object.assign(result, yield getPageInfo(tab, timeout)); | |
308 result.endTime = Date.now(); | 331 result.endTime = Date.now(); |
309 } | 332 } |
310 finally | 333 finally |
311 { | 334 { |
312 if (requestNotifier) | 335 if (requestNotifier) |
313 requestNotifier.shutdown(); | 336 requestNotifier.shutdown(); |
314 tabAllocator.releaseTab(tab); | 337 tabAllocator.releaseTab(tab); |
315 } | 338 } |
316 return result; | 339 return result; |
317 } | 340 } |
318 | 341 |
319 function reportException(e) | 342 function reportException(e) |
320 { | 343 { |
321 let stack = ""; | 344 let stack = ""; |
322 if (e && typeof e == "object" && "stack" in e) | 345 if (e && typeof e == "object" && "stack" in e) |
323 stack = e.stack + "\n"; | 346 stack = e.stack + "\n"; |
324 | 347 |
325 Cu.reportError(e); | 348 Cu.reportError(e); |
326 dump(e + "\n" + stack + "\n"); | 349 dump(e + "\n" + stack + "\n"); |
327 } | 350 } |
328 | |
329 let {addonRoot} = require("info"); | |
330 let frameScriptPath = addonRoot + "/lib/child/frameScript.js"; | |
331 let globalMessageManager = Services.mm; | |
332 globalMessageManager.loadFrameScript(frameScriptPath, true); | |
333 | |
334 onShutdown.add(() => | |
335 { | |
336 globalMessageManager.removeDelayedFrameScript(frameScriptPath); | |
337 }); | |
LEFT | RIGHT |