| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * This Source Code is subject to the terms of the Mozilla Public License | 2 * This Source Code is subject to the terms of the Mozilla Public License |
| 3 * version 2.0 (the "License"). You can obtain a copy of the License at | 3 * version 2.0 (the "License"). You can obtain a copy of the License at |
| 4 * http://mozilla.org/MPL/2.0/. | 4 * http://mozilla.org/MPL/2.0/. |
| 5 */ | 5 */ |
| 6 | 6 |
| 7 /** | 7 /** |
| 8 * @module crawler | 8 * @module crawler |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 Cu.import("resource://gre/modules/Services.jsm"); | 11 Cu.import("resource://gre/modules/Services.jsm"); |
| 12 Cu.import("resource://gre/modules/Task.jsm"); | 12 Cu.import("resource://gre/modules/Task.jsm"); |
| 13 Cu.import("resource://gre/modules/Promise.jsm"); | 13 Cu.import("resource://gre/modules/Promise.jsm"); |
| 14 | 14 |
| 15 function abprequire(module) | 15 function abprequire(module) |
| 16 { | 16 { |
| 17 let result = {}; | 17 let result = {}; |
| 18 result.wrappedJSObject = result; | 18 result.wrappedJSObject = result; |
| 19 Services.obs.notifyObservers(result, "adblockplus-require", module); | 19 Services.obs.notifyObservers(result, "adblockplus-require", module); |
| 20 return result.exports; | 20 return result.exports; |
| 21 } | 21 } |
| 22 | 22 |
| 23 let {RequestNotifier} = abprequire("requestNotifier"); | 23 let {RequestNotifier} = abprequire("requestNotifier"); |
| 24 let {FilterNotifier} = abprequire("filterNotifier"); | 24 let {FilterNotifier} = abprequire("filterNotifier"); |
| 25 let {FilterStorage} = abprequire("filterStorage"); | 25 let {FilterStorage} = abprequire("filterStorage"); |
| 26 | 26 |
| 27 /** | 27 /** |
| 28 * Creates a pool of tabs and allocates them to tasks on request. | 28 * Allocates tabs on request but not more than maxtabs at the same time. |
| 29 * | 29 * |
| 30 * @param {tabbrowser} browser | 30 * @param {tabbrowser} browser |
| 31 * The tabbed browser where tabs should be created | 31 * The tabbed browser where tabs should be created |
| 32 * @param {int} maxtabs | 32 * @param {int} maxtabs |
| 33 * The maximum number of tabs to be allocated | 33 * The maximum number of tabs to be allocated |
| 34 * @constructor | 34 * @constructor |
| 35 */ | 35 */ |
| 36 function TabAllocator(browser, maxtabs) | 36 function TabAllocator(browser, maxtabs) |
| 37 { | 37 { |
| 38 browser.removeAllTabsBut(browser.tabs[0]) | 38 this._browser = browser; |
| 39 | 39 this._tabs = 0; |
| 40 this._tabs = []; | 40 this._maxtabs = maxtabs; |
| 41 for (let i = 0; i < maxtabs; i++) | 41 // The queue containing resolve functions of promises waiting for a tab. |
| 42 this._tabs.push(browser.addTab("about:blank")); | 42 this._resolvers = []; |
| 43 | 43 // Keep at least one tab alive to prevent browser from closing itself. |
| 44 browser.removeTab(browser.tabs[0]); | 44 this._tabKeepingWindowAlive = this._browser.tabs[0]; |
| 45 | 45 this._browser.removeAllTabsBut(this._tabKeepingWindowAlive); |
| 46 this._deferred = []; | |
| 47 } | 46 } |
| 48 TabAllocator.prototype = { | 47 TabAllocator.prototype = { |
| 48 _removeTabKeepingWindowAlive: function() | |
| 49 { | |
| 50 if (!this._tabKeepingWindowAlive) | |
| 51 return; | |
| 52 this._browser.removeTab(this._tabKeepingWindowAlive); | |
| 53 delete this._tabKeepingWindowAlive; | |
| 54 }, | |
| 55 | |
| 49 /** | 56 /** |
| 50 * Returns a promise that will resolve into a tab once a tab can be allocated. | 57 * Creates a blank tab in this._browser. |
| 58 * | |
| 59 * @return {Promise.<tab>} promise which resolves once the tab is fully initia lized. | |
| 60 */ | |
| 61 _createTab: function() | |
| 62 { | |
| 63 this._tabs++; | |
| 64 let tab = this._browser.addTab("about:blank"); | |
| 65 if (tab.linkedBrowser.outerWindowID) | |
| 66 { | |
| 67 this._removeTabKeepingWindowAlive(); | |
| 68 return Promise.resolve(tab); | |
|
Wladimir Palant
2016/09/16 07:10:37
I think that rather than introducing a _removeTabK
sergei
2016/09/16 12:34:13
It's a valid point but I would like to keep it thi
| |
| 69 } | |
| 70 return new Promise((resolve, reject) => | |
| 71 { | |
| 72 let onBrowserInit = (msg) => | |
| 73 { | |
| 74 tab.linkedBrowser.messageManager.removeMessageListener("Browser:Init", o nBrowserInit); | |
| 75 this._removeTabKeepingWindowAlive(); | |
| 76 resolve(tab); | |
| 77 }; | |
| 78 // "Browser:Init" message is sent once the browser is ready, see | |
| 79 // https://bugzil.la/1256602#c1 | |
| 80 tab.linkedBrowser.messageManager.addMessageListener("Browser:Init", onBrow serInit); | |
| 81 }); | |
| 82 }, | |
| 83 | |
|
Wladimir Palant
2016/09/16 07:10:37
Nit: Don't add this extra newline please.
sergei
2016/09/16 12:34:13
Done.
| |
| 84 | |
| 85 /** | |
| 86 * Returns a promise that will resolve into a tab once a tab is allocated. | |
| 51 * The tab cannot be used by other tasks until releaseTab() is called. | 87 * The tab cannot be used by other tasks until releaseTab() is called. |
| 52 * | 88 * |
| 53 * @result {Promise} | 89 * @result {Promise.<tab>} |
| 54 */ | 90 */ |
| 55 getTab: function() | 91 getTab: function() |
| 56 { | 92 { |
| 57 if (this._tabs.length) | 93 if (this._tabs < this._maxtabs) |
| 58 return this._tabs.shift(); | 94 return this._createTab(); |
| 59 else | 95 return new Promise((resolve, reject) => this._resolvers.push(resolve)); |
| 60 { | |
| 61 let deferred = Promise.defer(); | |
| 62 this._deferred.push(deferred); | |
| 63 return deferred.promise; | |
| 64 } | |
| 65 }, | 96 }, |
| 66 | 97 |
| 67 /** | 98 /** |
| 68 * Adds a tab back to the pool so that it can be used by other tasks. | 99 * Adds a tab back to the pool so that it can be used by other tasks. |
| 69 * | 100 * |
| 70 * @param {tab} tab | 101 * @param {tab} tab |
| 71 */ | 102 */ |
| 72 releaseTab: function(tab) | 103 releaseTab: function(tab) |
| 73 { | 104 { |
| 74 let browser = tab.parentNode.tabbrowser; | 105 // If we are about to close last tab don't close it immediately to keep |
| 75 browser.removeTab(tab); | 106 // the window alive. It will be closed when a new tab is created. |
| 76 tab = browser.addTab("about:blank"); | 107 if (this._tabs > 1) |
| 108 this._browser.removeTab(tab); | |
| 109 else | |
| 110 { | |
| 111 // navigate away from early opened URL | |
| 112 tab.linkedBrowser.loadURI('about:blank', null, null); | |
|
Wladimir Palant
2016/09/16 07:10:37
What's the point of navigating away if we are igno
sergei
2016/09/16 12:34:13
I have a version when the crawler is always runnin
| |
| 113 this._tabKeepingWindowAlive = tab; | |
| 114 } | |
| 77 | 115 |
| 78 if (this._deferred.length) | 116 this._tabs--; |
| 79 this._deferred.shift().resolve(tab); | 117 if (this._resolvers.length && this._tabs < this._maxtabs) |
| 80 else | 118 { |
| 81 this._tabs.push(tab); | 119 this._resolvers.shift()(this._createTab()); |
| 82 } | 120 } |
| 121 }, | |
| 83 }; | 122 }; |
| 84 | 123 |
| 85 /** | 124 /** |
| 86 * Observes page loads in a particular tabbed browser. | 125 * Observes page loads in a particular tabbed browser. |
| 87 * | 126 * |
| 88 * @param {tabbrowser} browser | 127 * @param {tabbrowser} browser |
| 89 * The tabbed browser to be observed | 128 * The tabbed browser to be observed |
| 90 * @param {int} timeout | 129 * @param {int} timeout |
| 91 * Load timeout in milliseconds | 130 * Load timeout in milliseconds |
| 92 * @constructor | 131 * @constructor |
| (...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 224 resolve(); | 263 resolve(); |
| 225 } | 264 } |
| 226 }; | 265 }; |
| 227 FilterNotifier.addListener(onFiltersLoaded); | 266 FilterNotifier.addListener(onFiltersLoaded); |
| 228 }).then(() => crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)) | 267 }).then(() => crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)) |
| 229 .catch(reportException); | 268 .catch(reportException); |
| 230 } | 269 } |
| 231 exports.run = run; | 270 exports.run = run; |
| 232 | 271 |
| 233 /** | 272 /** |
| 234 * Spawns a {Task} task to crawl each url from `urls` argument and calls | 273 * Spawns a {Task} task to crawl each url from urls argument and calls |
| 235 * `onDone` when all tasks are finished. | 274 * onDone when all tasks are finished. |
| 236 * @param {Window} window | 275 * @param {Window} window |
| 237 * The browser window we're operating in | 276 * The browser window we're operating in |
| 238 * @param {String[]} urls | 277 * @param {String[]} urls |
| 239 * URLs to be crawled | 278 * URLs to be crawled |
| 240 * @param {int} timeout | 279 * @param {int} timeout |
| 241 * Load timeout in milliseconds | 280 * Load timeout in milliseconds |
| 242 * @param {int} maxtabs | 281 * @param {int} maxtabs |
| 243 * Maximum number of tabs to be opened | 282 * Maximum number of tabs to be opened |
| 244 * @param {String} targetURL | 283 * @param {String} targetURL |
| 245 * URL that should receive the results | 284 * URL that should receive the results |
| (...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 356 | 395 |
| 357 function reportException(e) | 396 function reportException(e) |
| 358 { | 397 { |
| 359 let stack = ""; | 398 let stack = ""; |
| 360 if (e && typeof e == "object" && "stack" in e) | 399 if (e && typeof e == "object" && "stack" in e) |
| 361 stack = e.stack + "\n"; | 400 stack = e.stack + "\n"; |
| 362 | 401 |
| 363 Cu.reportError(e); | 402 Cu.reportError(e); |
| 364 dump(e + "\n" + stack + "\n"); | 403 dump(e + "\n" + stack + "\n"); |
| 365 } | 404 } |
| OLD | NEW |