| Left: | ||
| Right: |
| LEFT | RIGHT |
|---|---|
| 1 /* | 1 /* |
| 2 * This Source Code is subject to the terms of the Mozilla Public License | 2 * This Source Code is subject to the terms of the Mozilla Public License |
| 3 * version 2.0 (the "License"). You can obtain a copy of the License at | 3 * version 2.0 (the "License"). You can obtain a copy of the License at |
| 4 * http://mozilla.org/MPL/2.0/. | 4 * http://mozilla.org/MPL/2.0/. |
| 5 */ | 5 */ |
| 6 | 6 |
| 7 "use strict"; | |
| 8 | |
| 7 /** | 9 /** |
| 8 * @module crawler | 10 * @module crawler |
| 9 */ | 11 */ |
| 10 | 12 |
| 11 Cu.import("resource://gre/modules/Services.jsm"); | 13 const {Services} = Cu.import("resource://gre/modules/Services.jsm", {}); |
| 12 Cu.import("resource://gre/modules/Task.jsm"); | 14 const {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {}); |
| 13 Cu.import("resource://gre/modules/Promise.jsm"); | 15 const {Task} = Cu.import("resource://gre/modules/Task.jsm", {}); |
| 14 Cu.import("resource://gre/modules/Timer.jsm"); | 16 const {setTimeout, clearTimeout} = Cu.import("resource://gre/modules/Timer.jsm", {}); |
| 15 | 17 |
| 16 function abprequire(module) | 18 function abprequire(module) |
| 17 { | 19 { |
| 18 let result = {}; | 20 let result = {}; |
| 19 result.wrappedJSObject = result; | 21 result.wrappedJSObject = result; |
| 20 Services.obs.notifyObservers(result, "adblockplus-require", module); | 22 Services.obs.notifyObservers(result, "adblockplus-require", module); |
| 21 return result.exports; | 23 return result.exports; |
| 22 } | 24 } |
| 23 | 25 |
| 24 let {RequestNotifier} = abprequire("requestNotifier"); | 26 let {RequestNotifier} = abprequire("requestNotifier"); |
| 25 let {FilterNotifier} = abprequire("filterNotifier"); | 27 let {FilterNotifier} = abprequire("filterNotifier"); |
| 26 let {FilterStorage} = abprequire("filterStorage"); | 28 let {FilterStorage} = abprequire("filterStorage"); |
| 27 | 29 |
| 28 /** | 30 /** |
| 29 * Creates a pool of tabs and allocates them to tasks on request. | 31 * Allocates tabs on request but not more than maxtabs at the same time. |
| 30 * | 32 * |
| 31 * @param {tabbrowser} browser | 33 * @param {tabbrowser} browser |
| 32 * The tabbed browser where tabs should be created | 34 * The tabbed browser where tabs should be created |
| 33 * @param {int} maxtabs | 35 * @param {int} maxtabs |
| 34 * The maximum number of tabs to be allocated | 36 * The maximum number of tabs to be allocated |
| 35 * @constructor | 37 * @constructor |
| 36 */ | 38 */ |
| 37 function TabAllocator(browser, maxtabs) | 39 function TabAllocator(browser, maxtabs) |
| 38 { | 40 { |
| 39 browser.removeAllTabsBut(browser.tabs[0]) | 41 this._browser = browser; |
| 40 | 42 this._tabs = 0; |
| 41 this._tabs = []; | 43 this._maxtabs = maxtabs; |
| 42 for (let i = 0; i < maxtabs; i++) | 44 // The queue containing resolve functions of promises waiting for a tab. |
| 43 this._tabs.push(browser.addTab("about:blank")); | 45 this._resolvers = []; |
| 44 | 46 // Keep at least one tab alive to prevent browser from closing itself. |
| 45 browser.removeTab(browser.tabs[0]); | 47 this._tabKeepingWindowAlive = this._browser.tabs[0]; |
| 46 | 48 this._browser.removeAllTabsBut(this._tabKeepingWindowAlive); |
| 47 this._deferred = []; | |
| 48 } | 49 } |
| 49 TabAllocator.prototype = { | 50 TabAllocator.prototype = { |
| 51 _removeTabKeepingWindowAlive: function() | |
| 52 { | |
| 53 if (!this._tabKeepingWindowAlive) | |
| 54 return; | |
| 55 this._browser.removeTab(this._tabKeepingWindowAlive); | |
| 56 delete this._tabKeepingWindowAlive; | |
| 57 }, | |
| 58 | |
| 50 /** | 59 /** |
| 51 * Returns a promise that will resolve into a tab once a tab can be allocated. | 60 * Creates a blank tab in this._browser. |
| 61 * | |
| 62 * @return {Promise.<tab>} promise which resolves once the tab is fully initia lized. | |
| 63 */ | |
| 64 _createTab: function() | |
| 65 { | |
| 66 this._tabs++; | |
| 67 let tab = this._browser.addTab("about:blank"); | |
| 68 if (tab.linkedBrowser.outerWindowID) | |
| 69 { | |
| 70 this._removeTabKeepingWindowAlive(); | |
| 71 return Promise.resolve(tab); | |
| 72 } | |
| 73 return new Promise((resolve, reject) => | |
| 74 { | |
| 75 let onBrowserInit = (msg) => | |
| 76 { | |
| 77 tab.linkedBrowser.messageManager.removeMessageListener("Browser:Init", o nBrowserInit); | |
| 78 this._removeTabKeepingWindowAlive(); | |
| 79 resolve(tab); | |
| 80 }; | |
| 81 // "Browser:Init" message is sent once the browser is ready, see | |
| 82 // https://bugzil.la/1256602#c1 | |
| 83 tab.linkedBrowser.messageManager.addMessageListener("Browser:Init", onBrow serInit); | |
| 84 }); | |
| 85 }, | |
| 86 | |
| 87 /** | |
| 88 * Returns a promise that will resolve into a tab once a tab is allocated. | |
| 52 * The tab cannot be used by other tasks until releaseTab() is called. | 89 * The tab cannot be used by other tasks until releaseTab() is called. |
| 53 * | 90 * |
| 54 * @result {Promise} | 91 * @result {Promise.<tab>} |
| 55 */ | 92 */ |
| 56 getTab: function() | 93 getTab: function() |
| 57 { | 94 { |
| 58 if (this._tabs.length) | 95 if (this._tabs < this._maxtabs) |
| 59 return this._tabs.shift(); | 96 return this._createTab(); |
| 60 else | 97 return new Promise((resolve, reject) => this._resolvers.push(resolve)); |
| 61 { | |
| 62 let deferred = Promise.defer(); | |
| 63 this._deferred.push(deferred); | |
| 64 return deferred.promise; | |
| 65 } | |
| 66 }, | 98 }, |
| 67 | 99 |
| 68 /** | 100 /** |
| 69 * Adds a tab back to the pool so that it can be used by other tasks. | 101 * Adds a tab back to the pool so that it can be used by other tasks. |
| 70 * | 102 * |
| 71 * @param {tab} tab | 103 * @param {tab} tab |
| 72 */ | 104 */ |
| 73 releaseTab: function(tab) | 105 releaseTab: function(tab) |
| 74 { | 106 { |
| 75 let browser = tab.parentNode.tabbrowser; | 107 // If we are about to close last tab don't close it immediately to keep |
| 76 browser.removeTab(tab); | 108 // the window alive. It will be closed when a new tab is created. |
| 77 tab = browser.addTab("about:blank"); | 109 if (this._tabs > 1) |
| 78 | 110 this._browser.removeTab(tab); |
| 79 if (this._deferred.length) | |
| 80 this._deferred.shift().resolve(tab); | |
| 81 else | 111 else |
| 82 this._tabs.push(tab); | 112 { |
| 83 } | 113 // navigate away from previously opened URL |
| 114 tab.linkedBrowser.loadURI("about:blank", null, null); | |
| 115 this._tabKeepingWindowAlive = tab; | |
| 116 } | |
| 117 | |
| 118 this._tabs--; | |
| 119 if (this._resolvers.length && this._tabs < this._maxtabs) | |
| 120 { | |
| 121 this._resolvers.shift()(this._createTab()); | |
| 122 } | |
| 123 }, | |
| 84 }; | 124 }; |
| 85 | 125 |
| 86 /** | 126 /** |
| 87 * Once created, this object will make sure all new windows are dismissed | 127 * Once created, this object will make sure all new windows are dismissed |
| 88 * immediately. | 128 * immediately. |
| 89 * | 129 * |
| 90 * @constructor | 130 * @constructor |
| 91 */ | 131 */ |
| 92 function WindowCloser() | 132 function WindowCloser() |
| 93 { | 133 { |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 110 { | 150 { |
| 111 if (window.document.documentElement.localName == 'dialog') | 151 if (window.document.documentElement.localName == 'dialog') |
| 112 window.document.documentElement.acceptDialog(); | 152 window.document.documentElement.acceptDialog(); |
| 113 else | 153 else |
| 114 window.close(); | 154 window.close(); |
| 115 }, false); | 155 }, false); |
| 116 }, | 156 }, |
| 117 | 157 |
| 118 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer ence]) | 158 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer ence]) |
| 119 }; | 159 }; |
| 160 | |
| 161 function configureFrameScript() | |
| 162 { | |
| 163 const info = require("info"); | |
| 164 let frameScriptPath = info.addonRoot + "/lib/child/frameScript.js"; | |
| 165 Services.mm.loadFrameScript(frameScriptPath, true); | |
| 166 | |
| 167 onShutdown.add(() => | |
| 168 { | |
| 169 Services.mm.removeDelayedFrameScript(frameScriptPath); | |
| 170 }); | |
| 171 } | |
| 120 | 172 |
| 121 /** | 173 /** |
| 122 * Starts the crawling session. The crawler opens each URL in a tab and stores | 174 * Starts the crawling session. The crawler opens each URL in a tab and stores |
| 123 * the results. | 175 * the results. |
| 124 * | 176 * |
| 125 * @param {Window} window | 177 * @param {Window} window |
| 126 * The browser window we're operating in | 178 * The browser window we're operating in |
| 127 * @param {String[]} urls | 179 * @param {String[]} urls |
| 128 * URLs to be crawled | 180 * URLs to be crawled |
| 129 * @param {int} number_of_tabs | 181 * @param {int} timeout |
| 182 * Load timeout in milliseconds | |
| 183 * @param {int} maxtabs | |
| 130 * Maximum number of tabs to be opened | 184 * Maximum number of tabs to be opened |
| 131 * @param {String} targetURL | 185 * @param {String} targetURL |
| 132 * URL that should receive the results | 186 * URL that should receive the results |
| 187 * @param {Function} onDone | |
| 188 * The callback which is called after finishing of crawling of all URLs. | |
| 133 */ | 189 */ |
| 134 function run(window, urls, timeout, maxtabs, targetURL, onDone) | 190 function run(window, urls, timeout, maxtabs, targetURL, onDone) |
| 191 { | |
| 192 configureFrameScript(); | |
| 193 new Promise((resolve, reject) => | |
| 194 { | |
| 195 if (FilterStorage.subscriptions.length > 0) | |
| 196 { | |
| 197 resolve(); | |
| 198 return; | |
| 199 } | |
| 200 let onFiltersLoaded = (action, item, newValue, oldValue) => | |
| 201 { | |
| 202 if (action == "load") | |
| 203 { | |
| 204 FilterNotifier.removeListener(onFiltersLoaded); | |
| 205 resolve(); | |
| 206 } | |
| 207 }; | |
| 208 FilterNotifier.addListener(onFiltersLoaded); | |
| 209 }).then(() => crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)) | |
| 210 .catch(reportException); | |
| 211 } | |
| 212 exports.run = run; | |
| 213 | |
| 214 /** | |
| 215 * Spawns a {Task} task to crawl each url from urls argument and calls | |
| 216 * onDone when all tasks are finished. | |
| 217 * @param {Window} window | |
| 218 * The browser window we're operating in | |
| 219 * @param {String[]} urls | |
| 220 * URLs to be crawled | |
| 221 * @param {int} timeout | |
| 222 * Load timeout in milliseconds | |
| 223 * @param {int} maxtabs | |
| 224 * Maximum number of tabs to be opened | |
| 225 * @param {String} targetURL | |
| 226 * URL that should receive the results | |
| 227 * @param {Function} onDone | |
| 228 * The callback which is called after finishing of all tasks. | |
| 229 */ | |
| 230 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone) | |
| 135 { | 231 { |
| 136 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); | 232 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); |
| 137 | 233 |
| 138 let running = 0; | 234 let running = 0; |
| 139 let windowCloser = new WindowCloser(); | 235 let windowCloser = new WindowCloser(); |
| 140 let taskDone = function() | 236 let taskDone = function() |
| 141 { | 237 { |
| 142 running--; | 238 running--; |
| 143 if (running <= 0) | 239 if (running <= 0) |
| 144 { | 240 { |
| 145 windowCloser.stop(); | 241 windowCloser.stop(); |
| 146 onDone(); | 242 onDone(); |
| 147 } | 243 } |
| 148 }; | 244 }; |
| 149 | 245 |
| 150 new Promise(function(resolve, reject) | 246 for (let url of urls) |
| 151 { | 247 { |
| 152 if (FilterStorage.subscriptions.length > 0 && !FilterStorage._loading) | 248 running++; |
| 153 { | 249 Task.spawn(crawl_url.bind(null, url, tabAllocator, timeout)).then(function(r esult) |
| 154 resolve(); | 250 { |
| 155 return; | 251 let request = new XMLHttpRequest(); |
| 156 } | 252 request.open("POST", targetURL); |
| 157 FilterNotifier.addListener((action, item, newValue, oldValue) => | 253 request.addEventListener("load", taskDone, false); |
| 158 { | 254 request.addEventListener("error", taskDone, false); |
| 159 if (action === "load") | 255 request.send(JSON.stringify(result)); |
| 160 { | 256 }, function(url, exception) |
| 161 resolve(); | 257 { |
| 162 } | 258 reportException(exception); |
| 163 }); | 259 |
| 164 }).then(_ => | 260 let request = new XMLHttpRequest(); |
| 165 { | 261 request.open("POST", targetURL); |
| 166 for (let url of urls) | 262 request.addEventListener("load", taskDone, false); |
| 167 { | 263 request.addEventListener("error", taskDone, false); |
| 168 running++; | 264 request.send(JSON.stringify({ |
| 169 Task.spawn(crawl_url.bind(null, url, tabAllocator, timeout)).then(function (result) | 265 url: url, |
| 170 { | 266 startTime: Date.now(), |
| 171 let request = new XMLHttpRequest(); | 267 error: String(exception) |
| 172 request.open("POST", targetURL); | 268 })); |
| 173 request.addEventListener("load", taskDone, false); | 269 }.bind(null, url)); |
| 174 request.addEventListener("error", taskDone, false); | 270 } |
| 175 request.send(JSON.stringify(result)); | 271 } |
| 176 }, function(url, exception) | 272 |
| 177 { | 273 /** |
| 178 reportException(exception); | 274 * Expects to receive page info gathered in a content process for the specified |
| 179 | 275 * `tab`. If there is no relevant message within specified `timeout` then |
| 180 let request = new XMLHttpRequest(); | 276 * the result promise is resolved with error object. |
| 181 request.open("POST", targetURL); | 277 * @param tab |
| 182 request.addEventListener("load", taskDone, false); | 278 * Tab in which we are interested in |
| 183 request.addEventListener("error", taskDone, false); | 279 * @param {int} timeout |
| 184 request.send(JSON.stringify({ | 280 * Timeout in milliseconds |
| 185 url: url, | 281 * @return {Promise} promise which will be resolved with the received page info |
| 186 startTime: Date.now(), | 282 */ |
| 187 error: String(exception) | 283 function getPageInfo(tab, timeout) |
| 188 })); | 284 { |
| 189 }.bind(null, url)); | 285 return new Promise((resolve, result) => |
| 190 } | 286 { |
| 191 // Be careful, `catch` does not catch exeptions from this `then` handler becau se | 287 let mm = tab.linkedBrowser.messageManager; |
| 192 // the latter one does not return an array of promises of asynchrounous tasks | 288 let timerID; |
| 193 // and does not contain any waiting code. | 289 let onDone = (msg) => |
| 194 }).catch(reportException); | 290 { |
| 195 } | 291 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone); |
| 196 exports.run = run; | 292 clearTimeout(timerID); |
| 293 resolve(msg.data); | |
| 294 } | |
| 295 mm.addMessageListener("abpcrawler:pageInfoGathered", onDone); | |
| 296 timerID = setTimeout(() => onDone({data: {error: "timeout"}}), timeout); | |
| 297 }); | |
| 298 } | |
| 197 | 299 |
| 198 /** | 300 /** |
| 199 * Crawls a URL. This is a generator meant to be used via a Task object. | 301 * Crawls a URL. This is a generator meant to be used via a Task object. |
| 200 * | 302 * |
| 201 * @param {String} url | 303 * @param {String} url |
| 202 * @param {TabAllocator} tabAllocator | 304 * @param {TabAllocator} tabAllocator |
| 305 * @param {int} timeout | |
| 306 * Load timeout in milliseconds | |
| 203 * @result {Object} | 307 * @result {Object} |
| 204 * Crawling result | 308 * Crawling result |
| 205 */ | 309 */ |
| 206 function* crawl_url(url, tabAllocator, timeout) | 310 function* crawl_url(url, tabAllocator, timeout) |
| 207 { | 311 { |
| 208 let tab = yield tabAllocator.getTab(); | 312 let tab = yield tabAllocator.getTab(); |
| 209 let result = {url, requests: []}; | 313 let result = {url, requests: []}; |
| 210 | 314 let requestNotifier; |
| 211 try | 315 try |
| 212 { | 316 { |
| 213 result.startTime = Date.now(); | 317 result.startTime = Date.now(); |
| 214 let requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, f unction({type, location, filter}, scanComplete) | 318 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, |
|
sergei
2016/03/15 16:40:10
BTW, in addition, this part stops to work, I have
sergei
2016/03/16 14:44:23
https://issues.adblockplus.org/ticket/3815
| |
| 215 { | 319 function(entry, scanComplete) |
| 216 result.requests.push({location, contentType: type, filter}); | 320 { |
| 321 if (!entry) | |
| 322 return; | |
| 323 let {type: contentType, location, filter} = entry; | |
| 324 result.requests.push({location, contentType, filter}); | |
| 217 }); | 325 }); |
| 218 | 326 |
| 219 tab.linkedBrowser.loadURI(url, null, null); | 327 tab.linkedBrowser.loadURI(url, null, null); |
| 220 | 328 |
| 221 let mm = tab.linkedBrowser.messageManager; | 329 Object.assign(result, yield getPageInfo(tab, timeout)); |
| 222 let pageInfoFuture = new Promise((resolve, result) => | |
| 223 { | |
| 224 let timerID; | |
| 225 let onDone = (pageInfo) => | |
| 226 { | |
| 227 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone); | |
|
Wladimir Palant
2016/03/15 10:07:10
So, which tab did you get the page info for?
The
sergei
2016/03/16 14:44:23
For the `tab`, it's "browser message manager" it a
| |
| 228 clearTimeout(timerID); | |
| 229 resolve(pageInfo); | |
| 230 } | |
| 231 mm.addMessageListener("abpcrawler:pageInfoGathered", (msg) => onDone(msg.d ata));; | |
| 232 timerID = setTimeout(onDone.bind(this, {error: "timeout"}), timeout); | |
| 233 }); | |
|
Wladimir Palant
2016/03/15 10:07:10
Please move this functionality into a separate fun
sergei
2016/03/16 14:44:23
Done.
| |
| 234 | |
| 235 let pageInfo = yield pageInfoFuture; | |
| 236 | |
| 237 result.finalUrl = tab.linkedBrowser.currentURI.spec; | 330 result.finalUrl = tab.linkedBrowser.currentURI.spec; |
| 238 Object.assign(result, pageInfo); | |
| 239 result.endTime = Date.now(); | 331 result.endTime = Date.now(); |
| 240 } | 332 } |
| 241 finally | 333 finally |
| 242 { | 334 { |
| 335 if (requestNotifier) | |
| 336 requestNotifier.shutdown(); | |
| 243 tabAllocator.releaseTab(tab); | 337 tabAllocator.releaseTab(tab); |
| 244 } | 338 } |
| 245 return result; | 339 return result; |
| 246 } | 340 } |
| 247 | 341 |
| 248 function reportException(e) | 342 function reportException(e) |
| 249 { | 343 { |
| 250 let stack = ""; | 344 let stack = ""; |
| 251 if (e && typeof e == "object" && "stack" in e) | 345 if (e && typeof e == "object" && "stack" in e) |
| 252 stack = e.stack + "\n"; | 346 stack = e.stack + "\n"; |
| 253 | 347 |
| 254 Cu.reportError(e); | 348 Cu.reportError(e); |
| 255 dump(e + "\n" + stack + "\n"); | 349 dump(e + "\n" + stack + "\n"); |
| 256 } | 350 } |
| 257 | |
| 258 let {addonRoot} = require("info"); | |
| 259 let frameScriptPath = addonRoot + "/lib/child/frameScript.js"; | |
| 260 let globalMessageManager = Services.mm; | |
| 261 globalMessageManager.loadFrameScript(frameScriptPath, true); | |
|
Wladimir Palant
2016/03/15 10:07:10
This should be a process script, no point using a
| |
| 262 | |
| 263 let onReportException = function(msg) | |
| 264 { | |
| 265 reportException(msg.objects); | |
|
Wladimir Palant
2016/03/15 10:07:10
Please don't use msg.objects - ever. That's a wrap
sergei
2016/03/16 14:44:23
Acknowledged. Actually, I wanted to avoid duplicat
sergei
2016/03/16 14:44:23
I know, I used `msg.objects` because we don't know
Wladimir Palant
2016/09/14 16:11:46
Worst-case scenario: deadlocks because all of that
| |
| 266 } | |
| 267 globalMessageManager.addMessageListener("abpcrawler:reportException", onReportEx ception); | |
| 268 | |
| 269 onShutdown.add(() => | |
| 270 { | |
| 271 globalMessageManager.removeMessageListener("abpcrawler:reportException", onRep ortException); | |
| 272 globalMessageManager.removeDelayedFrameScript(frameScriptPath); | |
| 273 }); | |
| LEFT | RIGHT |