| OLD | NEW |
| 1 /* | 1 /* |
| 2 * This Source Code is subject to the terms of the Mozilla Public License | 2 * This Source Code is subject to the terms of the Mozilla Public License |
| 3 * version 2.0 (the "License"). You can obtain a copy of the License at | 3 * version 2.0 (the "License"). You can obtain a copy of the License at |
| 4 * http://mozilla.org/MPL/2.0/. | 4 * http://mozilla.org/MPL/2.0/. |
| 5 */ | 5 */ |
| 6 | 6 |
| 7 "use strict"; | 7 "use strict"; |
| 8 | 8 |
| 9 /** | 9 /** |
| 10 * @module crawler | 10 * @module crawler |
| 11 */ | 11 */ |
| 12 | 12 |
| 13 const {Services} = Cu.import("resource://gre/modules/Services.jsm", {}); | 13 const {Services} = Cu.import("resource://gre/modules/Services.jsm", {}); |
| 14 const {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {}); | 14 const {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {}); |
| 15 const {Task} = Cu.import("resource://gre/modules/Task.jsm", {}); | 15 const {Task} = Cu.import("resource://gre/modules/Task.jsm", {}); |
| 16 const {setTimeout, clearTimeout} = Cu.import("resource://gre/modules/Timer.jsm",
{}); |
| 16 | 17 |
| 17 function abprequire(module) | 18 function abprequire(module) |
| 18 { | 19 { |
| 19 let result = {}; | 20 let result = {}; |
| 20 result.wrappedJSObject = result; | 21 result.wrappedJSObject = result; |
| 21 Services.obs.notifyObservers(result, "adblockplus-require", module); | 22 Services.obs.notifyObservers(result, "adblockplus-require", module); |
| 22 return result.exports; | 23 return result.exports; |
| 23 } | 24 } |
| 24 | 25 |
| 25 let {RequestNotifier} = abprequire("requestNotifier"); | 26 let {RequestNotifier} = abprequire("requestNotifier"); |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 102 * @param {tab} tab | 103 * @param {tab} tab |
| 103 */ | 104 */ |
| 104 releaseTab: function(tab) | 105 releaseTab: function(tab) |
| 105 { | 106 { |
| 106 // If we are about to close last tab don't close it immediately to keep | 107 // If we are about to close last tab don't close it immediately to keep |
| 107 // the window alive. It will be closed when a new tab is created. | 108 // the window alive. It will be closed when a new tab is created. |
| 108 if (this._tabs > 1) | 109 if (this._tabs > 1) |
| 109 this._browser.removeTab(tab); | 110 this._browser.removeTab(tab); |
| 110 else | 111 else |
| 111 { | 112 { |
| 112 // navigate away from early opened URL | 113 // navigate away from previously opened URL |
| 113 tab.linkedBrowser.loadURI('about:blank', null, null); | 114 tab.linkedBrowser.loadURI("about:blank", null, null); |
| 114 this._tabKeepingWindowAlive = tab; | 115 this._tabKeepingWindowAlive = tab; |
| 115 } | 116 } |
| 116 | 117 |
| 117 this._tabs--; | 118 this._tabs--; |
| 118 if (this._resolvers.length && this._tabs < this._maxtabs) | 119 if (this._resolvers.length && this._tabs < this._maxtabs) |
| 119 { | 120 { |
| 120 this._resolvers.shift()(this._createTab()); | 121 this._resolvers.shift()(this._createTab()); |
| 121 } | 122 } |
| 122 }, | 123 }, |
| 123 }; | 124 }; |
| 124 | 125 |
| 125 /** | 126 /** |
| 126 * Observes page loads in a particular tabbed browser. | |
| 127 * | |
| 128 * @param {tabbrowser} browser | |
| 129 * The tabbed browser to be observed | |
| 130 * @param {int} timeout | |
| 131 * Load timeout in milliseconds | |
| 132 * @constructor | |
| 133 */ | |
| 134 function LoadListener(browser, timeout) | |
| 135 { | |
| 136 this._browser = browser; | |
| 137 this._deferred = new Map(); | |
| 138 this._timeout = timeout; | |
| 139 browser.addTabsProgressListener(this); | |
| 140 } | |
| 141 LoadListener.prototype = { | |
| 142 /** | |
| 143 * Returns a promise that will be resolved when the page in the specified tab | |
| 144 * finishes loading. Loading will be stopped if the timeout is reached. | |
| 145 * | |
| 146 * @param {tab} tab | |
| 147 * @result {Promise} | |
| 148 */ | |
| 149 waitForLoad: function(tab) | |
| 150 { | |
| 151 let deferred = Promise.defer(); | |
| 152 this._deferred.set(tab.linkedBrowser, deferred); | |
| 153 | |
| 154 tab.ownerDocument.defaultView.setTimeout(function() | |
| 155 { | |
| 156 tab.linkedBrowser.stop(); | |
| 157 }, this._timeout); | |
| 158 | |
| 159 return deferred.promise; | |
| 160 }, | |
| 161 | |
| 162 /** | |
| 163 * Deactivates this object. | |
| 164 */ | |
| 165 stop: function() | |
| 166 { | |
| 167 this._browser.removeTabsProgressListener(this); | |
| 168 }, | |
| 169 | |
| 170 onStateChange: function(browser, progress, request, flags, status) | |
| 171 { | |
| 172 if ((flags & Ci.nsIWebProgressListener.STATE_STOP) && (flags & Ci.nsIWebProg
ressListener.STATE_IS_WINDOW)) | |
| 173 { | |
| 174 let deferred = this._deferred.get(browser); | |
| 175 if (deferred) | |
| 176 { | |
| 177 this._deferred.delete(browser); | |
| 178 | |
| 179 let headers = []; | |
| 180 if (request instanceof Ci.nsIHttpChannel) | |
| 181 { | |
| 182 try | |
| 183 { | |
| 184 headers.push("HTTP/x.x " + request.responseStatus + " " + request.re
sponseStatusText); | |
| 185 request.visitResponseHeaders((header, value) => headers.push(header
+ ": " + value)); | |
| 186 } | |
| 187 catch (e) | |
| 188 { | |
| 189 // Exceptions are expected here | |
| 190 } | |
| 191 } | |
| 192 deferred.resolve([status, headers]); | |
| 193 } | |
| 194 } | |
| 195 } | |
| 196 }; | |
| 197 | |
| 198 /** | |
| 199 * Once created, this object will make sure all new windows are dismissed | 127 * Once created, this object will make sure all new windows are dismissed |
| 200 * immediately. | 128 * immediately. |
| 201 * | 129 * |
| 202 * @constructor | 130 * @constructor |
| 203 */ | 131 */ |
| 204 function WindowCloser() | 132 function WindowCloser() |
| 205 { | 133 { |
| 206 Services.obs.addObserver(this, "xul-window-registered", true) | 134 Services.obs.addObserver(this, "xul-window-registered", true) |
| 207 } | 135 } |
| 208 WindowCloser.prototype = { | 136 WindowCloser.prototype = { |
| (...skipping 14 matching lines...) Expand all Loading... |
| 223 if (window.document.documentElement.localName == 'dialog') | 151 if (window.document.documentElement.localName == 'dialog') |
| 224 window.document.documentElement.acceptDialog(); | 152 window.document.documentElement.acceptDialog(); |
| 225 else | 153 else |
| 226 window.close(); | 154 window.close(); |
| 227 }, false); | 155 }, false); |
| 228 }, | 156 }, |
| 229 | 157 |
| 230 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer
ence]) | 158 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer
ence]) |
| 231 }; | 159 }; |
| 232 | 160 |
| 161 function configureFrameScript() |
| 162 { |
| 163 const frameScriptPath = info.addonRoot + "/lib/child/frameScript.js"; |
| 164 Services.mm.loadFrameScript(frameScriptPath, true); |
| 165 |
| 166 onShutdown.add(() => |
| 167 { |
| 168 Services.mm.removeDelayedFrameScript(frameScriptPath); |
| 169 }); |
| 170 } |
| 171 |
| 233 /** | 172 /** |
| 234 * Starts the crawling session. The crawler opens each URL in a tab and stores | 173 * Starts the crawling session. The crawler opens each URL in a tab and stores |
| 235 * the results. | 174 * the results. |
| 236 * | 175 * |
| 237 * @param {Window} window | 176 * @param {Window} window |
| 238 * The browser window we're operating in | 177 * The browser window we're operating in |
| 239 * @param {String[]} urls | 178 * @param {String[]} urls |
| 240 * URLs to be crawled | 179 * URLs to be crawled |
| 241 * @param {int} timeout | 180 * @param {int} timeout |
| 242 * Load timeout in milliseconds | 181 * Load timeout in milliseconds |
| 243 * @param {int} maxtabs | 182 * @param {int} maxtabs |
| 244 * Maximum number of tabs to be opened | 183 * Maximum number of tabs to be opened |
| 245 * @param {String} targetURL | 184 * @param {String} targetURL |
| 246 * URL that should receive the results | 185 * URL that should receive the results |
| 247 * @param {Function} onDone | 186 * @param {Function} onDone |
| 248 * The callback which is called after finishing of crawling of all URLs. | 187 * The callback which is called after finishing of crawling of all URLs. |
| 249 */ | 188 */ |
| 250 function run(window, urls, timeout, maxtabs, targetURL, onDone) | 189 function run(window, urls, timeout, maxtabs, targetURL, onDone) |
| 251 { | 190 { |
| 191 configureFrameScript(); |
| 252 new Promise((resolve, reject) => | 192 new Promise((resolve, reject) => |
| 253 { | 193 { |
| 254 if (FilterStorage.subscriptions.length > 0) | 194 if (FilterStorage.subscriptions.length > 0) |
| 255 { | 195 { |
| 256 resolve(); | 196 resolve(); |
| 257 return; | 197 return; |
| 258 } | 198 } |
| 259 let onFiltersLoaded = (action, item, newValue, oldValue) => | 199 let onFiltersLoaded = (action, item, newValue, oldValue) => |
| 260 { | 200 { |
| 261 if (action == "load") | 201 if (action == "load") |
| (...skipping 20 matching lines...) Expand all Loading... |
| 282 * @param {int} maxtabs | 222 * @param {int} maxtabs |
| 283 * Maximum number of tabs to be opened | 223 * Maximum number of tabs to be opened |
| 284 * @param {String} targetURL | 224 * @param {String} targetURL |
| 285 * URL that should receive the results | 225 * URL that should receive the results |
| 286 * @param {Function} onDone | 226 * @param {Function} onDone |
| 287 * The callback which is called after finishing of all tasks. | 227 * The callback which is called after finishing of all tasks. |
| 288 */ | 228 */ |
| 289 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone) | 229 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone) |
| 290 { | 230 { |
| 291 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); | 231 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); |
| 292 let loadListener = new LoadListener(window.getBrowser(), timeout); | 232 |
| 293 let running = 0; | 233 let running = 0; |
| 294 let windowCloser = new WindowCloser(); | 234 let windowCloser = new WindowCloser(); |
| 295 let taskDone = function() | 235 let taskDone = function() |
| 296 { | 236 { |
| 297 running--; | 237 running--; |
| 298 if (running <= 0) | 238 if (running <= 0) |
| 299 { | 239 { |
| 300 loadListener.stop(); | |
| 301 windowCloser.stop(); | 240 windowCloser.stop(); |
| 302 onDone(); | 241 onDone(); |
| 303 } | 242 } |
| 304 }; | 243 }; |
| 305 | 244 |
| 306 for (let url of urls) | 245 for (let url of urls) |
| 307 { | 246 { |
| 308 running++; | 247 running++; |
| 309 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct
ion(result) | 248 Task.spawn(crawl_url.bind(null, url, tabAllocator, timeout)).then(function(r
esult) |
| 310 { | 249 { |
| 311 let request = new XMLHttpRequest(); | 250 let request = new XMLHttpRequest(); |
| 312 request.open("POST", targetURL); | 251 request.open("POST", targetURL); |
| 313 request.addEventListener("load", taskDone, false); | 252 request.addEventListener("load", taskDone, false); |
| 314 request.addEventListener("error", taskDone, false); | 253 request.addEventListener("error", taskDone, false); |
| 315 request.send(JSON.stringify(result)); | 254 request.send(JSON.stringify(result)); |
| 316 }, function(url, exception) | 255 }, function(url, exception) |
| 317 { | 256 { |
| 318 reportException(exception); | 257 reportException(exception); |
| 319 | 258 |
| 320 let request = new XMLHttpRequest(); | 259 let request = new XMLHttpRequest(); |
| 321 request.open("POST", targetURL); | 260 request.open("POST", targetURL); |
| 322 request.addEventListener("load", taskDone, false); | 261 request.addEventListener("load", taskDone, false); |
| 323 request.addEventListener("error", taskDone, false); | 262 request.addEventListener("error", taskDone, false); |
| 324 request.send(JSON.stringify({ | 263 request.send(JSON.stringify({ |
| 325 url: url, | 264 url: url, |
| 326 startTime: Date.now(), | 265 startTime: Date.now(), |
| 327 error: String(exception) | 266 error: String(exception) |
| 328 })); | 267 })); |
| 329 }.bind(null, url)); | 268 }.bind(null, url)); |
| 330 } | 269 } |
| 331 } | 270 } |
| 332 | 271 |
| 333 /** | 272 /** |
| 273 * Expects to receive page info gathered in a content process for the specified |
| 274 * `tab`. If there is no relevant message within specified `timeout` then |
| 275 * the result promise is resolved with error object. |
| 276 * @param tab |
| 277 * Tab in which we are interested in |
| 278 * @param {int} timeout |
| 279 * Timeout in milliseconds |
| 280 * @return {Promise} promise which will be resolved with the received page info |
| 281 */ |
| 282 function getPageInfo(tab, timeout) |
| 283 { |
| 284 return new Promise((resolve, result) => |
| 285 { |
| 286 const mm = tab.linkedBrowser.messageManager; |
| 287 let timerID; |
| 288 let onDone = (msg) => |
| 289 { |
| 290 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone); |
| 291 clearTimeout(timerID); |
| 292 resolve(msg.data); |
| 293 } |
| 294 mm.addMessageListener("abpcrawler:pageInfoGathered", onDone); |
| 295 timerID = setTimeout(() => onDone({data: {error: "timeout"}}), timeout); |
| 296 }); |
| 297 } |
| 298 |
| 299 /** |
| 334 * Crawls a URL. This is a generator meant to be used via a Task object. | 300 * Crawls a URL. This is a generator meant to be used via a Task object. |
| 335 * | 301 * |
| 336 * @param {String} url | 302 * @param {String} url |
| 337 * @param {TabAllocator} tabAllocator | 303 * @param {TabAllocator} tabAllocator |
| 338 * @param {loadListener} loadListener | 304 * @param {int} timeout |
| 305 * Load timeout in milliseconds |
| 339 * @result {Object} | 306 * @result {Object} |
| 340 * Crawling result | 307 * Crawling result |
| 341 */ | 308 */ |
| 342 function* crawl_url(url, tabAllocator, loadListener) | 309 function* crawl_url(url, tabAllocator, timeout) |
| 343 { | 310 { |
| 344 let tab = yield tabAllocator.getTab(); | 311 let tab = yield tabAllocator.getTab(); |
| 345 let result = {url, requests: []}; | 312 let result = {url, requests: []}; |
| 346 let requestNotifier; | 313 let requestNotifier; |
| 347 try | 314 try |
| 348 { | 315 { |
| 349 result.startTime = Date.now(); | 316 result.startTime = Date.now(); |
| 350 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, | 317 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, |
| 351 function(entry, scanComplete) | 318 function(entry, scanComplete) |
| 352 { | 319 { |
| 353 if (!entry) | 320 if (!entry) |
| 354 return; | 321 return; |
| 355 let {type: contentType, location, filter} = entry; | 322 let {type: contentType, location, filter} = entry; |
| 356 result.requests.push({location, contentType, filter}); | 323 result.requests.push({location, contentType, filter}); |
| 357 }); | 324 }); |
| 358 | 325 |
| 359 tab.linkedBrowser.loadURI(url, null, null); | 326 tab.linkedBrowser.loadURI(url, null, null); |
| 360 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab)
; | 327 |
| 328 Object.assign(result, yield getPageInfo(tab, timeout)); |
| 329 result.finalUrl = tab.linkedBrowser.currentURI.spec; |
| 361 result.endTime = Date.now(); | 330 result.endTime = Date.now(); |
| 362 result.finalUrl = tab.linkedBrowser.currentURI.spec; | |
| 363 | |
| 364 let document = tab.linkedBrowser.contentDocument; | |
| 365 if (document.documentElement) | |
| 366 { | |
| 367 try | |
| 368 { | |
| 369 let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "c
anvas"); | |
| 370 canvas.width = document.documentElement.scrollWidth; | |
| 371 canvas.height = document.documentElement.scrollHeight; | |
| 372 | |
| 373 let context = canvas.getContext("2d"); | |
| 374 context.drawWindow(document.defaultView, 0, 0, canvas.width, canvas.heig
ht, "rgb(255, 255, 255)"); | |
| 375 result.screenshot = canvas.toDataURL("image/jpeg", 0.8); | |
| 376 } | |
| 377 catch (e) | |
| 378 { | |
| 379 reportException(e); | |
| 380 result.error = "Capturing screenshot failed: " + e; | |
| 381 } | |
| 382 | |
| 383 // TODO: Capture frames as well? | |
| 384 let serializer = new tab.ownerDocument.defaultView.XMLSerializer(); | |
| 385 result.source = serializer.serializeToString(document.documentElement); | |
| 386 } | |
| 387 } | 331 } |
| 388 finally | 332 finally |
| 389 { | 333 { |
| 390 if (requestNotifier) | 334 if (requestNotifier) |
| 391 requestNotifier.shutdown(); | 335 requestNotifier.shutdown(); |
| 392 tabAllocator.releaseTab(tab); | 336 tabAllocator.releaseTab(tab); |
| 393 } | 337 } |
| 394 return result; | 338 return result; |
| 395 } | 339 } |
| 396 | 340 |
| 397 function reportException(e) | 341 function reportException(e) |
| 398 { | 342 { |
| 399 let stack = ""; | 343 let stack = ""; |
| 400 if (e && typeof e == "object" && "stack" in e) | 344 if (e && typeof e == "object" && "stack" in e) |
| 401 stack = e.stack + "\n"; | 345 stack = e.stack + "\n"; |
| 402 | 346 |
| 403 Cu.reportError(e); | 347 Cu.reportError(e); |
| 404 dump(e + "\n" + stack + "\n"); | 348 dump(e + "\n" + stack + "\n"); |
| 405 } | 349 } |
| OLD | NEW |