| OLD | NEW |
| 1 /* | 1 /* |
| 2 * This Source Code is subject to the terms of the Mozilla Public License | 2 * This Source Code is subject to the terms of the Mozilla Public License |
| 3 * version 2.0 (the "License"). You can obtain a copy of the License at | 3 * version 2.0 (the "License"). You can obtain a copy of the License at |
| 4 * http://mozilla.org/MPL/2.0/. | 4 * http://mozilla.org/MPL/2.0/. |
| 5 */ | 5 */ |
| 6 | 6 |
| 7 /** | 7 /** |
| 8 * @module crawler | 8 * @module crawler |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 Cu.import("resource://gre/modules/Services.jsm"); | 11 Cu.import("resource://gre/modules/Services.jsm"); |
| 12 Cu.import("resource://gre/modules/Task.jsm"); | 12 Cu.import("resource://gre/modules/Task.jsm"); |
| 13 Cu.import("resource://gre/modules/Promise.jsm"); | 13 Cu.import("resource://gre/modules/Promise.jsm"); |
| 14 | 14 |
| 15 function abprequire(module) | 15 function abprequire(module) |
| 16 { | 16 { |
| 17 let result = {}; | 17 let result = {}; |
| 18 result.wrappedJSObject = result; | 18 result.wrappedJSObject = result; |
| 19 Services.obs.notifyObservers(result, "adblockplus-require", module); | 19 Services.obs.notifyObservers(result, "adblockplus-require", module); |
| 20 return result.exports; | 20 return result.exports; |
| 21 } | 21 } |
| 22 | 22 |
| 23 let {Policy} = abprequire("contentPolicy"); | |
| 24 let {RequestNotifier} = abprequire("requestNotifier"); | 23 let {RequestNotifier} = abprequire("requestNotifier"); |
| 25 let {Utils} = abprequire("utils"); | |
| 26 | 24 |
| 27 let dataForTab = new WeakMap(); | |
| 28 | 25 |
| 29 /** | 26 /** |
| 30 * Creates a pool of tabs and allocates them to tasks on request. | 27 * Creates a pool of tabs and allocates them to tasks on request. |
| 31 * | 28 * |
| 32 * @param {tabbrowser} browser | 29 * @param {tabbrowser} browser |
| 33 * The tabbed browser where tabs should be created | 30 * The tabbed browser where tabs should be created |
| 34 * @param {int} maxtabs | 31 * @param {int} maxtabs |
| 35 * The maximum number of tabs to be allocated | 32 * The maximum number of tabs to be allocated |
| 36 * @constructor | 33 * @constructor |
| 37 */ | 34 */ |
| (...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 186 window.document.documentElement.acceptDialog(); | 183 window.document.documentElement.acceptDialog(); |
| 187 else | 184 else |
| 188 window.close(); | 185 window.close(); |
| 189 }, false); | 186 }, false); |
| 190 }, | 187 }, |
| 191 | 188 |
| 192 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer
ence]) | 189 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer
ence]) |
| 193 }; | 190 }; |
| 194 | 191 |
| 195 /** | 192 /** |
| 196 * Retrieves crawler results associated with a particular content window. | |
| 197 * | |
| 198 * @param {Window} window | |
| 199 * Content window to retrieve crawler results for | |
| 200 * @result {Object} | |
| 201 * Crawler results or undefined if the window wasn't created by the crawler. | |
| 202 */ | |
| 203 function getDataForWindow(window) | |
| 204 { | |
| 205 let topWindow = window.top; | |
| 206 if (!topWindow.document) | |
| 207 throw new Error("No document associated with the node's top window"); | |
| 208 let tabbrowser = Utils.getChromeWindow(topWindow).getBrowser(); | |
| 209 if (!tabbrowser) | |
| 210 throw new Error("Unable to get a tabbrowser reference from the window"); | |
| 211 let browser = tabbrowser.getBrowserForDocument(topWindow.document); | |
| 212 if (!browser) | |
| 213 throw new Error("Unable to get browser for the content window"); | |
| 214 let tab = tabbrowser.getTabForBrowser(browser); | |
| 215 if (!tab) | |
| 216 throw new Error("Unable to get tab for the browser"); | |
| 217 return dataForTab.get(tab); | |
| 218 }; | |
| 219 | |
| 220 /** | |
| 221 * Starts the crawling session. The crawler opens each URL in a tab and stores | 193 * Starts the crawling session. The crawler opens each URL in a tab and stores |
| 222 * the results. | 194 * the results. |
| 223 * | 195 * |
| 224 * @param {Window} window | 196 * @param {Window} window |
| 225 * The browser window we're operating in | 197 * The browser window we're operating in |
| 226 * @param {String[]} urls | 198 * @param {String[]} urls |
| 227 * URLs to be crawled | 199 * URLs to be crawled |
| 228 * @param {int} number_of_tabs | 200 * @param {int} number_of_tabs |
| 229 * Maximum number of tabs to be opened | 201 * Maximum number of tabs to be opened |
| 230 * @param {String} targetURL | 202 * @param {String} targetURL |
| 231 * URL that should receive the results | 203 * URL that should receive the results |
| 232 */ | 204 */ |
| 233 function run(window, urls, timeout, maxtabs, targetURL, onDone) | 205 function run(window, urls, timeout, maxtabs, targetURL, onDone) |
| 234 { | 206 { |
| 235 let requestNotifier = new RequestNotifier(null, function() {}); | |
| 236 | |
| 237 let origProcessNode = Policy.processNode; | |
| 238 Policy.processNode = processNodeReplacement.bind(null, origProcessNode, reques
tNotifier); | |
| 239 | |
| 240 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); | 207 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); |
| 241 let loadListener = new LoadListener(window.getBrowser(), timeout); | 208 let loadListener = new LoadListener(window.getBrowser(), timeout); |
| 242 let running = 0; | 209 let running = 0; |
| 243 let windowCloser = new WindowCloser(); | 210 let windowCloser = new WindowCloser(); |
| 244 let taskDone = function() | 211 let taskDone = function() |
| 245 { | 212 { |
| 246 running--; | 213 running--; |
| 247 if (running <= 0) | 214 if (running <= 0) |
| 248 { | 215 { |
| 249 Policy.processNode = origProcessNode; | |
| 250 requestNotifier.shutdown(); | |
| 251 loadListener.stop(); | 216 loadListener.stop(); |
| 252 windowCloser.stop(); | 217 windowCloser.stop(); |
| 253 onDone(); | 218 onDone(); |
| 254 } | 219 } |
| 255 }; | 220 }; |
| 256 | 221 |
| 257 for (let url of urls) | 222 for (let url of urls) |
| 258 { | 223 { |
| 259 running++; | 224 running++; |
| 260 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct
ion(result) | 225 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct
ion(result) |
| (...skipping 26 matching lines...) Expand all Loading... |
| 287 * | 252 * |
| 288 * @param {String} url | 253 * @param {String} url |
| 289 * @param {TabAllocator} tabAllocator | 254 * @param {TabAllocator} tabAllocator |
| 290 * @param {loadListener} loadListener | 255 * @param {loadListener} loadListener |
| 291 * @result {Object} | 256 * @result {Object} |
| 292 * Crawling result | 257 * Crawling result |
| 293 */ | 258 */ |
| 294 function* crawl_url(url, tabAllocator, loadListener) | 259 function* crawl_url(url, tabAllocator, loadListener) |
| 295 { | 260 { |
| 296 let tab = yield tabAllocator.getTab(); | 261 let tab = yield tabAllocator.getTab(); |
| 297 let result = {url: url}; | 262 let result = {url, requests: []}; |
| 298 | 263 let requestNotifier; |
| 299 dataForTab.set(tab, result); | |
| 300 try | 264 try |
| 301 { | 265 { |
| 302 result.startTime = Date.now(); | 266 result.startTime = Date.now(); |
| 267 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, |
| 268 function(entry, scanComplete) |
| 269 { |
| 270 if (!entry) |
| 271 return; |
| 272 let {type: contentType, location, filter} = entry; |
| 273 result.requests.push({location, contentType, filter}); |
| 274 }); |
| 275 |
| 303 tab.linkedBrowser.loadURI(url, null, null); | 276 tab.linkedBrowser.loadURI(url, null, null); |
| 304 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab)
; | 277 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab)
; |
| 305 result.endTime = Date.now(); | 278 result.endTime = Date.now(); |
| 306 result.finalUrl = tab.linkedBrowser.currentURI.spec; | 279 result.finalUrl = tab.linkedBrowser.currentURI.spec; |
| 307 | 280 |
| 308 let document = tab.linkedBrowser.contentDocument; | 281 let document = tab.linkedBrowser.contentDocument; |
| 309 if (document.documentElement) | 282 if (document.documentElement) |
| 310 { | 283 { |
| 311 try | 284 try |
| 312 { | 285 { |
| (...skipping 11 matching lines...) Expand all Loading... |
| 324 result.error = "Capturing screenshot failed: " + e; | 297 result.error = "Capturing screenshot failed: " + e; |
| 325 } | 298 } |
| 326 | 299 |
| 327 // TODO: Capture frames as well? | 300 // TODO: Capture frames as well? |
| 328 let serializer = new tab.ownerDocument.defaultView.XMLSerializer(); | 301 let serializer = new tab.ownerDocument.defaultView.XMLSerializer(); |
| 329 result.source = serializer.serializeToString(document.documentElement); | 302 result.source = serializer.serializeToString(document.documentElement); |
| 330 } | 303 } |
| 331 } | 304 } |
| 332 finally | 305 finally |
| 333 { | 306 { |
| 307 if (requestNotifier) |
| 308 requestNotifier.shutdown(); |
| 334 tabAllocator.releaseTab(tab); | 309 tabAllocator.releaseTab(tab); |
| 335 } | 310 } |
| 336 return result; | 311 return result; |
| 337 } | 312 } |
| 338 | 313 |
| 339 function reportException(e) | 314 function reportException(e) |
| 340 { | 315 { |
| 341 let stack = ""; | 316 let stack = ""; |
| 342 if (e && typeof e == "object" && "stack" in e) | 317 if (e && typeof e == "object" && "stack" in e) |
| 343 stack = e.stack + "\n"; | 318 stack = e.stack + "\n"; |
| 344 | 319 |
| 345 Cu.reportError(e); | 320 Cu.reportError(e); |
| 346 dump(e + "\n" + stack + "\n"); | 321 dump(e + "\n" + stack + "\n"); |
| 347 } | 322 } |
| 348 | |
| 349 /** | |
| 350 * Wrapper for the Policy.processNode() function in ABP. Calls the original | |
| 351 * function and records all the data. | |
| 352 * | |
| 353 * @param {Function} origProcessNode | |
| 354 * The original processNode function. | |
| 355 * @param {RequestNotifier} requestNotifier | |
| 356 * The crawler's RequestNotifier object instance. | |
| 357 * @param {nsIDOMWindow} wnd | |
| 358 * @param {nsIDOMElement} node | |
| 359 * @param {Number} contentType | |
| 360 * @param {nsIURI} location | |
| 361 * @param {Boolean} collapse | |
| 362 * @return {Boolean} | |
| 363 */ | |
| 364 function processNodeReplacement(origProcessNode, requestNotifier, wnd, node, con
tentType, location, collapse) | |
| 365 { | |
| 366 let filters = []; | |
| 367 let origListener = requestNotifier.listener; | |
| 368 requestNotifier.listener = function(window, node, entry) | |
| 369 { | |
| 370 if (entry.filter) | |
| 371 filters.push(entry.filter.text); | |
| 372 }; | |
| 373 | |
| 374 /* | |
| 375 * Call the original processNode. If the original throws, then we will too, so
this is outside a try clause. | |
| 376 */ | |
| 377 let result; | |
| 378 try | |
| 379 { | |
| 380 result = origProcessNode(wnd, node, contentType, location, collapse); | |
| 381 } | |
| 382 finally | |
| 383 { | |
| 384 requestNotifier.listener = origListener; | |
| 385 } | |
| 386 | |
| 387 try | |
| 388 { | |
| 389 let data = getDataForWindow(wnd); | |
| 390 if (data) | |
| 391 { | |
| 392 if (!("requests" in data)) | |
| 393 data.requests = []; | |
| 394 data.requests.push({ | |
| 395 contentType: contentType, | |
| 396 location: (contentType == Policy.type.ELEMHIDE ? location.text : locatio
n.spec), | |
| 397 blocked: result != Ci.nsIContentPolicy.ACCEPT, | |
| 398 filters: filters | |
| 399 }); | |
| 400 } | |
| 401 } | |
| 402 catch (e) | |
| 403 { | |
| 404 reportException(e); | |
| 405 } | |
| 406 return result; | |
| 407 }; | |
| OLD | NEW |