| Left: | ||
| Right: | 
| OLD | NEW | 
|---|---|
| 1 /* | 1 /* | 
| 2 * This Source Code is subject to the terms of the Mozilla Public License | 2 * This Source Code is subject to the terms of the Mozilla Public License | 
| 3 * version 2.0 (the "License"). You can obtain a copy of the License at | 3 * version 2.0 (the "License"). You can obtain a copy of the License at | 
| 4 * http://mozilla.org/MPL/2.0/. | 4 * http://mozilla.org/MPL/2.0/. | 
| 5 */ | 5 */ | 
| 6 | 6 | 
| 7 /** | 7 /** | 
| 8 * @module crawler | 8 * @module crawler | 
| 9 */ | 9 */ | 
| 10 | 10 | 
| 11 Cu.import("resource://gre/modules/Services.jsm"); | 11 Cu.import("resource://gre/modules/Services.jsm"); | 
| 12 Cu.import("resource://gre/modules/Task.jsm"); | 12 Cu.import("resource://gre/modules/Task.jsm"); | 
| 13 Cu.import("resource://gre/modules/Promise.jsm"); | 13 Cu.import("resource://gre/modules/Promise.jsm"); | 
| 14 | 14 | 
| 15 function abprequire(module) | 15 function abprequire(module) | 
| 16 { | 16 { | 
| 17 let result = {}; | 17 let result = {}; | 
| 18 result.wrappedJSObject = result; | 18 result.wrappedJSObject = result; | 
| 19 Services.obs.notifyObservers(result, "adblockplus-require", module); | 19 Services.obs.notifyObservers(result, "adblockplus-require", module); | 
| 20 return result.exports; | 20 return result.exports; | 
| 21 } | 21 } | 
| 22 | 22 | 
| 23 let {Policy} = abprequire("contentPolicy"); | |
| 24 let {RequestNotifier} = abprequire("requestNotifier"); | 23 let {RequestNotifier} = abprequire("requestNotifier"); | 
| 25 let {Utils} = abprequire("utils"); | |
| 26 | 24 | 
| 27 let dataForTab = new WeakMap(); | |
| 28 | 25 | 
| 29 /** | 26 /** | 
| 30 * Creates a pool of tabs and allocates them to tasks on request. | 27 * Creates a pool of tabs and allocates them to tasks on request. | 
| 31 * | 28 * | 
| 32 * @param {tabbrowser} browser | 29 * @param {tabbrowser} browser | 
| 33 * The tabbed browser where tabs should be created | 30 * The tabbed browser where tabs should be created | 
| 34 * @param {int} maxtabs | 31 * @param {int} maxtabs | 
| 35 * The maximum number of tabs to be allocated | 32 * The maximum number of tabs to be allocated | 
| 36 * @constructor | 33 * @constructor | 
| 37 */ | 34 */ | 
| (...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 186 window.document.documentElement.acceptDialog(); | 183 window.document.documentElement.acceptDialog(); | 
| 187 else | 184 else | 
| 188 window.close(); | 185 window.close(); | 
| 189 }, false); | 186 }, false); | 
| 190 }, | 187 }, | 
| 191 | 188 | 
| 192 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer ence]) | 189 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer ence]) | 
| 193 }; | 190 }; | 
| 194 | 191 | 
| 195 /** | 192 /** | 
| 196 * Retrieves crawler results associated with a particular content window. | |
| 197 * | |
| 198 * @param {Window} window | |
| 199 * Content window to retrieve crawler results for | |
| 200 * @result {Object} | |
| 201 * Crawler results or undefined if the window wasn't created by the crawler. | |
| 202 */ | |
| 203 function getDataForWindow(window) | |
| 204 { | |
| 205 let topWindow = window.top; | |
| 206 if (!topWindow.document) | |
| 207 throw new Error("No document associated with the node's top window"); | |
| 208 let tabbrowser = Utils.getChromeWindow(topWindow).getBrowser(); | |
| 209 if (!tabbrowser) | |
| 210 throw new Error("Unable to get a tabbrowser reference from the window"); | |
| 211 let browser = tabbrowser.getBrowserForDocument(topWindow.document); | |
| 212 if (!browser) | |
| 213 throw new Error("Unable to get browser for the content window"); | |
| 214 let tab = tabbrowser.getTabForBrowser(browser); | |
| 215 if (!tab) | |
| 216 throw new Error("Unable to get tab for the browser"); | |
| 217 return dataForTab.get(tab); | |
| 218 }; | |
| 219 | |
| 220 /** | |
| 221 * Starts the crawling session. The crawler opens each URL in a tab and stores | 193 * Starts the crawling session. The crawler opens each URL in a tab and stores | 
| 222 * the results. | 194 * the results. | 
| 223 * | 195 * | 
| 224 * @param {Window} window | 196 * @param {Window} window | 
| 225 * The browser window we're operating in | 197 * The browser window we're operating in | 
| 226 * @param {String[]} urls | 198 * @param {String[]} urls | 
| 227 * URLs to be crawled | 199 * URLs to be crawled | 
| 228 * @param {int} number_of_tabs | 200 * @param {int} number_of_tabs | 
| 229 * Maximum number of tabs to be opened | 201 * Maximum number of tabs to be opened | 
| 230 * @param {String} targetURL | 202 * @param {String} targetURL | 
| 231 * URL that should receive the results | 203 * URL that should receive the results | 
| 232 */ | 204 */ | 
| 233 function run(window, urls, timeout, maxtabs, targetURL, onDone) | 205 function run(window, urls, timeout, maxtabs, targetURL, onDone) | 
| 234 { | 206 { | 
| 235 let requestNotifier = new RequestNotifier(null, function() {}); | |
| 236 | |
| 237 let origProcessNode = Policy.processNode; | |
| 238 Policy.processNode = processNodeReplacement.bind(null, origProcessNode, reques tNotifier); | |
| 239 | |
| 240 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); | 207 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); | 
| 241 let loadListener = new LoadListener(window.getBrowser(), timeout); | 208 let loadListener = new LoadListener(window.getBrowser(), timeout); | 
| 242 let running = 0; | 209 let running = 0; | 
| 243 let windowCloser = new WindowCloser(); | 210 let windowCloser = new WindowCloser(); | 
| 244 let taskDone = function() | 211 let taskDone = function() | 
| 245 { | 212 { | 
| 246 running--; | 213 running--; | 
| 247 if (running <= 0) | 214 if (running <= 0) | 
| 248 { | 215 { | 
| 249 Policy.processNode = origProcessNode; | |
| 250 requestNotifier.shutdown(); | |
| 251 loadListener.stop(); | 216 loadListener.stop(); | 
| 252 windowCloser.stop(); | 217 windowCloser.stop(); | 
| 253 onDone(); | 218 onDone(); | 
| 254 } | 219 } | 
| 255 }; | 220 }; | 
| 256 | 221 | 
| 257 for (let url of urls) | 222 for (let url of urls) | 
| 258 { | 223 { | 
| 259 running++; | 224 running++; | 
| 260 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct ion(result) | 225 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct ion(result) | 
| (...skipping 26 matching lines...) Expand all Loading... | |
| 287 * | 252 * | 
| 288 * @param {String} url | 253 * @param {String} url | 
| 289 * @param {TabAllocator} tabAllocator | 254 * @param {TabAllocator} tabAllocator | 
| 290 * @param {loadListener} loadListener | 255 * @param {loadListener} loadListener | 
| 291 * @result {Object} | 256 * @result {Object} | 
| 292 * Crawling result | 257 * Crawling result | 
| 293 */ | 258 */ | 
| 294 function* crawl_url(url, tabAllocator, loadListener) | 259 function* crawl_url(url, tabAllocator, loadListener) | 
| 295 { | 260 { | 
| 296 let tab = yield tabAllocator.getTab(); | 261 let tab = yield tabAllocator.getTab(); | 
| 297 let result = {url: url}; | 262 let result = {url, requests: []}; | 
| 298 | 263 | 
| 299 dataForTab.set(tab, result); | |
| 300 try | 264 try | 
| 301 { | 265 { | 
| 302 result.startTime = Date.now(); | 266 result.startTime = Date.now(); | 
| 267 let requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, f unction({type, location, filter}, scanComplete) | |
| 
 
Wladimir Palant
2016/03/14 19:50:43
When the scan is completed the listener is called
 
sergei
2016/03/15 10:59:56
Fixed, is the indentation correct now?
 
sergei
2016/03/15 10:59:56
Fixed.
However, I guess, it should be documented
 
Wladimir Palant
2016/03/15 11:05:44
Yes, it should be documented. As to reproducing -
 
 | |
| 268 { | |
| 269 result.requests.push({location, contentType: type, filter}); | |
| 270 }); | |
| 
 
Wladimir Palant
2016/03/14 19:50:43
You need to shut down this notifier when you are d
 
sergei
2016/03/15 10:59:56
Done. Thanks, overlooked it.
 
 | |
| 271 | |
| 303 tab.linkedBrowser.loadURI(url, null, null); | 272 tab.linkedBrowser.loadURI(url, null, null); | 
| 304 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab) ; | 273 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab) ; | 
| 305 result.endTime = Date.now(); | 274 result.endTime = Date.now(); | 
| 306 result.finalUrl = tab.linkedBrowser.currentURI.spec; | 275 result.finalUrl = tab.linkedBrowser.currentURI.spec; | 
| 307 | 276 | 
| 308 let document = tab.linkedBrowser.contentDocument; | 277 let document = tab.linkedBrowser.contentDocument; | 
| 309 if (document.documentElement) | 278 if (document.documentElement) | 
| 310 { | 279 { | 
| 311 try | 280 try | 
| 312 { | 281 { | 
| (...skipping 25 matching lines...) Expand all Loading... | |
| 338 | 307 | 
| 339 function reportException(e) | 308 function reportException(e) | 
| 340 { | 309 { | 
| 341 let stack = ""; | 310 let stack = ""; | 
| 342 if (e && typeof e == "object" && "stack" in e) | 311 if (e && typeof e == "object" && "stack" in e) | 
| 343 stack = e.stack + "\n"; | 312 stack = e.stack + "\n"; | 
| 344 | 313 | 
| 345 Cu.reportError(e); | 314 Cu.reportError(e); | 
| 346 dump(e + "\n" + stack + "\n"); | 315 dump(e + "\n" + stack + "\n"); | 
| 347 } | 316 } | 
| 348 | |
| 349 /** | |
| 350 * Wrapper for the Policy.processNode() function in ABP. Calls the original | |
| 351 * function and records all the data. | |
| 352 * | |
| 353 * @param {Function} origProcessNode | |
| 354 * The original processNode function. | |
| 355 * @param {RequestNotifier} requestNotifier | |
| 356 * The crawler's RequestNotifier object instance. | |
| 357 * @param {nsIDOMWindow} wnd | |
| 358 * @param {nsIDOMElement} node | |
| 359 * @param {Number} contentType | |
| 360 * @param {nsIURI} location | |
| 361 * @param {Boolean} collapse | |
| 362 * @return {Boolean} | |
| 363 */ | |
| 364 function processNodeReplacement(origProcessNode, requestNotifier, wnd, node, con tentType, location, collapse) | |
| 365 { | |
| 366 let filters = []; | |
| 367 let origListener = requestNotifier.listener; | |
| 368 requestNotifier.listener = function(window, node, entry) | |
| 369 { | |
| 370 if (entry.filter) | |
| 371 filters.push(entry.filter.text); | |
| 372 }; | |
| 373 | |
| 374 /* | |
| 375 * Call the original processNode. If the original throws, then we will too, so this is outside a try clause. | |
| 376 */ | |
| 377 let result; | |
| 378 try | |
| 379 { | |
| 380 result = origProcessNode(wnd, node, contentType, location, collapse); | |
| 381 } | |
| 382 finally | |
| 383 { | |
| 384 requestNotifier.listener = origListener; | |
| 385 } | |
| 386 | |
| 387 try | |
| 388 { | |
| 389 let data = getDataForWindow(wnd); | |
| 390 if (data) | |
| 391 { | |
| 392 if (!("requests" in data)) | |
| 393 data.requests = []; | |
| 394 data.requests.push({ | |
| 395 contentType: contentType, | |
| 396 location: (contentType == Policy.type.ELEMHIDE ? location.text : locatio n.spec), | |
| 397 blocked: result != Ci.nsIContentPolicy.ACCEPT, | |
| 398 filters: filters | |
| 399 }); | |
| 400 } | |
| 401 } | |
| 402 catch (e) | |
| 403 { | |
| 404 reportException(e); | |
| 405 } | |
| 406 return result; | |
| 407 }; | |
| OLD | NEW |