| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * This Source Code is subject to the terms of the Mozilla Public License | 2 * This Source Code is subject to the terms of the Mozilla Public License |
| 3 * version 2.0 (the "License"). You can obtain a copy of the License at | 3 * version 2.0 (the "License"). You can obtain a copy of the License at |
| 4 * http://mozilla.org/MPL/2.0/. | 4 * http://mozilla.org/MPL/2.0/. |
| 5 */ | 5 */ |
| 6 | 6 |
| 7 /** | 7 /** |
| 8 * @module crawler | 8 * @module crawler |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 Cu.import("resource://gre/modules/Services.jsm"); | 11 Cu.import("resource://gre/modules/Services.jsm"); |
| 12 Cu.import("resource://gre/modules/Task.jsm"); | 12 Cu.import("resource://gre/modules/Task.jsm"); |
| 13 Cu.import("resource://gre/modules/Promise.jsm"); | 13 Cu.import("resource://gre/modules/Promise.jsm"); |
| 14 Cu.import("resource://gre/modules/Timer.jsm"); | |
| 14 | 15 |
| 15 function abprequire(module) | 16 function abprequire(module) |
| 16 { | 17 { |
| 17 let result = {}; | 18 let result = {}; |
| 18 result.wrappedJSObject = result; | 19 result.wrappedJSObject = result; |
| 19 Services.obs.notifyObservers(result, "adblockplus-require", module); | 20 Services.obs.notifyObservers(result, "adblockplus-require", module); |
| 20 return result.exports; | 21 return result.exports; |
| 21 } | 22 } |
| 22 | 23 |
| 23 let {RequestNotifier} = abprequire("requestNotifier"); | 24 let {RequestNotifier} = abprequire("requestNotifier"); |
| 24 | |
| 25 let {FilterNotifier} = abprequire("filterNotifier"); | 25 let {FilterNotifier} = abprequire("filterNotifier"); |
| 26 let {FilterStorage} = abprequire("filterStorage"); | 26 let {FilterStorage} = abprequire("filterStorage"); |
| 27 | 27 |
| 28 /** | 28 /** |
| 29 * Creates a pool of tabs and allocates them to tasks on request. | 29 * Creates a pool of tabs and allocates them to tasks on request. |
| 30 * | 30 * |
| 31 * @param {tabbrowser} browser | 31 * @param {tabbrowser} browser |
| 32 * The tabbed browser where tabs should be created | 32 * The tabbed browser where tabs should be created |
| 33 * @param {int} maxtabs | 33 * @param {int} maxtabs |
| 34 * The maximum number of tabs to be allocated | 34 * The maximum number of tabs to be allocated |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 77 tab = browser.addTab("about:blank"); | 77 tab = browser.addTab("about:blank"); |
| 78 | 78 |
| 79 if (this._deferred.length) | 79 if (this._deferred.length) |
| 80 this._deferred.shift().resolve(tab); | 80 this._deferred.shift().resolve(tab); |
| 81 else | 81 else |
| 82 this._tabs.push(tab); | 82 this._tabs.push(tab); |
| 83 } | 83 } |
| 84 }; | 84 }; |
| 85 | 85 |
| 86 /** | 86 /** |
| 87 * Observes page loads in a particular tabbed browser. | |
| 88 * | |
| 89 * @param {tabbrowser} browser | |
| 90 * The tabbed browser to be observed | |
| 91 * @param {int} timeout | |
| 92 * Load timeout in milliseconds | |
| 93 * @constructor | |
| 94 */ | |
| 95 function LoadListener(browser, timeout) | |
| 96 { | |
| 97 this._browser = browser; | |
| 98 this._deferred = new Map(); | |
| 99 this._timeout = timeout; | |
| 100 browser.addTabsProgressListener(this); | |
| 101 } | |
| 102 LoadListener.prototype = { | |
| 103 /** | |
| 104 * Returns a promise that will be resolved when the page in the specified tab | |
| 105 * finishes loading. Loading will be stopped if the timeout is reached. | |
| 106 * | |
| 107 * @param {tab} tab | |
| 108 * @result {Promise} | |
| 109 */ | |
| 110 waitForLoad: function(tab) | |
| 111 { | |
| 112 let deferred = Promise.defer(); | |
| 113 this._deferred.set(tab.linkedBrowser, deferred); | |
| 114 | |
| 115 tab.ownerDocument.defaultView.setTimeout(function() | |
| 116 { | |
| 117 tab.linkedBrowser.stop(); | |
| 118 }, this._timeout); | |
| 119 | |
| 120 return deferred.promise; | |
| 121 }, | |
| 122 | |
| 123 /** | |
| 124 * Deactivates this object. | |
| 125 */ | |
| 126 stop: function() | |
| 127 { | |
| 128 this._browser.removeTabsProgressListener(this); | |
| 129 }, | |
| 130 | |
| 131 onStateChange: function(browser, progress, request, flags, status) | |
| 132 { | |
| 133 if ((flags & Ci.nsIWebProgressListener.STATE_STOP) && (flags & Ci.nsIWebProg ressListener.STATE_IS_WINDOW)) | |
| 134 { | |
| 135 let deferred = this._deferred.get(browser); | |
| 136 if (deferred) | |
| 137 { | |
| 138 this._deferred.delete(browser); | |
| 139 | |
| 140 let headers = []; | |
| 141 if (request instanceof Ci.nsIHttpChannel) | |
| 142 { | |
| 143 try | |
| 144 { | |
| 145 headers.push("HTTP/x.x " + request.responseStatus + " " + request.re sponseStatusText); | |
| 146 request.visitResponseHeaders((header, value) => headers.push(header + ": " + value)); | |
| 147 } | |
| 148 catch (e) | |
| 149 { | |
| 150 // Exceptions are expected here | |
| 151 } | |
| 152 } | |
| 153 deferred.resolve([status, headers]); | |
| 154 } | |
| 155 } | |
| 156 } | |
| 157 }; | |
|
Wladimir Palant
2016/03/15 10:07:10
Why did you move this functionality into the conte
sergei
2016/03/15 16:40:10
In e10s it does not work in chrome process, we can
| |
| 158 | |
| 159 /** | |
| 160 * Once created, this object will make sure all new windows are dismissed | 87 * Once created, this object will make sure all new windows are dismissed |
| 161 * immediately. | 88 * immediately. |
| 162 * | 89 * |
| 163 * @constructor | 90 * @constructor |
| 164 */ | 91 */ |
| 165 function WindowCloser() | 92 function WindowCloser() |
| 166 { | 93 { |
| 167 Services.obs.addObserver(this, "xul-window-registered", true) | 94 Services.obs.addObserver(this, "xul-window-registered", true) |
| 168 } | 95 } |
| 169 WindowCloser.prototype = { | 96 WindowCloser.prototype = { |
| (...skipping 30 matching lines...) Expand all Loading... | |
| 200 * @param {String[]} urls | 127 * @param {String[]} urls |
| 201 * URLs to be crawled | 128 * URLs to be crawled |
| 202 * @param {int} number_of_tabs | 129 * @param {int} number_of_tabs |
| 203 * Maximum number of tabs to be opened | 130 * Maximum number of tabs to be opened |
| 204 * @param {String} targetURL | 131 * @param {String} targetURL |
| 205 * URL that should receive the results | 132 * URL that should receive the results |
| 206 */ | 133 */ |
| 207 function run(window, urls, timeout, maxtabs, targetURL, onDone) | 134 function run(window, urls, timeout, maxtabs, targetURL, onDone) |
| 208 { | 135 { |
| 209 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); | 136 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); |
| 210 let loadListener = new LoadListener(window.getBrowser(), timeout); | 137 |
| 211 let running = 0; | 138 let running = 0; |
| 212 let windowCloser = new WindowCloser(); | 139 let windowCloser = new WindowCloser(); |
| 213 let taskDone = function() | 140 let taskDone = function() |
| 214 { | 141 { |
| 215 running--; | 142 running--; |
| 216 if (running <= 0) | 143 if (running <= 0) |
| 217 { | 144 { |
| 218 loadListener.stop(); | |
| 219 windowCloser.stop(); | 145 windowCloser.stop(); |
| 220 onDone(); | 146 onDone(); |
| 221 } | 147 } |
| 222 }; | 148 }; |
| 223 | 149 |
| 224 new Promise(function(resolve, reject) | 150 new Promise(function(resolve, reject) |
| 225 { | 151 { |
| 226 if (FilterStorage.subscriptions.length > 0 && !FilterStorage._loading) | 152 if (FilterStorage.subscriptions.length > 0 && !FilterStorage._loading) |
| 227 { | 153 { |
| 228 resolve(); | 154 resolve(); |
| 229 return; | 155 return; |
| 230 } | 156 } |
| 231 FilterNotifier.addListener((action, item, newValue, oldValue) => | 157 FilterNotifier.addListener((action, item, newValue, oldValue) => |
| 232 { | 158 { |
| 233 if (action === "load") | 159 if (action === "load") |
| 234 { | 160 { |
| 235 resolve(); | 161 resolve(); |
| 236 } | 162 } |
| 237 }); | 163 }); |
| 238 }).then(_ => | 164 }).then(_ => |
| 239 { | 165 { |
| 240 for (let url of urls) | 166 for (let url of urls) |
| 241 { | 167 { |
| 242 running++; | 168 running++; |
| 243 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(fun ction(result) | 169 Task.spawn(crawl_url.bind(null, url, tabAllocator, timeout)).then(function (result) |
| 244 { | 170 { |
| 245 let request = new XMLHttpRequest(); | 171 let request = new XMLHttpRequest(); |
| 246 request.open("POST", targetURL); | 172 request.open("POST", targetURL); |
| 247 request.addEventListener("load", taskDone, false); | 173 request.addEventListener("load", taskDone, false); |
| 248 request.addEventListener("error", taskDone, false); | 174 request.addEventListener("error", taskDone, false); |
| 249 request.send(JSON.stringify(result)); | 175 request.send(JSON.stringify(result)); |
| 250 }, function(url, exception) | 176 }, function(url, exception) |
| 251 { | 177 { |
| 252 reportException(exception); | 178 reportException(exception); |
| 253 | 179 |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 267 // and does not contain any waiting code. | 193 // and does not contain any waiting code. |
| 268 }).catch(reportException); | 194 }).catch(reportException); |
| 269 } | 195 } |
| 270 exports.run = run; | 196 exports.run = run; |
| 271 | 197 |
| 272 /** | 198 /** |
| 273 * Crawls a URL. This is a generator meant to be used via a Task object. | 199 * Crawls a URL. This is a generator meant to be used via a Task object. |
| 274 * | 200 * |
| 275 * @param {String} url | 201 * @param {String} url |
| 276 * @param {TabAllocator} tabAllocator | 202 * @param {TabAllocator} tabAllocator |
| 277 * @param {loadListener} loadListener | |
| 278 * @result {Object} | 203 * @result {Object} |
| 279 * Crawling result | 204 * Crawling result |
| 280 */ | 205 */ |
| 281 function* crawl_url(url, tabAllocator, loadListener) | 206 function* crawl_url(url, tabAllocator, timeout) |
| 282 { | 207 { |
| 283 let tab = yield tabAllocator.getTab(); | 208 let tab = yield tabAllocator.getTab(); |
| 284 let result = {url, requests: []}; | 209 let result = {url, requests: []}; |
| 285 | 210 |
| 286 try | 211 try |
| 287 { | 212 { |
| 288 result.startTime = Date.now(); | 213 result.startTime = Date.now(); |
| 289 let requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, f unction({type, location, filter}, scanComplete) | 214 let requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, f unction({type, location, filter}, scanComplete) |
|
sergei
2016/03/15 16:40:10
BTW, in addition, this part stops to work, I have
sergei
2016/03/16 14:44:23
https://issues.adblockplus.org/ticket/3815
| |
| 290 { | 215 { |
| 291 result.requests.push({location, contentType: type, filter}); | 216 result.requests.push({location, contentType: type, filter}); |
| 292 }); | 217 }); |
| 293 | 218 |
| 294 tab.linkedBrowser.loadURI(url, null, null); | 219 tab.linkedBrowser.loadURI(url, null, null); |
| 295 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab) ; | 220 |
| 221 let mm = tab.linkedBrowser.messageManager; | |
| 222 let pageInfoFuture = new Promise((resolve, result) => | |
| 223 { | |
| 224 let timerID; | |
| 225 let onDone = (pageInfo) => | |
| 226 { | |
| 227 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone); | |
|
Wladimir Palant
2016/03/15 10:07:10
So, which tab did you get the page info for?
The
sergei
2016/03/16 14:44:23
For the `tab`, it's "browser message manager" it a
| |
| 228 clearTimeout(timerID); | |
| 229 resolve(pageInfo); | |
| 230 } | |
| 231 mm.addMessageListener("abpcrawler:pageInfoGathered", (msg) => onDone(msg.d ata));; | |
| 232 timerID = setTimeout(onDone.bind(this, {error: "timeout"}), timeout); | |
| 233 }); | |
|
Wladimir Palant
2016/03/15 10:07:10
Please move this functionality into a separate fun
sergei
2016/03/16 14:44:23
Done.
| |
| 234 | |
| 235 let pageInfo = yield pageInfoFuture; | |
| 236 | |
| 237 result.finalUrl = tab.linkedBrowser.currentURI.spec; | |
| 238 Object.assign(result, pageInfo); | |
| 296 result.endTime = Date.now(); | 239 result.endTime = Date.now(); |
| 297 result.finalUrl = tab.linkedBrowser.currentURI.spec; | |
| 298 | |
| 299 let document = tab.linkedBrowser.contentDocument; | |
| 300 if (document.documentElement) | |
| 301 { | |
| 302 try | |
| 303 { | |
| 304 let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "c anvas"); | |
| 305 canvas.width = document.documentElement.scrollWidth; | |
| 306 canvas.height = document.documentElement.scrollHeight; | |
| 307 | |
| 308 let context = canvas.getContext("2d"); | |
| 309 context.drawWindow(document.defaultView, 0, 0, canvas.width, canvas.heig ht, "rgb(255, 255, 255)"); | |
| 310 result.screenshot = canvas.toDataURL("image/jpeg", 0.8); | |
| 311 } | |
| 312 catch (e) | |
| 313 { | |
| 314 reportException(e); | |
| 315 result.error = "Capturing screenshot failed: " + e; | |
| 316 } | |
| 317 | |
| 318 // TODO: Capture frames as well? | |
| 319 let serializer = new tab.ownerDocument.defaultView.XMLSerializer(); | |
| 320 result.source = serializer.serializeToString(document.documentElement); | |
| 321 } | |
| 322 } | 240 } |
| 323 finally | 241 finally |
| 324 { | 242 { |
| 325 tabAllocator.releaseTab(tab); | 243 tabAllocator.releaseTab(tab); |
| 326 } | 244 } |
| 327 return result; | 245 return result; |
| 328 } | 246 } |
| 329 | 247 |
| 330 function reportException(e) | 248 function reportException(e) |
| 331 { | 249 { |
| 332 let stack = ""; | 250 let stack = ""; |
| 333 if (e && typeof e == "object" && "stack" in e) | 251 if (e && typeof e == "object" && "stack" in e) |
| 334 stack = e.stack + "\n"; | 252 stack = e.stack + "\n"; |
| 335 | 253 |
| 336 Cu.reportError(e); | 254 Cu.reportError(e); |
| 337 dump(e + "\n" + stack + "\n"); | 255 dump(e + "\n" + stack + "\n"); |
| 338 } | 256 } |
| 257 | |
| 258 let {addonRoot} = require("info"); | |
| 259 let frameScriptPath = addonRoot + "/lib/child/frameScript.js"; | |
| 260 let globalMessageManager = Services.mm; | |
| 261 globalMessageManager.loadFrameScript(frameScriptPath, true); | |
|
Wladimir Palant
2016/03/15 10:07:10
This should be a process script, no point using a
| |
| 262 | |
| 263 let onReportException = function(msg) | |
| 264 { | |
| 265 reportException(msg.objects); | |
|
Wladimir Palant
2016/03/15 10:07:10
Please don't use msg.objects - ever. That's a wrap
sergei
2016/03/16 14:44:23
Acknowledged. Actually, I wanted to avoid duplicat
sergei
2016/03/16 14:44:23
I know, I used `msg.objects` because we don't know
Wladimir Palant
2016/09/14 16:11:46
Worst-case scenario: deadlocks because all of that
| |
| 266 } | |
| 267 globalMessageManager.addMessageListener("abpcrawler:reportException", onReportEx ception); | |
| 268 | |
| 269 onShutdown.add(() => | |
| 270 { | |
| 271 globalMessageManager.removeMessageListener("abpcrawler:reportException", onRep ortException); | |
| 272 globalMessageManager.removeDelayedFrameScript(frameScriptPath); | |
| 273 }); | |
| OLD | NEW |