OLD | NEW |
1 /* | 1 /* |
2 * This Source Code is subject to the terms of the Mozilla Public License | 2 * This Source Code is subject to the terms of the Mozilla Public License |
3 * version 2.0 (the "License"). You can obtain a copy of the License at | 3 * version 2.0 (the "License"). You can obtain a copy of the License at |
4 * http://mozilla.org/MPL/2.0/. | 4 * http://mozilla.org/MPL/2.0/. |
5 */ | 5 */ |
6 | 6 |
7 /** | 7 /** |
8 * @module crawler | 8 * @module crawler |
9 */ | 9 */ |
10 | 10 |
11 Cu.import("resource://gre/modules/Services.jsm"); | 11 Cu.import("resource://gre/modules/Services.jsm"); |
12 Cu.import("resource://gre/modules/Task.jsm"); | 12 Cu.import("resource://gre/modules/Task.jsm"); |
13 Cu.import("resource://gre/modules/Promise.jsm"); | 13 Cu.import("resource://gre/modules/Promise.jsm"); |
| 14 Cu.import("resource://gre/modules/Timer.jsm"); |
14 | 15 |
15 function abprequire(module) | 16 function abprequire(module) |
16 { | 17 { |
17 let result = {}; | 18 let result = {}; |
18 result.wrappedJSObject = result; | 19 result.wrappedJSObject = result; |
19 Services.obs.notifyObservers(result, "adblockplus-require", module); | 20 Services.obs.notifyObservers(result, "adblockplus-require", module); |
20 return result.exports; | 21 return result.exports; |
21 } | 22 } |
22 | 23 |
23 let {RequestNotifier} = abprequire("requestNotifier"); | 24 let {RequestNotifier} = abprequire("requestNotifier"); |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
103 browser.removeTab(tab); | 104 browser.removeTab(tab); |
104 | 105 |
105 if (this._resolvers.length) | 106 if (this._resolvers.length) |
106 this._resolvers.shift()(createTab(this._browser)); | 107 this._resolvers.shift()(createTab(this._browser)); |
107 else | 108 else |
108 this._tabs--; | 109 this._tabs--; |
109 } | 110 } |
110 }; | 111 }; |
111 | 112 |
112 /** | 113 /** |
113 * Observes page loads in a particular tabbed browser. | |
114 * | |
115 * @param {tabbrowser} browser | |
116 * The tabbed browser to be observed | |
117 * @param {int} timeout | |
118 * Load timeout in milliseconds | |
119 * @constructor | |
120 */ | |
121 function LoadListener(browser, timeout) | |
122 { | |
123 this._browser = browser; | |
124 this._deferred = new Map(); | |
125 this._timeout = timeout; | |
126 browser.addTabsProgressListener(this); | |
127 } | |
128 LoadListener.prototype = { | |
129 /** | |
130 * Returns a promise that will be resolved when the page in the specified tab | |
131 * finishes loading. Loading will be stopped if the timeout is reached. | |
132 * | |
133 * @param {tab} tab | |
134 * @result {Promise} | |
135 */ | |
136 waitForLoad: function(tab) | |
137 { | |
138 let deferred = Promise.defer(); | |
139 this._deferred.set(tab.linkedBrowser, deferred); | |
140 | |
141 tab.ownerDocument.defaultView.setTimeout(function() | |
142 { | |
143 tab.linkedBrowser.stop(); | |
144 }, this._timeout); | |
145 | |
146 return deferred.promise; | |
147 }, | |
148 | |
149 /** | |
150 * Deactivates this object. | |
151 */ | |
152 stop: function() | |
153 { | |
154 this._browser.removeTabsProgressListener(this); | |
155 }, | |
156 | |
157 onStateChange: function(browser, progress, request, flags, status) | |
158 { | |
159 if ((flags & Ci.nsIWebProgressListener.STATE_STOP) && (flags & Ci.nsIWebProg
ressListener.STATE_IS_WINDOW)) | |
160 { | |
161 let deferred = this._deferred.get(browser); | |
162 if (deferred) | |
163 { | |
164 this._deferred.delete(browser); | |
165 | |
166 let headers = []; | |
167 if (request instanceof Ci.nsIHttpChannel) | |
168 { | |
169 try | |
170 { | |
171 headers.push("HTTP/x.x " + request.responseStatus + " " + request.re
sponseStatusText); | |
172 request.visitResponseHeaders((header, value) => headers.push(header
+ ": " + value)); | |
173 } | |
174 catch (e) | |
175 { | |
176 // Exceptions are expected here | |
177 } | |
178 } | |
179 deferred.resolve([status, headers]); | |
180 } | |
181 } | |
182 } | |
183 }; | |
184 | |
185 /** | |
186 * Once created, this object will make sure all new windows are dismissed | 114 * Once created, this object will make sure all new windows are dismissed |
187 * immediately. | 115 * immediately. |
188 * | 116 * |
189 * @constructor | 117 * @constructor |
190 */ | 118 */ |
191 function WindowCloser() | 119 function WindowCloser() |
192 { | 120 { |
193 Services.obs.addObserver(this, "xul-window-registered", true) | 121 Services.obs.addObserver(this, "xul-window-registered", true) |
194 } | 122 } |
195 WindowCloser.prototype = { | 123 WindowCloser.prototype = { |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
269 * @param {int} maxtabs | 197 * @param {int} maxtabs |
270 * Maximum number of tabs to be opened | 198 * Maximum number of tabs to be opened |
271 * @param {String} targetURL | 199 * @param {String} targetURL |
272 * URL that should receive the results | 200 * URL that should receive the results |
273 * @param {Function} onDone | 201 * @param {Function} onDone |
274 * The callback which is called after finishing of all tasks. | 202 * The callback which is called after finishing of all tasks. |
275 */ | 203 */ |
276 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone) | 204 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone) |
277 { | 205 { |
278 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); | 206 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); |
279 let loadListener = new LoadListener(window.getBrowser(), timeout); | 207 |
280 let running = 0; | 208 let running = 0; |
281 let windowCloser = new WindowCloser(); | 209 let windowCloser = new WindowCloser(); |
282 let taskDone = function() | 210 let taskDone = function() |
283 { | 211 { |
284 running--; | 212 running--; |
285 if (running <= 0) | 213 if (running <= 0) |
286 { | 214 { |
287 loadListener.stop(); | |
288 windowCloser.stop(); | 215 windowCloser.stop(); |
289 onDone(); | 216 onDone(); |
290 } | 217 } |
291 }; | 218 }; |
292 | 219 |
293 for (let url of urls) | 220 for (let url of urls) |
294 { | 221 { |
295 running++; | 222 running++; |
296 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct
ion(result) | 223 Task.spawn(crawl_url.bind(null, url, tabAllocator, timeout)).then(function(r
esult) |
297 { | 224 { |
298 let request = new XMLHttpRequest(); | 225 let request = new XMLHttpRequest(); |
299 request.open("POST", targetURL); | 226 request.open("POST", targetURL); |
300 request.addEventListener("load", taskDone, false); | 227 request.addEventListener("load", taskDone, false); |
301 request.addEventListener("error", taskDone, false); | 228 request.addEventListener("error", taskDone, false); |
302 request.send(JSON.stringify(result)); | 229 request.send(JSON.stringify(result)); |
303 }, function(url, exception) | 230 }, function(url, exception) |
304 { | 231 { |
305 reportException(exception); | 232 reportException(exception); |
306 | 233 |
307 let request = new XMLHttpRequest(); | 234 let request = new XMLHttpRequest(); |
308 request.open("POST", targetURL); | 235 request.open("POST", targetURL); |
309 request.addEventListener("load", taskDone, false); | 236 request.addEventListener("load", taskDone, false); |
310 request.addEventListener("error", taskDone, false); | 237 request.addEventListener("error", taskDone, false); |
311 request.send(JSON.stringify({ | 238 request.send(JSON.stringify({ |
312 url: url, | 239 url: url, |
313 startTime: Date.now(), | 240 startTime: Date.now(), |
314 error: String(exception) | 241 error: String(exception) |
315 })); | 242 })); |
316 }.bind(null, url)); | 243 }.bind(null, url)); |
317 } | 244 } |
318 } | 245 } |
319 | 246 |
320 /** | 247 /** |
| 248 * Expects to receive page info gathered in a content process for the specified |
| 249 * `tab`. If there is no relevant message within specified `timeout` then |
| 250 * the result promise is resolve with error object. |
| 251 * @param tab |
| 252 * Tab in which we are interested in |
| 253 * @param {int} timeout |
| 254 * Timeout in milliseconds |
| 255 * @return {Promise} promise which will be resolved with the received page info |
| 256 */ |
| 257 function getPageInfo(tab, timeout) |
| 258 { |
| 259 return new Promise((resolve, result) => |
| 260 { |
| 261 let mm = tab.linkedBrowser.messageManager; |
| 262 let timerID; |
| 263 let onDone = (pageInfo) => |
| 264 { |
| 265 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone); |
| 266 clearTimeout(timerID); |
| 267 resolve(pageInfo); |
| 268 } |
| 269 mm.addMessageListener("abpcrawler:pageInfoGathered", (msg) => onDone(msg.dat
a));; |
| 270 timerID = setTimeout(onDone.bind(this, {error: "timeout"}), timeout); |
| 271 }); |
| 272 } |
| 273 |
| 274 /** |
321 * Crawls a URL. This is a generator meant to be used via a Task object. | 275 * Crawls a URL. This is a generator meant to be used via a Task object. |
322 * | 276 * |
323 * @param {String} url | 277 * @param {String} url |
324 * @param {TabAllocator} tabAllocator | 278 * @param {TabAllocator} tabAllocator |
325 * @param {loadListener} loadListener | 279 * @param {int} timeout |
| 280 * Load timeout in milliseconds |
326 * @result {Object} | 281 * @result {Object} |
327 * Crawling result | 282 * Crawling result |
328 */ | 283 */ |
329 function* crawl_url(url, tabAllocator, loadListener) | 284 function* crawl_url(url, tabAllocator, timeout) |
330 { | 285 { |
331 let tab = yield tabAllocator.getTab(); | 286 let tab = yield tabAllocator.getTab(); |
332 let result = {url, requests: []}; | 287 let result = {url, requests: []}; |
333 let requestNotifier; | 288 let requestNotifier; |
334 try | 289 try |
335 { | 290 { |
336 result.startTime = Date.now(); | 291 result.startTime = Date.now(); |
337 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, | 292 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, |
338 function(entry, scanComplete) | 293 function(entry, scanComplete) |
339 { | 294 { |
340 if (!entry) | 295 if (!entry) |
341 return; | 296 return; |
342 let {type: contentType, location, filter} = entry; | 297 let {type: contentType, location, filter} = entry; |
343 result.requests.push({location, contentType, filter}); | 298 result.requests.push({location, contentType, filter}); |
344 }); | 299 }); |
345 | 300 |
346 tab.linkedBrowser.loadURI(url, null, null); | 301 tab.linkedBrowser.loadURI(url, null, null); |
347 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab)
; | 302 |
| 303 result.finalUrl = tab.linkedBrowser.currentURI.spec; |
| 304 Object.assign(result, yield getPageInfo(tab, timeout)); |
348 result.endTime = Date.now(); | 305 result.endTime = Date.now(); |
349 result.finalUrl = tab.linkedBrowser.currentURI.spec; | |
350 | |
351 let document = tab.linkedBrowser.contentDocument; | |
352 if (document.documentElement) | |
353 { | |
354 try | |
355 { | |
356 let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "c
anvas"); | |
357 canvas.width = document.documentElement.scrollWidth; | |
358 canvas.height = document.documentElement.scrollHeight; | |
359 | |
360 let context = canvas.getContext("2d"); | |
361 context.drawWindow(document.defaultView, 0, 0, canvas.width, canvas.heig
ht, "rgb(255, 255, 255)"); | |
362 result.screenshot = canvas.toDataURL("image/jpeg", 0.8); | |
363 } | |
364 catch (e) | |
365 { | |
366 reportException(e); | |
367 result.error = "Capturing screenshot failed: " + e; | |
368 } | |
369 | |
370 // TODO: Capture frames as well? | |
371 let serializer = new tab.ownerDocument.defaultView.XMLSerializer(); | |
372 result.source = serializer.serializeToString(document.documentElement); | |
373 } | |
374 } | 306 } |
375 finally | 307 finally |
376 { | 308 { |
377 if (requestNotifier) | 309 if (requestNotifier) |
378 requestNotifier.shutdown(); | 310 requestNotifier.shutdown(); |
379 tabAllocator.releaseTab(tab); | 311 tabAllocator.releaseTab(tab); |
380 } | 312 } |
381 return result; | 313 return result; |
382 } | 314 } |
383 | 315 |
384 function reportException(e) | 316 function reportException(e) |
385 { | 317 { |
386 let stack = ""; | 318 let stack = ""; |
387 if (e && typeof e == "object" && "stack" in e) | 319 if (e && typeof e == "object" && "stack" in e) |
388 stack = e.stack + "\n"; | 320 stack = e.stack + "\n"; |
389 | 321 |
390 Cu.reportError(e); | 322 Cu.reportError(e); |
391 dump(e + "\n" + stack + "\n"); | 323 dump(e + "\n" + stack + "\n"); |
392 } | 324 } |
| 325 |
| 326 let {addonRoot} = require("info"); |
| 327 let frameScriptPath = addonRoot + "/lib/child/frameScript.js"; |
| 328 let globalMessageManager = Services.mm; |
| 329 globalMessageManager.loadFrameScript(frameScriptPath, true); |
| 330 |
| 331 onShutdown.add(() => |
| 332 { |
| 333 globalMessageManager.removeDelayedFrameScript(frameScriptPath); |
| 334 }); |
OLD | NEW |