OLD | NEW |
1 /* | 1 /* |
2 * This Source Code is subject to the terms of the Mozilla Public License | 2 * This Source Code is subject to the terms of the Mozilla Public License |
3 * version 2.0 (the "License"). You can obtain a copy of the License at | 3 * version 2.0 (the "License"). You can obtain a copy of the License at |
4 * http://mozilla.org/MPL/2.0/. | 4 * http://mozilla.org/MPL/2.0/. |
5 */ | 5 */ |
6 | 6 |
7 /** | 7 /** |
8 * @module crawler | 8 * @module crawler |
9 */ | 9 */ |
10 | 10 |
11 Cu.import("resource://gre/modules/Services.jsm"); | 11 Cu.import("resource://gre/modules/Services.jsm"); |
12 Cu.import("resource://gre/modules/Task.jsm"); | 12 Cu.import("resource://gre/modules/Task.jsm"); |
13 Cu.import("resource://gre/modules/Promise.jsm"); | 13 Cu.import("resource://gre/modules/Promise.jsm"); |
| 14 Cu.import("resource://gre/modules/Timer.jsm"); |
14 | 15 |
15 function abprequire(module) | 16 function abprequire(module) |
16 { | 17 { |
17 let result = {}; | 18 let result = {}; |
18 result.wrappedJSObject = result; | 19 result.wrappedJSObject = result; |
19 Services.obs.notifyObservers(result, "adblockplus-require", module); | 20 Services.obs.notifyObservers(result, "adblockplus-require", module); |
20 return result.exports; | 21 return result.exports; |
21 } | 22 } |
22 | 23 |
23 let {RequestNotifier} = abprequire("requestNotifier"); | 24 let {RequestNotifier} = abprequire("requestNotifier"); |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
106 browser.removeTab(tab); | 107 browser.removeTab(tab); |
107 | 108 |
108 if (this._resolvers.length) | 109 if (this._resolvers.length) |
109 this._resolvers.shift()(createTab(this._browser)); | 110 this._resolvers.shift()(createTab(this._browser)); |
110 else | 111 else |
111 this._tabs--; | 112 this._tabs--; |
112 } | 113 } |
113 }; | 114 }; |
114 | 115 |
115 /** | 116 /** |
116 * Observes page loads in a particular tabbed browser. | |
117 * | |
118 * @param {tabbrowser} browser | |
119 * The tabbed browser to be observed | |
120 * @param {int} timeout | |
121 * Load timeout in milliseconds | |
122 * @constructor | |
123 */ | |
124 function LoadListener(browser, timeout) | |
125 { | |
126 this._browser = browser; | |
127 this._deferred = new Map(); | |
128 this._timeout = timeout; | |
129 browser.addTabsProgressListener(this); | |
130 } | |
131 LoadListener.prototype = { | |
132 /** | |
133 * Returns a promise that will be resolved when the page in the specified tab | |
134 * finishes loading. Loading will be stopped if the timeout is reached. | |
135 * | |
136 * @param {tab} tab | |
137 * @result {Promise} | |
138 */ | |
139 waitForLoad: function(tab) | |
140 { | |
141 let deferred = Promise.defer(); | |
142 this._deferred.set(tab.linkedBrowser, deferred); | |
143 | |
144 tab.ownerDocument.defaultView.setTimeout(function() | |
145 { | |
146 tab.linkedBrowser.stop(); | |
147 }, this._timeout); | |
148 | |
149 return deferred.promise; | |
150 }, | |
151 | |
152 /** | |
153 * Deactivates this object. | |
154 */ | |
155 stop: function() | |
156 { | |
157 this._browser.removeTabsProgressListener(this); | |
158 }, | |
159 | |
160 onStateChange: function(browser, progress, request, flags, status) | |
161 { | |
162 if ((flags & Ci.nsIWebProgressListener.STATE_STOP) && (flags & Ci.nsIWebProg
ressListener.STATE_IS_WINDOW)) | |
163 { | |
164 let deferred = this._deferred.get(browser); | |
165 if (deferred) | |
166 { | |
167 this._deferred.delete(browser); | |
168 | |
169 let headers = []; | |
170 if (request instanceof Ci.nsIHttpChannel) | |
171 { | |
172 try | |
173 { | |
174 headers.push("HTTP/x.x " + request.responseStatus + " " + request.re
sponseStatusText); | |
175 request.visitResponseHeaders((header, value) => headers.push(header
+ ": " + value)); | |
176 } | |
177 catch (e) | |
178 { | |
179 // Exceptions are expected here | |
180 } | |
181 } | |
182 deferred.resolve([status, headers]); | |
183 } | |
184 } | |
185 } | |
186 }; | |
187 | |
188 /** | |
189 * Once created, this object will make sure all new windows are dismissed | 117 * Once created, this object will make sure all new windows are dismissed |
190 * immediately. | 118 * immediately. |
191 * | 119 * |
192 * @constructor | 120 * @constructor |
193 */ | 121 */ |
194 function WindowCloser() | 122 function WindowCloser() |
195 { | 123 { |
196 Services.obs.addObserver(this, "xul-window-registered", true) | 124 Services.obs.addObserver(this, "xul-window-registered", true) |
197 } | 125 } |
198 WindowCloser.prototype = { | 126 WindowCloser.prototype = { |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
272 * @param {int} maxtabs | 200 * @param {int} maxtabs |
273 * Maximum number of tabs to be opened | 201 * Maximum number of tabs to be opened |
274 * @param {String} targetURL | 202 * @param {String} targetURL |
275 * URL that should receive the results | 203 * URL that should receive the results |
276 * @param {Function} onDone | 204 * @param {Function} onDone |
277 * The callback which is called after finishing of all tasks. | 205 * The callback which is called after finishing of all tasks. |
278 */ | 206 */ |
279 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone) | 207 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone) |
280 { | 208 { |
281 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); | 209 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); |
282 let loadListener = new LoadListener(window.getBrowser(), timeout); | 210 |
283 let running = 0; | 211 let running = 0; |
284 let windowCloser = new WindowCloser(); | 212 let windowCloser = new WindowCloser(); |
285 let taskDone = function() | 213 let taskDone = function() |
286 { | 214 { |
287 running--; | 215 running--; |
288 if (running <= 0) | 216 if (running <= 0) |
289 { | 217 { |
290 loadListener.stop(); | |
291 windowCloser.stop(); | 218 windowCloser.stop(); |
292 onDone(); | 219 onDone(); |
293 } | 220 } |
294 }; | 221 }; |
295 | 222 |
296 for (let url of urls) | 223 for (let url of urls) |
297 { | 224 { |
298 running++; | 225 running++; |
299 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct
ion(result) | 226 Task.spawn(crawl_url.bind(null, url, tabAllocator, timeout)).then(function(r
esult) |
300 { | 227 { |
301 let request = new XMLHttpRequest(); | 228 let request = new XMLHttpRequest(); |
302 request.open("POST", targetURL); | 229 request.open("POST", targetURL); |
303 request.addEventListener("load", taskDone, false); | 230 request.addEventListener("load", taskDone, false); |
304 request.addEventListener("error", taskDone, false); | 231 request.addEventListener("error", taskDone, false); |
305 request.send(JSON.stringify(result)); | 232 request.send(JSON.stringify(result)); |
306 }, function(url, exception) | 233 }, function(url, exception) |
307 { | 234 { |
308 reportException(exception); | 235 reportException(exception); |
309 | 236 |
310 let request = new XMLHttpRequest(); | 237 let request = new XMLHttpRequest(); |
311 request.open("POST", targetURL); | 238 request.open("POST", targetURL); |
312 request.addEventListener("load", taskDone, false); | 239 request.addEventListener("load", taskDone, false); |
313 request.addEventListener("error", taskDone, false); | 240 request.addEventListener("error", taskDone, false); |
314 request.send(JSON.stringify({ | 241 request.send(JSON.stringify({ |
315 url: url, | 242 url: url, |
316 startTime: Date.now(), | 243 startTime: Date.now(), |
317 error: String(exception) | 244 error: String(exception) |
318 })); | 245 })); |
319 }.bind(null, url)); | 246 }.bind(null, url)); |
320 } | 247 } |
321 } | 248 } |
322 | 249 |
323 /** | 250 /** |
| 251 * Expects to receive page info gathered in a content process for the specified |
| 252 * `tab`. If there is no relevant message within specified `timeout` then |
| 253 * the result promise is resolve with error object. |
| 254 * @param tab |
| 255 * Tab in which we are interested in |
| 256 * @param {int} timeout |
| 257 * Timeout in milliseconds |
| 258 * @return {Promise} promise which will be resolved with the received page info |
| 259 */ |
| 260 function getPageInfo(tab, timeout) |
| 261 { |
| 262 return new Promise((resolve, result) => |
| 263 { |
| 264 let mm = tab.linkedBrowser.messageManager; |
| 265 let timerID; |
| 266 let onDone = (pageInfo) => |
| 267 { |
| 268 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone); |
| 269 clearTimeout(timerID); |
| 270 resolve(pageInfo); |
| 271 } |
| 272 mm.addMessageListener("abpcrawler:pageInfoGathered", (msg) => onDone(msg.dat
a));; |
| 273 timerID = setTimeout(onDone.bind(this, {error: "timeout"}), timeout); |
| 274 }); |
| 275 } |
| 276 |
| 277 /** |
324 * Crawls a URL. This is a generator meant to be used via a Task object. | 278 * Crawls a URL. This is a generator meant to be used via a Task object. |
325 * | 279 * |
326 * @param {String} url | 280 * @param {String} url |
327 * @param {TabAllocator} tabAllocator | 281 * @param {TabAllocator} tabAllocator |
328 * @param {loadListener} loadListener | 282 * @param {int} timeout |
| 283 * Load timeout in milliseconds |
329 * @result {Object} | 284 * @result {Object} |
330 * Crawling result | 285 * Crawling result |
331 */ | 286 */ |
332 function* crawl_url(url, tabAllocator, loadListener) | 287 function* crawl_url(url, tabAllocator, timeout) |
333 { | 288 { |
334 let tab = yield tabAllocator.getTab(); | 289 let tab = yield tabAllocator.getTab(); |
335 let result = {url, requests: []}; | 290 let result = {url, requests: []}; |
336 let requestNotifier; | 291 let requestNotifier; |
337 try | 292 try |
338 { | 293 { |
339 result.startTime = Date.now(); | 294 result.startTime = Date.now(); |
340 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, | 295 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, |
341 function(entry, scanComplete) | 296 function(entry, scanComplete) |
342 { | 297 { |
343 if (!entry) | 298 if (!entry) |
344 return; | 299 return; |
345 let {type: contentType, location, filter} = entry; | 300 let {type: contentType, location, filter} = entry; |
346 result.requests.push({location, contentType, filter}); | 301 result.requests.push({location, contentType, filter}); |
347 }); | 302 }); |
348 | 303 |
349 tab.linkedBrowser.loadURI(url, null, null); | 304 tab.linkedBrowser.loadURI(url, null, null); |
350 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab)
; | 305 |
| 306 result.finalUrl = tab.linkedBrowser.currentURI.spec; |
| 307 Object.assign(result, yield getPageInfo(tab, timeout)); |
351 result.endTime = Date.now(); | 308 result.endTime = Date.now(); |
352 result.finalUrl = tab.linkedBrowser.currentURI.spec; | |
353 | |
354 let document = tab.linkedBrowser.contentDocument; | |
355 if (document.documentElement) | |
356 { | |
357 try | |
358 { | |
359 let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "c
anvas"); | |
360 canvas.width = document.documentElement.scrollWidth; | |
361 canvas.height = document.documentElement.scrollHeight; | |
362 | |
363 let context = canvas.getContext("2d"); | |
364 context.drawWindow(document.defaultView, 0, 0, canvas.width, canvas.heig
ht, "rgb(255, 255, 255)"); | |
365 result.screenshot = canvas.toDataURL("image/jpeg", 0.8); | |
366 } | |
367 catch (e) | |
368 { | |
369 reportException(e); | |
370 result.error = "Capturing screenshot failed: " + e; | |
371 } | |
372 | |
373 // TODO: Capture frames as well? | |
374 let serializer = new tab.ownerDocument.defaultView.XMLSerializer(); | |
375 result.source = serializer.serializeToString(document.documentElement); | |
376 } | |
377 } | 309 } |
378 finally | 310 finally |
379 { | 311 { |
380 if (requestNotifier) | 312 if (requestNotifier) |
381 requestNotifier.shutdown(); | 313 requestNotifier.shutdown(); |
382 tabAllocator.releaseTab(tab); | 314 tabAllocator.releaseTab(tab); |
383 } | 315 } |
384 return result; | 316 return result; |
385 } | 317 } |
386 | 318 |
387 function reportException(e) | 319 function reportException(e) |
388 { | 320 { |
389 let stack = ""; | 321 let stack = ""; |
390 if (e && typeof e == "object" && "stack" in e) | 322 if (e && typeof e == "object" && "stack" in e) |
391 stack = e.stack + "\n"; | 323 stack = e.stack + "\n"; |
392 | 324 |
393 Cu.reportError(e); | 325 Cu.reportError(e); |
394 dump(e + "\n" + stack + "\n"); | 326 dump(e + "\n" + stack + "\n"); |
395 } | 327 } |
| 328 |
| 329 let {addonRoot} = require("info"); |
| 330 let frameScriptPath = addonRoot + "/lib/child/frameScript.js"; |
| 331 let globalMessageManager = Services.mm; |
| 332 globalMessageManager.loadFrameScript(frameScriptPath, true); |
| 333 |
| 334 onShutdown.add(() => |
| 335 { |
| 336 globalMessageManager.removeDelayedFrameScript(frameScriptPath); |
| 337 }); |
OLD | NEW |