OLD | NEW |
1 /* | 1 /* |
2 * This Source Code is subject to the terms of the Mozilla Public License | 2 * This Source Code is subject to the terms of the Mozilla Public License |
3 * version 2.0 (the "License"). You can obtain a copy of the License at | 3 * version 2.0 (the "License"). You can obtain a copy of the License at |
4 * http://mozilla.org/MPL/2.0/. | 4 * http://mozilla.org/MPL/2.0/. |
5 */ | 5 */ |
6 | 6 |
7 "use strict"; | 7 "use strict"; |
8 | 8 |
9 /** | 9 /** |
10 * @module crawler | 10 * @module crawler |
11 */ | 11 */ |
12 | 12 |
13 const {Services} = Cu.import("resource://gre/modules/Services.jsm", {}); | 13 const {Services} = Cu.import("resource://gre/modules/Services.jsm", {}); |
14 const {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {}); | 14 const {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {}); |
15 const {Task} = Cu.import("resource://gre/modules/Task.jsm", {}); | 15 const {Task} = Cu.import("resource://gre/modules/Task.jsm", {}); |
| 16 const {setTimeout, clearTimeout} = Cu.import("resource://gre/modules/Timer.jsm",
{}); |
16 | 17 |
17 function abprequire(module) | 18 function abprequire(module) |
18 { | 19 { |
19 let result = {}; | 20 let result = {}; |
20 result.wrappedJSObject = result; | 21 result.wrappedJSObject = result; |
21 Services.obs.notifyObservers(result, "adblockplus-require", module); | 22 Services.obs.notifyObservers(result, "adblockplus-require", module); |
22 return result.exports; | 23 return result.exports; |
23 } | 24 } |
24 | 25 |
25 let {RequestNotifier} = abprequire("requestNotifier"); | 26 let {RequestNotifier} = abprequire("requestNotifier"); |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
102 * @param {tab} tab | 103 * @param {tab} tab |
103 */ | 104 */ |
104 releaseTab: function(tab) | 105 releaseTab: function(tab) |
105 { | 106 { |
106 // If we are about to close last tab don't close it immediately to keep | 107 // If we are about to close last tab don't close it immediately to keep |
107 // the window alive. It will be closed when a new tab is created. | 108 // the window alive. It will be closed when a new tab is created. |
108 if (this._tabs > 1) | 109 if (this._tabs > 1) |
109 this._browser.removeTab(tab); | 110 this._browser.removeTab(tab); |
110 else | 111 else |
111 { | 112 { |
112 // navigate away from early opened URL | 113 // navigate away from previously opened URL |
113 tab.linkedBrowser.loadURI('about:blank', null, null); | 114 tab.linkedBrowser.loadURI("about:blank", null, null); |
114 this._tabKeepingWindowAlive = tab; | 115 this._tabKeepingWindowAlive = tab; |
115 } | 116 } |
116 | 117 |
117 this._tabs--; | 118 this._tabs--; |
118 if (this._resolvers.length && this._tabs < this._maxtabs) | 119 if (this._resolvers.length && this._tabs < this._maxtabs) |
119 { | 120 { |
120 this._resolvers.shift()(this._createTab()); | 121 this._resolvers.shift()(this._createTab()); |
121 } | 122 } |
122 }, | 123 }, |
123 }; | 124 }; |
124 | 125 |
125 /** | 126 /** |
126 * Observes page loads in a particular tabbed browser. | |
127 * | |
128 * @param {tabbrowser} browser | |
129 * The tabbed browser to be observed | |
130 * @param {int} timeout | |
131 * Load timeout in milliseconds | |
132 * @constructor | |
133 */ | |
134 function LoadListener(browser, timeout) | |
135 { | |
136 this._browser = browser; | |
137 this._deferred = new Map(); | |
138 this._timeout = timeout; | |
139 browser.addTabsProgressListener(this); | |
140 } | |
141 LoadListener.prototype = { | |
142 /** | |
143 * Returns a promise that will be resolved when the page in the specified tab | |
144 * finishes loading. Loading will be stopped if the timeout is reached. | |
145 * | |
146 * @param {tab} tab | |
147 * @result {Promise} | |
148 */ | |
149 waitForLoad: function(tab) | |
150 { | |
151 return new Promise((resolve, reject) => | |
152 { | |
153 this._deferred.set(tab.linkedBrowser, resolve); | |
154 | |
155 tab.ownerDocument.defaultView.setTimeout(function() | |
156 { | |
157 tab.linkedBrowser.stop(); | |
158 }, this._timeout); | |
159 }); | |
160 }, | |
161 | |
162 /** | |
163 * Deactivates this object. | |
164 */ | |
165 stop: function() | |
166 { | |
167 this._browser.removeTabsProgressListener(this); | |
168 }, | |
169 | |
170 onStateChange: function(browser, progress, request, flags, status) | |
171 { | |
172 if ((flags & Ci.nsIWebProgressListener.STATE_STOP) && (flags & Ci.nsIWebProg
ressListener.STATE_IS_WINDOW)) | |
173 { | |
174 let resolve = this._deferred.get(browser); | |
175 if (resolve) | |
176 { | |
177 this._deferred.delete(browser); | |
178 | |
179 let headers = []; | |
180 if (request instanceof Ci.nsIHttpChannel) | |
181 { | |
182 try | |
183 { | |
184 headers.push("HTTP/x.x " + request.responseStatus + " " + request.re
sponseStatusText); | |
185 request.visitResponseHeaders((header, value) => headers.push(header
+ ": " + value)); | |
186 } | |
187 catch (e) | |
188 { | |
189 // Exceptions are expected here | |
190 } | |
191 } | |
192 resolve([status, headers]); | |
193 } | |
194 } | |
195 } | |
196 }; | |
197 | |
198 /** | |
199 * Once created, this object will make sure all new windows are dismissed | 127 * Once created, this object will make sure all new windows are dismissed |
200 * immediately. | 128 * immediately. |
201 * | 129 * |
202 * @constructor | 130 * @constructor |
203 */ | 131 */ |
204 function WindowCloser() | 132 function WindowCloser() |
205 { | 133 { |
206 Services.obs.addObserver(this, "xul-window-registered", true) | 134 Services.obs.addObserver(this, "xul-window-registered", true) |
207 } | 135 } |
208 WindowCloser.prototype = { | 136 WindowCloser.prototype = { |
(...skipping 14 matching lines...) Expand all Loading... |
223 if (window.document.documentElement.localName == 'dialog') | 151 if (window.document.documentElement.localName == 'dialog') |
224 window.document.documentElement.acceptDialog(); | 152 window.document.documentElement.acceptDialog(); |
225 else | 153 else |
226 window.close(); | 154 window.close(); |
227 }, false); | 155 }, false); |
228 }, | 156 }, |
229 | 157 |
230 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer
ence]) | 158 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer
ence]) |
231 }; | 159 }; |
232 | 160 |
| 161 function configureFrameScript() |
| 162 { |
| 163 const info = require("info"); |
| 164 let frameScriptPath = info.addonRoot + "/lib/child/frameScript.js"; |
| 165 Services.mm.loadFrameScript(frameScriptPath, true); |
| 166 |
| 167 onShutdown.add(() => |
| 168 { |
| 169 Services.mm.removeDelayedFrameScript(frameScriptPath); |
| 170 }); |
| 171 } |
| 172 |
233 /** | 173 /** |
234 * Starts the crawling session. The crawler opens each URL in a tab and stores | 174 * Starts the crawling session. The crawler opens each URL in a tab and stores |
235 * the results. | 175 * the results. |
236 * | 176 * |
237 * @param {Window} window | 177 * @param {Window} window |
238 * The browser window we're operating in | 178 * The browser window we're operating in |
239 * @param {String[]} urls | 179 * @param {String[]} urls |
240 * URLs to be crawled | 180 * URLs to be crawled |
241 * @param {int} timeout | 181 * @param {int} timeout |
242 * Load timeout in milliseconds | 182 * Load timeout in milliseconds |
243 * @param {int} maxtabs | 183 * @param {int} maxtabs |
244 * Maximum number of tabs to be opened | 184 * Maximum number of tabs to be opened |
245 * @param {String} targetURL | 185 * @param {String} targetURL |
246 * URL that should receive the results | 186 * URL that should receive the results |
247 * @param {Function} onDone | 187 * @param {Function} onDone |
248 * The callback which is called after finishing of crawling of all URLs. | 188 * The callback which is called after finishing of crawling of all URLs. |
249 */ | 189 */ |
250 function run(window, urls, timeout, maxtabs, targetURL, onDone) | 190 function run(window, urls, timeout, maxtabs, targetURL, onDone) |
251 { | 191 { |
| 192 configureFrameScript(); |
252 new Promise((resolve, reject) => | 193 new Promise((resolve, reject) => |
253 { | 194 { |
254 if (FilterStorage.subscriptions.length > 0) | 195 if (FilterStorage.subscriptions.length > 0) |
255 { | 196 { |
256 resolve(); | 197 resolve(); |
257 return; | 198 return; |
258 } | 199 } |
259 let onFiltersLoaded = (action, item, newValue, oldValue) => | 200 let onFiltersLoaded = (action, item, newValue, oldValue) => |
260 { | 201 { |
261 if (action == "load") | 202 if (action == "load") |
(...skipping 20 matching lines...) Expand all Loading... |
282 * @param {int} maxtabs | 223 * @param {int} maxtabs |
283 * Maximum number of tabs to be opened | 224 * Maximum number of tabs to be opened |
284 * @param {String} targetURL | 225 * @param {String} targetURL |
285 * URL that should receive the results | 226 * URL that should receive the results |
286 * @param {Function} onDone | 227 * @param {Function} onDone |
287 * The callback which is called after finishing of all tasks. | 228 * The callback which is called after finishing of all tasks. |
288 */ | 229 */ |
289 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone) | 230 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone) |
290 { | 231 { |
291 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); | 232 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); |
292 let loadListener = new LoadListener(window.getBrowser(), timeout); | 233 |
293 let running = 0; | 234 let running = 0; |
294 let windowCloser = new WindowCloser(); | 235 let windowCloser = new WindowCloser(); |
295 let taskDone = function() | 236 let taskDone = function() |
296 { | 237 { |
297 running--; | 238 running--; |
298 if (running <= 0) | 239 if (running <= 0) |
299 { | 240 { |
300 loadListener.stop(); | |
301 windowCloser.stop(); | 241 windowCloser.stop(); |
302 onDone(); | 242 onDone(); |
303 } | 243 } |
304 }; | 244 }; |
305 | 245 |
306 for (let url of urls) | 246 for (let url of urls) |
307 { | 247 { |
308 running++; | 248 running++; |
309 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct
ion(result) | 249 Task.spawn(crawl_url.bind(null, url, tabAllocator, timeout)).then(function(r
esult) |
310 { | 250 { |
311 let request = new XMLHttpRequest(); | 251 let request = new XMLHttpRequest(); |
312 request.open("POST", targetURL); | 252 request.open("POST", targetURL); |
313 request.addEventListener("load", taskDone, false); | 253 request.addEventListener("load", taskDone, false); |
314 request.addEventListener("error", taskDone, false); | 254 request.addEventListener("error", taskDone, false); |
315 request.send(JSON.stringify(result)); | 255 request.send(JSON.stringify(result)); |
316 }, function(url, exception) | 256 }, function(url, exception) |
317 { | 257 { |
318 reportException(exception); | 258 reportException(exception); |
319 | 259 |
320 let request = new XMLHttpRequest(); | 260 let request = new XMLHttpRequest(); |
321 request.open("POST", targetURL); | 261 request.open("POST", targetURL); |
322 request.addEventListener("load", taskDone, false); | 262 request.addEventListener("load", taskDone, false); |
323 request.addEventListener("error", taskDone, false); | 263 request.addEventListener("error", taskDone, false); |
324 request.send(JSON.stringify({ | 264 request.send(JSON.stringify({ |
325 url: url, | 265 url: url, |
326 startTime: Date.now(), | 266 startTime: Date.now(), |
327 error: String(exception) | 267 error: String(exception) |
328 })); | 268 })); |
329 }.bind(null, url)); | 269 }.bind(null, url)); |
330 } | 270 } |
331 } | 271 } |
332 | 272 |
333 /** | 273 /** |
| 274 * Expects to receive page info gathered in a content process for the specified |
| 275 * `tab`. If there is no relevant message within specified `timeout` then |
| 276 * the result promise is resolved with error object. |
| 277 * @param tab |
| 278 * Tab in which we are interested in |
| 279 * @param {int} timeout |
| 280 * Timeout in milliseconds |
| 281 * @return {Promise} promise which will be resolved with the received page info |
| 282 */ |
| 283 function getPageInfo(tab, timeout) |
| 284 { |
| 285 return new Promise((resolve, result) => |
| 286 { |
| 287 let mm = tab.linkedBrowser.messageManager; |
| 288 let timerID; |
| 289 let onDone = (msg) => |
| 290 { |
| 291 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone); |
| 292 clearTimeout(timerID); |
| 293 resolve(msg.data); |
| 294 } |
| 295 mm.addMessageListener("abpcrawler:pageInfoGathered", onDone); |
| 296 timerID = setTimeout(() => onDone({data: {error: "timeout"}}), timeout); |
| 297 }); |
| 298 } |
| 299 |
| 300 /** |
334 * Crawls a URL. This is a generator meant to be used via a Task object. | 301 * Crawls a URL. This is a generator meant to be used via a Task object. |
335 * | 302 * |
336 * @param {String} url | 303 * @param {String} url |
337 * @param {TabAllocator} tabAllocator | 304 * @param {TabAllocator} tabAllocator |
338 * @param {loadListener} loadListener | 305 * @param {int} timeout |
| 306 * Load timeout in milliseconds |
339 * @result {Object} | 307 * @result {Object} |
340 * Crawling result | 308 * Crawling result |
341 */ | 309 */ |
342 function* crawl_url(url, tabAllocator, loadListener) | 310 function* crawl_url(url, tabAllocator, timeout) |
343 { | 311 { |
344 let tab = yield tabAllocator.getTab(); | 312 let tab = yield tabAllocator.getTab(); |
345 let result = {url, requests: []}; | 313 let result = {url, requests: []}; |
346 let requestNotifier; | 314 let requestNotifier; |
347 try | 315 try |
348 { | 316 { |
349 result.startTime = Date.now(); | 317 result.startTime = Date.now(); |
350 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, | 318 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, |
351 function(entry, scanComplete) | 319 function(entry, scanComplete) |
352 { | 320 { |
353 if (!entry) | 321 if (!entry) |
354 return; | 322 return; |
355 let {type: contentType, location, filter} = entry; | 323 let {type: contentType, location, filter} = entry; |
356 result.requests.push({location, contentType, filter}); | 324 result.requests.push({location, contentType, filter}); |
357 }); | 325 }); |
358 | 326 |
359 tab.linkedBrowser.loadURI(url, null, null); | 327 tab.linkedBrowser.loadURI(url, null, null); |
360 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab)
; | 328 |
| 329 Object.assign(result, yield getPageInfo(tab, timeout)); |
| 330 result.finalUrl = tab.linkedBrowser.currentURI.spec; |
361 result.endTime = Date.now(); | 331 result.endTime = Date.now(); |
362 result.finalUrl = tab.linkedBrowser.currentURI.spec; | |
363 | |
364 let document = tab.linkedBrowser.contentDocument; | |
365 if (document.documentElement) | |
366 { | |
367 try | |
368 { | |
369 let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "c
anvas"); | |
370 canvas.width = document.documentElement.scrollWidth; | |
371 canvas.height = document.documentElement.scrollHeight; | |
372 | |
373 let context = canvas.getContext("2d"); | |
374 context.drawWindow(document.defaultView, 0, 0, canvas.width, canvas.heig
ht, "rgb(255, 255, 255)"); | |
375 result.screenshot = canvas.toDataURL("image/jpeg", 0.8); | |
376 } | |
377 catch (e) | |
378 { | |
379 reportException(e); | |
380 result.error = "Capturing screenshot failed: " + e; | |
381 } | |
382 | |
383 // TODO: Capture frames as well? | |
384 let serializer = new tab.ownerDocument.defaultView.XMLSerializer(); | |
385 result.source = serializer.serializeToString(document.documentElement); | |
386 } | |
387 } | 332 } |
388 finally | 333 finally |
389 { | 334 { |
390 if (requestNotifier) | 335 if (requestNotifier) |
391 requestNotifier.shutdown(); | 336 requestNotifier.shutdown(); |
392 tabAllocator.releaseTab(tab); | 337 tabAllocator.releaseTab(tab); |
393 } | 338 } |
394 return result; | 339 return result; |
395 } | 340 } |
396 | 341 |
397 function reportException(e) | 342 function reportException(e) |
398 { | 343 { |
399 let stack = ""; | 344 let stack = ""; |
400 if (e && typeof e == "object" && "stack" in e) | 345 if (e && typeof e == "object" && "stack" in e) |
401 stack = e.stack + "\n"; | 346 stack = e.stack + "\n"; |
402 | 347 |
403 Cu.reportError(e); | 348 Cu.reportError(e); |
404 dump(e + "\n" + stack + "\n"); | 349 dump(e + "\n" + stack + "\n"); |
405 } | 350 } |
OLD | NEW |