OLD | NEW |
1 /* | 1 /* |
2 * This Source Code is subject to the terms of the Mozilla Public License | 2 * This Source Code is subject to the terms of the Mozilla Public License |
3 * version 2.0 (the "License"). You can obtain a copy of the License at | 3 * version 2.0 (the "License"). You can obtain a copy of the License at |
4 * http://mozilla.org/MPL/2.0/. | 4 * http://mozilla.org/MPL/2.0/. |
5 */ | 5 */ |
6 | 6 |
7 /** | 7 /** |
8 * @module crawler | 8 * @module crawler |
9 */ | 9 */ |
10 | 10 |
11 Cu.import("resource://gre/modules/Services.jsm"); | 11 Cu.import("resource://gre/modules/Services.jsm"); |
12 Cu.import("resource://gre/modules/Task.jsm"); | 12 Cu.import("resource://gre/modules/Task.jsm"); |
13 Cu.import("resource://gre/modules/Promise.jsm"); | 13 Cu.import("resource://gre/modules/Promise.jsm"); |
14 | 14 |
15 function abprequire(module) | 15 function abprequire(module) |
16 { | 16 { |
17 let result = {}; | 17 let result = {}; |
18 result.wrappedJSObject = result; | 18 result.wrappedJSObject = result; |
19 Services.obs.notifyObservers(result, "adblockplus-require", module); | 19 Services.obs.notifyObservers(result, "adblockplus-require", module); |
20 return result.exports; | 20 return result.exports; |
21 } | 21 } |
22 | 22 |
23 let {Policy} = abprequire("contentPolicy"); | |
24 let {RequestNotifier} = abprequire("requestNotifier"); | 23 let {RequestNotifier} = abprequire("requestNotifier"); |
25 let {Utils} = abprequire("utils"); | |
26 | 24 |
27 let dataForTab = new WeakMap(); | |
28 | 25 |
29 /** | 26 /** |
30 * Creates a pool of tabs and allocates them to tasks on request. | 27 * Creates a pool of tabs and allocates them to tasks on request. |
31 * | 28 * |
32 * @param {tabbrowser} browser | 29 * @param {tabbrowser} browser |
33 * The tabbed browser where tabs should be created | 30 * The tabbed browser where tabs should be created |
34 * @param {int} maxtabs | 31 * @param {int} maxtabs |
35 * The maximum number of tabs to be allocated | 32 * The maximum number of tabs to be allocated |
36 * @constructor | 33 * @constructor |
37 */ | 34 */ |
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
186 window.document.documentElement.acceptDialog(); | 183 window.document.documentElement.acceptDialog(); |
187 else | 184 else |
188 window.close(); | 185 window.close(); |
189 }, false); | 186 }, false); |
190 }, | 187 }, |
191 | 188 |
192 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer
ence]) | 189 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer
ence]) |
193 }; | 190 }; |
194 | 191 |
195 /** | 192 /** |
196 * Retrieves crawler results associated with a particular content window. | |
197 * | |
198 * @param {Window} window | |
199 * Content window to retrieve crawler results for | |
200 * @result {Object} | |
201 * Crawler results or undefined if the window wasn't created by the crawler. | |
202 */ | |
203 function getDataForWindow(window) | |
204 { | |
205 let topWindow = window.top; | |
206 if (!topWindow.document) | |
207 throw new Error("No document associated with the node's top window"); | |
208 let tabbrowser = Utils.getChromeWindow(topWindow).getBrowser(); | |
209 if (!tabbrowser) | |
210 throw new Error("Unable to get a tabbrowser reference from the window"); | |
211 let browser = tabbrowser.getBrowserForDocument(topWindow.document); | |
212 if (!browser) | |
213 throw new Error("Unable to get browser for the content window"); | |
214 let tab = tabbrowser.getTabForBrowser(browser); | |
215 if (!tab) | |
216 throw new Error("Unable to get tab for the browser"); | |
217 return dataForTab.get(tab); | |
218 }; | |
219 | |
220 /** | |
221 * Starts the crawling session. The crawler opens each URL in a tab and stores | 193 * Starts the crawling session. The crawler opens each URL in a tab and stores |
222 * the results. | 194 * the results. |
223 * | 195 * |
224 * @param {Window} window | 196 * @param {Window} window |
225 * The browser window we're operating in | 197 * The browser window we're operating in |
226 * @param {String[]} urls | 198 * @param {String[]} urls |
227 * URLs to be crawled | 199 * URLs to be crawled |
228 * @param {int} number_of_tabs | 200 * @param {int} number_of_tabs |
229 * Maximum number of tabs to be opened | 201 * Maximum number of tabs to be opened |
230 * @param {String} targetURL | 202 * @param {String} targetURL |
231 * URL that should receive the results | 203 * URL that should receive the results |
232 */ | 204 */ |
233 function run(window, urls, timeout, maxtabs, targetURL, onDone) | 205 function run(window, urls, timeout, maxtabs, targetURL, onDone) |
234 { | 206 { |
235 let requestNotifier = new RequestNotifier(null, function() {}); | |
236 | |
237 let origProcessNode = Policy.processNode; | |
238 Policy.processNode = processNodeReplacement.bind(null, origProcessNode, reques
tNotifier); | |
239 | |
240 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); | 207 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); |
241 let loadListener = new LoadListener(window.getBrowser(), timeout); | 208 let loadListener = new LoadListener(window.getBrowser(), timeout); |
242 let running = 0; | 209 let running = 0; |
243 let windowCloser = new WindowCloser(); | 210 let windowCloser = new WindowCloser(); |
244 let taskDone = function() | 211 let taskDone = function() |
245 { | 212 { |
246 running--; | 213 running--; |
247 if (running <= 0) | 214 if (running <= 0) |
248 { | 215 { |
249 Policy.processNode = origProcessNode; | |
250 requestNotifier.shutdown(); | |
251 loadListener.stop(); | 216 loadListener.stop(); |
252 windowCloser.stop(); | 217 windowCloser.stop(); |
253 onDone(); | 218 onDone(); |
254 } | 219 } |
255 }; | 220 }; |
256 | 221 |
257 for (let url of urls) | 222 for (let url of urls) |
258 { | 223 { |
259 running++; | 224 running++; |
260 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct
ion(result) | 225 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct
ion(result) |
(...skipping 26 matching lines...) Expand all Loading... |
287 * | 252 * |
288 * @param {String} url | 253 * @param {String} url |
289 * @param {TabAllocator} tabAllocator | 254 * @param {TabAllocator} tabAllocator |
290 * @param {loadListener} loadListener | 255 * @param {loadListener} loadListener |
291 * @result {Object} | 256 * @result {Object} |
292 * Crawling result | 257 * Crawling result |
293 */ | 258 */ |
294 function* crawl_url(url, tabAllocator, loadListener) | 259 function* crawl_url(url, tabAllocator, loadListener) |
295 { | 260 { |
296 let tab = yield tabAllocator.getTab(); | 261 let tab = yield tabAllocator.getTab(); |
297 let result = {url: url}; | 262 let result = {url, requests: []}; |
298 | 263 let requestNotifier; |
299 dataForTab.set(tab, result); | |
300 try | 264 try |
301 { | 265 { |
302 result.startTime = Date.now(); | 266 result.startTime = Date.now(); |
| 267 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, |
| 268 function(entry, scanComplete) |
| 269 { |
| 270 if (!entry) |
| 271 return; |
| 272 let {type: contentType, location, filter} = entry; |
| 273 result.requests.push({location, contentType, filter}); |
| 274 }); |
| 275 |
303 tab.linkedBrowser.loadURI(url, null, null); | 276 tab.linkedBrowser.loadURI(url, null, null); |
304 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab)
; | 277 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab)
; |
305 result.endTime = Date.now(); | 278 result.endTime = Date.now(); |
306 result.finalUrl = tab.linkedBrowser.currentURI.spec; | 279 result.finalUrl = tab.linkedBrowser.currentURI.spec; |
307 | 280 |
308 let document = tab.linkedBrowser.contentDocument; | 281 let document = tab.linkedBrowser.contentDocument; |
309 if (document.documentElement) | 282 if (document.documentElement) |
310 { | 283 { |
311 try | 284 try |
312 { | 285 { |
(...skipping 11 matching lines...) Expand all Loading... |
324 result.error = "Capturing screenshot failed: " + e; | 297 result.error = "Capturing screenshot failed: " + e; |
325 } | 298 } |
326 | 299 |
327 // TODO: Capture frames as well? | 300 // TODO: Capture frames as well? |
328 let serializer = new tab.ownerDocument.defaultView.XMLSerializer(); | 301 let serializer = new tab.ownerDocument.defaultView.XMLSerializer(); |
329 result.source = serializer.serializeToString(document.documentElement); | 302 result.source = serializer.serializeToString(document.documentElement); |
330 } | 303 } |
331 } | 304 } |
332 finally | 305 finally |
333 { | 306 { |
| 307 if (requestNotifier) |
| 308 requestNotifier.shutdown(); |
334 tabAllocator.releaseTab(tab); | 309 tabAllocator.releaseTab(tab); |
335 } | 310 } |
336 return result; | 311 return result; |
337 } | 312 } |
338 | 313 |
339 function reportException(e) | 314 function reportException(e) |
340 { | 315 { |
341 let stack = ""; | 316 let stack = ""; |
342 if (e && typeof e == "object" && "stack" in e) | 317 if (e && typeof e == "object" && "stack" in e) |
343 stack = e.stack + "\n"; | 318 stack = e.stack + "\n"; |
344 | 319 |
345 Cu.reportError(e); | 320 Cu.reportError(e); |
346 dump(e + "\n" + stack + "\n"); | 321 dump(e + "\n" + stack + "\n"); |
347 } | 322 } |
348 | |
349 /** | |
350 * Wrapper for the Policy.processNode() function in ABP. Calls the original | |
351 * function and records all the data. | |
352 * | |
353 * @param {Function} origProcessNode | |
354 * The original processNode function. | |
355 * @param {RequestNotifier} requestNotifier | |
356 * The crawler's RequestNotifier object instance. | |
357 * @param {nsIDOMWindow} wnd | |
358 * @param {nsIDOMElement} node | |
359 * @param {Number} contentType | |
360 * @param {nsIURI} location | |
361 * @param {Boolean} collapse | |
362 * @return {Boolean} | |
363 */ | |
364 function processNodeReplacement(origProcessNode, requestNotifier, wnd, node, con
tentType, location, collapse) | |
365 { | |
366 let filters = []; | |
367 let origListener = requestNotifier.listener; | |
368 requestNotifier.listener = function(window, node, entry) | |
369 { | |
370 if (entry.filter) | |
371 filters.push(entry.filter.text); | |
372 }; | |
373 | |
374 /* | |
375 * Call the original processNode. If the original throws, then we will too, so
this is outside a try clause. | |
376 */ | |
377 let result; | |
378 try | |
379 { | |
380 result = origProcessNode(wnd, node, contentType, location, collapse); | |
381 } | |
382 finally | |
383 { | |
384 requestNotifier.listener = origListener; | |
385 } | |
386 | |
387 try | |
388 { | |
389 let data = getDataForWindow(wnd); | |
390 if (data) | |
391 { | |
392 if (!("requests" in data)) | |
393 data.requests = []; | |
394 data.requests.push({ | |
395 contentType: contentType, | |
396 location: (contentType == Policy.type.ELEMHIDE ? location.text : locatio
n.spec), | |
397 blocked: result != Ci.nsIContentPolicy.ACCEPT, | |
398 filters: filters | |
399 }); | |
400 } | |
401 } | |
402 catch (e) | |
403 { | |
404 reportException(e); | |
405 } | |
406 return result; | |
407 }; | |
OLD | NEW |