Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/crawler.js

Issue 29338121: Issue 3775 - fix saving of requests (Closed)
Patch Set: Created March 11, 2016, 9:41 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This Source Code is subject to the terms of the Mozilla Public License 2 * This Source Code is subject to the terms of the Mozilla Public License
3 * version 2.0 (the "License"). You can obtain a copy of the License at 3 * version 2.0 (the "License"). You can obtain a copy of the License at
4 * http://mozilla.org/MPL/2.0/. 4 * http://mozilla.org/MPL/2.0/.
5 */ 5 */
6 6
7 /** 7 /**
8 * @module crawler 8 * @module crawler
9 */ 9 */
10 10
11 Cu.import("resource://gre/modules/Services.jsm"); 11 Cu.import("resource://gre/modules/Services.jsm");
12 Cu.import("resource://gre/modules/Task.jsm"); 12 Cu.import("resource://gre/modules/Task.jsm");
13 Cu.import("resource://gre/modules/Promise.jsm"); 13 Cu.import("resource://gre/modules/Promise.jsm");
14 14
15 function abprequire(module) 15 function abprequire(module)
16 { 16 {
17 let result = {}; 17 let result = {};
18 result.wrappedJSObject = result; 18 result.wrappedJSObject = result;
19 Services.obs.notifyObservers(result, "adblockplus-require", module); 19 Services.obs.notifyObservers(result, "adblockplus-require", module);
20 return result.exports; 20 return result.exports;
21 } 21 }
22 22
23 let {Policy} = abprequire("contentPolicy");
24 let {RequestNotifier} = abprequire("requestNotifier"); 23 let {RequestNotifier} = abprequire("requestNotifier");
25 let {Utils} = abprequire("utils");
26 24
27 let dataForTab = new WeakMap();
28 25
29 /** 26 /**
30 * Creates a pool of tabs and allocates them to tasks on request. 27 * Creates a pool of tabs and allocates them to tasks on request.
31 * 28 *
32 * @param {tabbrowser} browser 29 * @param {tabbrowser} browser
33 * The tabbed browser where tabs should be created 30 * The tabbed browser where tabs should be created
34 * @param {int} maxtabs 31 * @param {int} maxtabs
35 * The maximum number of tabs to be allocated 32 * The maximum number of tabs to be allocated
36 * @constructor 33 * @constructor
37 */ 34 */
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after
186 window.document.documentElement.acceptDialog(); 183 window.document.documentElement.acceptDialog();
187 else 184 else
188 window.close(); 185 window.close();
189 }, false); 186 }, false);
190 }, 187 },
191 188
192 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer ence]) 189 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer ence])
193 }; 190 };
194 191
195 /** 192 /**
196 * Retrieves crawler results associated with a particular content window.
197 *
198 * @param {Window} window
199 * Content window to retrieve crawler results for
200 * @result {Object}
201 * Crawler results or undefined if the window wasn't created by the crawler.
202 */
203 function getDataForWindow(window)
204 {
205 let topWindow = window.top;
206 if (!topWindow.document)
207 throw new Error("No document associated with the node's top window");
208 let tabbrowser = Utils.getChromeWindow(topWindow).getBrowser();
209 if (!tabbrowser)
210 throw new Error("Unable to get a tabbrowser reference from the window");
211 let browser = tabbrowser.getBrowserForDocument(topWindow.document);
212 if (!browser)
213 throw new Error("Unable to get browser for the content window");
214 let tab = tabbrowser.getTabForBrowser(browser);
215 if (!tab)
216 throw new Error("Unable to get tab for the browser");
217 return dataForTab.get(tab);
218 };
219
220 /**
221 * Starts the crawling session. The crawler opens each URL in a tab and stores 193 * Starts the crawling session. The crawler opens each URL in a tab and stores
222 * the results. 194 * the results.
223 * 195 *
224 * @param {Window} window 196 * @param {Window} window
225 * The browser window we're operating in 197 * The browser window we're operating in
226 * @param {String[]} urls 198 * @param {String[]} urls
227 * URLs to be crawled 199 * URLs to be crawled
228 * @param {int} number_of_tabs 200 * @param {int} number_of_tabs
229 * Maximum number of tabs to be opened 201 * Maximum number of tabs to be opened
230 * @param {String} targetURL 202 * @param {String} targetURL
231 * URL that should receive the results 203 * URL that should receive the results
232 */ 204 */
233 function run(window, urls, timeout, maxtabs, targetURL, onDone) 205 function run(window, urls, timeout, maxtabs, targetURL, onDone)
234 { 206 {
235 let requestNotifier = new RequestNotifier(null, function() {});
236
237 let origProcessNode = Policy.processNode;
238 Policy.processNode = processNodeReplacement.bind(null, origProcessNode, reques tNotifier);
239
240 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); 207 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs);
241 let loadListener = new LoadListener(window.getBrowser(), timeout); 208 let loadListener = new LoadListener(window.getBrowser(), timeout);
242 let running = 0; 209 let running = 0;
243 let windowCloser = new WindowCloser(); 210 let windowCloser = new WindowCloser();
244 let taskDone = function() 211 let taskDone = function()
245 { 212 {
246 running--; 213 running--;
247 if (running <= 0) 214 if (running <= 0)
248 { 215 {
249 Policy.processNode = origProcessNode;
250 requestNotifier.shutdown();
251 loadListener.stop(); 216 loadListener.stop();
252 windowCloser.stop(); 217 windowCloser.stop();
253 onDone(); 218 onDone();
254 } 219 }
255 }; 220 };
256 221
257 for (let url of urls) 222 for (let url of urls)
258 { 223 {
259 running++; 224 running++;
260 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct ion(result) 225 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct ion(result)
(...skipping 26 matching lines...) Expand all
287 * 252 *
288 * @param {String} url 253 * @param {String} url
289 * @param {TabAllocator} tabAllocator 254 * @param {TabAllocator} tabAllocator
290 * @param {loadListener} loadListener 255 * @param {loadListener} loadListener
291 * @result {Object} 256 * @result {Object}
292 * Crawling result 257 * Crawling result
293 */ 258 */
294 function* crawl_url(url, tabAllocator, loadListener) 259 function* crawl_url(url, tabAllocator, loadListener)
295 { 260 {
296 let tab = yield tabAllocator.getTab(); 261 let tab = yield tabAllocator.getTab();
297 let result = {url: url}; 262 let result = {url, requests: []};
298 263
299 dataForTab.set(tab, result);
300 try 264 try
301 { 265 {
302 result.startTime = Date.now(); 266 result.startTime = Date.now();
267 let requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, f unction({type, location, filter}, scanComplete)
Wladimir Palant 2016/03/14 19:50:43 When the scan is completed the listener is called
sergei 2016/03/15 10:59:56 Fixed, is the indentation correct now?
sergei 2016/03/15 10:59:56 Fixed. However, I guess, it should be documented
Wladimir Palant 2016/03/15 11:05:44 Yes, it should be documented. As to reproducing -
268 {
269 result.requests.push({location, contentType: type, filter});
270 });
Wladimir Palant 2016/03/14 19:50:43 You need to shut down this notifier when you are d
sergei 2016/03/15 10:59:56 Done. Thanks, overlooked it.
271
303 tab.linkedBrowser.loadURI(url, null, null); 272 tab.linkedBrowser.loadURI(url, null, null);
304 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab) ; 273 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab) ;
305 result.endTime = Date.now(); 274 result.endTime = Date.now();
306 result.finalUrl = tab.linkedBrowser.currentURI.spec; 275 result.finalUrl = tab.linkedBrowser.currentURI.spec;
307 276
308 let document = tab.linkedBrowser.contentDocument; 277 let document = tab.linkedBrowser.contentDocument;
309 if (document.documentElement) 278 if (document.documentElement)
310 { 279 {
311 try 280 try
312 { 281 {
(...skipping 25 matching lines...) Expand all
338 307
339 function reportException(e) 308 function reportException(e)
340 { 309 {
341 let stack = ""; 310 let stack = "";
342 if (e && typeof e == "object" && "stack" in e) 311 if (e && typeof e == "object" && "stack" in e)
343 stack = e.stack + "\n"; 312 stack = e.stack + "\n";
344 313
345 Cu.reportError(e); 314 Cu.reportError(e);
346 dump(e + "\n" + stack + "\n"); 315 dump(e + "\n" + stack + "\n");
347 } 316 }
348
349 /**
350 * Wrapper for the Policy.processNode() function in ABP. Calls the original
351 * function and records all the data.
352 *
353 * @param {Function} origProcessNode
354 * The original processNode function.
355 * @param {RequestNotifier} requestNotifier
356 * The crawler's RequestNotifier object instance.
357 * @param {nsIDOMWindow} wnd
358 * @param {nsIDOMElement} node
359 * @param {Number} contentType
360 * @param {nsIURI} location
361 * @param {Boolean} collapse
362 * @return {Boolean}
363 */
364 function processNodeReplacement(origProcessNode, requestNotifier, wnd, node, con tentType, location, collapse)
365 {
366 let filters = [];
367 let origListener = requestNotifier.listener;
368 requestNotifier.listener = function(window, node, entry)
369 {
370 if (entry.filter)
371 filters.push(entry.filter.text);
372 };
373
374 /*
375 * Call the original processNode. If the original throws, then we will too, so this is outside a try clause.
376 */
377 let result;
378 try
379 {
380 result = origProcessNode(wnd, node, contentType, location, collapse);
381 }
382 finally
383 {
384 requestNotifier.listener = origListener;
385 }
386
387 try
388 {
389 let data = getDataForWindow(wnd);
390 if (data)
391 {
392 if (!("requests" in data))
393 data.requests = [];
394 data.requests.push({
395 contentType: contentType,
396 location: (contentType == Policy.type.ELEMHIDE ? location.text : locatio n.spec),
397 blocked: result != Ci.nsIContentPolicy.ACCEPT,
398 filters: filters
399 });
400 }
401 }
402 catch (e)
403 {
404 reportException(e);
405 }
406 return result;
407 };
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld