Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/crawler.js

Issue 29338153: Issue 3780 - wait for the loading of filters and only afterwards start to fetch pages (Closed)
Patch Set: make crawl_urls function Created March 15, 2016, 2:36 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This Source Code is subject to the terms of the Mozilla Public License 2 * This Source Code is subject to the terms of the Mozilla Public License
3 * version 2.0 (the "License"). You can obtain a copy of the License at 3 * version 2.0 (the "License"). You can obtain a copy of the License at
4 * http://mozilla.org/MPL/2.0/. 4 * http://mozilla.org/MPL/2.0/.
5 */ 5 */
6 6
7 /** 7 /**
8 * @module crawler 8 * @module crawler
9 */ 9 */
10 10
11 Cu.import("resource://gre/modules/Services.jsm"); 11 Cu.import("resource://gre/modules/Services.jsm");
12 Cu.import("resource://gre/modules/Task.jsm"); 12 Cu.import("resource://gre/modules/Task.jsm");
13 Cu.import("resource://gre/modules/Promise.jsm"); 13 Cu.import("resource://gre/modules/Promise.jsm");
14 14
15 function abprequire(module) 15 function abprequire(module)
16 { 16 {
17 let result = {}; 17 let result = {};
18 result.wrappedJSObject = result; 18 result.wrappedJSObject = result;
19 Services.obs.notifyObservers(result, "adblockplus-require", module); 19 Services.obs.notifyObservers(result, "adblockplus-require", module);
20 return result.exports; 20 return result.exports;
21 } 21 }
22 22
23 let {RequestNotifier} = abprequire("requestNotifier"); 23 let {RequestNotifier} = abprequire("requestNotifier");
24 24
25 let {FilterNotifier} = abprequire("filterNotifier");
26 let {FilterStorage} = abprequire("filterStorage");
25 27
26 /** 28 /**
27 * Creates a pool of tabs and allocates them to tasks on request. 29 * Creates a pool of tabs and allocates them to tasks on request.
28 * 30 *
29 * @param {tabbrowser} browser 31 * @param {tabbrowser} browser
30 * The tabbed browser where tabs should be created 32 * The tabbed browser where tabs should be created
31 * @param {int} maxtabs 33 * @param {int} maxtabs
32 * The maximum number of tabs to be allocated 34 * The maximum number of tabs to be allocated
33 * @constructor 35 * @constructor
34 */ 36 */
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after
190 }; 192 };
191 193
192 /** 194 /**
193 * Starts the crawling session. The crawler opens each URL in a tab and stores 195 * Starts the crawling session. The crawler opens each URL in a tab and stores
194 * the results. 196 * the results.
195 * 197 *
196 * @param {Window} window 198 * @param {Window} window
197 * The browser window we're operating in 199 * The browser window we're operating in
198 * @param {String[]} urls 200 * @param {String[]} urls
199 * URLs to be crawled 201 * URLs to be crawled
200 * @param {int} number_of_tabs 202 * @param {int} timeout
203 * Load timeout in milliseconds
204 * @param {int} maxtabs
201 * Maximum number of tabs to be opened 205 * Maximum number of tabs to be opened
202 * @param {String} targetURL 206 * @param {String} targetURL
203 * URL that should receive the results 207 * URL that should receive the results
208 * @param {Function} onDone
209 * The callback which is called after finishing of crawling of all URLs.
204 */ 210 */
205 function run(window, urls, timeout, maxtabs, targetURL, onDone) 211 function run(window, urls, timeout, maxtabs, targetURL, onDone)
206 { 212 {
213 new Promise((resolve, reject) =>
214 {
215 if (FilterStorage.subscriptions.length > 0 && !FilterStorage._loading)
216 {
217 resolve();
218 return;
219 }
220 FilterNotifier.addListener((action, item, newValue, oldValue) =>
221 {
222 if (action == "load")
223 {
224 resolve();
225 }
226 });
227 }).then(() => crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)).c atch(reportException);
228 }
229 exports.run = run;
230
231 /**
232 * Spawns a {Task} task to crawl each url from `urls` argument and calls
233 * `onDone` when all tasks are finished.
234 * @param {Window} window
235 * The browser window we're operating in
236 * @param {String[]} urls
237 * URLs to be crawled
238 * @param {int} timeout
239 * Load timeout in milliseconds
240 * @param {int} maxtabs
241 * Maximum number of tabs to be opened
242 * @param {String} targetURL
243 * URL that should receive the results
244 * @param {Function} onDone
245 * The callback which is called after finishing of all tasks.
246 */
247 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)
248 {
207 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); 249 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs);
208 let loadListener = new LoadListener(window.getBrowser(), timeout); 250 let loadListener = new LoadListener(window.getBrowser(), timeout);
209 let running = 0; 251 let running = 0;
210 let windowCloser = new WindowCloser(); 252 let windowCloser = new WindowCloser();
211 let taskDone = function() 253 let taskDone = function()
212 { 254 {
213 running--; 255 running--;
214 if (running <= 0) 256 if (running <= 0)
215 { 257 {
216 loadListener.stop(); 258 loadListener.stop();
(...skipping 21 matching lines...) Expand all
238 request.addEventListener("load", taskDone, false); 280 request.addEventListener("load", taskDone, false);
239 request.addEventListener("error", taskDone, false); 281 request.addEventListener("error", taskDone, false);
240 request.send(JSON.stringify({ 282 request.send(JSON.stringify({
241 url: url, 283 url: url,
242 startTime: Date.now(), 284 startTime: Date.now(),
243 error: String(exception) 285 error: String(exception)
244 })); 286 }));
245 }.bind(null, url)); 287 }.bind(null, url));
246 } 288 }
247 } 289 }
248 exports.run = run;
249 290
250 /** 291 /**
251 * Crawls a URL. This is a generator meant to be used via a Task object. 292 * Crawls a URL. This is a generator meant to be used via a Task object.
252 * 293 *
253 * @param {String} url 294 * @param {String} url
254 * @param {TabAllocator} tabAllocator 295 * @param {TabAllocator} tabAllocator
255 * @param {loadListener} loadListener 296 * @param {loadListener} loadListener
256 * @result {Object} 297 * @result {Object}
257 * Crawling result 298 * Crawling result
258 */ 299 */
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
313 354
314 function reportException(e) 355 function reportException(e)
315 { 356 {
316 let stack = ""; 357 let stack = "";
317 if (e && typeof e == "object" && "stack" in e) 358 if (e && typeof e == "object" && "stack" in e)
318 stack = e.stack + "\n"; 359 stack = e.stack + "\n";
319 360
320 Cu.reportError(e); 361 Cu.reportError(e);
321 dump(e + "\n" + stack + "\n"); 362 dump(e + "\n" + stack + "\n");
322 } 363 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld