Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/crawler.js

Issue 29338153: Issue 3780 - wait for the loading of filters and only afterwards start to fetch pages (Closed)
Patch Set: Created March 11, 2016, 3:55 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This Source Code is subject to the terms of the Mozilla Public License 2 * This Source Code is subject to the terms of the Mozilla Public License
3 * version 2.0 (the "License"). You can obtain a copy of the License at 3 * version 2.0 (the "License"). You can obtain a copy of the License at
4 * http://mozilla.org/MPL/2.0/. 4 * http://mozilla.org/MPL/2.0/.
5 */ 5 */
6 6
7 /** 7 /**
8 * @module crawler 8 * @module crawler
9 */ 9 */
10 10
11 Cu.import("resource://gre/modules/Services.jsm"); 11 Cu.import("resource://gre/modules/Services.jsm");
12 Cu.import("resource://gre/modules/Task.jsm"); 12 Cu.import("resource://gre/modules/Task.jsm");
13 Cu.import("resource://gre/modules/Promise.jsm"); 13 Cu.import("resource://gre/modules/Promise.jsm");
14 14
15 function abprequire(module) 15 function abprequire(module)
16 { 16 {
17 let result = {}; 17 let result = {};
18 result.wrappedJSObject = result; 18 result.wrappedJSObject = result;
19 Services.obs.notifyObservers(result, "adblockplus-require", module); 19 Services.obs.notifyObservers(result, "adblockplus-require", module);
20 return result.exports; 20 return result.exports;
21 } 21 }
22 22
23 let {RequestNotifier} = abprequire("requestNotifier"); 23 let {RequestNotifier} = abprequire("requestNotifier");
24 24
25 let {FilterNotifier} = abprequire("filterNotifier");
26 let {FilterStorage} = abprequire("filterStorage");
25 27
26 /** 28 /**
27 * Creates a pool of tabs and allocates them to tasks on request. 29 * Creates a pool of tabs and allocates them to tasks on request.
28 * 30 *
29 * @param {tabbrowser} browser 31 * @param {tabbrowser} browser
30 * The tabbed browser where tabs should be created 32 * The tabbed browser where tabs should be created
31 * @param {int} maxtabs 33 * @param {int} maxtabs
32 * The maximum number of tabs to be allocated 34 * The maximum number of tabs to be allocated
33 * @constructor 35 * @constructor
34 */ 36 */
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after
212 { 214 {
213 running--; 215 running--;
214 if (running <= 0) 216 if (running <= 0)
215 { 217 {
216 loadListener.stop(); 218 loadListener.stop();
217 windowCloser.stop(); 219 windowCloser.stop();
218 onDone(); 220 onDone();
219 } 221 }
220 }; 222 };
221 223
222 for (let url of urls) 224 new Promise(function(resolve, reject)
Wladimir Palant 2016/03/15 09:13:10 Nit: Why not use arrow functions consistently? (re
sergei 2016/03/15 12:16:23 We don't need to capture `this` here, so I have no
Wladimir Palant 2016/03/15 13:44:22 We generally use arrow functions for inline callba
223 { 225 {
224 running++; 226 if (FilterStorage.subscriptions.length > 0 && !FilterStorage._loading)
Wladimir Palant 2016/03/15 09:13:10 _loading is an internal flag to prevent reentrance
Wladimir Palant 2016/03/15 09:18:11 Actually, I think that this is a bad assumption fr
sergei 2016/03/15 12:16:23 I thought about it. It does happen that "load" ev
Wladimir Palant 2016/03/15 13:44:22 No, subscriptions are added all at once when loadi
sergei 2016/03/15 14:44:25 Acknowledged.
225 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct ion(result)
226 { 227 {
227 let request = new XMLHttpRequest(); 228 resolve();
228 request.open("POST", targetURL); 229 return;
229 request.addEventListener("load", taskDone, false); 230 }
230 request.addEventListener("error", taskDone, false); 231 FilterNotifier.addListener((action, item, newValue, oldValue) =>
231 request.send(JSON.stringify(result));
232 }, function(url, exception)
233 { 232 {
234 reportException(exception); 233 if (action === "load")
Wladimir Palant 2016/03/15 09:13:11 Nit: We don't usually use strict equality, and it
sergei 2016/03/15 12:16:23 Fixed.
234 {
235 resolve();
236 }
237 });
238 }).then(_ =>
Wladimir Palant 2016/03/15 09:13:11 No pointless parameter please, () =>
sergei 2016/03/15 12:16:23 Acknowledged.
239 {
240 for (let url of urls)
241 {
242 running++;
243 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(fun ction(result)
244 {
245 let request = new XMLHttpRequest();
246 request.open("POST", targetURL);
247 request.addEventListener("load", taskDone, false);
248 request.addEventListener("error", taskDone, false);
249 request.send(JSON.stringify(result));
250 }, function(url, exception)
251 {
252 reportException(exception);
235 253
236 let request = new XMLHttpRequest(); 254 let request = new XMLHttpRequest();
237 request.open("POST", targetURL); 255 request.open("POST", targetURL);
238 request.addEventListener("load", taskDone, false); 256 request.addEventListener("load", taskDone, false);
239 request.addEventListener("error", taskDone, false); 257 request.addEventListener("error", taskDone, false);
240 request.send(JSON.stringify({ 258 request.send(JSON.stringify({
241 url: url, 259 url: url,
242 startTime: Date.now(), 260 startTime: Date.now(),
243 error: String(exception) 261 error: String(exception)
244 })); 262 }));
245 }.bind(null, url)); 263 }.bind(null, url));
246 } 264 }
265 // Be careful, `catch` does not catch exeptions from any asynchronous calls
Wladimir Palant 2016/03/15 09:13:11 exeptions => exceptions.
sergei 2016/03/15 12:16:23 Done. Basically, I have removed the comment.
266 // of this `then` handler because the latter one does not return an array of
267 // promises of asynchrounous tasks and does not contain any waiting code.
268 }).catch(reportException);
Wladimir Palant 2016/03/15 09:13:11 I'm not really happy with the way this is structur
sergei 2016/03/15 12:16:23 Done.
247 } 269 }
248 exports.run = run; 270 exports.run = run;
249 271
250 /** 272 /**
251 * Crawls a URL. This is a generator meant to be used via a Task object. 273 * Crawls a URL. This is a generator meant to be used via a Task object.
252 * 274 *
253 * @param {String} url 275 * @param {String} url
254 * @param {TabAllocator} tabAllocator 276 * @param {TabAllocator} tabAllocator
255 * @param {loadListener} loadListener 277 * @param {loadListener} loadListener
256 * @result {Object} 278 * @result {Object}
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
307 329
308 function reportException(e) 330 function reportException(e)
309 { 331 {
310 let stack = ""; 332 let stack = "";
311 if (e && typeof e == "object" && "stack" in e) 333 if (e && typeof e == "object" && "stack" in e)
312 stack = e.stack + "\n"; 334 stack = e.stack + "\n";
313 335
314 Cu.reportError(e); 336 Cu.reportError(e);
315 dump(e + "\n" + stack + "\n"); 337 dump(e + "\n" + stack + "\n");
316 } 338 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld