Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/crawler.js

Issue 29338242: Issue 3792 - Fix to support multiprocess firefox (Closed)
Left Patch Set: rebase on #3815 and address some trivial comments Created March 16, 2016, 2:41 p.m.
Right Patch Set: change comment Created Sept. 30, 2016, 12:43 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « lib/child/frameScript.js ('k') | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This Source Code is subject to the terms of the Mozilla Public License 2 * This Source Code is subject to the terms of the Mozilla Public License
3 * version 2.0 (the "License"). You can obtain a copy of the License at 3 * version 2.0 (the "License"). You can obtain a copy of the License at
4 * http://mozilla.org/MPL/2.0/. 4 * http://mozilla.org/MPL/2.0/.
5 */ 5 */
6 6
7 "use strict";
8
7 /** 9 /**
8 * @module crawler 10 * @module crawler
9 */ 11 */
10 12
11 Cu.import("resource://gre/modules/Services.jsm"); 13 const {Services} = Cu.import("resource://gre/modules/Services.jsm", {});
12 Cu.import("resource://gre/modules/Task.jsm"); 14 const {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {});
13 Cu.import("resource://gre/modules/Promise.jsm"); 15 const {Task} = Cu.import("resource://gre/modules/Task.jsm", {});
14 Cu.import("resource://gre/modules/Timer.jsm"); 16 const {setTimeout, clearTimeout} = Cu.import("resource://gre/modules/Timer.jsm", {});
15 17
16 function abprequire(module) 18 function abprequire(module)
17 { 19 {
18 let result = {}; 20 let result = {};
19 result.wrappedJSObject = result; 21 result.wrappedJSObject = result;
20 Services.obs.notifyObservers(result, "adblockplus-require", module); 22 Services.obs.notifyObservers(result, "adblockplus-require", module);
21 return result.exports; 23 return result.exports;
22 } 24 }
23 25
24 let {RequestNotifier} = abprequire("requestNotifier"); 26 let {RequestNotifier} = abprequire("requestNotifier");
25 let {FilterNotifier} = abprequire("filterNotifier"); 27 let {FilterNotifier} = abprequire("filterNotifier");
26 let {FilterStorage} = abprequire("filterStorage"); 28 let {FilterStorage} = abprequire("filterStorage");
27 29
28 /** 30 /**
29 * Allocates a new tab "about:blank" in the `browser`. 31 * Allocates tabs on request but not more than maxtabs at the same time.
30 * The method returns a {Promise} promise which is resolved with the `tab`
31 * when `outerWindowID` is already initialized.
32 *
33 * See: https://bugzilla.mozilla.org/show_bug.cgi?id=1256602#c1
34 *
35 * @param {tabbrowser} browser
36 * The tabbed browser where tabs should be created
37 * @return {Promise} promise which will be resolved with the tab.
38 */
39 function createTab(browser)
40 {
41 let tab = browser.addTab("about:blank");
42 if (tab.linkedBrowser.outerWindowID)
43 return Promise.resolve(tab);
44 return new Promise((resolve, reject)=>
45 {
46 let onBrowserInit = (msg) =>
47 {
48 tab.linkedBrowser.messageManager.removeMessageListener("Browser:Init", onB rowserInit);
49 resolve(tab);
50 };
51 tab.linkedBrowser.messageManager.addMessageListener("Browser:Init", onBrowse rInit);
52 });
53 }
54
55 /**
56 * Allocates tabs on request but not more than `maxtabs` at the same time.
57 * 32 *
58 * @param {tabbrowser} browser 33 * @param {tabbrowser} browser
59 * The tabbed browser where tabs should be created 34 * The tabbed browser where tabs should be created
60 * @param {int} maxtabs 35 * @param {int} maxtabs
61 * The maximum number of tabs to be allocated 36 * The maximum number of tabs to be allocated
62 * @constructor 37 * @constructor
63 */ 38 */
64 function TabAllocator(browser, maxtabs) 39 function TabAllocator(browser, maxtabs)
65 { 40 {
66 this._browser = browser; 41 this._browser = browser;
67 this._tabs = 0; 42 this._tabs = 0;
68 this._maxtabs = maxtabs; 43 this._maxtabs = maxtabs;
69 // the array of `resolve` functions of {Promise} promises returned by `getTab` . 44 // The queue containing resolve functions of promises waiting for a tab.
70 this._resolvers = []; 45 this._resolvers = [];
71 // Keep at least one tab alive to prevent browser from closing of it self. 46 // Keep at least one tab alive to prevent browser from closing itself.
72 // That tab will be removed when the first tab is requested. 47 this._tabKeepingWindowAlive = this._browser.tabs[0];
73 browser.removeAllTabsBut(browser.tabs[0]) 48 this._browser.removeAllTabsBut(this._tabKeepingWindowAlive);
74 } 49 }
75 TabAllocator.prototype = { 50 TabAllocator.prototype = {
51 _removeTabKeepingWindowAlive: function()
52 {
53 if (!this._tabKeepingWindowAlive)
54 return;
55 this._browser.removeTab(this._tabKeepingWindowAlive);
56 delete this._tabKeepingWindowAlive;
57 },
58
59 /**
60 * Creates a blank tab in this._browser.
61 *
62 * @return {Promise.<tab>} promise which resolves once the tab is fully initia lized.
63 */
64 _createTab: function()
65 {
66 this._tabs++;
67 let tab = this._browser.addTab("about:blank");
68 if (tab.linkedBrowser.outerWindowID)
69 {
70 this._removeTabKeepingWindowAlive();
71 return Promise.resolve(tab);
72 }
73 return new Promise((resolve, reject) =>
74 {
75 let onBrowserInit = (msg) =>
76 {
77 tab.linkedBrowser.messageManager.removeMessageListener("Browser:Init", o nBrowserInit);
78 this._removeTabKeepingWindowAlive();
79 resolve(tab);
80 };
81 // "Browser:Init" message is sent once the browser is ready, see
82 // https://bugzil.la/1256602#c1
83 tab.linkedBrowser.messageManager.addMessageListener("Browser:Init", onBrow serInit);
84 });
85 },
86
76 /** 87 /**
77 * Returns a promise that will resolve into a tab once a tab is allocated. 88 * Returns a promise that will resolve into a tab once a tab is allocated.
78 * The tab cannot be used by other tasks until releaseTab() is called. 89 * The tab cannot be used by other tasks until releaseTab() is called.
79 * 90 *
80 * @result {Promise} 91 * @result {Promise.<tab>}
81 */ 92 */
82 getTab: function() 93 getTab: function()
83 { 94 {
84 if (this._tabs < this._maxtabs) 95 if (this._tabs < this._maxtabs)
85 { 96 return this._createTab();
86 let tab = createTab(this._browser); 97 return new Promise((resolve, reject) => this._resolvers.push(resolve));
87 // close initial tab, we don't need it anymore.
88 if (this._tabs == 0)
89 this._browser.removeTab(this._browser.tabs[0]);
90 this._tabs++;
91 return tab;
92 }
93 return new Promise((resolve, reject) =>
94 {
95 this._resolvers.push(resolve);
96 });
97 }, 98 },
98 99
99 /** 100 /**
100 * Adds a tab back to the pool so that it can be used by other tasks. 101 * Adds a tab back to the pool so that it can be used by other tasks.
101 * 102 *
102 * @param {tab} tab 103 * @param {tab} tab
103 */ 104 */
104 releaseTab: function(tab) 105 releaseTab: function(tab)
105 { 106 {
106 let browser = tab.parentNode.tabbrowser; 107 // If we are about to close last tab don't close it immediately to keep
107 browser.removeTab(tab); 108 // the window alive. It will be closed when a new tab is created.
108 109 if (this._tabs > 1)
109 if (this._resolvers.length) 110 this._browser.removeTab(tab);
110 this._resolvers.shift()(createTab(this._browser));
111 else 111 else
112 this._tabs--; 112 {
113 } 113 // navigate away from previously opened URL
114 tab.linkedBrowser.loadURI("about:blank", null, null);
115 this._tabKeepingWindowAlive = tab;
116 }
117
118 this._tabs--;
119 if (this._resolvers.length && this._tabs < this._maxtabs)
120 {
121 this._resolvers.shift()(this._createTab());
122 }
123 },
114 }; 124 };
115 125
116 /** 126 /**
117 * Once created, this object will make sure all new windows are dismissed 127 * Once created, this object will make sure all new windows are dismissed
118 * immediately. 128 * immediately.
119 * 129 *
120 * @constructor 130 * @constructor
121 */ 131 */
122 function WindowCloser() 132 function WindowCloser()
123 { 133 {
(...skipping 16 matching lines...) Expand all
140 { 150 {
141 if (window.document.documentElement.localName == 'dialog') 151 if (window.document.documentElement.localName == 'dialog')
142 window.document.documentElement.acceptDialog(); 152 window.document.documentElement.acceptDialog();
143 else 153 else
144 window.close(); 154 window.close();
145 }, false); 155 }, false);
146 }, 156 },
147 157
148 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer ence]) 158 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer ence])
149 }; 159 };
160
161 function configureFrameScript()
162 {
163 const info = require("info");
164 let frameScriptPath = info.addonRoot + "/lib/child/frameScript.js";
165 Services.mm.loadFrameScript(frameScriptPath, true);
166
167 onShutdown.add(() =>
168 {
169 Services.mm.removeDelayedFrameScript(frameScriptPath);
170 });
171 }
150 172
151 /** 173 /**
152 * Starts the crawling session. The crawler opens each URL in a tab and stores 174 * Starts the crawling session. The crawler opens each URL in a tab and stores
153 * the results. 175 * the results.
154 * 176 *
155 * @param {Window} window 177 * @param {Window} window
156 * The browser window we're operating in 178 * The browser window we're operating in
157 * @param {String[]} urls 179 * @param {String[]} urls
158 * URLs to be crawled 180 * URLs to be crawled
159 * @param {int} timeout 181 * @param {int} timeout
160 * Load timeout in milliseconds 182 * Load timeout in milliseconds
161 * @param {int} maxtabs 183 * @param {int} maxtabs
162 * Maximum number of tabs to be opened 184 * Maximum number of tabs to be opened
163 * @param {String} targetURL 185 * @param {String} targetURL
164 * URL that should receive the results 186 * URL that should receive the results
165 * @param {Function} onDone 187 * @param {Function} onDone
166 * The callback which is called after finishing of crawling of all URLs. 188 * The callback which is called after finishing of crawling of all URLs.
167 */ 189 */
168 function run(window, urls, timeout, maxtabs, targetURL, onDone) 190 function run(window, urls, timeout, maxtabs, targetURL, onDone)
169 { 191 {
192 configureFrameScript();
170 new Promise((resolve, reject) => 193 new Promise((resolve, reject) =>
171 { 194 {
172 if (FilterStorage.subscriptions.length > 0) 195 if (FilterStorage.subscriptions.length > 0)
173 { 196 {
174 resolve(); 197 resolve();
175 return; 198 return;
176 } 199 }
177 let onFiltersLoaded = (action, item, newValue, oldValue) => 200 let onFiltersLoaded = (action, item, newValue, oldValue) =>
178 { 201 {
179 if (action == "load") 202 if (action == "load")
180 { 203 {
181 FilterNotifier.removeListener(onFiltersLoaded); 204 FilterNotifier.removeListener(onFiltersLoaded);
182 resolve(); 205 resolve();
183 } 206 }
184 }; 207 };
185 FilterNotifier.addListener(onFiltersLoaded); 208 FilterNotifier.addListener(onFiltersLoaded);
186 }).then(() => crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)) 209 }).then(() => crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone))
187 .catch(reportException); 210 .catch(reportException);
188 } 211 }
189 exports.run = run; 212 exports.run = run;
190 213
191 /** 214 /**
192 * Spawns a {Task} task to crawl each url from `urls` argument and calls 215 * Spawns a {Task} task to crawl each url from urls argument and calls
193 * `onDone` when all tasks are finished. 216 * onDone when all tasks are finished.
194 * @param {Window} window 217 * @param {Window} window
195 * The browser window we're operating in 218 * The browser window we're operating in
196 * @param {String[]} urls 219 * @param {String[]} urls
197 * URLs to be crawled 220 * URLs to be crawled
198 * @param {int} timeout 221 * @param {int} timeout
199 * Load timeout in milliseconds 222 * Load timeout in milliseconds
200 * @param {int} maxtabs 223 * @param {int} maxtabs
201 * Maximum number of tabs to be opened 224 * Maximum number of tabs to be opened
202 * @param {String} targetURL 225 * @param {String} targetURL
203 * URL that should receive the results 226 * URL that should receive the results
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
243 startTime: Date.now(), 266 startTime: Date.now(),
244 error: String(exception) 267 error: String(exception)
245 })); 268 }));
246 }.bind(null, url)); 269 }.bind(null, url));
247 } 270 }
248 } 271 }
249 272
250 /** 273 /**
251 * Expects to receive page info gathered in a content process for the specified 274 * Expects to receive page info gathered in a content process for the specified
252 * `tab`. If there is no relevant message within specified `timeout` then 275 * `tab`. If there is no relevant message within specified `timeout` then
253 * the result promise is resolve with error object. 276 * the result promise is resolved with error object.
254 * @param tab 277 * @param tab
255 * Tab in which we are interested in 278 * Tab in which we are interested in
256 * @param {int} timeout 279 * @param {int} timeout
257 * Timeout in milliseconds 280 * Timeout in milliseconds
258 * @return {Promise} promise which will be resolved with the received page info 281 * @return {Promise} promise which will be resolved with the received page info
259 */ 282 */
260 function getPageInfo(tab, timeout) 283 function getPageInfo(tab, timeout)
261 { 284 {
262 return new Promise((resolve, result) => 285 return new Promise((resolve, result) =>
263 { 286 {
264 let mm = tab.linkedBrowser.messageManager; 287 let mm = tab.linkedBrowser.messageManager;
265 let timerID; 288 let timerID;
266 let onDone = (pageInfo) => 289 let onDone = (msg) =>
267 { 290 {
268 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone); 291 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone);
269 clearTimeout(timerID); 292 clearTimeout(timerID);
270 resolve(pageInfo); 293 resolve(msg.data);
271 } 294 }
272 mm.addMessageListener("abpcrawler:pageInfoGathered", (msg) => onDone(msg.dat a));; 295 mm.addMessageListener("abpcrawler:pageInfoGathered", onDone);
273 timerID = setTimeout(onDone.bind(this, {error: "timeout"}), timeout); 296 timerID = setTimeout(() => onDone({data: {error: "timeout"}}), timeout);
274 }); 297 });
275 } 298 }
276 299
277 /** 300 /**
278 * Crawls a URL. This is a generator meant to be used via a Task object. 301 * Crawls a URL. This is a generator meant to be used via a Task object.
279 * 302 *
280 * @param {String} url 303 * @param {String} url
281 * @param {TabAllocator} tabAllocator 304 * @param {TabAllocator} tabAllocator
282 * @param {int} timeout 305 * @param {int} timeout
283 * Load timeout in milliseconds 306 * Load timeout in milliseconds
(...skipping 12 matching lines...) Expand all
296 function(entry, scanComplete) 319 function(entry, scanComplete)
297 { 320 {
298 if (!entry) 321 if (!entry)
299 return; 322 return;
300 let {type: contentType, location, filter} = entry; 323 let {type: contentType, location, filter} = entry;
301 result.requests.push({location, contentType, filter}); 324 result.requests.push({location, contentType, filter});
302 }); 325 });
303 326
304 tab.linkedBrowser.loadURI(url, null, null); 327 tab.linkedBrowser.loadURI(url, null, null);
305 328
329 Object.assign(result, yield getPageInfo(tab, timeout));
306 result.finalUrl = tab.linkedBrowser.currentURI.spec; 330 result.finalUrl = tab.linkedBrowser.currentURI.spec;
307 Object.assign(result, yield getPageInfo(tab, timeout));
308 result.endTime = Date.now(); 331 result.endTime = Date.now();
309 } 332 }
310 finally 333 finally
311 { 334 {
312 if (requestNotifier) 335 if (requestNotifier)
313 requestNotifier.shutdown(); 336 requestNotifier.shutdown();
314 tabAllocator.releaseTab(tab); 337 tabAllocator.releaseTab(tab);
315 } 338 }
316 return result; 339 return result;
317 } 340 }
318 341
319 function reportException(e) 342 function reportException(e)
320 { 343 {
321 let stack = ""; 344 let stack = "";
322 if (e && typeof e == "object" && "stack" in e) 345 if (e && typeof e == "object" && "stack" in e)
323 stack = e.stack + "\n"; 346 stack = e.stack + "\n";
324 347
325 Cu.reportError(e); 348 Cu.reportError(e);
326 dump(e + "\n" + stack + "\n"); 349 dump(e + "\n" + stack + "\n");
327 } 350 }
328
329 let {addonRoot} = require("info");
330 let frameScriptPath = addonRoot + "/lib/child/frameScript.js";
331 let globalMessageManager = Services.mm;
332 globalMessageManager.loadFrameScript(frameScriptPath, true);
333
334 onShutdown.add(() =>
335 {
336 globalMessageManager.removeDelayedFrameScript(frameScriptPath);
337 });
LEFTRIGHT

Powered by Google App Engine
This is Rietveld