Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/crawler.js

Issue 29338242: Issue 3792 - Fix to support multiprocess firefox (Closed)
Left Patch Set: fix issue with beacons Created April 7, 2016, 8:31 p.m.
Right Patch Set: change comment Created Sept. 30, 2016, 12:43 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « lib/child/frameScript.js ('k') | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This Source Code is subject to the terms of the Mozilla Public License 2 * This Source Code is subject to the terms of the Mozilla Public License
3 * version 2.0 (the "License"). You can obtain a copy of the License at 3 * version 2.0 (the "License"). You can obtain a copy of the License at
4 * http://mozilla.org/MPL/2.0/. 4 * http://mozilla.org/MPL/2.0/.
5 */ 5 */
6 6
7 "use strict";
8
7 /** 9 /**
8 * @module crawler 10 * @module crawler
9 */ 11 */
10 12
11 Cu.import("resource://gre/modules/Services.jsm"); 13 const {Services} = Cu.import("resource://gre/modules/Services.jsm", {});
12 Cu.import("resource://gre/modules/Task.jsm"); 14 const {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {});
13 Cu.import("resource://gre/modules/Promise.jsm"); 15 const {Task} = Cu.import("resource://gre/modules/Task.jsm", {});
14 Cu.import("resource://gre/modules/Timer.jsm"); 16 const {setTimeout, clearTimeout} = Cu.import("resource://gre/modules/Timer.jsm", {});
15 17
16 function abprequire(module) 18 function abprequire(module)
17 { 19 {
18 let result = {}; 20 let result = {};
19 result.wrappedJSObject = result; 21 result.wrappedJSObject = result;
20 Services.obs.notifyObservers(result, "adblockplus-require", module); 22 Services.obs.notifyObservers(result, "adblockplus-require", module);
21 return result.exports; 23 return result.exports;
22 } 24 }
23 25
24 let {RequestNotifier} = abprequire("requestNotifier"); 26 let {RequestNotifier} = abprequire("requestNotifier");
25 let {FilterNotifier} = abprequire("filterNotifier"); 27 let {FilterNotifier} = abprequire("filterNotifier");
26 let {FilterStorage} = abprequire("filterStorage"); 28 let {FilterStorage} = abprequire("filterStorage");
27
28 /**
29 * Creates a blank tab in the browser.
30 *
31 * @param {tabbrowser} browser
32 * The tabbed browser where the tab should be created.
33 * @return {Promise.<tab>} promise which resolves once the tab is fully initiali zed.
34 */
35 function createTab(browser)
36 {
37 let tab = browser.addTab("about:blank");
38 if (tab.linkedBrowser.outerWindowID)
39 return Promise.resolve(tab);
40 return new Promise((resolve, reject) =>
41 {
42 let onBrowserInit = (msg) =>
43 {
44 // https://bugzilla.mozilla.org/show_bug.cgi?id=1256602#c1
45 tab.linkedBrowser.messageManager.removeMessageListener("Browser:Init", onB rowserInit);
46 resolve(tab);
47 };
48 tab.linkedBrowser.messageManager.addMessageListener("Browser:Init", onBrowse rInit);
49 });
50 }
51 29
52 /** 30 /**
53 * Allocates tabs on request but not more than maxtabs at the same time. 31 * Allocates tabs on request but not more than maxtabs at the same time.
54 * 32 *
55 * @param {tabbrowser} browser 33 * @param {tabbrowser} browser
56 * The tabbed browser where tabs should be created 34 * The tabbed browser where tabs should be created
57 * @param {int} maxtabs 35 * @param {int} maxtabs
58 * The maximum number of tabs to be allocated 36 * The maximum number of tabs to be allocated
59 * @constructor 37 * @constructor
60 */ 38 */
61 function TabAllocator(browser, maxtabs) 39 function TabAllocator(browser, maxtabs)
62 { 40 {
63 this._browser = browser; 41 this._browser = browser;
64 this._tabs = 0; 42 this._tabs = 0;
65 this._maxtabs = maxtabs; 43 this._maxtabs = maxtabs;
66 // The queue containing resolve functions of promises waiting for a tab. 44 // The queue containing resolve functions of promises waiting for a tab.
67 this._resolvers = []; 45 this._resolvers = [];
68 // Keep at least one tab alive to prevent browser from closing itself. 46 // Keep at least one tab alive to prevent browser from closing itself.
69 // That tab will be removed when the first tab is requested. 47 this._tabKeepingWindowAlive = this._browser.tabs[0];
70 browser.removeAllTabsBut(browser.tabs[0]) 48 this._browser.removeAllTabsBut(this._tabKeepingWindowAlive);
71 } 49 }
72 TabAllocator.prototype = { 50 TabAllocator.prototype = {
51 _removeTabKeepingWindowAlive: function()
52 {
53 if (!this._tabKeepingWindowAlive)
54 return;
55 this._browser.removeTab(this._tabKeepingWindowAlive);
56 delete this._tabKeepingWindowAlive;
57 },
58
59 /**
60 * Creates a blank tab in this._browser.
61 *
62 * @return {Promise.<tab>} promise which resolves once the tab is fully initia lized.
63 */
64 _createTab: function()
65 {
66 this._tabs++;
67 let tab = this._browser.addTab("about:blank");
68 if (tab.linkedBrowser.outerWindowID)
69 {
70 this._removeTabKeepingWindowAlive();
71 return Promise.resolve(tab);
72 }
73 return new Promise((resolve, reject) =>
74 {
75 let onBrowserInit = (msg) =>
76 {
77 tab.linkedBrowser.messageManager.removeMessageListener("Browser:Init", o nBrowserInit);
78 this._removeTabKeepingWindowAlive();
79 resolve(tab);
80 };
81 // "Browser:Init" message is sent once the browser is ready, see
82 // https://bugzil.la/1256602#c1
83 tab.linkedBrowser.messageManager.addMessageListener("Browser:Init", onBrow serInit);
84 });
85 },
86
73 /** 87 /**
74 * Returns a promise that will resolve into a tab once a tab is allocated. 88 * Returns a promise that will resolve into a tab once a tab is allocated.
75 * The tab cannot be used by other tasks until releaseTab() is called. 89 * The tab cannot be used by other tasks until releaseTab() is called.
76 * 90 *
77 * @result {Promise.<tab>} 91 * @result {Promise.<tab>}
78 */ 92 */
79 getTab: function() 93 getTab: function()
80 { 94 {
81 if (this._tabs < this._maxtabs) 95 if (this._tabs < this._maxtabs)
82 { 96 return this._createTab();
83 let tab = createTab(this._browser); 97 return new Promise((resolve, reject) => this._resolvers.push(resolve));
84 // Close initial tab, we don't need it anymore.
85 if (this._tabs == 0)
86 this._browser.removeTab(this._browser.tabs[0]);
87 this._tabs++;
88 return tab;
89 }
90 return new Promise((resolve, reject) =>
91 {
92 this._resolvers.push(resolve);
93 });
94 }, 98 },
95 99
96 /** 100 /**
97 * Adds a tab back to the pool so that it can be used by other tasks. 101 * Adds a tab back to the pool so that it can be used by other tasks.
98 * 102 *
99 * @param {tab} tab 103 * @param {tab} tab
100 */ 104 */
101 releaseTab: function(tab) 105 releaseTab: function(tab)
102 { 106 {
103 let browser = tab.parentNode.tabbrowser; 107 // If we are about to close last tab don't close it immediately to keep
104 browser.removeTab(tab); 108 // the window alive. It will be closed when a new tab is created.
105 109 if (this._tabs > 1)
106 if (this._resolvers.length) 110 this._browser.removeTab(tab);
107 this._resolvers.shift()(createTab(this._browser));
108 else 111 else
109 this._tabs--; 112 {
110 } 113 // navigate away from previously opened URL
114 tab.linkedBrowser.loadURI("about:blank", null, null);
115 this._tabKeepingWindowAlive = tab;
116 }
117
118 this._tabs--;
119 if (this._resolvers.length && this._tabs < this._maxtabs)
120 {
121 this._resolvers.shift()(this._createTab());
122 }
123 },
111 }; 124 };
112 125
113 /** 126 /**
114 * Once created, this object will make sure all new windows are dismissed 127 * Once created, this object will make sure all new windows are dismissed
115 * immediately. 128 * immediately.
116 * 129 *
117 * @constructor 130 * @constructor
118 */ 131 */
119 function WindowCloser() 132 function WindowCloser()
120 { 133 {
(...skipping 16 matching lines...) Expand all
137 { 150 {
138 if (window.document.documentElement.localName == 'dialog') 151 if (window.document.documentElement.localName == 'dialog')
139 window.document.documentElement.acceptDialog(); 152 window.document.documentElement.acceptDialog();
140 else 153 else
141 window.close(); 154 window.close();
142 }, false); 155 }, false);
143 }, 156 },
144 157
145 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer ence]) 158 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer ence])
146 }; 159 };
160
161 function configureFrameScript()
162 {
163 const info = require("info");
164 let frameScriptPath = info.addonRoot + "/lib/child/frameScript.js";
165 Services.mm.loadFrameScript(frameScriptPath, true);
166
167 onShutdown.add(() =>
168 {
169 Services.mm.removeDelayedFrameScript(frameScriptPath);
170 });
171 }
147 172
148 /** 173 /**
149 * Starts the crawling session. The crawler opens each URL in a tab and stores 174 * Starts the crawling session. The crawler opens each URL in a tab and stores
150 * the results. 175 * the results.
151 * 176 *
152 * @param {Window} window 177 * @param {Window} window
153 * The browser window we're operating in 178 * The browser window we're operating in
154 * @param {String[]} urls 179 * @param {String[]} urls
155 * URLs to be crawled 180 * URLs to be crawled
156 * @param {int} timeout 181 * @param {int} timeout
157 * Load timeout in milliseconds 182 * Load timeout in milliseconds
158 * @param {int} maxtabs 183 * @param {int} maxtabs
159 * Maximum number of tabs to be opened 184 * Maximum number of tabs to be opened
160 * @param {String} targetURL 185 * @param {String} targetURL
161 * URL that should receive the results 186 * URL that should receive the results
162 * @param {Function} onDone 187 * @param {Function} onDone
163 * The callback which is called after finishing of crawling of all URLs. 188 * The callback which is called after finishing of crawling of all URLs.
164 */ 189 */
165 function run(window, urls, timeout, maxtabs, targetURL, onDone) 190 function run(window, urls, timeout, maxtabs, targetURL, onDone)
166 { 191 {
192 configureFrameScript();
167 new Promise((resolve, reject) => 193 new Promise((resolve, reject) =>
168 { 194 {
169 if (FilterStorage.subscriptions.length > 0) 195 if (FilterStorage.subscriptions.length > 0)
170 { 196 {
171 resolve(); 197 resolve();
172 return; 198 return;
173 } 199 }
174 let onFiltersLoaded = (action, item, newValue, oldValue) => 200 let onFiltersLoaded = (action, item, newValue, oldValue) =>
175 { 201 {
176 if (action == "load") 202 if (action == "load")
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
240 startTime: Date.now(), 266 startTime: Date.now(),
241 error: String(exception) 267 error: String(exception)
242 })); 268 }));
243 }.bind(null, url)); 269 }.bind(null, url));
244 } 270 }
245 } 271 }
246 272
247 /** 273 /**
248 * Expects to receive page info gathered in a content process for the specified 274 * Expects to receive page info gathered in a content process for the specified
249 * `tab`. If there is no relevant message within specified `timeout` then 275 * `tab`. If there is no relevant message within specified `timeout` then
250 * the result promise is resolve with error object. 276 * the result promise is resolved with error object.
251 * @param tab 277 * @param tab
252 * Tab in which we are interested in 278 * Tab in which we are interested in
253 * @param {int} timeout 279 * @param {int} timeout
254 * Timeout in milliseconds 280 * Timeout in milliseconds
255 * @return {Promise} promise which will be resolved with the received page info 281 * @return {Promise} promise which will be resolved with the received page info
256 */ 282 */
257 function getPageInfo(tab, timeout) 283 function getPageInfo(tab, timeout)
258 { 284 {
259 return new Promise((resolve, result) => 285 return new Promise((resolve, result) =>
260 { 286 {
261 let mm = tab.linkedBrowser.messageManager; 287 let mm = tab.linkedBrowser.messageManager;
262 let timerID; 288 let timerID;
263 let onDone = (pageInfo) => 289 let onDone = (msg) =>
264 { 290 {
265 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone); 291 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone);
266 clearTimeout(timerID); 292 clearTimeout(timerID);
267 resolve(pageInfo); 293 resolve(msg.data);
268 } 294 }
269 mm.addMessageListener("abpcrawler:pageInfoGathered", (msg) => onDone(msg.dat a));; 295 mm.addMessageListener("abpcrawler:pageInfoGathered", onDone);
270 timerID = setTimeout(onDone.bind(this, {error: "timeout"}), timeout); 296 timerID = setTimeout(() => onDone({data: {error: "timeout"}}), timeout);
271 }); 297 });
272 } 298 }
273 299
274 /** 300 /**
275 * Crawls a URL. This is a generator meant to be used via a Task object. 301 * Crawls a URL. This is a generator meant to be used via a Task object.
276 * 302 *
277 * @param {String} url 303 * @param {String} url
278 * @param {TabAllocator} tabAllocator 304 * @param {TabAllocator} tabAllocator
279 * @param {int} timeout 305 * @param {int} timeout
280 * Load timeout in milliseconds 306 * Load timeout in milliseconds
(...skipping 12 matching lines...) Expand all
293 function(entry, scanComplete) 319 function(entry, scanComplete)
294 { 320 {
295 if (!entry) 321 if (!entry)
296 return; 322 return;
297 let {type: contentType, location, filter} = entry; 323 let {type: contentType, location, filter} = entry;
298 result.requests.push({location, contentType, filter}); 324 result.requests.push({location, contentType, filter});
299 }); 325 });
300 326
301 tab.linkedBrowser.loadURI(url, null, null); 327 tab.linkedBrowser.loadURI(url, null, null);
302 328
329 Object.assign(result, yield getPageInfo(tab, timeout));
303 result.finalUrl = tab.linkedBrowser.currentURI.spec; 330 result.finalUrl = tab.linkedBrowser.currentURI.spec;
304 Object.assign(result, yield getPageInfo(tab, timeout));
305 result.endTime = Date.now(); 331 result.endTime = Date.now();
306 } 332 }
307 finally 333 finally
308 { 334 {
309 if (requestNotifier) 335 if (requestNotifier)
310 requestNotifier.shutdown(); 336 requestNotifier.shutdown();
311 tabAllocator.releaseTab(tab); 337 tabAllocator.releaseTab(tab);
312 } 338 }
313 return result; 339 return result;
314 } 340 }
315 341
316 function reportException(e) 342 function reportException(e)
317 { 343 {
318 let stack = ""; 344 let stack = "";
319 if (e && typeof e == "object" && "stack" in e) 345 if (e && typeof e == "object" && "stack" in e)
320 stack = e.stack + "\n"; 346 stack = e.stack + "\n";
321 347
322 Cu.reportError(e); 348 Cu.reportError(e);
323 dump(e + "\n" + stack + "\n"); 349 dump(e + "\n" + stack + "\n");
324 } 350 }
325
326 let {addonRoot} = require("info");
327 let frameScriptPath = addonRoot + "/lib/child/frameScript.js";
328 let globalMessageManager = Services.mm;
329 globalMessageManager.loadFrameScript(frameScriptPath, true);
330
331 onShutdown.add(() =>
332 {
333 globalMessageManager.removeDelayedFrameScript(frameScriptPath);
334 });
LEFTRIGHT

Powered by Google App Engine
This is Rietveld