Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/crawler.js

Issue 29338242: Issue 3792 - Fix to support multiprocess firefox (Closed)
Left Patch Set: rebase, fix race condition and support canvas limits Created April 21, 2016, 5:13 p.m.
Right Patch Set: change comment Created Sept. 30, 2016, 12:43 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « lib/child/frameScript.js ('k') | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This Source Code is subject to the terms of the Mozilla Public License 2 * This Source Code is subject to the terms of the Mozilla Public License
3 * version 2.0 (the "License"). You can obtain a copy of the License at 3 * version 2.0 (the "License"). You can obtain a copy of the License at
4 * http://mozilla.org/MPL/2.0/. 4 * http://mozilla.org/MPL/2.0/.
5 */ 5 */
6 6
7 "use strict";
8
7 /** 9 /**
8 * @module crawler 10 * @module crawler
9 */ 11 */
10 12
11 Cu.import("resource://gre/modules/Services.jsm"); 13 const {Services} = Cu.import("resource://gre/modules/Services.jsm", {});
12 Cu.import("resource://gre/modules/Task.jsm"); 14 const {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {});
13 Cu.import("resource://gre/modules/Promise.jsm"); 15 const {Task} = Cu.import("resource://gre/modules/Task.jsm", {});
14 Cu.import("resource://gre/modules/Timer.jsm"); 16 const {setTimeout, clearTimeout} = Cu.import("resource://gre/modules/Timer.jsm", {});
15 17
16 function abprequire(module) 18 function abprequire(module)
17 { 19 {
18 let result = {}; 20 let result = {};
19 result.wrappedJSObject = result; 21 result.wrappedJSObject = result;
20 Services.obs.notifyObservers(result, "adblockplus-require", module); 22 Services.obs.notifyObservers(result, "adblockplus-require", module);
21 return result.exports; 23 return result.exports;
22 } 24 }
23 25
24 let {RequestNotifier} = abprequire("requestNotifier"); 26 let {RequestNotifier} = abprequire("requestNotifier");
(...skipping 10 matching lines...) Expand all
35 * @constructor 37 * @constructor
36 */ 38 */
37 function TabAllocator(browser, maxtabs) 39 function TabAllocator(browser, maxtabs)
38 { 40 {
39 this._browser = browser; 41 this._browser = browser;
40 this._tabs = 0; 42 this._tabs = 0;
41 this._maxtabs = maxtabs; 43 this._maxtabs = maxtabs;
42 // The queue containing resolve functions of promises waiting for a tab. 44 // The queue containing resolve functions of promises waiting for a tab.
43 this._resolvers = []; 45 this._resolvers = [];
44 // Keep at least one tab alive to prevent browser from closing itself. 46 // Keep at least one tab alive to prevent browser from closing itself.
45 let tabToRemove = this._browser.tabs[0]; 47 this._tabKeepingWindowAlive = this._browser.tabs[0];
46 this._browser.removeAllTabsBut(tabToRemove); 48 this._browser.removeAllTabsBut(this._tabKeepingWindowAlive);
47 // this._tab is a keep alive tab
48 this._tab = this._createTab().then(tab =>
49 {
50 // Starting from Firefox 48 (nightly) the sequence of calls addTab and
51 // removeTab can cause a closing of the browser because a new tab is still
52 // not here. Because of that we need to remove the previous tab only after
53 // the new tab is ready.
54 this._browser.removeTab(tabToRemove);
55 return tab;
56 });
57 } 49 }
58 TabAllocator.prototype = { 50 TabAllocator.prototype = {
51 _removeTabKeepingWindowAlive: function()
52 {
53 if (!this._tabKeepingWindowAlive)
54 return;
55 this._browser.removeTab(this._tabKeepingWindowAlive);
56 delete this._tabKeepingWindowAlive;
57 },
58
59 /** 59 /**
60 * Creates a blank tab in this._browser. 60 * Creates a blank tab in this._browser.
61 * 61 *
62 * @return {Promise.<tab>} promise which resolves once the tab is fully initia lized. 62 * @return {Promise.<tab>} promise which resolves once the tab is fully initia lized.
63 */ 63 */
64 _createTab: function() 64 _createTab: function()
65 { 65 {
66 this._tabs++; 66 this._tabs++;
67 let tab = this._browser.addTab("about:blank"); 67 let tab = this._browser.addTab("about:blank");
68 if (tab.linkedBrowser.outerWindowID) 68 if (tab.linkedBrowser.outerWindowID)
69 {
70 this._removeTabKeepingWindowAlive();
69 return Promise.resolve(tab); 71 return Promise.resolve(tab);
72 }
70 return new Promise((resolve, reject) => 73 return new Promise((resolve, reject) =>
71 { 74 {
72 let onBrowserInit = (msg) => 75 let onBrowserInit = (msg) =>
73 { 76 {
74 // https://bugzilla.mozilla.org/show_bug.cgi?id=1256602#c1
75 tab.linkedBrowser.messageManager.removeMessageListener("Browser:Init", o nBrowserInit); 77 tab.linkedBrowser.messageManager.removeMessageListener("Browser:Init", o nBrowserInit);
78 this._removeTabKeepingWindowAlive();
76 resolve(tab); 79 resolve(tab);
77 }; 80 };
81 // "Browser:Init" message is sent once the browser is ready, see
82 // https://bugzil.la/1256602#c1
78 tab.linkedBrowser.messageManager.addMessageListener("Browser:Init", onBrow serInit); 83 tab.linkedBrowser.messageManager.addMessageListener("Browser:Init", onBrow serInit);
79 }); 84 });
80 }, 85 },
81 86
82 /** 87 /**
83 * Returns a promise that will resolve into a tab once a tab is allocated. 88 * Returns a promise that will resolve into a tab once a tab is allocated.
84 * The tab cannot be used by other tasks until releaseTab() is called. 89 * The tab cannot be used by other tasks until releaseTab() is called.
85 * 90 *
86 * @result {Promise.<tab>} 91 * @result {Promise.<tab>}
87 */ 92 */
88 getTab: function() 93 getTab: function()
89 { 94 {
90 if (this._tab)
91 {
92 let tab = this._tab;
93 delete this._tab;
94 return tab;
95 }
96 if (this._tabs < this._maxtabs) 95 if (this._tabs < this._maxtabs)
97 return this._createTab(); 96 return this._createTab();
98 return new Promise((resolve, reject) => this._resolvers.push(resolve)); 97 return new Promise((resolve, reject) => this._resolvers.push(resolve));
99 }, 98 },
100 99
101 /** 100 /**
102 * Adds a tab back to the pool so that it can be used by other tasks. 101 * Adds a tab back to the pool so that it can be used by other tasks.
103 * 102 *
104 * @param {tab} tab 103 * @param {tab} tab
105 */ 104 */
106 releaseTab: function(tab) 105 releaseTab: function(tab)
107 { 106 {
108 // If we are about to close last tab don't close it immediately rather 107 // If we are about to close last tab don't close it immediately to keep
109 // allocate a new blank tab and close the current one afterwards. 108 // the window alive. It will be closed when a new tab is created.
110 if (this._tabs == 1) 109 if (this._tabs > 1)
111 { 110 this._browser.removeTab(tab);
112 this._tab = this._createTab().then((resultTab) => 111 else
113 { 112 {
114 this.releaseTab(tab); 113 // navigate away from previously opened URL
115 return resultTab; 114 tab.linkedBrowser.loadURI("about:blank", null, null);
116 }); 115 this._tabKeepingWindowAlive = tab;
117 return; 116 }
118 } 117
119
120 this._browser.removeTab(tab);
121 this._tabs--; 118 this._tabs--;
122 if (this._resolvers.length) 119 if (this._resolvers.length && this._tabs < this._maxtabs)
123 { 120 {
124 if (this._tab) 121 this._resolvers.shift()(this._createTab());
125 {
126 this._resolvers.shift()(this._tab);
127 delete this._tab;
128 }
129 else if (this._tabs < this._maxtabs)
130 {
131 this._resolvers.shift()(this._createTab());
132 }
133 } 122 }
134 }, 123 },
135 }; 124 };
136 125
137 /** 126 /**
138 * Once created, this object will make sure all new windows are dismissed 127 * Once created, this object will make sure all new windows are dismissed
139 * immediately. 128 * immediately.
140 * 129 *
141 * @constructor 130 * @constructor
142 */ 131 */
(...skipping 18 matching lines...) Expand all
161 { 150 {
162 if (window.document.documentElement.localName == 'dialog') 151 if (window.document.documentElement.localName == 'dialog')
163 window.document.documentElement.acceptDialog(); 152 window.document.documentElement.acceptDialog();
164 else 153 else
165 window.close(); 154 window.close();
166 }, false); 155 }, false);
167 }, 156 },
168 157
169 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer ence]) 158 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer ence])
170 }; 159 };
160
161 function configureFrameScript()
162 {
163 const info = require("info");
164 let frameScriptPath = info.addonRoot + "/lib/child/frameScript.js";
165 Services.mm.loadFrameScript(frameScriptPath, true);
166
167 onShutdown.add(() =>
168 {
169 Services.mm.removeDelayedFrameScript(frameScriptPath);
170 });
171 }
171 172
172 /** 173 /**
173 * Starts the crawling session. The crawler opens each URL in a tab and stores 174 * Starts the crawling session. The crawler opens each URL in a tab and stores
174 * the results. 175 * the results.
175 * 176 *
176 * @param {Window} window 177 * @param {Window} window
177 * The browser window we're operating in 178 * The browser window we're operating in
178 * @param {String[]} urls 179 * @param {String[]} urls
179 * URLs to be crawled 180 * URLs to be crawled
180 * @param {int} timeout 181 * @param {int} timeout
181 * Load timeout in milliseconds 182 * Load timeout in milliseconds
182 * @param {int} maxtabs 183 * @param {int} maxtabs
183 * Maximum number of tabs to be opened 184 * Maximum number of tabs to be opened
184 * @param {String} targetURL 185 * @param {String} targetURL
185 * URL that should receive the results 186 * URL that should receive the results
186 * @param {Function} onDone 187 * @param {Function} onDone
187 * The callback which is called after finishing of crawling of all URLs. 188 * The callback which is called after finishing of crawling of all URLs.
188 */ 189 */
189 function run(window, urls, timeout, maxtabs, targetURL, onDone) 190 function run(window, urls, timeout, maxtabs, targetURL, onDone)
190 { 191 {
192 configureFrameScript();
191 new Promise((resolve, reject) => 193 new Promise((resolve, reject) =>
192 { 194 {
193 if (FilterStorage.subscriptions.length > 0) 195 if (FilterStorage.subscriptions.length > 0)
194 { 196 {
195 resolve(); 197 resolve();
196 return; 198 return;
197 } 199 }
198 let onFiltersLoaded = (action, item, newValue, oldValue) => 200 let onFiltersLoaded = (action, item, newValue, oldValue) =>
199 { 201 {
200 if (action == "load") 202 if (action == "load")
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
264 startTime: Date.now(), 266 startTime: Date.now(),
265 error: String(exception) 267 error: String(exception)
266 })); 268 }));
267 }.bind(null, url)); 269 }.bind(null, url));
268 } 270 }
269 } 271 }
270 272
271 /** 273 /**
272 * Expects to receive page info gathered in a content process for the specified 274 * Expects to receive page info gathered in a content process for the specified
273 * `tab`. If there is no relevant message within specified `timeout` then 275 * `tab`. If there is no relevant message within specified `timeout` then
274 * the result promise is resolve with error object. 276 * the result promise is resolved with error object.
275 * @param tab 277 * @param tab
276 * Tab in which we are interested in 278 * Tab in which we are interested in
277 * @param {int} timeout 279 * @param {int} timeout
278 * Timeout in milliseconds 280 * Timeout in milliseconds
279 * @return {Promise} promise which will be resolved with the received page info 281 * @return {Promise} promise which will be resolved with the received page info
280 */ 282 */
281 function getPageInfo(tab, timeout) 283 function getPageInfo(tab, timeout)
282 { 284 {
283 return new Promise((resolve, result) => 285 return new Promise((resolve, result) =>
284 { 286 {
285 let mm = tab.linkedBrowser.messageManager; 287 let mm = tab.linkedBrowser.messageManager;
286 let timerID; 288 let timerID;
287 let onDone = (pageInfo) => 289 let onDone = (msg) =>
288 { 290 {
289 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone); 291 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone);
290 clearTimeout(timerID); 292 clearTimeout(timerID);
291 resolve(pageInfo); 293 resolve(msg.data);
292 } 294 }
293 mm.addMessageListener("abpcrawler:pageInfoGathered", (msg) => onDone(msg.dat a));; 295 mm.addMessageListener("abpcrawler:pageInfoGathered", onDone);
294 timerID = setTimeout(onDone.bind(this, {error: "timeout"}), timeout); 296 timerID = setTimeout(() => onDone({data: {error: "timeout"}}), timeout);
295 }); 297 });
296 } 298 }
297 299
298 /** 300 /**
299 * Crawls a URL. This is a generator meant to be used via a Task object. 301 * Crawls a URL. This is a generator meant to be used via a Task object.
300 * 302 *
301 * @param {String} url 303 * @param {String} url
302 * @param {TabAllocator} tabAllocator 304 * @param {TabAllocator} tabAllocator
303 * @param {int} timeout 305 * @param {int} timeout
304 * Load timeout in milliseconds 306 * Load timeout in milliseconds
(...skipping 12 matching lines...) Expand all
317 function(entry, scanComplete) 319 function(entry, scanComplete)
318 { 320 {
319 if (!entry) 321 if (!entry)
320 return; 322 return;
321 let {type: contentType, location, filter} = entry; 323 let {type: contentType, location, filter} = entry;
322 result.requests.push({location, contentType, filter}); 324 result.requests.push({location, contentType, filter});
323 }); 325 });
324 326
325 tab.linkedBrowser.loadURI(url, null, null); 327 tab.linkedBrowser.loadURI(url, null, null);
326 328
329 Object.assign(result, yield getPageInfo(tab, timeout));
327 result.finalUrl = tab.linkedBrowser.currentURI.spec; 330 result.finalUrl = tab.linkedBrowser.currentURI.spec;
328 Object.assign(result, yield getPageInfo(tab, timeout));
329 result.endTime = Date.now(); 331 result.endTime = Date.now();
330 } 332 }
331 finally 333 finally
332 { 334 {
333 if (requestNotifier) 335 if (requestNotifier)
334 requestNotifier.shutdown(); 336 requestNotifier.shutdown();
335 tabAllocator.releaseTab(tab); 337 tabAllocator.releaseTab(tab);
336 } 338 }
337 return result; 339 return result;
338 } 340 }
339 341
340 function reportException(e) 342 function reportException(e)
341 { 343 {
342 let stack = ""; 344 let stack = "";
343 if (e && typeof e == "object" && "stack" in e) 345 if (e && typeof e == "object" && "stack" in e)
344 stack = e.stack + "\n"; 346 stack = e.stack + "\n";
345 347
346 Cu.reportError(e); 348 Cu.reportError(e);
347 dump(e + "\n" + stack + "\n"); 349 dump(e + "\n" + stack + "\n");
348 } 350 }
349
350 let {addonRoot} = require("info");
351 let frameScriptPath = addonRoot + "/lib/child/frameScript.js";
352 let globalMessageManager = Services.mm;
353 globalMessageManager.loadFrameScript(frameScriptPath, true);
354
355 onShutdown.add(() =>
356 {
357 globalMessageManager.removeDelayedFrameScript(frameScriptPath);
358 });
LEFTRIGHT

Powered by Google App Engine
This is Rietveld