Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/crawler.js

Issue 29338242: Issue 3792 - Fix to support multiprocess firefox (Closed)
Patch Set: fix race condition Created April 22, 2016, 12:32 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« lib/child/frameScript.js ('K') | « lib/child/frameScript.js ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This Source Code is subject to the terms of the Mozilla Public License 2 * This Source Code is subject to the terms of the Mozilla Public License
3 * version 2.0 (the "License"). You can obtain a copy of the License at 3 * version 2.0 (the "License"). You can obtain a copy of the License at
4 * http://mozilla.org/MPL/2.0/. 4 * http://mozilla.org/MPL/2.0/.
5 */ 5 */
6 6
7 /** 7 /**
8 * @module crawler 8 * @module crawler
9 */ 9 */
10 10
11 Cu.import("resource://gre/modules/Services.jsm"); 11 Cu.import("resource://gre/modules/Services.jsm");
12 Cu.import("resource://gre/modules/Task.jsm"); 12 Cu.import("resource://gre/modules/Task.jsm");
13 Cu.import("resource://gre/modules/Promise.jsm"); 13 Cu.import("resource://gre/modules/Promise.jsm");
Wladimir Palant 2016/09/14 16:13:18 Just realized that this import hasn't been removed
14 Cu.import("resource://gre/modules/Timer.jsm");
Wladimir Palant 2016/09/14 16:13:18 Nit: We don't import like that any more, symbols s
sergei 2016/09/29 09:58:14 Fixed and the rest related to coding style is addr
14 15
15 function abprequire(module) 16 function abprequire(module)
16 { 17 {
17 let result = {}; 18 let result = {};
18 result.wrappedJSObject = result; 19 result.wrappedJSObject = result;
19 Services.obs.notifyObservers(result, "adblockplus-require", module); 20 Services.obs.notifyObservers(result, "adblockplus-require", module);
20 return result.exports; 21 return result.exports;
21 } 22 }
22 23
23 let {RequestNotifier} = abprequire("requestNotifier"); 24 let {RequestNotifier} = abprequire("requestNotifier");
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
127 } 128 }
128 else if (this._tabs < this._maxtabs) 129 else if (this._tabs < this._maxtabs)
129 { 130 {
130 this._resolvers.shift()(this._createTab()); 131 this._resolvers.shift()(this._createTab());
131 } 132 }
132 } 133 }
133 }, 134 },
134 }; 135 };
135 136
136 /** 137 /**
137 * Observes page loads in a particular tabbed browser.
138 *
139 * @param {tabbrowser} browser
140 * The tabbed browser to be observed
141 * @param {int} timeout
142 * Load timeout in milliseconds
143 * @constructor
144 */
145 function LoadListener(browser, timeout)
146 {
147 this._browser = browser;
148 this._deferred = new Map();
149 this._timeout = timeout;
150 browser.addTabsProgressListener(this);
151 }
152 LoadListener.prototype = {
153 /**
154 * Returns a promise that will be resolved when the page in the specified tab
155 * finishes loading. Loading will be stopped if the timeout is reached.
156 *
157 * @param {tab} tab
158 * @result {Promise}
159 */
160 waitForLoad: function(tab)
161 {
162 let deferred = Promise.defer();
163 this._deferred.set(tab.linkedBrowser, deferred);
164
165 tab.ownerDocument.defaultView.setTimeout(function()
166 {
167 tab.linkedBrowser.stop();
168 }, this._timeout);
169
170 return deferred.promise;
171 },
172
173 /**
174 * Deactivates this object.
175 */
176 stop: function()
177 {
178 this._browser.removeTabsProgressListener(this);
179 },
180
181 onStateChange: function(browser, progress, request, flags, status)
182 {
183 if ((flags & Ci.nsIWebProgressListener.STATE_STOP) && (flags & Ci.nsIWebProg ressListener.STATE_IS_WINDOW))
184 {
185 let deferred = this._deferred.get(browser);
186 if (deferred)
187 {
188 this._deferred.delete(browser);
189
190 let headers = [];
191 if (request instanceof Ci.nsIHttpChannel)
192 {
193 try
194 {
195 headers.push("HTTP/x.x " + request.responseStatus + " " + request.re sponseStatusText);
196 request.visitResponseHeaders((header, value) => headers.push(header + ": " + value));
197 }
198 catch (e)
199 {
200 // Exceptions are expected here
201 }
202 }
203 deferred.resolve([status, headers]);
204 }
205 }
206 }
207 };
208
209 /**
210 * Once created, this object will make sure all new windows are dismissed 138 * Once created, this object will make sure all new windows are dismissed
211 * immediately. 139 * immediately.
212 * 140 *
213 * @constructor 141 * @constructor
214 */ 142 */
215 function WindowCloser() 143 function WindowCloser()
216 { 144 {
217 Services.obs.addObserver(this, "xul-window-registered", true) 145 Services.obs.addObserver(this, "xul-window-registered", true)
218 } 146 }
219 WindowCloser.prototype = { 147 WindowCloser.prototype = {
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
293 * @param {int} maxtabs 221 * @param {int} maxtabs
294 * Maximum number of tabs to be opened 222 * Maximum number of tabs to be opened
295 * @param {String} targetURL 223 * @param {String} targetURL
296 * URL that should receive the results 224 * URL that should receive the results
297 * @param {Function} onDone 225 * @param {Function} onDone
298 * The callback which is called after finishing of all tasks. 226 * The callback which is called after finishing of all tasks.
299 */ 227 */
300 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone) 228 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)
301 { 229 {
302 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); 230 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs);
303 let loadListener = new LoadListener(window.getBrowser(), timeout); 231
304 let running = 0; 232 let running = 0;
305 let windowCloser = new WindowCloser(); 233 let windowCloser = new WindowCloser();
306 let taskDone = function() 234 let taskDone = function()
307 { 235 {
308 running--; 236 running--;
309 if (running <= 0) 237 if (running <= 0)
310 { 238 {
311 loadListener.stop();
312 windowCloser.stop(); 239 windowCloser.stop();
313 onDone(); 240 onDone();
314 } 241 }
315 }; 242 };
316 243
317 for (let url of urls) 244 for (let url of urls)
318 { 245 {
319 running++; 246 running++;
320 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct ion(result) 247 Task.spawn(crawl_url.bind(null, url, tabAllocator, timeout)).then(function(r esult)
321 { 248 {
322 let request = new XMLHttpRequest(); 249 let request = new XMLHttpRequest();
323 request.open("POST", targetURL); 250 request.open("POST", targetURL);
324 request.addEventListener("load", taskDone, false); 251 request.addEventListener("load", taskDone, false);
325 request.addEventListener("error", taskDone, false); 252 request.addEventListener("error", taskDone, false);
326 request.send(JSON.stringify(result)); 253 request.send(JSON.stringify(result));
327 }, function(url, exception) 254 }, function(url, exception)
328 { 255 {
329 reportException(exception); 256 reportException(exception);
330 257
331 let request = new XMLHttpRequest(); 258 let request = new XMLHttpRequest();
332 request.open("POST", targetURL); 259 request.open("POST", targetURL);
333 request.addEventListener("load", taskDone, false); 260 request.addEventListener("load", taskDone, false);
334 request.addEventListener("error", taskDone, false); 261 request.addEventListener("error", taskDone, false);
335 request.send(JSON.stringify({ 262 request.send(JSON.stringify({
336 url: url, 263 url: url,
337 startTime: Date.now(), 264 startTime: Date.now(),
338 error: String(exception) 265 error: String(exception)
339 })); 266 }));
340 }.bind(null, url)); 267 }.bind(null, url));
341 } 268 }
342 } 269 }
343 270
344 /** 271 /**
272 * Expects to receive page info gathered in a content process for the specified
273 * `tab`. If there is no relevant message within specified `timeout` then
274 * the result promise is resolve with error object.
275 * @param tab
276 * Tab in which we are interested in
277 * @param {int} timeout
278 * Timeout in milliseconds
279 * @return {Promise} promise which will be resolved with the received page info
280 */
281 function getPageInfo(tab, timeout)
282 {
283 return new Promise((resolve, result) =>
284 {
285 let mm = tab.linkedBrowser.messageManager;
286 let timerID;
287 let onDone = (pageInfo) =>
288 {
289 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone);
290 clearTimeout(timerID);
291 resolve(pageInfo);
292 }
293 mm.addMessageListener("abpcrawler:pageInfoGathered", (msg) => onDone(msg.dat a));;
Wladimir Palant 2016/09/14 16:11:48 That's not the callback you are removing above. Al
sergei 2016/09/29 09:58:13 Fixed. Sorry, overlooked.
294 timerID = setTimeout(onDone.bind(this, {error: "timeout"}), timeout);
Wladimir Palant 2016/09/14 16:11:49 How about not using bind() here, for clarity and c
sergei 2016/09/29 09:58:13 Done. Good idea.
295 });
296 }
297
298 /**
345 * Crawls a URL. This is a generator meant to be used via a Task object. 299 * Crawls a URL. This is a generator meant to be used via a Task object.
346 * 300 *
347 * @param {String} url 301 * @param {String} url
348 * @param {TabAllocator} tabAllocator 302 * @param {TabAllocator} tabAllocator
349 * @param {loadListener} loadListener 303 * @param {int} timeout
304 * Load timeout in milliseconds
350 * @result {Object} 305 * @result {Object}
351 * Crawling result 306 * Crawling result
352 */ 307 */
353 function* crawl_url(url, tabAllocator, loadListener) 308 function* crawl_url(url, tabAllocator, timeout)
354 { 309 {
355 let tab = yield tabAllocator.getTab(); 310 let tab = yield tabAllocator.getTab();
356 let result = {url, requests: []}; 311 let result = {url, requests: []};
357 let requestNotifier; 312 let requestNotifier;
358 try 313 try
359 { 314 {
360 result.startTime = Date.now(); 315 result.startTime = Date.now();
361 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, 316 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID,
362 function(entry, scanComplete) 317 function(entry, scanComplete)
363 { 318 {
364 if (!entry) 319 if (!entry)
365 return; 320 return;
366 let {type: contentType, location, filter} = entry; 321 let {type: contentType, location, filter} = entry;
367 result.requests.push({location, contentType, filter}); 322 result.requests.push({location, contentType, filter});
368 }); 323 });
369 324
370 tab.linkedBrowser.loadURI(url, null, null); 325 tab.linkedBrowser.loadURI(url, null, null);
371 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab) ; 326
327 Object.assign(result, yield getPageInfo(tab, timeout));
328 result.finalUrl = tab.linkedBrowser.currentURI.spec;
372 result.endTime = Date.now(); 329 result.endTime = Date.now();
373 result.finalUrl = tab.linkedBrowser.currentURI.spec;
374
375 let document = tab.linkedBrowser.contentDocument;
376 if (document.documentElement)
377 {
378 try
379 {
380 let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "c anvas");
381 canvas.width = document.documentElement.scrollWidth;
382 canvas.height = document.documentElement.scrollHeight;
383
384 let context = canvas.getContext("2d");
385 context.drawWindow(document.defaultView, 0, 0, canvas.width, canvas.heig ht, "rgb(255, 255, 255)");
386 result.screenshot = canvas.toDataURL("image/jpeg", 0.8);
387 }
388 catch (e)
389 {
390 reportException(e);
391 result.error = "Capturing screenshot failed: " + e;
392 }
393
394 // TODO: Capture frames as well?
395 let serializer = new tab.ownerDocument.defaultView.XMLSerializer();
396 result.source = serializer.serializeToString(document.documentElement);
397 }
398 } 330 }
399 finally 331 finally
400 { 332 {
401 if (requestNotifier) 333 if (requestNotifier)
402 requestNotifier.shutdown(); 334 requestNotifier.shutdown();
403 tabAllocator.releaseTab(tab); 335 tabAllocator.releaseTab(tab);
404 } 336 }
405 return result; 337 return result;
406 } 338 }
407 339
408 function reportException(e) 340 function reportException(e)
409 { 341 {
410 let stack = ""; 342 let stack = "";
411 if (e && typeof e == "object" && "stack" in e) 343 if (e && typeof e == "object" && "stack" in e)
412 stack = e.stack + "\n"; 344 stack = e.stack + "\n";
413 345
414 Cu.reportError(e); 346 Cu.reportError(e);
415 dump(e + "\n" + stack + "\n"); 347 dump(e + "\n" + stack + "\n");
416 } 348 }
349
350 let {addonRoot} = require("info");
351 let frameScriptPath = addonRoot + "/lib/child/frameScript.js";
352 let globalMessageManager = Services.mm;
353 globalMessageManager.loadFrameScript(frameScriptPath, true);
354
355 onShutdown.add(() =>
356 {
357 globalMessageManager.removeDelayedFrameScript(frameScriptPath);
358 });
OLDNEW
« lib/child/frameScript.js ('K') | « lib/child/frameScript.js ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld