Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/crawler.js

Issue 29338242: Issue 3792 - Fix to support multiprocess firefox (Closed)
Patch Set: Created March 14, 2016, 2:41 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« lib/child/frameScript.js ('K') | « lib/child/frameScript.js ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/crawler.js
diff --git a/lib/crawler.js b/lib/crawler.js
index 83c5f40e4fe873373b70dcc465bc22958610edf3..86b5e3bd70d68316dcfbbc54e98c03bad3893aaf 100644
--- a/lib/crawler.js
+++ b/lib/crawler.js
@@ -11,6 +11,7 @@
Cu.import("resource://gre/modules/Services.jsm");
Cu.import("resource://gre/modules/Task.jsm");
Cu.import("resource://gre/modules/Promise.jsm");
+Cu.import("resource://gre/modules/Timer.jsm");
function abprequire(module)
{
@@ -21,7 +22,6 @@ function abprequire(module)
}
let {RequestNotifier} = abprequire("requestNotifier");
-
let {FilterNotifier} = abprequire("filterNotifier");
let {FilterStorage} = abprequire("filterStorage");
@@ -84,79 +84,6 @@ TabAllocator.prototype = {
};
/**
- * Observes page loads in a particular tabbed browser.
- *
- * @param {tabbrowser} browser
- * The tabbed browser to be observed
- * @param {int} timeout
- * Load timeout in milliseconds
- * @constructor
- */
-function LoadListener(browser, timeout)
-{
- this._browser = browser;
- this._deferred = new Map();
- this._timeout = timeout;
- browser.addTabsProgressListener(this);
-}
-LoadListener.prototype = {
- /**
- * Returns a promise that will be resolved when the page in the specified tab
- * finishes loading. Loading will be stopped if the timeout is reached.
- *
- * @param {tab} tab
- * @result {Promise}
- */
- waitForLoad: function(tab)
- {
- let deferred = Promise.defer();
- this._deferred.set(tab.linkedBrowser, deferred);
-
- tab.ownerDocument.defaultView.setTimeout(function()
- {
- tab.linkedBrowser.stop();
- }, this._timeout);
-
- return deferred.promise;
- },
-
- /**
- * Deactivates this object.
- */
- stop: function()
- {
- this._browser.removeTabsProgressListener(this);
- },
-
- onStateChange: function(browser, progress, request, flags, status)
- {
- if ((flags & Ci.nsIWebProgressListener.STATE_STOP) && (flags & Ci.nsIWebProgressListener.STATE_IS_WINDOW))
- {
- let deferred = this._deferred.get(browser);
- if (deferred)
- {
- this._deferred.delete(browser);
-
- let headers = [];
- if (request instanceof Ci.nsIHttpChannel)
- {
- try
- {
- headers.push("HTTP/x.x " + request.responseStatus + " " + request.responseStatusText);
- request.visitResponseHeaders((header, value) => headers.push(header + ": " + value));
- }
- catch (e)
- {
- // Exceptions are expected here
- }
- }
- deferred.resolve([status, headers]);
- }
- }
- }
-};
Wladimir Palant 2016/03/15 10:07:10 Why did you move this functionality into the conte
sergei 2016/03/15 16:40:10 In e10s it does not work in chrome process, we can
-
-/**
* Once created, this object will make sure all new windows are dismissed
* immediately.
*
@@ -207,7 +134,7 @@ WindowCloser.prototype = {
function run(window, urls, timeout, maxtabs, targetURL, onDone)
{
let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs);
- let loadListener = new LoadListener(window.getBrowser(), timeout);
+
let running = 0;
let windowCloser = new WindowCloser();
let taskDone = function()
@@ -215,7 +142,6 @@ function run(window, urls, timeout, maxtabs, targetURL, onDone)
running--;
if (running <= 0)
{
- loadListener.stop();
windowCloser.stop();
onDone();
}
@@ -240,7 +166,7 @@ function run(window, urls, timeout, maxtabs, targetURL, onDone)
for (let url of urls)
{
running++;
- Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(function(result)
+ Task.spawn(crawl_url.bind(null, url, tabAllocator, timeout)).then(function(result)
{
let request = new XMLHttpRequest();
request.open("POST", targetURL);
@@ -274,11 +200,10 @@ exports.run = run;
*
* @param {String} url
* @param {TabAllocator} tabAllocator
- * @param {loadListener} loadListener
* @result {Object}
* Crawling result
*/
-function* crawl_url(url, tabAllocator, loadListener)
+function* crawl_url(url, tabAllocator, timeout)
{
let tab = yield tabAllocator.getTab();
let result = {url, requests: []};
@@ -292,33 +217,26 @@ function* crawl_url(url, tabAllocator, loadListener)
});
tab.linkedBrowser.loadURI(url, null, null);
- [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab);
- result.endTime = Date.now();
- result.finalUrl = tab.linkedBrowser.currentURI.spec;
- let document = tab.linkedBrowser.contentDocument;
- if (document.documentElement)
+ let mm = tab.linkedBrowser.messageManager;
+ let pageInfoFuture = new Promise((resolve, result) =>
{
- try
+ let timerID;
+ let onDone = (pageInfo) =>
{
- let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "canvas");
- canvas.width = document.documentElement.scrollWidth;
- canvas.height = document.documentElement.scrollHeight;
-
- let context = canvas.getContext("2d");
- context.drawWindow(document.defaultView, 0, 0, canvas.width, canvas.height, "rgb(255, 255, 255)");
- result.screenshot = canvas.toDataURL("image/jpeg", 0.8);
- }
- catch (e)
- {
- reportException(e);
- result.error = "Capturing screenshot failed: " + e;
+ mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone);
Wladimir Palant 2016/03/15 10:07:10 So, which tab did you get the page info for? The
sergei 2016/03/16 14:44:23 For the `tab`, it's "browser message manager" it a
+ clearTimeout(timerID);
+ resolve(pageInfo);
}
+ mm.addMessageListener("abpcrawler:pageInfoGathered", (msg) => onDone(msg.data));;
+ timerID = setTimeout(onDone.bind(this, {error: "timeout"}), timeout);
+ });
Wladimir Palant 2016/03/15 10:07:10 Please move this functionality into a separate fun
sergei 2016/03/16 14:44:23 Done.
- // TODO: Capture frames as well?
- let serializer = new tab.ownerDocument.defaultView.XMLSerializer();
- result.source = serializer.serializeToString(document.documentElement);
- }
+ let pageInfo = yield pageInfoFuture;
+
+ result.finalUrl = tab.linkedBrowser.currentURI.spec;
+ Object.assign(result, pageInfo);
+ result.endTime = Date.now();
}
finally
{
@@ -336,3 +254,20 @@ function reportException(e)
Cu.reportError(e);
dump(e + "\n" + stack + "\n");
}
+
+let {addonRoot} = require("info");
+let frameScriptPath = addonRoot + "/lib/child/frameScript.js";
+let globalMessageManager = Services.mm;
+globalMessageManager.loadFrameScript(frameScriptPath, true);
Wladimir Palant 2016/03/15 10:07:10 This should be a process script, no point using a
+
+let onReportException = function(msg)
+{
+ reportException(msg.objects);
Wladimir Palant 2016/03/15 10:07:10 Please don't use msg.objects - ever. That's a wrap
sergei 2016/03/16 14:44:23 Acknowledged. Actually, I wanted to avoid duplicat
sergei 2016/03/16 14:44:23 I know, I used `msg.objects` because we don't know
Wladimir Palant 2016/09/14 16:11:46 Worst-case scenario: deadlocks because all of that
+}
+globalMessageManager.addMessageListener("abpcrawler:reportException", onReportException);
+
+onShutdown.add(() =>
+{
+ globalMessageManager.removeMessageListener("abpcrawler:reportException", onReportException);
+ globalMessageManager.removeDelayedFrameScript(frameScriptPath);
+});
« lib/child/frameScript.js ('K') | « lib/child/frameScript.js ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld