| Index: lib/crawler.js |
| diff --git a/lib/crawler.js b/lib/crawler.js |
| index 83c5f40e4fe873373b70dcc465bc22958610edf3..86b5e3bd70d68316dcfbbc54e98c03bad3893aaf 100644 |
| --- a/lib/crawler.js |
| +++ b/lib/crawler.js |
| @@ -11,6 +11,7 @@ |
| Cu.import("resource://gre/modules/Services.jsm"); |
| Cu.import("resource://gre/modules/Task.jsm"); |
| Cu.import("resource://gre/modules/Promise.jsm"); |
| +Cu.import("resource://gre/modules/Timer.jsm"); |
| function abprequire(module) |
| { |
| @@ -21,7 +22,6 @@ function abprequire(module) |
| } |
| let {RequestNotifier} = abprequire("requestNotifier"); |
| - |
| let {FilterNotifier} = abprequire("filterNotifier"); |
| let {FilterStorage} = abprequire("filterStorage"); |
| @@ -84,79 +84,6 @@ TabAllocator.prototype = { |
| }; |
| /** |
| - * Observes page loads in a particular tabbed browser. |
| - * |
| - * @param {tabbrowser} browser |
| - * The tabbed browser to be observed |
| - * @param {int} timeout |
| - * Load timeout in milliseconds |
| - * @constructor |
| - */ |
| -function LoadListener(browser, timeout) |
| -{ |
| - this._browser = browser; |
| - this._deferred = new Map(); |
| - this._timeout = timeout; |
| - browser.addTabsProgressListener(this); |
| -} |
| -LoadListener.prototype = { |
| - /** |
| - * Returns a promise that will be resolved when the page in the specified tab |
| - * finishes loading. Loading will be stopped if the timeout is reached. |
| - * |
| - * @param {tab} tab |
| - * @result {Promise} |
| - */ |
| - waitForLoad: function(tab) |
| - { |
| - let deferred = Promise.defer(); |
| - this._deferred.set(tab.linkedBrowser, deferred); |
| - |
| - tab.ownerDocument.defaultView.setTimeout(function() |
| - { |
| - tab.linkedBrowser.stop(); |
| - }, this._timeout); |
| - |
| - return deferred.promise; |
| - }, |
| - |
| - /** |
| - * Deactivates this object. |
| - */ |
| - stop: function() |
| - { |
| - this._browser.removeTabsProgressListener(this); |
| - }, |
| - |
| - onStateChange: function(browser, progress, request, flags, status) |
| - { |
| - if ((flags & Ci.nsIWebProgressListener.STATE_STOP) && (flags & Ci.nsIWebProgressListener.STATE_IS_WINDOW)) |
| - { |
| - let deferred = this._deferred.get(browser); |
| - if (deferred) |
| - { |
| - this._deferred.delete(browser); |
| - |
| - let headers = []; |
| - if (request instanceof Ci.nsIHttpChannel) |
| - { |
| - try |
| - { |
| - headers.push("HTTP/x.x " + request.responseStatus + " " + request.responseStatusText); |
| - request.visitResponseHeaders((header, value) => headers.push(header + ": " + value)); |
| - } |
| - catch (e) |
| - { |
| - // Exceptions are expected here |
| - } |
| - } |
| - deferred.resolve([status, headers]); |
| - } |
| - } |
| - } |
| -}; |
|
Wladimir Palant
2016/03/15 10:07:10
Why did you move this functionality into the conte
sergei
2016/03/15 16:40:10
In e10s it does not work in chrome process, we can
|
| - |
| -/** |
| * Once created, this object will make sure all new windows are dismissed |
| * immediately. |
| * |
| @@ -207,7 +134,7 @@ WindowCloser.prototype = { |
| function run(window, urls, timeout, maxtabs, targetURL, onDone) |
| { |
| let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); |
| - let loadListener = new LoadListener(window.getBrowser(), timeout); |
| + |
| let running = 0; |
| let windowCloser = new WindowCloser(); |
| let taskDone = function() |
| @@ -215,7 +142,6 @@ function run(window, urls, timeout, maxtabs, targetURL, onDone) |
| running--; |
| if (running <= 0) |
| { |
| - loadListener.stop(); |
| windowCloser.stop(); |
| onDone(); |
| } |
| @@ -240,7 +166,7 @@ function run(window, urls, timeout, maxtabs, targetURL, onDone) |
| for (let url of urls) |
| { |
| running++; |
| - Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(function(result) |
| + Task.spawn(crawl_url.bind(null, url, tabAllocator, timeout)).then(function(result) |
| { |
| let request = new XMLHttpRequest(); |
| request.open("POST", targetURL); |
| @@ -274,11 +200,10 @@ exports.run = run; |
| * |
| * @param {String} url |
| * @param {TabAllocator} tabAllocator |
| - * @param {loadListener} loadListener |
| * @result {Object} |
| * Crawling result |
| */ |
| -function* crawl_url(url, tabAllocator, loadListener) |
| +function* crawl_url(url, tabAllocator, timeout) |
| { |
| let tab = yield tabAllocator.getTab(); |
| let result = {url, requests: []}; |
| @@ -292,33 +217,26 @@ function* crawl_url(url, tabAllocator, loadListener) |
| }); |
| tab.linkedBrowser.loadURI(url, null, null); |
| - [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab); |
| - result.endTime = Date.now(); |
| - result.finalUrl = tab.linkedBrowser.currentURI.spec; |
| - let document = tab.linkedBrowser.contentDocument; |
| - if (document.documentElement) |
| + let mm = tab.linkedBrowser.messageManager; |
| + let pageInfoFuture = new Promise((resolve, result) => |
| { |
| - try |
| + let timerID; |
| + let onDone = (pageInfo) => |
| { |
| - let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "canvas"); |
| - canvas.width = document.documentElement.scrollWidth; |
| - canvas.height = document.documentElement.scrollHeight; |
| - |
| - let context = canvas.getContext("2d"); |
| - context.drawWindow(document.defaultView, 0, 0, canvas.width, canvas.height, "rgb(255, 255, 255)"); |
| - result.screenshot = canvas.toDataURL("image/jpeg", 0.8); |
| - } |
| - catch (e) |
| - { |
| - reportException(e); |
| - result.error = "Capturing screenshot failed: " + e; |
| + mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone); |
|
Wladimir Palant
2016/03/15 10:07:10
So, which tab did you get the page info for?
The
sergei
2016/03/16 14:44:23
For the `tab`, it's "browser message manager" it a
|
| + clearTimeout(timerID); |
| + resolve(pageInfo); |
| } |
| + mm.addMessageListener("abpcrawler:pageInfoGathered", (msg) => onDone(msg.data));; |
| + timerID = setTimeout(onDone.bind(this, {error: "timeout"}), timeout); |
| + }); |
|
Wladimir Palant
2016/03/15 10:07:10
Please move this functionality into a separate fun
sergei
2016/03/16 14:44:23
Done.
|
| - // TODO: Capture frames as well? |
| - let serializer = new tab.ownerDocument.defaultView.XMLSerializer(); |
| - result.source = serializer.serializeToString(document.documentElement); |
| - } |
| + let pageInfo = yield pageInfoFuture; |
| + |
| + result.finalUrl = tab.linkedBrowser.currentURI.spec; |
| + Object.assign(result, pageInfo); |
| + result.endTime = Date.now(); |
| } |
| finally |
| { |
| @@ -336,3 +254,20 @@ function reportException(e) |
| Cu.reportError(e); |
| dump(e + "\n" + stack + "\n"); |
| } |
| + |
| +let {addonRoot} = require("info"); |
| +let frameScriptPath = addonRoot + "/lib/child/frameScript.js"; |
| +let globalMessageManager = Services.mm; |
| +globalMessageManager.loadFrameScript(frameScriptPath, true); |
|
Wladimir Palant
2016/03/15 10:07:10
This should be a process script, no point using a
|
| + |
| +let onReportException = function(msg) |
| +{ |
| + reportException(msg.objects); |
|
Wladimir Palant
2016/03/15 10:07:10
Please don't use msg.objects - ever. That's a wrap
sergei
2016/03/16 14:44:23
Acknowledged. Actually, I wanted to avoid duplicat
sergei
2016/03/16 14:44:23
I know, I used `msg.objects` because we don't know
Wladimir Palant
2016/09/14 16:11:46
Worst-case scenario: deadlocks because all of that
|
| +} |
| +globalMessageManager.addMessageListener("abpcrawler:reportException", onReportException); |
| + |
| +onShutdown.add(() => |
| +{ |
| + globalMessageManager.removeMessageListener("abpcrawler:reportException", onReportException); |
| + globalMessageManager.removeDelayedFrameScript(frameScriptPath); |
| +}); |