Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/crawler.js

Issue 29338121: Issue 3775 - fix saving of requests (Closed)
Patch Set: Created March 11, 2016, 9:41 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/crawler.js
diff --git a/lib/crawler.js b/lib/crawler.js
index fae18c76edffa77382910c4ef55a45167635d02b..54df1501957a2a585f608eaca8042c75bc477051 100644
--- a/lib/crawler.js
+++ b/lib/crawler.js
@@ -20,11 +20,8 @@ function abprequire(module)
return result.exports;
}
-let {Policy} = abprequire("contentPolicy");
let {RequestNotifier} = abprequire("requestNotifier");
-let {Utils} = abprequire("utils");
-let dataForTab = new WeakMap();
/**
* Creates a pool of tabs and allocates them to tasks on request.
@@ -193,31 +190,6 @@ WindowCloser.prototype = {
};
/**
- * Retrieves crawler results associated with a particular content window.
- *
- * @param {Window} window
- * Content window to retrieve crawler results for
- * @result {Object}
- * Crawler results or undefined if the window wasn't created by the crawler.
- */
-function getDataForWindow(window)
-{
- let topWindow = window.top;
- if (!topWindow.document)
- throw new Error("No document associated with the node's top window");
- let tabbrowser = Utils.getChromeWindow(topWindow).getBrowser();
- if (!tabbrowser)
- throw new Error("Unable to get a tabbrowser reference from the window");
- let browser = tabbrowser.getBrowserForDocument(topWindow.document);
- if (!browser)
- throw new Error("Unable to get browser for the content window");
- let tab = tabbrowser.getTabForBrowser(browser);
- if (!tab)
- throw new Error("Unable to get tab for the browser");
- return dataForTab.get(tab);
-};
-
-/**
* Starts the crawling session. The crawler opens each URL in a tab and stores
* the results.
*
@@ -232,11 +204,6 @@ function getDataForWindow(window)
*/
function run(window, urls, timeout, maxtabs, targetURL, onDone)
{
- let requestNotifier = new RequestNotifier(null, function() {});
-
- let origProcessNode = Policy.processNode;
- Policy.processNode = processNodeReplacement.bind(null, origProcessNode, requestNotifier);
-
let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs);
let loadListener = new LoadListener(window.getBrowser(), timeout);
let running = 0;
@@ -246,8 +213,6 @@ function run(window, urls, timeout, maxtabs, targetURL, onDone)
running--;
if (running <= 0)
{
- Policy.processNode = origProcessNode;
- requestNotifier.shutdown();
loadListener.stop();
windowCloser.stop();
onDone();
@@ -294,12 +259,16 @@ exports.run = run;
function* crawl_url(url, tabAllocator, loadListener)
{
let tab = yield tabAllocator.getTab();
- let result = {url: url};
+ let result = {url, requests: []};
- dataForTab.set(tab, result);
try
{
result.startTime = Date.now();
+ let requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, function({type, location, filter}, scanComplete)
Wladimir Palant 2016/03/14 19:50:43 When the scan is completed the listener is called
sergei 2016/03/15 10:59:56 Fixed, is the indentation correct now?
sergei 2016/03/15 10:59:56 Fixed. However, I guess, it should be documented
Wladimir Palant 2016/03/15 11:05:44 Yes, it should be documented. As to reproducing -
+ {
+ result.requests.push({location, contentType: type, filter});
+ });
Wladimir Palant 2016/03/14 19:50:43 You need to shut down this notifier when you are d
sergei 2016/03/15 10:59:56 Done. Thanks, overlooked it.
+
tab.linkedBrowser.loadURI(url, null, null);
[result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab);
result.endTime = Date.now();
@@ -345,63 +314,3 @@ function reportException(e)
Cu.reportError(e);
dump(e + "\n" + stack + "\n");
}
-
-/**
- * Wrapper for the Policy.processNode() function in ABP. Calls the original
- * function and records all the data.
- *
- * @param {Function} origProcessNode
- * The original processNode function.
- * @param {RequestNotifier} requestNotifier
- * The crawler's RequestNotifier object instance.
- * @param {nsIDOMWindow} wnd
- * @param {nsIDOMElement} node
- * @param {Number} contentType
- * @param {nsIURI} location
- * @param {Boolean} collapse
- * @return {Boolean}
- */
-function processNodeReplacement(origProcessNode, requestNotifier, wnd, node, contentType, location, collapse)
-{
- let filters = [];
- let origListener = requestNotifier.listener;
- requestNotifier.listener = function(window, node, entry)
- {
- if (entry.filter)
- filters.push(entry.filter.text);
- };
-
- /*
- * Call the original processNode. If the original throws, then we will too, so this is outside a try clause.
- */
- let result;
- try
- {
- result = origProcessNode(wnd, node, contentType, location, collapse);
- }
- finally
- {
- requestNotifier.listener = origListener;
- }
-
- try
- {
- let data = getDataForWindow(wnd);
- if (data)
- {
- if (!("requests" in data))
- data.requests = [];
- data.requests.push({
- contentType: contentType,
- location: (contentType == Policy.type.ELEMHIDE ? location.text : location.spec),
- blocked: result != Ci.nsIContentPolicy.ACCEPT,
- filters: filters
- });
- }
- }
- catch (e)
- {
- reportException(e);
- }
- return result;
-};
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld