Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/crawler.js

Issue 8402021: Crawler frontend (Closed)
Patch Set: Created Sept. 26, 2012, 8:25 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « lib/client.js ('k') | lib/main.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/crawler.js
===================================================================
new file mode 100644
--- /dev/null
+++ b/lib/crawler.js
@@ -0,0 +1,132 @@
+Cu.import("resource://gre/modules/Services.jsm");
+
+function abprequire(module)
+{
+ let result = {};
+ result.wrappedJSObject = result;
+ Services.obs.notifyObservers(result, "adblockplus-require", module);
+ return result.exports;
+}
+
+let {Storage} = require("storage");
+let {Client} = require("client");
+
+let {Policy} = abprequire("contentPolicy");
+let {Filter} = abprequire("filterClasses");
+let {Utils} = abprequire("utils");
+
+let origProcessNode = Policy.processNode;
+
+let siteTabs;
+let currentTabs;
+
+function processNode(wnd, node, contentType, location, collapse)
+{
+ let result = origProcessNode.apply(this, arguments);
+ let url = (contentType === Policy.type.ELEMHIDE) ? location.text :
+ location.spec;
+
+ let topWindow = wnd.top;
+ if (!topWindow.document)
+ {
+ Cu.reportError("No document associated with the node's top window");
+ return result;
+ }
+
+ let tabbrowser = Utils.getChromeWindow(topWindow).gBrowser;
+ if (!tabbrowser)
+ {
+ Cu.reportError("Unable to get a tabbrowser reference");
+ return result;
+ }
+
+ let browser = tabbrowser.getBrowserForDocument(topWindow.document);
+ if (!browser)
+ {
+ Cu.reportError("Unable to get browser for the tab");
+ return result;
+ }
+
+ let site = siteTabs.get(browser);
+ let filtered = !result;
+ Storage.write([url, site, filtered]);
+ return result;
+}
+
+function loadSite(site, window, callback)
+{
+ if (!site)
+ return;
+
+ let tabbrowser = window.gBrowser;
+ let tab = tabbrowser.addTab(site);
+ let browser = tabbrowser.getBrowserForTab(tab);
+
+ siteTabs.set(browser, site);
+
+ let progressListener = {
+ onStateChange: function(aBrowser, aWebProgress, aRequest, aStateFlags, aStatus)
+ {
+ if (browser !== aBrowser)
+ return;
+
+ if (!(aStateFlags & Ci.nsIWebProgressListener.STATE_STOP))
+ return;
+
+ tabbrowser.removeTabsProgressListener(progressListener);
+ tabbrowser.removeTab(tab);
+ callback();
+ }
+ };
+ tabbrowser.addTabsProgressListener(progressListener);
+}
+
+function loadSites(backendUrl, parallelTabs, window, sites, callback)
+{
+ while (currentTabs < parallelTabs && sites.length)
+ {
+ currentTabs++;
+ let site = sites.shift();
+ loadSite(site, window, function()
+ {
+ currentTabs--;
+ if (!sites.length && !currentTabs)
+ {
+ Storage.finish();
+ let dataFilePath = Storage.dataFile.path;
+ Client.sendCrawlerDataFile(backendUrl, dataFilePath, function()
+ {
+ Storage.destroy();
+ callback();
+ });
+ }
+ else
+ loadSites(backendUrl, parallelTabs, window, sites, callback);
+ });
+ }
+}
+
+let Crawler = exports.Crawler = {};
+
+Crawler.crawl = function(backendUrl, parallelTabs, window, callback)
+{
+ if (Policy.processNode != origProcessNode)
+ return;
+
+ Policy.processNode = processNode;
+
+ siteTabs = new WeakMap();
+ currentTabs = 0;
+
+ Storage.init();
+
+ Client.fetchCrawlableSites(backendUrl, function(sites)
+ {
+ loadSites(backendUrl, parallelTabs, window, sites, function()
+ {
+ Policy.processNode = origProcessNode;
+ siteTabs = null;
+ callback();
+ });
+ });
+};
« no previous file with comments | « lib/client.js ('k') | lib/main.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld