Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/crawler.js

Issue 8402021: Crawler frontend (Closed)
Patch Set: Created Sept. 21, 2012, 1:16 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: lib/crawler.js
===================================================================
new file mode 100644
--- /dev/null
+++ b/lib/crawler.js
@@ -0,0 +1,126 @@
+Cu.import("resource://gre/modules/Services.jsm");
+
+function require(module)
+{
Wladimir Palant 2012/09/21 15:36:18 This is unnecessary - module in lib/ directory can
+ let result = {};
+ result.wrappedJSObject = result;
+ Services.obs.notifyObservers(result, "abpcrawler-require", module);
+ return result.exports;
+}
+
+function abprequire(module)
+{
+ let result = {};
+ result.wrappedJSObject = result;
+ Services.obs.notifyObservers(result, "adblockplus-require", module);
+ if ("exports" in result)
+ return result.exports;
+ else
+ return Cu.import("chrome://adblockplus-modules/content/" +
Wladimir Palant 2012/09/21 15:36:18 This is unnecessary - Diagnostics is currently com
+ module[0].toUpperCase() + module.substr(1) + ".jsm", null);
+}
+
+let {Storage} = require("storage");
+let {Client} = require("client");
+
+let {Policy} = abprequire("contentPolicy");
+let {Filter} = abprequire("filterClasses");
+
+let origProcessNode = Policy.processNode;
+
+let siteTabs;
+let currentTabs;
+
+function processNode(wnd, node, contentType, location, collapse)
+{
+ let result = origProcessNode.apply(this, arguments);
+ let url = location.spec;
+ if (url)
Wladimir Palant 2012/09/21 15:36:18 I think that you can remove that check - an nsIURI
+ {
+ let site = siteTabs[wnd.top.location.href];
+ let filtered = !result;
+ Storage.write([url, site, filtered]);
+ }
+ return result;
+}
+
+function loadSite(site, window, callback)
+{
+ if (!site)
+ return;
+
+ let tabbrowser = window.gBrowser;
+ let tab = tabbrowser.addTab(site);
+ let browser = tabbrowser.getBrowserForTab(tab);
+
+ let progressListener = {
+ onStateChange: function(aBrowser, aWebProgress, aRequest, aStateFlags, aStatus)
+ {
+ if (browser !== aBrowser)
+ return;
+
+ if (!(aStateFlags & Ci.nsIWebProgressListener.STATE_STOP))
+ return;
+
+ tabbrowser.removeTabsProgressListener(progressListener);
+ tabbrowser.removeTab(tab);
+ callback();
+ },
+ onLocationChange: function(aBrowser, aWebProgress, aRequest, aLocation, aFlags)
+ {
+ if (browser === aBrowser)
+ siteTabs[aLocation.spec] = site;
Wladimir Palant 2012/09/21 15:36:18 Ok, that's a hack. What if two webpages in differe
+ }
+ };
+ tabbrowser.addTabsProgressListener(progressListener);
+}
+
+function loadSites(backendUrl, parallelTabs, window, sites, callback)
+{
+ while (currentTabs < parallelTabs && sites.length)
+ {
+ currentTabs++;
+ let site = sites[0];
+ sites = sites.slice(1);
Wladimir Palant 2012/09/21 15:36:18 let site = sites.shift()?
+ loadSite(site, window, function()
+ {
+ currentTabs--;
+ if (!sites.length && !currentTabs)
+ {
+ Storage.finish();
+ let dataFilePath = Storage.dataFile.path;
+ Client.sendCrawlerDataFile(backendUrl, window, dataFilePath, function()
+ {
+ Storage.destroy();
+ callback();
+ });
+ }
+ else
+ loadSites(backendUrl, parallelTabs, window, sites, callback);
+ });
+ }
+}
+
+let Crawler = exports.Crawler = {};
+
+Crawler.crawl = function(backendUrl, parallelTabs, window, callback)
+{
+ if (Policy.processNode != origProcessNode)
+ return;
+
+ Policy.processNode = processNode;
+
+ siteTabs = {};
+ currentTabs = 0;
+
+ Storage.init();
+
+ Client.fetchCrawlableSites(backendUrl, function(sites)
+ {
+ loadSites(backendUrl, parallelTabs, window, sites, function()
+ {
+ Policy.processNode = origProcessNode;
+ callback();
+ });
+ });
+};

Powered by Google App Engine
This is Rietveld