| Index: lib/crawler.js |
| =================================================================== |
| new file mode 100644 |
| --- /dev/null |
| +++ b/lib/crawler.js |
| @@ -0,0 +1,126 @@ |
| +Cu.import("resource://gre/modules/Services.jsm"); |
| + |
| +function require(module) |
| +{ |
|
Wladimir Palant
2012/09/21 15:36:18
This is unnecessary - module in lib/ directory can
|
| + let result = {}; |
| + result.wrappedJSObject = result; |
| + Services.obs.notifyObservers(result, "abpcrawler-require", module); |
| + return result.exports; |
| +} |
| + |
| +function abprequire(module) |
| +{ |
| + let result = {}; |
| + result.wrappedJSObject = result; |
| + Services.obs.notifyObservers(result, "adblockplus-require", module); |
| + if ("exports" in result) |
| + return result.exports; |
| + else |
| + return Cu.import("chrome://adblockplus-modules/content/" + |
|
Wladimir Palant
2012/09/21 15:36:18
This is unnecessary - Diagnostics is currently com
|
| + module[0].toUpperCase() + module.substr(1) + ".jsm", null); |
| +} |
| + |
| +let {Storage} = require("storage"); |
| +let {Client} = require("client"); |
| + |
| +let {Policy} = abprequire("contentPolicy"); |
| +let {Filter} = abprequire("filterClasses"); |
| + |
| +let origProcessNode = Policy.processNode; |
| + |
| +let siteTabs; |
| +let currentTabs; |
| + |
| +function processNode(wnd, node, contentType, location, collapse) |
| +{ |
| + let result = origProcessNode.apply(this, arguments); |
| + let url = location.spec; |
| + if (url) |
|
Wladimir Palant
2012/09/21 15:36:18
I think that you can remove that check - an nsIURI
|
| + { |
| + let site = siteTabs[wnd.top.location.href]; |
| + let filtered = !result; |
| + Storage.write([url, site, filtered]); |
| + } |
| + return result; |
| +} |
| + |
| +function loadSite(site, window, callback) |
| +{ |
| + if (!site) |
| + return; |
| + |
| + let tabbrowser = window.gBrowser; |
| + let tab = tabbrowser.addTab(site); |
| + let browser = tabbrowser.getBrowserForTab(tab); |
| + |
| + let progressListener = { |
| + onStateChange: function(aBrowser, aWebProgress, aRequest, aStateFlags, aStatus) |
| + { |
| + if (browser !== aBrowser) |
| + return; |
| + |
| + if (!(aStateFlags & Ci.nsIWebProgressListener.STATE_STOP)) |
| + return; |
| + |
| + tabbrowser.removeTabsProgressListener(progressListener); |
| + tabbrowser.removeTab(tab); |
| + callback(); |
| + }, |
| + onLocationChange: function(aBrowser, aWebProgress, aRequest, aLocation, aFlags) |
| + { |
| + if (browser === aBrowser) |
| + siteTabs[aLocation.spec] = site; |
|
Wladimir Palant
2012/09/21 15:36:18
Ok, that's a hack. What if two webpages in differe
|
| + } |
| + }; |
| + tabbrowser.addTabsProgressListener(progressListener); |
| +} |
| + |
| +function loadSites(backendUrl, parallelTabs, window, sites, callback) |
| +{ |
| + while (currentTabs < parallelTabs && sites.length) |
| + { |
| + currentTabs++; |
| + let site = sites[0]; |
| + sites = sites.slice(1); |
|
Wladimir Palant
2012/09/21 15:36:18
let site = sites.shift()?
|
| + loadSite(site, window, function() |
| + { |
| + currentTabs--; |
| + if (!sites.length && !currentTabs) |
| + { |
| + Storage.finish(); |
| + let dataFilePath = Storage.dataFile.path; |
| + Client.sendCrawlerDataFile(backendUrl, window, dataFilePath, function() |
| + { |
| + Storage.destroy(); |
| + callback(); |
| + }); |
| + } |
| + else |
| + loadSites(backendUrl, parallelTabs, window, sites, callback); |
| + }); |
| + } |
| +} |
| + |
| +let Crawler = exports.Crawler = {}; |
| + |
| +Crawler.crawl = function(backendUrl, parallelTabs, window, callback) |
| +{ |
| + if (Policy.processNode != origProcessNode) |
| + return; |
| + |
| + Policy.processNode = processNode; |
| + |
| + siteTabs = {}; |
| + currentTabs = 0; |
| + |
| + Storage.init(); |
| + |
| + Client.fetchCrawlableSites(backendUrl, function(sites) |
| + { |
| + loadSites(backendUrl, parallelTabs, window, sites, function() |
| + { |
| + Policy.processNode = origProcessNode; |
| + callback(); |
| + }); |
| + }); |
| +}; |