Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/crawler.js

Issue 8492018: abpcrawler: Collect unmatched filters (Closed)
Patch Set: Created Oct. 2, 2012, 5:01 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « lib/client.js ('k') | lib/storage.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/crawler.js
===================================================================
--- a/lib/crawler.js
+++ b/lib/crawler.js
@@ -12,13 +12,16 @@
let {Client} = require("client");
let {Policy} = abprequire("contentPolicy");
+let {RequestNotifier} = abprequire("requestNotifier");
let {Filter} = abprequire("filterClasses");
let {Utils} = abprequire("utils");
let origProcessNode = Policy.processNode;
+let requestNotifier;
let siteTabs;
let currentTabs;
+let currentFilter;
Wladimir Palant 2012/12/13 16:14:01 Initialize with null?
Felix Dahlke 2012/12/14 10:11:58 I'd rather keep it undefined, like currentTabs etc
function processNode(wnd, node, contentType, location, collapse)
{
@@ -49,10 +52,37 @@
let site = siteTabs.get(browser);
let filtered = !result;
- Storage.write([url, site, filtered]);
+ let data = [url, site, filtered];
+ if (currentFilter) {
Wladimir Palant 2012/12/14 16:43:01 Missed on the first pass - bracket should be on ne
+ data.push(currentFilter);
+ currentFilter = null;
+ }
+ Storage.write(data);
return result;
}
+function handleFilterHit(wnd, node, data)
+{
+ if (data.filter)
+ currentFilter = data.filter.text;
+}
+
+function prepare()
+{
+ if (Policy.processNode != origProcessNode)
+ return false;
+
+ Policy.processNode = processNode;
+
+ requestNotifier = new RequestNotifier(null, handleFilterHit);
+ siteTabs = new WeakMap();
+ currentTabs = 0;
+
+ Storage.init();
+
+ return true;
+}
+
function loadSite(site, window, callback)
{
if (!site)
@@ -93,8 +123,8 @@
if (!sites.length && !currentTabs)
{
Storage.finish();
- let dataFilePath = Storage.dataFile.path;
- Client.sendCrawlerDataFile(backendUrl, dataFilePath, function()
+ let requestsFilePath = Storage.requestsFile.path;
+ Client.sendRequestsFile(backendUrl, requestsFilePath, function()
{
Storage.destroy();
callback();
@@ -106,26 +136,24 @@
}
}
+function cleanUp()
+{
+ Policy.processNode = origProcessNode;
+ siteTabs = null;
+}
+
let Crawler = exports.Crawler = {};
Crawler.crawl = function(backendUrl, parallelTabs, window, callback)
{
- if (Policy.processNode != origProcessNode)
+ if (!prepare())
return;
- Policy.processNode = processNode;
-
- siteTabs = new WeakMap();
- currentTabs = 0;
-
- Storage.init();
-
Client.fetchCrawlableSites(backendUrl, function(sites)
{
loadSites(backendUrl, parallelTabs, window, sites, function()
{
- Policy.processNode = origProcessNode;
- siteTabs = null;
+ cleanUp();
callback();
});
});
« no previous file with comments | « lib/client.js ('k') | lib/storage.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld