Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: sitescripts/crawler/schema.sql

Issue 8492019: sitescripts: Collect unmatched filters (Closed)
Patch Set: Created Oct. 2, 2012, 5:02 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « sitescripts/crawler/bin/import_sites.py ('k') | sitescripts/crawler/web/crawler.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: sitescripts/crawler/schema.sql
===================================================================
--- a/sitescripts/crawler/schema.sql
+++ b/sitescripts/crawler/schema.sql
@@ -1,11 +1,14 @@
DROP TABLE IF EXISTS crawler_sites;
DROP TABLE IF EXISTS crawler_runs;
-DROP TABLE IF EXISTS crawler_data;
+DROP TABLE IF EXISTS crawler_requests;
+DROP TABLE IF EXISTS crawler_domains;
+DROP TABLE IF EXISTS crawler_filters;
+DROP TABLE IF EXISTS crawler_domain_filters;
CREATE TABLE crawler_sites (
id INT NOT NULL AUTO_INCREMENT,
PRIMARY KEY (id),
- url VARCHAR(512) NOT NULL,
+ url VARCHAR(1000) BINARY NOT NULL,
UNIQUE (url)
);
@@ -15,7 +18,7 @@
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-CREATE TABLE crawler_data (
+CREATE TABLE crawler_requests (
id INT NOT NULL AUTO_INCREMENT,
PRIMARY KEY (id),
run INT NOT NULL,
@@ -23,5 +26,30 @@
site INT NOT NULL,
FOREIGN KEY (site) REFERENCES crawler_sites (id),
url VARCHAR(512) NOT NULL,
- filtered BOOLEAN NOT NULL
+ filtered BOOLEAN NOT NULL,
+ filter INT,
+ FOREIGN KEY (filter) REFERENCES crawler_filters (id)
);
+
+CREATE TABLE crawler_domains (
+ id INT NOT NULL AUTO_INCREMENT,
+ PRIMARY KEY (id),
+ domain VARCHAR(512) BINARY NOT NULL,
+ UNIQUE (domain)
+);
+
+CREATE TABLE crawler_filters (
+ id INT NOT NULL AUTO_INCREMENT,
+ PRIMARY KEY (id),
+ filter VARCHAR(2000) NOT NULL,
+ filter_hash VARCHAR(40) NOT NULL,
+ UNIQUE (filter_hash)
+);
+
+CREATE TABLE crawler_domain_filters (
+ domain INT NOT NULL,
+ FOREIGN KEY (domain) REFERENCES crawler_domains (id),
+ filter INT NOT NULL,
+ FOREIGN KEY (filter) REFERENCES crawler_filters (id),
+ PRIMARY KEY (domain, filter)
+);
« no previous file with comments | « sitescripts/crawler/bin/import_sites.py ('k') | sitescripts/crawler/web/crawler.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld