Index: sitescripts/crawler/schema.sql |
=================================================================== |
--- a/sitescripts/crawler/schema.sql |
+++ b/sitescripts/crawler/schema.sql |
@@ -1,11 +1,14 @@ |
DROP TABLE IF EXISTS crawler_sites; |
DROP TABLE IF EXISTS crawler_runs; |
-DROP TABLE IF EXISTS crawler_data; |
+DROP TABLE IF EXISTS crawler_requests; |
+DROP TABLE IF EXISTS crawler_domains; |
+DROP TABLE IF EXISTS crawler_filters; |
+DROP TABLE IF EXISTS crawler_domain_filters; |
CREATE TABLE crawler_sites ( |
id INT NOT NULL AUTO_INCREMENT, |
PRIMARY KEY (id), |
- url VARCHAR(512) NOT NULL, |
+ url VARCHAR(1000) BINARY NOT NULL, |
UNIQUE (url) |
); |
@@ -15,7 +18,7 @@ |
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP |
); |
-CREATE TABLE crawler_data ( |
+CREATE TABLE crawler_requests ( |
id INT NOT NULL AUTO_INCREMENT, |
PRIMARY KEY (id), |
run INT NOT NULL, |
@@ -23,5 +26,30 @@ |
site INT NOT NULL, |
FOREIGN KEY (site) REFERENCES crawler_sites (id), |
url VARCHAR(512) NOT NULL, |
- filtered BOOLEAN NOT NULL |
+ filtered BOOLEAN NOT NULL, |
+ filter INT, |
+ FOREIGN KEY (filter) REFERENCES crawler_filters (id) |
); |
+ |
+CREATE TABLE crawler_domains ( |
+ id INT NOT NULL AUTO_INCREMENT, |
+ PRIMARY KEY (id), |
+ domain VARCHAR(512) BINARY NOT NULL, |
+ UNIQUE (domain) |
+); |
+ |
+CREATE TABLE crawler_filters ( |
+ id INT NOT NULL AUTO_INCREMENT, |
+ PRIMARY KEY (id), |
+ filter VARCHAR(2000) NOT NULL, |
+ filter_hash VARCHAR(40) NOT NULL, |
+ UNIQUE (filter_hash) |
+); |
+ |
+CREATE TABLE crawler_domain_filters ( |
+ domain INT NOT NULL, |
+ FOREIGN KEY (domain) REFERENCES crawler_domains (id), |
+ filter INT NOT NULL, |
+ FOREIGN KEY (filter) REFERENCES crawler_filters (id), |
+ PRIMARY KEY (domain, filter) |
+); |