| Index: sitescripts/crawler/schema.sql |
| =================================================================== |
| --- a/sitescripts/crawler/schema.sql |
| +++ b/sitescripts/crawler/schema.sql |
| @@ -1,11 +1,14 @@ |
| DROP TABLE IF EXISTS crawler_sites; |
| DROP TABLE IF EXISTS crawler_runs; |
| -DROP TABLE IF EXISTS crawler_data; |
| +DROP TABLE IF EXISTS crawler_requests; |
| +DROP TABLE IF EXISTS crawler_domains; |
| +DROP TABLE IF EXISTS crawler_filters; |
| +DROP TABLE IF EXISTS crawler_domain_filters; |
| CREATE TABLE crawler_sites ( |
| id INT NOT NULL AUTO_INCREMENT, |
| PRIMARY KEY (id), |
| - url VARCHAR(512) NOT NULL, |
| + url VARCHAR(1000) BINARY NOT NULL, |
| UNIQUE (url) |
| ); |
| @@ -15,7 +18,7 @@ |
| timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP |
| ); |
| -CREATE TABLE crawler_data ( |
| +CREATE TABLE crawler_requests ( |
| id INT NOT NULL AUTO_INCREMENT, |
| PRIMARY KEY (id), |
| run INT NOT NULL, |
| @@ -23,5 +26,30 @@ |
| site INT NOT NULL, |
| FOREIGN KEY (site) REFERENCES crawler_sites (id), |
| url VARCHAR(512) NOT NULL, |
| - filtered BOOLEAN NOT NULL |
| + filtered BOOLEAN NOT NULL, |
| + filter INT, |
| + FOREIGN KEY (filter) REFERENCES crawler_filters (id) |
| ); |
| + |
| +CREATE TABLE crawler_domains ( |
| + id INT NOT NULL AUTO_INCREMENT, |
| + PRIMARY KEY (id), |
| + domain VARCHAR(512) BINARY NOT NULL, |
| + UNIQUE (domain) |
| +); |
| + |
| +CREATE TABLE crawler_filters ( |
| + id INT NOT NULL AUTO_INCREMENT, |
| + PRIMARY KEY (id), |
| + filter VARCHAR(2000) NOT NULL, |
| + filter_hash VARCHAR(40) NOT NULL, |
| + UNIQUE (filter_hash) |
| +); |
| + |
| +CREATE TABLE crawler_domain_filters ( |
| + domain INT NOT NULL, |
| + FOREIGN KEY (domain) REFERENCES crawler_domains (id), |
| + filter INT NOT NULL, |
| + FOREIGN KEY (filter) REFERENCES crawler_filters (id), |
| + PRIMARY KEY (domain, filter) |
| +); |