Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: sitescripts/crawler/schema.sql

Issue 8492019: sitescripts: Collect unmatched filters (Closed)
Patch Set: Created Oct. 2, 2012, 5:02 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « sitescripts/crawler/bin/import_sites.py ('k') | sitescripts/crawler/web/crawler.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 DROP TABLE IF EXISTS crawler_sites; 1 DROP TABLE IF EXISTS crawler_sites;
2 DROP TABLE IF EXISTS crawler_runs; 2 DROP TABLE IF EXISTS crawler_runs;
3 DROP TABLE IF EXISTS crawler_data; 3 DROP TABLE IF EXISTS crawler_requests;
4 DROP TABLE IF EXISTS crawler_domains;
5 DROP TABLE IF EXISTS crawler_filters;
6 DROP TABLE IF EXISTS crawler_domain_filters;
4 7
5 CREATE TABLE crawler_sites ( 8 CREATE TABLE crawler_sites (
6 id INT NOT NULL AUTO_INCREMENT, 9 id INT NOT NULL AUTO_INCREMENT,
7 PRIMARY KEY (id), 10 PRIMARY KEY (id),
8 url VARCHAR(512) NOT NULL, 11 url VARCHAR(1000) BINARY NOT NULL,
9 UNIQUE (url) 12 UNIQUE (url)
10 ); 13 );
11 14
12 CREATE TABLE crawler_runs ( 15 CREATE TABLE crawler_runs (
13 id INT NOT NULL AUTO_INCREMENT, 16 id INT NOT NULL AUTO_INCREMENT,
14 PRIMARY KEY (id), 17 PRIMARY KEY (id),
15 timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP 18 timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
16 ); 19 );
17 20
18 CREATE TABLE crawler_data ( 21 CREATE TABLE crawler_requests (
19 id INT NOT NULL AUTO_INCREMENT, 22 id INT NOT NULL AUTO_INCREMENT,
20 PRIMARY KEY (id), 23 PRIMARY KEY (id),
21 run INT NOT NULL, 24 run INT NOT NULL,
22 FOREIGN KEY (run) REFERENCES crawler_runs (id), 25 FOREIGN KEY (run) REFERENCES crawler_runs (id),
23 site INT NOT NULL, 26 site INT NOT NULL,
24 FOREIGN KEY (site) REFERENCES crawler_sites (id), 27 FOREIGN KEY (site) REFERENCES crawler_sites (id),
25 url VARCHAR(512) NOT NULL, 28 url VARCHAR(512) NOT NULL,
26 filtered BOOLEAN NOT NULL 29 filtered BOOLEAN NOT NULL,
30 filter INT,
31 FOREIGN KEY (filter) REFERENCES crawler_filters (id)
27 ); 32 );
33
34 CREATE TABLE crawler_domains (
35 id INT NOT NULL AUTO_INCREMENT,
36 PRIMARY KEY (id),
37 domain VARCHAR(512) BINARY NOT NULL,
38 UNIQUE (domain)
39 );
40
41 CREATE TABLE crawler_filters (
42 id INT NOT NULL AUTO_INCREMENT,
43 PRIMARY KEY (id),
44 filter VARCHAR(2000) NOT NULL,
45 filter_hash VARCHAR(40) NOT NULL,
46 UNIQUE (filter_hash)
47 );
48
49 CREATE TABLE crawler_domain_filters (
50 domain INT NOT NULL,
51 FOREIGN KEY (domain) REFERENCES crawler_domains (id),
52 filter INT NOT NULL,
53 FOREIGN KEY (filter) REFERENCES crawler_filters (id),
54 PRIMARY KEY (domain, filter)
55 );
OLDNEW
« no previous file with comments | « sitescripts/crawler/bin/import_sites.py ('k') | sitescripts/crawler/web/crawler.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld