Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: generate_lists.py

Issue 29328894: Issue 3168 - Add a script for generating new content blocker lists (Closed)
Patch Set: Created Oct. 6, 2015, 6:22 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 # This file is part of Adblock Plus <https://adblockplus.org/>,
5 # Copyright (C) 2006-2015 Eyeo GmbH
6 #
7 # Adblock Plus is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License version 3 as
9 # published by the Free Software Foundation.
10 #
11 # Adblock Plus is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
18
19 import os
20 import shutil
21 import subprocess
22 import tempfile
23 import urllib2
24
25 BASE_PATH = os.path.dirname(os.path.abspath(__file__))
26 ABP2BLOCKLIST_URL = "https://hg.adblockplus.org/abp2blocklist"
27 ABP2BLOCKLIST_PATH = os.path.join(BASE_PATH, "abp2blocklist")
28 EASYLIST_URL = "https://easylist-downloads.adblockplus.org/easylist_noadult.txt"
29 EXCEPTIONRULES_URL = "https://easylist-downloads.adblockplus.org/exceptionrules. txt"
30 EASYLIST_CONTENT_BLOCKER_PATH = os.path.join(BASE_PATH, "easylist_content_blocke r.json")
31 COMBINED_CONTENT_BLOCKER_PATH = os.path.join(BASE_PATH, "easylist+exceptionrules _content_blocker.json")
32
33 def _update_abp2blocklist():
34 if os.path.isdir(ABP2BLOCKLIST_PATH):
35 subprocess.check_call(["hg", "pull", "-u", "-R", ABP2BLOCKLIST_PATH])
36 else:
37 subprocess.check_call(["hg", "clone", ABP2BLOCKLIST_URL,
38 ABP2BLOCKLIST_PATH])
39 subprocess.check_call(["npm", "install", "tldjs"], cwd=ABP2BLOCKLIST_PATH)
Felix Dahlke 2015/10/06 18:23:58 Well, this is a bit ugly. Once my patch that adds
Sebastian Noack 2015/10/12 13:08:31 This can be addressed now.
Felix Dahlke 2015/10/21 20:36:21 Done.
40
41 def _download_filter_lists():
42 easylist_response = urllib2.urlopen(EASYLIST_URL)
Sebastian Noack 2015/10/12 13:08:31 Please close the file like object returned by urlo
Felix Dahlke 2015/10/21 20:36:22 Done.
43 with tempfile.NamedTemporaryFile(mode="w", delete=False) as easylist_file:
Sebastian Noack 2015/10/12 13:08:31 Always add "b" to the mode when dealing with binar
Sebastian Noack 2015/10/12 13:08:31 How about, using mode NamedTemporaryFile("wb+"), p
Felix Dahlke 2015/10/21 20:36:21 Passing around RW file objects seemed pretty hacky
Felix Dahlke 2015/10/21 20:36:22 Those are ASCII files, however, should I still add
Sebastian Noack 2015/10/21 20:56:23 Either you treat it as a binary file. Then you sho
Felix Dahlke 2015/10/22 02:43:45 I see, done.
44 easylist_file.write(easylist_response.read())
Sebastian Noack 2015/10/12 13:08:31 Please use shutil.copyfileobj consistently.
Felix Dahlke 2015/10/21 20:36:21 Done.
45 exceptionrules_response = urllib2.urlopen(EXCEPTIONRULES_URL)
46 with tempfile.NamedTemporaryFile(mode="w", delete=False) as exceptionrules_fil e:
47 exceptionrules_file.write(exceptionrules_response.read())
48 return (easylist_file.name, exceptionrules_file.name)
49
50 def _concatenate_files(*source_paths):
51 with tempfile.NamedTemporaryFile(mode="w", delete=False) as destination_file:
52 for source_path in source_paths:
53 with open(source_path, "r") as source_file:
54 shutil.copyfileobj(source_file, destination_file)
55 return destination_file.name
56
57 def _convert_filter_list(source_path, destination_path):
58 with open(source_path, "r") as source_file, \
59 open(destination_path, "w") as destination_file:
60 subprocess.check_call(["node", "abp2blocklist.js"],
61 cwd=ABP2BLOCKLIST_PATH, stdin=source_file,
62 stdout=destination_file)
63
64 if __name__ == "__main__":
65 print "Fetching/updating abp2blocklist ..."
66 _update_abp2blocklist()
67
68 print "Downloading filter lists ..."
69 easylist_path, exceptionrules_path = _download_filter_lists()
70
71 try:
72 print "Generating %s ..." % os.path.basename(EASYLIST_CONTENT_BLOCKER_PATH)
73 _convert_filter_list(easylist_path, EASYLIST_CONTENT_BLOCKER_PATH)
74
75 print "Generating %s ..." % os.path.basename(COMBINED_CONTENT_BLOCKER_PATH)
76 combined_path = _concatenate_files(easylist_path, exceptionrules_path)
77 try:
78 _convert_filter_list(combined_path, COMBINED_CONTENT_BLOCKER_PATH)
79 finally:
80 os.remove(combined_path)
81 finally:
82 os.remove(easylist_path)
83 os.remove(exceptionrules_path)
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld