Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: sitescripts/content_blocker_lists/bin/generate_lists.py

Issue 29329537: Issue 3168 - Add a script for generating content blocker lists (Closed)
Left Patch Set: Created Nov. 13, 2015, 7:43 a.m.
Right Patch Set: Remove retcode variable, move return code check out of the with block Created Nov. 20, 2015, 7 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « sitescripts/content_blocker_lists/bin/__init__.py ('k') | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # coding: utf-8 2 # coding: utf-8
3 3
4 # This file is part of Adblock Plus <https://adblockplus.org/>, 4 # This file is part of Adblock Plus <https://adblockplus.org/>,
5 # Copyright (C) 2006-2015 Eyeo GmbH 5 # Copyright (C) 2006-2015 Eyeo GmbH
6 # 6 #
7 # Adblock Plus is free software: you can redistribute it and/or modify 7 # Adblock Plus is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License version 3 as 8 # it under the terms of the GNU General Public License version 3 as
9 # published by the Free Software Foundation. 9 # published by the Free Software Foundation.
10 # 10 #
11 # Adblock Plus is distributed in the hope that it will be useful, 11 # Adblock Plus is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details. 14 # GNU General Public License for more details.
15 # 15 #
16 # You should have received a copy of the GNU General Public License 16 # You should have received a copy of the GNU General Public License
17 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 17 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
18 18
19 import os 19 import os
20 import shutil
21 import subprocess 20 import subprocess
22 import tempfile
23 import urllib2 21 import urllib2
24 22
25 from sitescripts.utils import get_config 23 from sitescripts.utils import get_config
26 24
27 def _update_abp2blocklist(): 25 def _update_abp2blocklist():
28 abp2blocklist_path = get_config().get("content_blocker_lists", 26 with open(os.devnull, "w") as devnull:
29 "abp2blocklist_path") 27 config = get_config()
30 if os.path.isdir(abp2blocklist_path): 28 abp2blocklist_path = config.get("content_blocker_lists",
31 subprocess.check_call(["hg", "pull", "-u", "-R", abp2blocklist_path]) 29 "abp2blocklist_path")
32 else: 30 if os.path.isdir(abp2blocklist_path):
33 abp2blocklist_url = get_config().get("content_blocker_lists", 31 subprocess.check_call(("hg", "pull", "-u", "-R", abp2blocklist_path),
34 "abp2blocklist_url") 32 stdout=devnull)
35 subprocess.check_call(["hg", "clone", abp2blocklist_url, 33 else:
36 abp2blocklist_path]) 34 abp2blocklist_url = config.get("content_blocker_lists",
37 subprocess.check_call(["npm", "install"], cwd=abp2blocklist_path) 35 "abp2blocklist_url")
36 subprocess.check_call(("hg", "clone", abp2blocklist_url,
37 abp2blocklist_path), stdout=devnull)
38 subprocess.check_call(("npm", "install"), cwd=abp2blocklist_path,
39 stdout=devnull)
38 40
39 def _download_filter_lists(): 41 def _download(url_key):
42 url = get_config().get("content_blocker_lists", url_key)
43 response = urllib2.urlopen(url)
40 try: 44 try:
41 easylist_url = get_config().get("content_blocker_lists", "easylist_url") 45 return response.read()
42 easylist_response = urllib2.urlopen(easylist_url)
43 easylist_file = tempfile.NamedTemporaryFile(mode="wb+")
44 shutil.copyfileobj(easylist_response, easylist_file)
45 finally: 46 finally:
46 easylist_response.close() 47 response.close()
47 48
48 try: 49 def _convert_filter_list(sources, destination_path_key):
49 exceptionrules_url = get_config().get("content_blocker_lists", 50 config = get_config()
50 "exceptionrules_url") 51 destination_path = config.get("content_blocker_lists", destination_path_key)
51 exceptionrules_response = urllib2.urlopen(exceptionrules_url) 52 with open(destination_path, "wb") as destination_file:
52 exceptionrules_file = tempfile.NamedTemporaryFile(mode="wb+") 53 abp2blocklist_path = config.get("content_blocker_lists",
53 shutil.copyfileobj(exceptionrules_response, exceptionrules_file) 54 "abp2blocklist_path")
54 finally: 55 process = subprocess.Popen(("node", "abp2blocklist.js"),
55 exceptionrules_response.close() 56 cwd=abp2blocklist_path, stdin=subprocess.PIPE,
57 stdout=destination_file)
58 try:
59 for source in sources:
60 print >>process.stdin, source
61 finally:
62 process.stdin.close()
63 process.wait()
56 64
57 return (easylist_file, exceptionrules_file) 65 if process.returncode:
58 66 raise Exception("abp2blocklist returned %s" % process.returncode)
59 def _convert_filter_list(source_file, destination_path):
60 source_file.seek(0)
61 with open(destination_path, "wb") as destination_file:
62 abp2blocklist_path = get_config().get("content_blocker_lists",
63 "abp2blocklist_path")
64 subprocess.check_call(["node", "abp2blocklist.js"],
65 cwd=abp2blocklist_path, stdin=source_file,
66 stdout=destination_file)
67
68 def _concatenate_files(*source_files):
69 destination_file = tempfile.NamedTemporaryFile(mode="wb+")
70 for source_file in source_files:
71 source_file.seek(0)
72 shutil.copyfileobj(source_file, destination_file)
73 return destination_file
74 67
75 if __name__ == "__main__": 68 if __name__ == "__main__":
76 print "Fetching/updating abp2blocklist ..."
77 _update_abp2blocklist() 69 _update_abp2blocklist()
78 70
79 print "Downloading filter lists ..." 71 easylist = _download("easylist_url")
80 easylist_file, exceptionrules_file = _download_filter_lists() 72 exceptionrules = _download("exceptionrules_url")
81 73
82 try: 74 _convert_filter_list([easylist], "easylist_content_blocker_path")
83 easylist_content_blocker_path = get_config().get("content_blocker_lists", 75 _convert_filter_list([easylist, exceptionrules],
84 "easylist_content_blocker_p ath") 76 "combined_content_blocker_path")
85 print "Generating %s ..." % os.path.basename(easylist_content_blocker_path)
86 _convert_filter_list(easylist_file, easylist_content_blocker_path)
87
88 combined_content_blocker_path = get_config().get("content_blocker_lists",
89 "combined_content_blocker_p ath")
90 print "Generating %s ..." % os.path.basename(combined_content_blocker_path)
91 with _concatenate_files(easylist_file, exceptionrules_file) as combined_file :
92 _convert_filter_list(combined_file, combined_content_blocker_path)
93 finally:
94 easylist_file.close()
95 exceptionrules_file.close()
LEFTRIGHT

Powered by Google App Engine
This is Rietveld