Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: sitescripts/content_blocker_lists/bin/generate_lists.py

Issue 29331148: Issue 3176 - Add metadata to content blocker lists (Closed)
Patch Set: Addressed more feedback from Felix and Sebastian Created Dec. 1, 2015, 12:10 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « .sitescripts.example ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # coding: utf-8 2 # coding: utf-8
3 3
4 # This file is part of Adblock Plus <https://adblockplus.org/>, 4 # This file is part of Adblock Plus <https://adblockplus.org/>,
5 # Copyright (C) 2006-2015 Eyeo GmbH 5 # Copyright (C) 2006-2015 Eyeo GmbH
6 # 6 #
7 # Adblock Plus is free software: you can redistribute it and/or modify 7 # Adblock Plus is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License version 3 as 8 # it under the terms of the GNU General Public License version 3 as
9 # published by the Free Software Foundation. 9 # published by the Free Software Foundation.
10 # 10 #
11 # Adblock Plus is distributed in the hope that it will be useful, 11 # Adblock Plus is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details. 14 # GNU General Public License for more details.
15 # 15 #
16 # You should have received a copy of the GNU General Public License 16 # You should have received a copy of the GNU General Public License
17 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 17 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
18 18
19 from collections import OrderedDict
20 from contextlib import closing
21 import json
19 import os 22 import os
20 import subprocess 23 import subprocess
24 import threading
25 import time
26 import re
21 import urllib2 27 import urllib2
22 28
23 from sitescripts.utils import get_config 29 from sitescripts.utils import get_config
24 30
25 def _update_abp2blocklist(): 31 config = dict(get_config().items("content_blocker_lists"))
32
33 def update_abp2blocklist():
26 with open(os.devnull, "w") as devnull: 34 with open(os.devnull, "w") as devnull:
27 config = get_config() 35 abp2blocklist_path = config["abp2blocklist_path"]
28 abp2blocklist_path = config.get("content_blocker_lists",
29 "abp2blocklist_path")
30 if os.path.isdir(abp2blocklist_path): 36 if os.path.isdir(abp2blocklist_path):
31 subprocess.check_call(("hg", "pull", "-u", "-R", abp2blocklist_path), 37 subprocess.check_call(("hg", "pull", "-u", "-R", abp2blocklist_path),
32 stdout=devnull) 38 stdout=devnull)
33 else: 39 else:
34 abp2blocklist_url = config.get("content_blocker_lists", 40 subprocess.check_call(("hg", "clone", config["abp2blocklist_url"],
35 "abp2blocklist_url")
36 subprocess.check_call(("hg", "clone", abp2blocklist_url,
37 abp2blocklist_path), stdout=devnull) 41 abp2blocklist_path), stdout=devnull)
38 subprocess.check_call(("npm", "install"), cwd=abp2blocklist_path, 42 subprocess.check_call(("npm", "install"), cwd=abp2blocklist_path,
39 stdout=devnull) 43 stdout=devnull)
40 44
41 def _download(url_key): 45 def parse_filter_list_header(filter_list):
42 url = get_config().get("content_blocker_lists", url_key) 46 body_start = re.search(r"^[^![]", filter_list, re.MULTILINE).start()
43 response = urllib2.urlopen(url) 47 field_re = re.compile(r"^!\s*([^:\s]+):\s*(.+)$", re.MULTILINE)
Sebastian Noack 2015/12/07 12:38:24 This logic can be simplified, in particular since
kzar 2015/12/08 12:52:21 IMHO that doesn't look easier to read. Also it see
Felix Dahlke 2015/12/08 13:39:51 Reading both pieces of code for the first time, I
kzar 2015/12/08 14:34:24 OK I've gone with a slightly simplified version of
48 return { match.group(1): match.group(2)
49 for match in field_re.finditer(filter_list, 0, body_start) }
50
51 def download_filter_list(url):
52 with closing(urllib2.urlopen(url)) as response:
53 body = response.read()
54 header = parse_filter_list_header(body)
55 return body, url, header["Version"]
56
57 def generate_metadata(filter_lists, expires):
58 metadata = OrderedDict((
59 ("version", time.strftime("%Y%m%d%H%M", time.gmtime())),
60 ("expires", expires),
61 ("sources", [])
62 ))
63 for body, url, version in filter_lists:
64 metadata["sources"].append({ "url": url, "version": version })
Sebastian Noack 2015/12/07 12:38:24 See https://www.python.org/dev/peps/pep-0008/#pet-
kzar 2015/12/08 12:52:21 Done.
65 return metadata
66
67 def pipe_in(process, filter_lists):
44 try: 68 try:
45 return response.read() 69 for body, _, _ in filter_lists:
70 print >>process.stdin, body
46 finally: 71 finally:
47 response.close() 72 process.stdin.close()
73 process.wait()
48 74
49 def _convert_filter_list(sources, destination_path_key): 75 def write_block_list(filter_lists, path, expires):
50 config = get_config() 76 block_list = generate_metadata(filter_lists, expires)
51 destination_path = config.get("content_blocker_lists", destination_path_key) 77 process = subprocess.Popen(("node", "abp2blocklist.js"),
52 with open(destination_path, "wb") as destination_file: 78 cwd=config["abp2blocklist_path"],
53 abp2blocklist_path = config.get("content_blocker_lists", 79 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
54 "abp2blocklist_path") 80 threading.Thread(target=pipe_in, args=(process, filter_lists)).start()
55 process = subprocess.Popen(("node", "abp2blocklist.js"), 81 block_list["rules"] = json.load(process.stdout)
56 cwd=abp2blocklist_path, stdin=subprocess.PIPE,
57 stdout=destination_file)
58 try:
59 for source in sources:
60 print >>process.stdin, source
61 finally:
62 process.stdin.close()
63 process.wait()
64
65 if process.returncode: 82 if process.returncode:
Sebastian Noack 2015/12/07 12:38:24 Note that returncode is set by the wait() method.
kzar 2015/12/08 12:52:21 Done.
66 raise Exception("abp2blocklist returned %s" % process.returncode) 83 raise Exception("abp2blocklist returned %s" % process.returncode)
67 84
85 with open(path, "wb") as destination_file:
86 json.dump(block_list, destination_file, indent=2, separators=(",", ": "))
87
68 if __name__ == "__main__": 88 if __name__ == "__main__":
69 _update_abp2blocklist() 89 update_abp2blocklist()
70 90
71 easylist = _download("easylist_url") 91 easylist = download_filter_list(config["easylist_url"])
72 exceptionrules = _download("exceptionrules_url") 92 exceptionrules = download_filter_list(config["exceptionrules_url"])
73 93
74 _convert_filter_list([easylist], "easylist_content_blocker_path") 94 write_block_list([easylist],
75 _convert_filter_list([easylist, exceptionrules], 95 config["easylist_content_blocker_path"],
76 "combined_content_blocker_path") 96 config["easylist_content_blocker_expires"])
97 write_block_list([easylist, exceptionrules],
98 config["combined_content_blocker_path"],
99 config["combined_content_blocker_expires"])
OLDNEW
« no previous file with comments | « .sitescripts.example ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld