Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: sitescripts/content_blocker_lists/bin/generate_lists.py

Issue 29331148: Issue 3176 - Add metadata to content blocker lists (Closed)
Left Patch Set: Addressed more feedback from Felix and Sebastian Created Dec. 1, 2015, 12:10 p.m.
Right Patch Set: Improved regexp Created Dec. 8, 2015, 3:31 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « .sitescripts.example ('k') | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # coding: utf-8 2 # coding: utf-8
3 3
4 # This file is part of Adblock Plus <https://adblockplus.org/>, 4 # This file is part of Adblock Plus <https://adblockplus.org/>,
5 # Copyright (C) 2006-2015 Eyeo GmbH 5 # Copyright (C) 2006-2015 Eyeo GmbH
6 # 6 #
7 # Adblock Plus is free software: you can redistribute it and/or modify 7 # Adblock Plus is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License version 3 as 8 # it under the terms of the GNU General Public License version 3 as
9 # published by the Free Software Foundation. 9 # published by the Free Software Foundation.
10 # 10 #
(...skipping 24 matching lines...) Expand all
35 abp2blocklist_path = config["abp2blocklist_path"] 35 abp2blocklist_path = config["abp2blocklist_path"]
36 if os.path.isdir(abp2blocklist_path): 36 if os.path.isdir(abp2blocklist_path):
37 subprocess.check_call(("hg", "pull", "-u", "-R", abp2blocklist_path), 37 subprocess.check_call(("hg", "pull", "-u", "-R", abp2blocklist_path),
38 stdout=devnull) 38 stdout=devnull)
39 else: 39 else:
40 subprocess.check_call(("hg", "clone", config["abp2blocklist_url"], 40 subprocess.check_call(("hg", "clone", config["abp2blocklist_url"],
41 abp2blocklist_path), stdout=devnull) 41 abp2blocklist_path), stdout=devnull)
42 subprocess.check_call(("npm", "install"), cwd=abp2blocklist_path, 42 subprocess.check_call(("npm", "install"), cwd=abp2blocklist_path,
43 stdout=devnull) 43 stdout=devnull)
44 44
45 def parse_filter_list_header(filter_list):
46 body_start = re.search(r"^[^![]", filter_list, re.MULTILINE).start()
47 field_re = re.compile(r"^!\s*([^:\s]+):\s*(.+)$", re.MULTILINE)
Sebastian Noack 2015/12/07 12:38:24 This logic can be simplified, in particular since
kzar 2015/12/08 12:52:21 IMHO that doesn't look easier to read. Also it see
Felix Dahlke 2015/12/08 13:39:51 Reading both pieces of code for the first time, I
kzar 2015/12/08 14:34:24 OK I've gone with a slightly simplified version of
48 return { match.group(1): match.group(2)
49 for match in field_re.finditer(filter_list, 0, body_start) }
50
51 def download_filter_list(url): 45 def download_filter_list(url):
52 with closing(urllib2.urlopen(url)) as response: 46 with closing(urllib2.urlopen(url)) as response:
53 body = response.read() 47 body = response.read()
54 header = parse_filter_list_header(body) 48 version = re.search(r"^(?:[^[!])|^!\s*Version:\s*(.+)$",
55 return body, url, header["Version"] 49 body, re.MULTILINE).group(1)
50 return body, url, version
56 51
57 def generate_metadata(filter_lists, expires): 52 def generate_metadata(filter_lists, expires):
58 metadata = OrderedDict(( 53 metadata = OrderedDict((
59 ("version", time.strftime("%Y%m%d%H%M", time.gmtime())), 54 ("version", time.strftime("%Y%m%d%H%M", time.gmtime())),
60 ("expires", expires), 55 ("expires", expires),
61 ("sources", []) 56 ("sources", [])
62 )) 57 ))
63 for body, url, version in filter_lists: 58 for body, url, version in filter_lists:
64 metadata["sources"].append({ "url": url, "version": version }) 59 metadata["sources"].append({"url": url, "version": version})
Sebastian Noack 2015/12/07 12:38:24 See https://www.python.org/dev/peps/pep-0008/#pet-
kzar 2015/12/08 12:52:21 Done.
65 return metadata 60 return metadata
66 61
67 def pipe_in(process, filter_lists): 62 def pipe_in(process, filter_lists):
68 try: 63 try:
69 for body, _, _ in filter_lists: 64 for body, _, _ in filter_lists:
70 print >>process.stdin, body 65 print >>process.stdin, body
71 finally: 66 finally:
72 process.stdin.close() 67 process.stdin.close()
73 process.wait()
74 68
75 def write_block_list(filter_lists, path, expires): 69 def write_block_list(filter_lists, path, expires):
76 block_list = generate_metadata(filter_lists, expires) 70 block_list = generate_metadata(filter_lists, expires)
77 process = subprocess.Popen(("node", "abp2blocklist.js"), 71 process = subprocess.Popen(("node", "abp2blocklist.js"),
78 cwd=config["abp2blocklist_path"], 72 cwd=config["abp2blocklist_path"],
79 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 73 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
80 threading.Thread(target=pipe_in, args=(process, filter_lists)).start() 74 threading.Thread(target=pipe_in, args=(process, filter_lists)).start()
81 block_list["rules"] = json.load(process.stdout) 75 block_list["rules"] = json.load(process.stdout)
82 if process.returncode: 76 if process.wait():
Sebastian Noack 2015/12/07 12:38:24 Note that returncode is set by the wait() method.
kzar 2015/12/08 12:52:21 Done.
83 raise Exception("abp2blocklist returned %s" % process.returncode) 77 raise Exception("abp2blocklist returned %s" % process.returncode)
84 78
85 with open(path, "wb") as destination_file: 79 with open(path, "wb") as destination_file:
86 json.dump(block_list, destination_file, indent=2, separators=(",", ": ")) 80 json.dump(block_list, destination_file, indent=2, separators=(",", ": "))
87 81
88 if __name__ == "__main__": 82 if __name__ == "__main__":
89 update_abp2blocklist() 83 update_abp2blocklist()
90 84
91 easylist = download_filter_list(config["easylist_url"]) 85 easylist = download_filter_list(config["easylist_url"])
92 exceptionrules = download_filter_list(config["exceptionrules_url"]) 86 exceptionrules = download_filter_list(config["exceptionrules_url"])
93 87
94 write_block_list([easylist], 88 write_block_list([easylist],
95 config["easylist_content_blocker_path"], 89 config["easylist_content_blocker_path"],
96 config["easylist_content_blocker_expires"]) 90 config["easylist_content_blocker_expires"])
97 write_block_list([easylist, exceptionrules], 91 write_block_list([easylist, exceptionrules],
98 config["combined_content_blocker_path"], 92 config["combined_content_blocker_path"],
99 config["combined_content_blocker_expires"]) 93 config["combined_content_blocker_expires"])
LEFTRIGHT

Powered by Google App Engine
This is Rietveld