Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: generate_lists.py

Issue 29328894: Issue 3168 - Add a script for generating new content blocker lists (Closed)
Left Patch Set: Created Oct. 6, 2015, 6:22 p.m.
Right Patch Set: Use the with statement Created Oct. 22, 2015, 3:15 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # coding: utf-8 2 # coding: utf-8
3 3
4 # This file is part of Adblock Plus <https://adblockplus.org/>, 4 # This file is part of Adblock Plus <https://adblockplus.org/>,
5 # Copyright (C) 2006-2015 Eyeo GmbH 5 # Copyright (C) 2006-2015 Eyeo GmbH
6 # 6 #
7 # Adblock Plus is free software: you can redistribute it and/or modify 7 # Adblock Plus is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License version 3 as 8 # it under the terms of the GNU General Public License version 3 as
9 # published by the Free Software Foundation. 9 # published by the Free Software Foundation.
10 # 10 #
(...skipping 18 matching lines...) Expand all
29 EXCEPTIONRULES_URL = "https://easylist-downloads.adblockplus.org/exceptionrules. txt" 29 EXCEPTIONRULES_URL = "https://easylist-downloads.adblockplus.org/exceptionrules. txt"
30 EASYLIST_CONTENT_BLOCKER_PATH = os.path.join(BASE_PATH, "easylist_content_blocke r.json") 30 EASYLIST_CONTENT_BLOCKER_PATH = os.path.join(BASE_PATH, "easylist_content_blocke r.json")
31 COMBINED_CONTENT_BLOCKER_PATH = os.path.join(BASE_PATH, "easylist+exceptionrules _content_blocker.json") 31 COMBINED_CONTENT_BLOCKER_PATH = os.path.join(BASE_PATH, "easylist+exceptionrules _content_blocker.json")
32 32
33 def _update_abp2blocklist(): 33 def _update_abp2blocklist():
34 if os.path.isdir(ABP2BLOCKLIST_PATH): 34 if os.path.isdir(ABP2BLOCKLIST_PATH):
35 subprocess.check_call(["hg", "pull", "-u", "-R", ABP2BLOCKLIST_PATH]) 35 subprocess.check_call(["hg", "pull", "-u", "-R", ABP2BLOCKLIST_PATH])
36 else: 36 else:
37 subprocess.check_call(["hg", "clone", ABP2BLOCKLIST_URL, 37 subprocess.check_call(["hg", "clone", ABP2BLOCKLIST_URL,
38 ABP2BLOCKLIST_PATH]) 38 ABP2BLOCKLIST_PATH])
39 subprocess.check_call(["npm", "install", "tldjs"], cwd=ABP2BLOCKLIST_PATH) 39 subprocess.check_call(["npm", "install"], cwd=ABP2BLOCKLIST_PATH)
Felix Dahlke 2015/10/06 18:23:58 Well, this is a bit ugly. Once my patch that adds
Sebastian Noack 2015/10/12 13:08:31 This can be addressed now.
Felix Dahlke 2015/10/21 20:36:21 Done.
40 40
41 def _download_filter_lists(): 41 def _download_filter_lists():
42 easylist_response = urllib2.urlopen(EASYLIST_URL) 42 try:
Sebastian Noack 2015/10/12 13:08:31 Please close the file like object returned by urlo
Felix Dahlke 2015/10/21 20:36:22 Done.
43 with tempfile.NamedTemporaryFile(mode="w", delete=False) as easylist_file: 43 easylist_response = urllib2.urlopen(EASYLIST_URL)
Sebastian Noack 2015/10/12 13:08:31 Always add "b" to the mode when dealing with binar
Sebastian Noack 2015/10/12 13:08:31 How about, using mode NamedTemporaryFile("wb+"), p
Felix Dahlke 2015/10/21 20:36:21 Passing around RW file objects seemed pretty hacky
Felix Dahlke 2015/10/21 20:36:22 Those are ASCII files, however, should I still add
Sebastian Noack 2015/10/21 20:56:23 Either you treat it as a binary file. Then you sho
Felix Dahlke 2015/10/22 02:43:45 I see, done.
44 easylist_file.write(easylist_response.read()) 44 easylist_file = tempfile.NamedTemporaryFile(mode="wb+")
Sebastian Noack 2015/10/12 13:08:31 Please use shutil.copyfileobj consistently.
Felix Dahlke 2015/10/21 20:36:21 Done.
45 exceptionrules_response = urllib2.urlopen(EXCEPTIONRULES_URL) 45 shutil.copyfileobj(easylist_response, easylist_file)
46 with tempfile.NamedTemporaryFile(mode="w", delete=False) as exceptionrules_fil e: 46 finally:
47 exceptionrules_file.write(exceptionrules_response.read()) 47 easylist_response.close()
48 return (easylist_file.name, exceptionrules_file.name)
49 48
50 def _concatenate_files(*source_paths): 49 try:
51 with tempfile.NamedTemporaryFile(mode="w", delete=False) as destination_file: 50 exceptionrules_response = urllib2.urlopen(EXCEPTIONRULES_URL)
52 for source_path in source_paths: 51 exceptionrules_file = tempfile.NamedTemporaryFile(mode="wb+")
53 with open(source_path, "r") as source_file: 52 shutil.copyfileobj(exceptionrules_response, exceptionrules_file)
54 shutil.copyfileobj(source_file, destination_file) 53 finally:
55 return destination_file.name 54 exceptionrules_response.close()
56 55
57 def _convert_filter_list(source_path, destination_path): 56 return (easylist_file, exceptionrules_file)
58 with open(source_path, "r") as source_file, \ 57
59 open(destination_path, "w") as destination_file: 58 def _convert_filter_list(source_file, destination_path):
59 source_file.seek(0)
60 with open(destination_path, "wb") as destination_file:
60 subprocess.check_call(["node", "abp2blocklist.js"], 61 subprocess.check_call(["node", "abp2blocklist.js"],
61 cwd=ABP2BLOCKLIST_PATH, stdin=source_file, 62 cwd=ABP2BLOCKLIST_PATH, stdin=source_file,
62 stdout=destination_file) 63 stdout=destination_file)
64
65 def _concatenate_files(*source_files):
66 destination_file = tempfile.NamedTemporaryFile(mode="wb+")
67 for source_file in source_files:
68 source_file.seek(0)
69 shutil.copyfileobj(source_file, destination_file)
70 return destination_file
63 71
64 if __name__ == "__main__": 72 if __name__ == "__main__":
65 print "Fetching/updating abp2blocklist ..." 73 print "Fetching/updating abp2blocklist ..."
66 _update_abp2blocklist() 74 _update_abp2blocklist()
67 75
68 print "Downloading filter lists ..." 76 print "Downloading filter lists ..."
69 easylist_path, exceptionrules_path = _download_filter_lists() 77 easylist_file, exceptionrules_file = _download_filter_lists()
70 78
71 try: 79 try:
72 print "Generating %s ..." % os.path.basename(EASYLIST_CONTENT_BLOCKER_PATH) 80 print "Generating %s ..." % os.path.basename(EASYLIST_CONTENT_BLOCKER_PATH)
73 _convert_filter_list(easylist_path, EASYLIST_CONTENT_BLOCKER_PATH) 81 _convert_filter_list(easylist_file, EASYLIST_CONTENT_BLOCKER_PATH)
74 82
75 print "Generating %s ..." % os.path.basename(COMBINED_CONTENT_BLOCKER_PATH) 83 print "Generating %s ..." % os.path.basename(COMBINED_CONTENT_BLOCKER_PATH)
76 combined_path = _concatenate_files(easylist_path, exceptionrules_path) 84 with _concatenate_files(easylist_file, exceptionrules_file) as combined_file :
77 try: 85 _convert_filter_list(combined_file, COMBINED_CONTENT_BLOCKER_PATH)
78 _convert_filter_list(combined_path, COMBINED_CONTENT_BLOCKER_PATH)
79 finally:
80 os.remove(combined_path)
81 finally: 86 finally:
82 os.remove(easylist_path) 87 easylist_file.close()
83 os.remove(exceptionrules_path) 88 exceptionrules_file.close()
LEFTRIGHT
« no previous file | no next file » | Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Toggle Comments ('s')

Powered by Google App Engine
This is Rietveld