| LEFT | RIGHT |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # coding: utf-8 | 2 # coding: utf-8 |
| 3 | 3 |
| 4 # This file is part of the Adblock Plus web scripts, | 4 # This file is part of the Adblock Plus web scripts, |
| 5 # Copyright (C) 2006-2013 Eyeo GmbH | 5 # Copyright (C) 2006-2013 Eyeo GmbH |
| 6 # | 6 # |
| 7 # Adblock Plus is free software: you can redistribute it and/or modify | 7 # Adblock Plus is free software: you can redistribute it and/or modify |
| 8 # it under the terms of the GNU General Public License version 3 as | 8 # it under the terms of the GNU General Public License version 3 as |
| 9 # published by the Free Software Foundation. | 9 # published by the Free Software Foundation. |
| 10 # | 10 # |
| 11 # Adblock Plus is distributed in the hope that it will be useful, | 11 # Adblock Plus is distributed in the hope that it will be useful, |
| 12 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 # GNU General Public License for more details. | 14 # GNU General Public License for more details. |
| 15 # | 15 # |
| 16 # You should have received a copy of the GNU General Public License | 16 # You should have received a copy of the GNU General Public License |
| 17 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 17 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| 18 | 18 |
| 19 import sys, os, re, subprocess, urllib2, time, traceback, codecs, hashlib, base6
4 | 19 import sys, os, re, subprocess, urllib2, time, traceback, codecs, hashlib, base6
4, tempfile |
| 20 from getopt import getopt, GetoptError | 20 from getopt import getopt, GetoptError |
| 21 | 21 |
| 22 accepted_extensions = set([".txt"]) | 22 accepted_extensions = set([".txt"]) |
| 23 ignore = set(["Apache.txt", "CC-BY-SA.txt", "GPL.txt", "MPL.txt"]) | 23 ignore = set(["Apache.txt", "CC-BY-SA.txt", "GPL.txt", "MPL.txt"]) |
| 24 verbatim = set(["COPYING"]) | 24 verbatim = set(["COPYING"]) |
| 25 | 25 |
| 26 def combine_subscriptions(sources, target_dir, timeout=30): | 26 def combine_subscriptions(sources, target_dir, timeout=30, tempdir=None): |
| 27 if not os.path.exists(target_dir): | 27 if not os.path.exists(target_dir): |
| 28 os.makedirs(target_dir, 0755) | 28 os.makedirs(target_dir, 0755) |
| 29 |
| 30 def save_file(filename, data): |
| 31 handle = tempfile.NamedTemporaryFile(mode="wb", dir=tempdir, delete=False) |
| 32 handle.write(data.encode("utf-8")) |
| 33 handle.close() |
| 34 |
| 35 try: |
| 36 subprocess.check_output(["7za", "a", "-tgzip", "-mx=9", "-bd", "-mpass=5",
handle.name + ".gz", handle.name]) |
| 37 except: |
| 38 print >>sys.stderr, "Failed to compress file %s. Please ensure that p7zip
is installed on the system." % handle.name |
| 39 |
| 40 path = os.path.join(target_dir, filename) |
| 41 os.rename(handle.name, path) |
| 42 os.rename(handle.name + ".gz", path + ".gz") |
| 29 | 43 |
| 30 known = set() | 44 known = set() |
| 31 for source_name, source in sources.iteritems(): | 45 for source_name, source in sources.iteritems(): |
| 32 for filename in source.list_top_level_files(): | 46 for filename in source.list_top_level_files(): |
| 33 if filename in ignore or filename.startswith("."): | 47 if filename in ignore or filename.startswith("."): |
| 34 continue | 48 continue |
| 35 if filename in verbatim: | 49 if filename in verbatim: |
| 36 process_verbatim_file(source, target_dir, filename) | 50 process_verbatim_file(source, save_file, filename) |
| 37 elif not os.path.splitext(filename)[1] in accepted_extensions: | 51 elif not os.path.splitext(filename)[1] in accepted_extensions: |
| 38 continue | 52 continue |
| 39 else: | 53 else: |
| 40 try: | 54 try: |
| 41 process_subscription_file(source_name, sources, target_dir, filename,
timeout) | 55 process_subscription_file(source_name, sources, save_file, filename, t
imeout) |
| 42 except: | 56 except: |
| 43 print >>sys.stderr, 'Error processing subscription file "%s"' % filena
me | 57 print >>sys.stderr, 'Error processing subscription file "%s"' % filena
me |
| 44 traceback.print_exc() | 58 traceback.print_exc() |
| 45 print >>sys.stderr | 59 print >>sys.stderr |
| 46 known.add(os.path.splitext(filename)[0] + ".tpl") | 60 known.add(os.path.splitext(filename)[0] + ".tpl") |
| 47 known.add(os.path.splitext(filename)[0] + ".tpl.gz") | 61 known.add(os.path.splitext(filename)[0] + ".tpl.gz") |
| 48 known.add(filename) | 62 known.add(filename) |
| 49 known.add(filename + ".gz") | 63 known.add(filename + ".gz") |
| 50 | 64 |
| 51 for filename in os.listdir(target_dir): | 65 for filename in os.listdir(target_dir): |
| 52 if filename.startswith("."): | 66 if filename.startswith("."): |
| 53 continue | 67 continue |
| 54 if not filename in known: | 68 if not filename in known: |
| 55 os.remove(os.path.join(target_dir, filename)) | 69 os.remove(os.path.join(target_dir, filename)) |
| 56 | 70 |
| 57 def save_file(path, data): | 71 def process_verbatim_file(source, save_file, filename): |
| 58 handle = codecs.open(path, "wb", encoding="utf-8") | 72 save_file(filename, source.read_file(filename)) |
| 59 handle.write(data) | 73 |
| 60 handle.close() | 74 def process_subscription_file(source_name, sources, save_file, filename, timeout
): |
| 61 try: | |
| 62 subprocess.check_output(["7za", "a", "-tgzip", "-mx=9", "-bd", "-mpass=5", p
ath + ".gz", path]) | |
| 63 except: | |
| 64 print >>sys.stderr, "Failed to compress file %s. Please ensure that p7zip is
installed on the system." % path | |
| 65 | |
| 66 def process_verbatim_file(source, target_dir, filename): | |
| 67 save_file(os.path.join(target_dir, filename), source.read_file(filename)) | |
| 68 | |
| 69 def process_subscription_file(source_name, sources, target_dir, filename, timeou
t): | |
| 70 source = sources[source_name] | 75 source = sources[source_name] |
| 71 lines = source.read_file(filename).splitlines() | 76 lines = source.read_file(filename).splitlines() |
| 72 | 77 |
| 73 header = "" | 78 header = "" |
| 74 if len(lines) > 0: | 79 if len(lines) > 0: |
| 75 header = lines.pop(0) | 80 header = lines.pop(0) |
| 76 if not re.search(r"\[Adblock(?:\s*Plus\s*([\d\.]+)?)?\]", header, re.I): | 81 if not re.search(r"\[Adblock(?:\s*Plus\s*([\d\.]+)?)?\]", header, re.I): |
| 77 raise Exception("This is not a valid Adblock Plus subscription file.") | 82 raise Exception("This is not a valid Adblock Plus subscription file.") |
| 78 | 83 |
| 79 lines = resolve_includes(source_name, sources, lines, timeout) | 84 lines = resolve_includes(source_name, sources, lines, timeout) |
| 80 seen = set(["checksum", "version"]) | 85 seen = set(["checksum", "version"]) |
| 81 def check_line(line): | 86 def check_line(line): |
| 82 if line == "": | 87 if line == "": |
| 83 return False | 88 return False |
| 84 match = re.search(r"^\s*!\s*(Redirect|Homepage|Title|Checksum|Version)\s*:",
line, re.M | re.I) | 89 match = re.search(r"^\s*!\s*(Redirect|Homepage|Title|Checksum|Version)\s*:",
line, re.M | re.I) |
| 85 if not match: | 90 if not match: |
| 86 return True | 91 return True |
| 87 key = match.group(1).lower() | 92 key = match.group(1).lower() |
| 88 if key in seen: | 93 if key in seen: |
| 89 return False | 94 return False |
| 90 seen.add(key) | 95 seen.add(key) |
| 91 return True | 96 return True |
| 92 lines = filter(check_line, lines) | 97 lines = filter(check_line, lines) |
| 93 | 98 |
| 94 write_tpl(os.path.join(target_dir, os.path.splitext(filename)[0] + ".tpl"), li
nes) | 99 write_tpl(save_file, os.path.splitext(filename)[0] + ".tpl", lines) |
| 95 | 100 |
| 96 lines.insert(0, "! Version: %s" % time.strftime("%Y%m%d%H%M", time.gmtime())) | 101 lines.insert(0, "! Version: %s" % time.strftime("%Y%m%d%H%M", time.gmtime())) |
| 97 | 102 |
| 98 checksum = hashlib.md5() | 103 checksum = hashlib.md5() |
| 99 checksum.update("\n".join([header] + lines).encode("utf-8")) | 104 checksum.update("\n".join([header] + lines).encode("utf-8")) |
| 100 lines.insert(0, "! Checksum: %s" % base64.b64encode(checksum.digest()).rstrip(
"=")) | 105 lines.insert(0, "! Checksum: %s" % base64.b64encode(checksum.digest()).rstrip(
"=")) |
| 101 lines.insert(0, header) | 106 lines.insert(0, header) |
| 102 save_file(os.path.join(target_dir, filename), "\n".join(lines)) | 107 save_file(filename, "\n".join(lines)) |
| 103 | 108 |
| 104 def resolve_includes(source_name, sources, lines, timeout, level=0): | 109 def resolve_includes(source_name, sources, lines, timeout, level=0): |
| 105 if level > 5: | 110 if level > 5: |
| 106 raise Exception("There are too many nested includes, which is probably the r
esult of a circular reference somewhere.") | 111 raise Exception("There are too many nested includes, which is probably the r
esult of a circular reference somewhere.") |
| 107 | 112 |
| 108 result = [] | 113 result = [] |
| 109 for line in lines: | 114 for line in lines: |
| 110 match = re.search(r"^\s*%include\s+(.*)%\s*$", line) | 115 match = re.search(r"^\s*%include\s+(.*)%\s*$", line) |
| 111 if match: | 116 if match: |
| 112 filename = match.group(1) | 117 filename = match.group(1) |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 150 result.extend(newlines) | 155 result.extend(newlines) |
| 151 else: | 156 else: |
| 152 if line.find("%timestamp%") >= 0: | 157 if line.find("%timestamp%") >= 0: |
| 153 if level == 0: | 158 if level == 0: |
| 154 line = line.replace("%timestamp%", time.strftime("%d %b %Y %H:%M UTC",
time.gmtime())) | 159 line = line.replace("%timestamp%", time.strftime("%d %b %Y %H:%M UTC",
time.gmtime())) |
| 155 else: | 160 else: |
| 156 line = "" | 161 line = "" |
| 157 result.append(line) | 162 result.append(line) |
| 158 return result | 163 return result |
| 159 | 164 |
| 160 def write_tpl(path, lines): | 165 def write_tpl(save_file, filename, lines): |
| 161 result = [] | 166 result = [] |
| 162 result.append("msFilterList") | 167 result.append("msFilterList") |
| 163 for line in lines: | 168 for line in lines: |
| 164 if re.search(r"^\s*!", line): | 169 if re.search(r"^\s*!", line): |
| 165 # This is a comment. Handle "Expires" comment in a special way, keep the r
est. | 170 # This is a comment. Handle "Expires" comment in a special way, keep the r
est. |
| 166 match = re.search(r"\bExpires\s*(?::|after)\s*(\d+)\s*(h)?", line, re.I) | 171 match = re.search(r"\bExpires\s*(?::|after)\s*(\d+)\s*(h)?", line, re.I) |
| 167 if match: | 172 if match: |
| 168 interval = int(match.group(1)) | 173 interval = int(match.group(1)) |
| 169 if match.group(2): | 174 if match.group(2): |
| 170 interval = int(interval / 24) | 175 interval = int(interval / 24) |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 249 line = line[2:] | 254 line = line[2:] |
| 250 if domain: | 255 if domain: |
| 251 line = "%sd %s %s" % ("+" if is_exception else "-", domain, line) | 256 line = "%sd %s %s" % ("+" if is_exception else "-", domain, line) |
| 252 line = re.sub(r"\s+/$", "", line) | 257 line = re.sub(r"\s+/$", "", line) |
| 253 result.append(line) | 258 result.append(line) |
| 254 elif is_exception: | 259 elif is_exception: |
| 255 # Exception rules without domains are unsupported | 260 # Exception rules without domains are unsupported |
| 256 result.append("# " + origline) | 261 result.append("# " + origline) |
| 257 else: | 262 else: |
| 258 result.append("- " + line) | 263 result.append("- " + line) |
| 259 save_file(path, "\n".join(result) + "\n") | 264 save_file(filename, "\n".join(result) + "\n") |
| 260 | 265 |
| 261 class FileSource: | 266 class FileSource: |
| 262 def __init__(self, dir): | 267 def __init__(self, dir): |
| 263 self._dir = dir | 268 self._dir = dir |
| 264 if os.path.exists(os.path.join(dir, ".hg")): | 269 if os.path.exists(os.path.join(dir, ".hg")): |
| 265 # This is a Mercurial repository, try updating | 270 # This is a Mercurial repository, try updating |
| 266 subprocess.call(["hg", "-q", "-R", dir, "pull", "--update"]) | 271 subprocess.call(["hg", "-q", "-R", dir, "pull", "--update"]) |
| 267 | 272 |
| 268 def get_path(self, filename): | 273 def get_path(self, filename): |
| 269 return os.path.join(self._dir, *filename.split("/")) | 274 return os.path.join(self._dir, *filename.split("/")) |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 310 | 315 |
| 311 timeout = 30 | 316 timeout = 30 |
| 312 for option, value in opts: | 317 for option, value in opts: |
| 313 if option in ("-h", "--help"): | 318 if option in ("-h", "--help"): |
| 314 usage() | 319 usage() |
| 315 sys.exit() | 320 sys.exit() |
| 316 elif option in ("-t", "--timeout"): | 321 elif option in ("-t", "--timeout"): |
| 317 timeout = int(value) | 322 timeout = int(value) |
| 318 | 323 |
| 319 combine_subscriptions(sources, target_dir, timeout) | 324 combine_subscriptions(sources, target_dir, timeout) |
| LEFT | RIGHT |