| OLD | NEW | 
|    1 #!/usr/bin/env python |    1 #!/usr/bin/env python | 
|    2 # coding: utf-8 |    2 # coding: utf-8 | 
|    3  |    3  | 
|    4 # This file is part of the Adblock Plus web scripts, |    4 # This file is part of the Adblock Plus web scripts, | 
|    5 # Copyright (C) 2006-2013 Eyeo GmbH |    5 # Copyright (C) 2006-2013 Eyeo GmbH | 
|    6 # |    6 # | 
|    7 # Adblock Plus is free software: you can redistribute it and/or modify |    7 # Adblock Plus is free software: you can redistribute it and/or modify | 
|    8 # it under the terms of the GNU General Public License version 3 as |    8 # it under the terms of the GNU General Public License version 3 as | 
|    9 # published by the Free Software Foundation. |    9 # published by the Free Software Foundation. | 
|   10 # |   10 # | 
|   11 # Adblock Plus is distributed in the hope that it will be useful, |   11 # Adblock Plus is distributed in the hope that it will be useful, | 
|   12 # but WITHOUT ANY WARRANTY; without even the implied warranty of |   12 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|   13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |   13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|   14 # GNU General Public License for more details. |   14 # GNU General Public License for more details. | 
|   15 # |   15 # | 
|   16 # You should have received a copy of the GNU General Public License |   16 # You should have received a copy of the GNU General Public License | 
|   17 # along with Adblock Plus.  If not, see <http://www.gnu.org/licenses/>. |   17 # along with Adblock Plus.  If not, see <http://www.gnu.org/licenses/>. | 
|   18  |   18  | 
|   19 import sys, os, re, subprocess, urllib2, time, traceback, codecs, hashlib, base6
     4 |   19 import sys, os, re, subprocess, urllib2, time, traceback, codecs, hashlib, base6
     4, tempfile | 
|   20 from getopt import getopt, GetoptError |   20 from getopt import getopt, GetoptError | 
|   21  |   21  | 
|   22 acceptedExtensions = { |   22 accepted_extensions = set([".txt"]) | 
|   23   '.txt': True, |   23 ignore = set(["Apache.txt", "CC-BY-SA.txt", "GPL.txt", "MPL.txt"]) | 
|   24 } |   24 verbatim = set(["COPYING"]) | 
|   25 ignore = { |   25  | 
|   26   'Apache.txt': True, |   26 def combine_subscriptions(sources, target_dir, timeout=30, tempdir=None): | 
|   27   'CC-BY-SA.txt': True, |   27   if not os.path.exists(target_dir): | 
|   28   'GPL.txt': True, |   28     os.makedirs(target_dir, 0755) | 
|   29   'MPL.txt': True, |   29  | 
|   30 } |   30   def save_file(filename, data): | 
|   31 verbatim = { |   31     handle = tempfile.NamedTemporaryFile(mode="wb", dir=tempdir, delete=False) | 
|   32   'COPYING': True, |   32     handle.write(data.encode("utf-8")) | 
|   33 } |   33     handle.close() | 
|   34  |   34  | 
|   35 def combineSubscriptions(sourceDirs, targetDir, timeout=30): |   35     try: | 
|   36   global acceptedExtensions, ignore, verbatim |   36       subprocess.check_output(["7za", "a", "-tgzip", "-mx=9", "-bd", "-mpass=5",
      handle.name + ".gz", handle.name]) | 
|   37  |   37     except: | 
|   38   if isinstance(sourceDirs, basestring): |   38       print >>sys.stderr, "Failed to compress file %s. Please ensure that p7zip 
     is installed on the system." % handle.name | 
|   39     sourceDirs = {'': sourceDirs} |   39  | 
|   40  |   40     path = os.path.join(target_dir, filename) | 
|   41   if not os.path.exists(targetDir): |   41     os.rename(handle.name, path) | 
|   42     os.makedirs(targetDir, 0755) |   42     os.rename(handle.name + ".gz", path + ".gz") | 
|   43  |   43  | 
|   44   known = {} |   44   known = set() | 
|   45   for sourceName, sourceDir in sourceDirs.iteritems(): |   45   for source_name, source in sources.iteritems(): | 
|   46     for file in os.listdir(sourceDir): |   46     for filename in source.list_top_level_files(): | 
|   47       if file in ignore or file[0] == '.' or not os.path.isfile(os.path.join(sou
     rceDir, file)): |   47       if filename in ignore or filename.startswith("."): | 
|   48         continue |   48         continue | 
|   49       if file in verbatim: |   49       if filename in verbatim: | 
|   50         processVerbatimFile(sourceDir, targetDir, file) |   50         process_verbatim_file(source, save_file, filename) | 
|   51       elif not os.path.splitext(file)[1] in acceptedExtensions: |   51       elif not os.path.splitext(filename)[1] in accepted_extensions: | 
|   52         continue |   52         continue | 
|   53       else: |   53       else: | 
|   54         try: |   54         try: | 
|   55           processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeo
     ut) |   55           process_subscription_file(source_name, sources, save_file, filename, t
     imeout) | 
|   56         except: |   56         except: | 
|   57           print >>sys.stderr, 'Error processing subscription file "%s"' % file |   57           print >>sys.stderr, 'Error processing subscription file "%s"' % filena
     me | 
|   58           traceback.print_exc() |   58           traceback.print_exc() | 
|   59           print >>sys.stderr |   59           print >>sys.stderr | 
|   60         known[os.path.splitext(file)[0] + '.tpl'] = True |   60         known.add(os.path.splitext(filename)[0] + ".tpl") | 
|   61         known[os.path.splitext(file)[0] + '.tpl.gz'] = True |   61         known.add(os.path.splitext(filename)[0] + ".tpl.gz") | 
|   62       known[file] = True |   62       known.add(filename) | 
|   63       known[file + '.gz'] = True |   63       known.add(filename + ".gz") | 
|   64  |   64  | 
|   65   for file in os.listdir(targetDir): |   65   for filename in os.listdir(target_dir): | 
|   66     if file[0] == '.': |   66     if filename.startswith("."): | 
|   67       continue |   67       continue | 
|   68     if not file in known: |   68     if not filename in known: | 
|   69       os.remove(os.path.join(targetDir, file)) |   69       os.remove(os.path.join(target_dir, filename)) | 
|   70  |   70  | 
|   71 def saveFile(filePath, data): |   71 def process_verbatim_file(source, save_file, filename): | 
|   72   handle = codecs.open(filePath, 'wb', encoding='utf-8') |   72   save_file(filename, source.read_file(filename)) | 
|   73   handle.write(data) |   73  | 
|   74   handle.close() |   74 def process_subscription_file(source_name, sources, save_file, filename, timeout
     ): | 
|   75   try: |   75   source = sources[source_name] | 
|   76     subprocess.check_output(['7za', 'a', '-tgzip', '-mx=9', '-bd', '-mpass=5', f
     ilePath + '.gz', filePath]) |   76   lines = source.read_file(filename).splitlines() | 
|   77   except: |   77  | 
|   78     print >>sys.stderr, 'Failed to compress file %s. Please ensure that p7zip is
      installed on the system.' % filePath |   78   header = "" | 
|   79  |  | 
|   80 def processVerbatimFile(sourceDir, targetDir, file): |  | 
|   81   handle = codecs.open(os.path.join(sourceDir, file), 'rb', encoding='utf-8') |  | 
|   82   saveFile(os.path.join(targetDir, file), handle.read()) |  | 
|   83   handle.close() |  | 
|   84  |  | 
|   85 def processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeout): |  | 
|   86   sourceDir = sourceDirs[sourceName] |  | 
|   87   filePath = os.path.join(sourceDir, file) |  | 
|   88   handle = codecs.open(filePath, 'rb', encoding='utf-8') |  | 
|   89   lines = map(lambda l: re.sub(r'[\r\n]', '', l), handle.readlines()) |  | 
|   90   handle.close() |  | 
|   91  |  | 
|   92   header = '' |  | 
|   93   if len(lines) > 0: |   79   if len(lines) > 0: | 
|   94     header = lines[0] |   80     header = lines.pop(0) | 
|   95     del lines[0] |   81   if not re.search(r"\[Adblock(?:\s*Plus\s*([\d\.]+)?)?\]", header, re.I): | 
|   96   if not re.search(r'\[Adblock(?:\s*Plus\s*([\d\.]+)?)?\]', header, re.I): |   82     raise Exception("This is not a valid Adblock Plus subscription file.") | 
|   97     raise Exception('This is not a valid Adblock Plus subscription file.') |   83  | 
|   98  |   84   lines = resolve_includes(source_name, sources, lines, timeout) | 
|   99   lines = resolveIncludes(sourceName, sourceDirs, filePath, lines, timeout) |   85   seen = set(["checksum", "version"]) | 
|  100   seen = set(['checksum', 'version']) |   86   def check_line(line): | 
|  101   def checkLine(line): |   87     if line == "": | 
|  102     if line == '': |  | 
|  103       return False |   88       return False | 
|  104     match = re.search(r'^\s*!\s*(Redirect|Homepage|Title|Checksum|Version)\s*:',
      line, re.M | re.I) |   89     match = re.search(r"^\s*!\s*(Redirect|Homepage|Title|Checksum|Version)\s*:",
      line, re.M | re.I) | 
|  105     if not match: |   90     if not match: | 
|  106       return True |   91       return True | 
|  107     key = match.group(1).lower() |   92     key = match.group(1).lower() | 
|  108     if key in seen: |   93     if key in seen: | 
|  109       return False |   94       return False | 
|  110     seen.add(key) |   95     seen.add(key) | 
|  111     return True |   96     return True | 
|  112   lines = filter(checkLine, lines) |   97   lines = filter(check_line, lines) | 
|  113  |   98  | 
|  114   writeTPL(os.path.join(targetDir, os.path.splitext(file)[0] + '.tpl'), lines) |   99   write_tpl(save_file, os.path.splitext(filename)[0] + ".tpl", lines) | 
|  115  |  100  | 
|  116   lines.insert(0, '! Version: %s' % time.strftime('%Y%m%d%H%M', time.gmtime())) |  101   lines.insert(0, "! Version: %s" % time.strftime("%Y%m%d%H%M", time.gmtime())) | 
|  117  |  102  | 
|  118   checksum = hashlib.md5() |  103   checksum = hashlib.md5() | 
|  119   checksum.update((header + '\n' + '\n'.join(lines)).encode('utf-8')) |  104   checksum.update("\n".join([header] + lines).encode("utf-8")) | 
|  120   lines.insert(0, '! Checksum: %s' % re.sub(r'=', '', base64.b64encode(checksum.
     digest()))) |  105   lines.insert(0, "! Checksum: %s" % base64.b64encode(checksum.digest()).rstrip(
     "=")) | 
|  121   lines.insert(0, header) |  106   lines.insert(0, header) | 
|  122   saveFile(os.path.join(targetDir, file), '\n'.join(lines)) |  107   save_file(filename, "\n".join(lines)) | 
|  123  |  108  | 
|  124 def resolveIncludes(sourceName, sourceDirs, filePath, lines, timeout, level=0): |  109 def resolve_includes(source_name, sources, lines, timeout, level=0): | 
|  125   if level > 5: |  110   if level > 5: | 
|  126     raise Exception('There are too many nested includes, which is probably the r
     esult of a circular reference somewhere.') |  111     raise Exception("There are too many nested includes, which is probably the r
     esult of a circular reference somewhere.") | 
|  127  |  112  | 
|  128   result = [] |  113   result = [] | 
|  129   for line in lines: |  114   for line in lines: | 
|  130     match = re.search(r'^\s*%include\s+(.*)%\s*$', line) |  115     match = re.search(r"^\s*%include\s+(.*)%\s*$", line) | 
|  131     if match: |  116     if match: | 
|  132       file = match.group(1) |  117       filename = match.group(1) | 
|  133       newLines = None |  118       newlines = None | 
|  134       if re.match(r'^https?://', file): |  119       if re.match(r"^https?://", filename): | 
|  135         result.append('! *** Fetched from: %s ***' % file) |  120         result.append("! *** Fetched from: %s ***" % filename) | 
|  136  |  121  | 
|  137         for i in range(3): |  122         for i in range(3): | 
|  138           try: |  123           try: | 
|  139             request = urllib2.urlopen(file, None, timeout) |  124             request = urllib2.urlopen(filename, None, timeout) | 
 |  125             data = request.read() | 
|  140             error = None |  126             error = None | 
|  141             break |  127             break | 
|  142           except urllib2.URLError, e: |  128           except urllib2.URLError, e: | 
|  143             error = e |  129             error = e | 
|  144             time.sleep(5) |  130             time.sleep(5) | 
|  145         if error: |  131         if error: | 
|  146           raise error |  132           raise error | 
|  147  |  133  | 
|  148         # We should really get the charset from the headers rather than assuming |  134         # We should really get the charset from the headers rather than assuming | 
|  149         # that it is UTF-8. However, some of the Google Code mirrors are |  135         # that it is UTF-8. However, some of the Google Code mirrors are | 
|  150         # misconfigured and will return ISO-8859-1 as charset instead of UTF-8. |  136         # misconfigured and will return ISO-8859-1 as charset instead of UTF-8. | 
|  151         newLines = unicode(request.read(), 'utf-8').split('\n') |  137         newlines = data.decode("utf-8").splitlines() | 
|  152         newLines = map(lambda l: re.sub(r'[\r\n]', '', l), newLines) |  138         newlines = filter(lambda l: not re.search(r"^\s*!.*?\bExpires\s*(?::|aft
     er)\s*(\d+)\s*(h)?", l, re.M | re.I), newlines) | 
|  153         newLines = filter(lambda l: not re.search(r'^\s*!.*?\bExpires\s*(?::|aft
     er)\s*(\d+)\s*(h)?', l, re.M | re.I), newLines) |  139         newlines = filter(lambda l: not re.search(r"^\s*!\s*(Redirect|Homepage|T
     itle|Version)\s*:", l, re.M | re.I), newlines) | 
|  154         newLines = filter(lambda l: not re.search(r'^\s*!\s*(Redirect|Homepage|T
     itle|Version)\s*:', l, re.M | re.I), newLines) |  | 
|  155       else: |  140       else: | 
|  156         result.append('! *** %s ***' % file) |  141         result.append("! *** %s ***" % filename) | 
|  157  |  142  | 
|  158         includeSource = sourceName |  143         include_source = source_name | 
|  159         if file.find(':') >= 0: |  144         if ":" in filename: | 
|  160           includeSource, file = file.split(':', 1) |  145           include_source, filename = filename.split(":", 1) | 
|  161         if not includeSource in sourceDirs: |  146         if not include_source in sources: | 
|  162           raise Exception('Cannot include file from repository "%s", this reposi
     tory is unknown' % includeSource) |  147           raise Exception('Cannot include file from repository "%s", this reposi
     tory is unknown' % include_source) | 
|  163  |  148  | 
|  164         parentDir = sourceDirs[includeSource] |  149         source = sources[include_source] | 
|  165         includePath = os.path.join(parentDir, file) |  150         newlines = source.read_file(filename).splitlines() | 
|  166         relPath = os.path.relpath(includePath, parentDir) |  151         newlines = resolve_includes(include_source, sources, newlines, timeout, 
     level + 1) | 
|  167         if len(relPath) == 0 or relPath[0] == '.': |  152  | 
|  168           raise Exception('Invalid include "%s", needs to be an HTTP/HTTPS URL o
     r a relative file path' % file) |  153       if len(newlines) and re.search(r"\[Adblock(?:\s*Plus\s*([\d\.]+)?)?\]", ne
     wlines[0], re.I): | 
|  169  |  154         del newlines[0] | 
|  170         handle = codecs.open(includePath, 'rb', encoding='utf-8') |  155       result.extend(newlines) | 
|  171         newLines = map(lambda l: re.sub(r'[\r\n]', '', l), handle.readlines()) |  | 
|  172         newLines = resolveIncludes(includeSource, sourceDirs, includePath, newLi
     nes, timeout, level + 1) |  | 
|  173         handle.close() |  | 
|  174  |  | 
|  175       if len(newLines) and re.search(r'\[Adblock(?:\s*Plus\s*([\d\.]+)?)?\]', ne
     wLines[0], re.I): |  | 
|  176         del newLines[0] |  | 
|  177       result.extend(newLines) |  | 
|  178     else: |  156     else: | 
|  179       if line.find('%timestamp%') >= 0: |  157       if line.find("%timestamp%") >= 0: | 
|  180         if level == 0: |  158         if level == 0: | 
|  181           line = line.replace('%timestamp%', time.strftime('%d %b %Y %H:%M UTC',
      time.gmtime())) |  159           line = line.replace("%timestamp%", time.strftime("%d %b %Y %H:%M UTC",
      time.gmtime())) | 
|  182         else: |  160         else: | 
|  183           line = '' |  161           line = "" | 
|  184       result.append(line) |  162       result.append(line) | 
|  185   return result |  163   return result | 
|  186  |  164  | 
|  187 def writeTPL(filePath, lines): |  165 def write_tpl(save_file, filename, lines): | 
|  188   result = [] |  166   result = [] | 
|  189   result.append('msFilterList') |  167   result.append("msFilterList") | 
|  190   for line in lines: |  168   for line in lines: | 
|  191     if re.search(r'^!', line): |  169     if re.search(r"^\s*!", line): | 
|  192       # This is a comment. Handle "Expires" comment in a special way, keep the r
     est. |  170       # This is a comment. Handle "Expires" comment in a special way, keep the r
     est. | 
|  193       match = re.search(r'\bExpires\s*(?::|after)\s*(\d+)\s*(h)?', line, re.I) |  171       match = re.search(r"\bExpires\s*(?::|after)\s*(\d+)\s*(h)?", line, re.I) | 
|  194       if match: |  172       if match: | 
|  195         interval = int(match.group(1)) |  173         interval = int(match.group(1)) | 
|  196         if match.group(2): |  174         if match.group(2): | 
|  197           interval = int(interval / 24) |  175           interval = int(interval / 24) | 
|  198         result.append(': Expires=%i' % interval) |  176         result.append(": Expires=%i" % interval) | 
|  199       else: |  177       else: | 
|  200         result.append(re.sub(r'!', '#', re.sub(r'--!$', '--#', line))) |  178         result.append(re.sub(r"^\s*!", "#", re.sub(r"--!$", "--#", line))) | 
|  201     elif line.find('#') >= 0: |  179     elif line.find("#") >= 0: | 
|  202       # Element hiding rules are not supported in MSIE, drop them |  180       # Element hiding rules are not supported in MSIE, drop them | 
|  203       pass |  181       pass | 
|  204     else: |  182     else: | 
|  205       # We have a blocking or exception rule, try to convert it |  183       # We have a blocking or exception rule, try to convert it | 
|  206       origLine = line |  184       origline = line | 
|  207  |  185  | 
|  208       isException = False |  186       is_exception = False | 
|  209       if line[0:2] == '@@': |  187       if line.startswith("@@"): | 
|  210         isException = True |  188         is_exception = True | 
|  211         line = line[2:] |  189         line = line[2:] | 
|  212  |  190  | 
|  213       hasUnsupportedOptions = False |  191       has_unsupported = False | 
|  214       requiresScript = False |  192       requires_script = False | 
|  215       match = re.search(r'^(.*?)\$(.*)', line) |  193       match = re.search(r"^(.*?)\$(.*)", line) | 
|  216       if match: |  194       if match: | 
|  217         # This rule has options, check whether any of them are important |  195         # This rule has options, check whether any of them are important | 
|  218         line = match.group(1) |  196         line = match.group(1) | 
|  219         options = match.group(2).replace('_', '-').lower().split(',') |  197         options = match.group(2).replace("_", "-").lower().split(",") | 
|  220  |  198  | 
|  221         # Remove first-party only exceptions, we will allow an ad server everywh
     ere otherwise |  199         # Remove first-party only exceptions, we will allow an ad server everywh
     ere otherwise | 
|  222         if isException and '~third-party' in options: |  200         if is_exception and "~third-party" in options: | 
|  223           hasUnsupportedOptions = True |  201           has_unsupported = True | 
|  224  |  202  | 
|  225         # A number of options are not supported in MSIE but can be safely ignore
     d, remove them |  203         # A number of options are not supported in MSIE but can be safely ignore
     d, remove them | 
|  226         options = filter(lambda o: not o in ('', 'third-party', '~third-party', 
     'match-case', '~match-case', '~other', '~donottrack'), options) |  204         options = filter(lambda o: not o in ("", "third-party", "~third-party", 
     "match-case", "~match-case", "~other", "~donottrack"), options) | 
|  227  |  205  | 
|  228         # Also ignore domain negation of whitelists |  206         # Also ignore domain negation of whitelists | 
|  229         if isException: |  207         if is_exception: | 
|  230           options = filter(lambda o: not o.startswith('domain=~'), options) |  208           options = filter(lambda o: not o.startswith("domain=~"), options) | 
|  231  |  209  | 
|  232         unsupportedOptions = filter(lambda o: o in ('other', 'elemhide'), option
     s) |  210         unsupported = filter(lambda o: o in ("other", "elemhide"), options) | 
|  233         if unsupportedOptions and len(unsupportedOptions) == len(options): |  211         if unsupported and len(unsupported) == len(options): | 
|  234           # The rule only applies to types that are not supported in MSIE |  212           # The rule only applies to types that are not supported in MSIE | 
|  235           hasUnsupportedOptions = True |  213           has_unsupported = True | 
|  236         elif 'donottrack' in options: |  214         elif "donottrack" in options: | 
|  237           # Do-Not-Track rules have to be removed even if $donottrack is combine
     d with other options |  215           # Do-Not-Track rules have to be removed even if $donottrack is combine
     d with other options | 
|  238           hasUnsupportedOptions = True |  216           has_unsupported = True | 
|  239         elif 'script' in options and len(options) == len(unsupportedOptions) + 1
     : |  217         elif "script" in options and len(options) == len(unsupported) + 1: | 
|  240           # Mark rules that only apply to scripts for approximate conversion |  218           # Mark rules that only apply to scripts for approximate conversion | 
|  241           requiresScript = True |  219           requires_script = True | 
|  242         elif len(options) > 0: |  220         elif len(options) > 0: | 
|  243           # The rule has further options that aren't available in TPLs. For |  221           # The rule has further options that aren't available in TPLs. For | 
|  244           # exception rules that aren't specific to a domain we ignore all |  222           # exception rules that aren't specific to a domain we ignore all | 
|  245           # remaining options to avoid potential false positives. Other rules |  223           # remaining options to avoid potential false positives. Other rules | 
|  246           # simply aren't included in the TPL file. |  224           # simply aren't included in the TPL file. | 
|  247           if isException: |  225           if is_exception: | 
|  248             hasUnsupportedOptions = any([o.startswith('domain=') for o in option
     s]) |  226             has_unsupported = any([o.startswith("domain=") for o in options]) | 
|  249           else: |  227           else: | 
|  250             hasUnsupportedOptions = True |  228             has_unsupported = True | 
|  251  |  229  | 
|  252       if hasUnsupportedOptions: |  230       if has_unsupported: | 
|  253         # Do not include filters with unsupported options |  231         # Do not include filters with unsupported options | 
|  254         result.append('# ' + origLine) |  232         result.append("# " + origline) | 
|  255       else: |  233       else: | 
|  256         line = line.replace('^', '/') # Assume that separator placeholders mean 
     slashes |  234         line = line.replace("^", "/") # Assume that separator placeholders mean 
     slashes | 
|  257  |  235  | 
|  258         # Try to extract domain info |  236         # Try to extract domain info | 
|  259         domain = None |  237         domain = None | 
|  260         match = re.search(r'^(\|\||\|\w+://)([^*:/]+)(:\d+)?(/.*)', line) |  238         match = re.search(r"^(\|\||\|\w+://)([^*:/]+)(:\d+)?(/.*)", line) | 
|  261         if match: |  239         if match: | 
|  262           domain = match.group(2) |  240           domain = match.group(2) | 
|  263           line = match.group(4) |  241           line = match.group(4) | 
|  264         else: |  242         else: | 
|  265           # No domain info, remove anchors at the rule start |  243           # No domain info, remove anchors at the rule start | 
|  266           line = re.sub(r'^\|\|', 'http://', line) |  244           line = re.sub(r"^\|\|", "http://", line) | 
|  267           line = re.sub(r'^\|', '', line) |  245           line = re.sub(r"^\|", "", line) | 
|  268         # Remove anchors at the rule end |  246         # Remove anchors at the rule end | 
|  269         line = re.sub(r'\|$', '', line) |  247         line = re.sub(r"\|$", "", line) | 
|  270         # Remove unnecessary asterisks at the ends of lines |  248         # Remove unnecessary asterisks at the ends of lines | 
|  271         line = re.sub(r'\*$', '', line) |  249         line = re.sub(r"\*$", "", line) | 
|  272         # Emulate $script by appending *.js to the rule |  250         # Emulate $script by appending *.js to the rule | 
|  273         if requiresScript: |  251         if requires_script: | 
|  274           line += '*.js' |  252           line += "*.js" | 
|  275         if line.startswith('/*'): |  253         if line.startswith("/*"): | 
|  276           line = line[2:] |  254           line = line[2:] | 
|  277         if domain: |  255         if domain: | 
|  278           line = '%sd %s %s' % ('+' if isException else '-', domain, line) |  256           line = "%sd %s %s" % ("+" if is_exception else "-", domain, line) | 
|  279           line = re.sub(r'\s+/$', '', line) |  257           line = re.sub(r"\s+/$", "", line) | 
|  280           result.append(line) |  258           result.append(line) | 
|  281         elif isException: |  259         elif is_exception: | 
|  282           # Exception rules without domains are unsupported |  260           # Exception rules without domains are unsupported | 
|  283           result.append('# ' + origLine) |  261           result.append("# " + origline) | 
|  284         else: |  262         else: | 
|  285           result.append('- ' + line) |  263           result.append("- " + line) | 
|  286   saveFile(filePath, '\n'.join(result) + '\n') |  264   save_file(filename, "\n".join(result) + "\n") | 
 |  265  | 
 |  266 class FileSource: | 
 |  267   def __init__(self, dir): | 
 |  268     self._dir = dir | 
 |  269     if os.path.exists(os.path.join(dir, ".hg")): | 
 |  270       # This is a Mercurial repository, try updating | 
 |  271       subprocess.call(["hg", "-q", "-R", dir, "pull", "--update"]) | 
 |  272  | 
 |  273   def get_path(self, filename): | 
 |  274     return os.path.join(self._dir, *filename.split("/")) | 
 |  275  | 
 |  276   def read_file(self, filename): | 
 |  277     path = self.get_path(filename) | 
 |  278     if os.path.relpath(path, self._dir).startswith("."): | 
 |  279       raise Exception("Attempt to access a file outside the repository") | 
 |  280     with codecs.open(path, "rb", encoding="utf-8") as handle: | 
 |  281       return handle.read() | 
 |  282  | 
 |  283   def list_top_level_files(self): | 
 |  284     for filename in os.listdir(self._dir): | 
 |  285       path = os.path.join(self._dir, filename) | 
 |  286       if os.path.isfile(path): | 
 |  287         yield filename | 
|  287  |  288  | 
|  288 def usage(): |  289 def usage(): | 
|  289   print '''Usage: %s [source_dir] [output_dir] |  290   print """Usage: %s source_name=source_dir ... [output_dir] | 
|  290  |  291  | 
|  291 Options: |  292 Options: | 
|  292   -h          --help              Print this message and exit |  293   -h          --help              Print this message and exit | 
|  293   -t seconds  --timeout=seconds   Timeout when fetching remote subscriptions |  294   -t seconds  --timeout=seconds   Timeout when fetching remote subscriptions | 
|  294 ''' % os.path.basename(sys.argv[0]) |  295 """ % os.path.basename(sys.argv[0]) | 
|  295  |  296  | 
|  296 if __name__ == '__main__': |  297 if __name__ == "__main__": | 
|  297   try: |  298   try: | 
|  298     opts, args = getopt(sys.argv[1:], 'ht:', ['help', 'timeout=']) |  299     opts, args = getopt(sys.argv[1:], "ht:", ["help", "timeout="]) | 
|  299   except GetoptError, e: |  300   except GetoptError, e: | 
|  300     print str(e) |  301     print str(e) | 
|  301     usage() |  302     usage() | 
|  302     sys.exit(2) |  303     sys.exit(2) | 
|  303  |  304  | 
|  304   sourceDir, targetDir =  '.', 'subscriptions' |  305   target_dir = "subscriptions" | 
|  305   if len(args) >= 1: |  306   sources = {} | 
|  306     sourceDir = args[0] |  307   for arg in args: | 
|  307   if len(args) >= 2: |  308     if "=" in arg: | 
|  308     targetDir = args[1] |  309       source_name, source_dir = arg.split("=", 1) | 
 |  310       sources[source_name] = FileSource(source_dir) | 
 |  311     else: | 
 |  312       target_dir = arg | 
 |  313   if not sources: | 
 |  314     sources[""] = FileSource(".") | 
|  309  |  315  | 
|  310   timeout = 30 |  316   timeout = 30 | 
|  311   for option, value in opts: |  317   for option, value in opts: | 
|  312     if option in ('-h', '--help'): |  318     if option in ("-h", "--help"): | 
|  313       usage() |  319       usage() | 
|  314       sys.exit() |  320       sys.exit() | 
|  315     elif option in ('-t', '--timeout'): |  321     elif option in ("-t", "--timeout"): | 
|  316       timeout = int(value) |  322       timeout = int(value) | 
|  317  |  323  | 
|  318   if os.path.exists(os.path.join(sourceDir, '.hg')): |  324   combine_subscriptions(sources, target_dir, timeout) | 
|  319     # Our source is a Mercurial repository, try updating |  | 
|  320     subprocess.check_call(['hg', '-q', '-R', sourceDir, 'pull', '--update']) |  | 
|  321  |  | 
|  322   combineSubscriptions(sourceDir, targetDir, timeout) |  | 
| OLD | NEW |