Index: generate_lists.py |
=================================================================== |
new file mode 100755 |
--- /dev/null |
+++ b/generate_lists.py |
@@ -0,0 +1,83 @@ |
+#!/usr/bin/env python |
+# coding: utf-8 |
+ |
+# This file is part of Adblock Plus <https://adblockplus.org/>, |
+# Copyright (C) 2006-2015 Eyeo GmbH |
+# |
+# Adblock Plus is free software: you can redistribute it and/or modify |
+# it under the terms of the GNU General Public License version 3 as |
+# published by the Free Software Foundation. |
+# |
+# Adblock Plus is distributed in the hope that it will be useful, |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+# GNU General Public License for more details. |
+# |
+# You should have received a copy of the GNU General Public License |
+# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
+ |
+import os |
+import shutil |
+import subprocess |
+import tempfile |
+import urllib2 |
+ |
+BASE_PATH = os.path.dirname(os.path.abspath(__file__)) |
+ABP2BLOCKLIST_URL = "https://hg.adblockplus.org/abp2blocklist" |
+ABP2BLOCKLIST_PATH = os.path.join(BASE_PATH, "abp2blocklist") |
+EASYLIST_URL = "https://easylist-downloads.adblockplus.org/easylist_noadult.txt" |
+EXCEPTIONRULES_URL = "https://easylist-downloads.adblockplus.org/exceptionrules.txt" |
+EASYLIST_CONTENT_BLOCKER_PATH = os.path.join(BASE_PATH, "easylist_content_blocker.json") |
+COMBINED_CONTENT_BLOCKER_PATH = os.path.join(BASE_PATH, "easylist+exceptionrules_content_blocker.json") |
+ |
+def _update_abp2blocklist(): |
+ if os.path.isdir(ABP2BLOCKLIST_PATH): |
+ subprocess.check_call(["hg", "pull", "-u", "-R", ABP2BLOCKLIST_PATH]) |
+ else: |
+ subprocess.check_call(["hg", "clone", ABP2BLOCKLIST_URL, |
+ ABP2BLOCKLIST_PATH]) |
+ subprocess.check_call(["npm", "install", "tldjs"], cwd=ABP2BLOCKLIST_PATH) |
Felix Dahlke
2015/10/06 18:23:58
Well, this is a bit ugly. Once my patch that adds
Sebastian Noack
2015/10/12 13:08:31
This can be addressed now.
Felix Dahlke
2015/10/21 20:36:21
Done.
|
+ |
+def _download_filter_lists(): |
+ easylist_response = urllib2.urlopen(EASYLIST_URL) |
Sebastian Noack
2015/10/12 13:08:31
Please close the file like object returned by urlo
Felix Dahlke
2015/10/21 20:36:22
Done.
|
+ with tempfile.NamedTemporaryFile(mode="w", delete=False) as easylist_file: |
Sebastian Noack
2015/10/12 13:08:31
Always add "b" to the mode when dealing with binar
Sebastian Noack
2015/10/12 13:08:31
How about, using mode NamedTemporaryFile("wb+"), p
Felix Dahlke
2015/10/21 20:36:21
Passing around RW file objects seemed pretty hacky
Felix Dahlke
2015/10/21 20:36:22
Those are ASCII files, however, should I still add
Sebastian Noack
2015/10/21 20:56:23
Either you treat it as a binary file. Then you sho
Felix Dahlke
2015/10/22 02:43:45
I see, done.
|
+ easylist_file.write(easylist_response.read()) |
Sebastian Noack
2015/10/12 13:08:31
Please use shutil.copyfileobj consistently.
Felix Dahlke
2015/10/21 20:36:21
Done.
|
+ exceptionrules_response = urllib2.urlopen(EXCEPTIONRULES_URL) |
+ with tempfile.NamedTemporaryFile(mode="w", delete=False) as exceptionrules_file: |
+ exceptionrules_file.write(exceptionrules_response.read()) |
+ return (easylist_file.name, exceptionrules_file.name) |
+ |
+def _concatenate_files(*source_paths): |
+ with tempfile.NamedTemporaryFile(mode="w", delete=False) as destination_file: |
+ for source_path in source_paths: |
+ with open(source_path, "r") as source_file: |
+ shutil.copyfileobj(source_file, destination_file) |
+ return destination_file.name |
+ |
+def _convert_filter_list(source_path, destination_path): |
+ with open(source_path, "r") as source_file, \ |
+ open(destination_path, "w") as destination_file: |
+ subprocess.check_call(["node", "abp2blocklist.js"], |
+ cwd=ABP2BLOCKLIST_PATH, stdin=source_file, |
+ stdout=destination_file) |
+ |
+if __name__ == "__main__": |
+ print "Fetching/updating abp2blocklist ..." |
+ _update_abp2blocklist() |
+ |
+ print "Downloading filter lists ..." |
+ easylist_path, exceptionrules_path = _download_filter_lists() |
+ |
+ try: |
+ print "Generating %s ..." % os.path.basename(EASYLIST_CONTENT_BLOCKER_PATH) |
+ _convert_filter_list(easylist_path, EASYLIST_CONTENT_BLOCKER_PATH) |
+ |
+ print "Generating %s ..." % os.path.basename(COMBINED_CONTENT_BLOCKER_PATH) |
+ combined_path = _concatenate_files(easylist_path, exceptionrules_path) |
+ try: |
+ _convert_filter_list(combined_path, COMBINED_CONTENT_BLOCKER_PATH) |
+ finally: |
+ os.remove(combined_path) |
+ finally: |
+ os.remove(easylist_path) |
+ os.remove(exceptionrules_path) |