Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: generate_lists.py

Issue 29328894: Issue 3168 - Add a script for generating new content blocker lists (Closed)
Patch Set: Created Oct. 6, 2015, 6:22 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: generate_lists.py
===================================================================
new file mode 100755
--- /dev/null
+++ b/generate_lists.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# This file is part of Adblock Plus <https://adblockplus.org/>,
+# Copyright (C) 2006-2015 Eyeo GmbH
+#
+# Adblock Plus is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 3 as
+# published by the Free Software Foundation.
+#
+# Adblock Plus is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
+
+import os
+import shutil
+import subprocess
+import tempfile
+import urllib2
+
+BASE_PATH = os.path.dirname(os.path.abspath(__file__))
+ABP2BLOCKLIST_URL = "https://hg.adblockplus.org/abp2blocklist"
+ABP2BLOCKLIST_PATH = os.path.join(BASE_PATH, "abp2blocklist")
+EASYLIST_URL = "https://easylist-downloads.adblockplus.org/easylist_noadult.txt"
+EXCEPTIONRULES_URL = "https://easylist-downloads.adblockplus.org/exceptionrules.txt"
+EASYLIST_CONTENT_BLOCKER_PATH = os.path.join(BASE_PATH, "easylist_content_blocker.json")
+COMBINED_CONTENT_BLOCKER_PATH = os.path.join(BASE_PATH, "easylist+exceptionrules_content_blocker.json")
+
+def _update_abp2blocklist():
+ if os.path.isdir(ABP2BLOCKLIST_PATH):
+ subprocess.check_call(["hg", "pull", "-u", "-R", ABP2BLOCKLIST_PATH])
+ else:
+ subprocess.check_call(["hg", "clone", ABP2BLOCKLIST_URL,
+ ABP2BLOCKLIST_PATH])
+ subprocess.check_call(["npm", "install", "tldjs"], cwd=ABP2BLOCKLIST_PATH)
Felix Dahlke 2015/10/06 18:23:58 Well, this is a bit ugly. Once my patch that adds
Sebastian Noack 2015/10/12 13:08:31 This can be addressed now.
Felix Dahlke 2015/10/21 20:36:21 Done.
+
+def _download_filter_lists():
+ easylist_response = urllib2.urlopen(EASYLIST_URL)
Sebastian Noack 2015/10/12 13:08:31 Please close the file like object returned by urlo
Felix Dahlke 2015/10/21 20:36:22 Done.
+ with tempfile.NamedTemporaryFile(mode="w", delete=False) as easylist_file:
Sebastian Noack 2015/10/12 13:08:31 Always add "b" to the mode when dealing with binar
Sebastian Noack 2015/10/12 13:08:31 How about, using mode NamedTemporaryFile("wb+"), p
Felix Dahlke 2015/10/21 20:36:21 Passing around RW file objects seemed pretty hacky
Felix Dahlke 2015/10/21 20:36:22 Those are ASCII files, however, should I still add
Sebastian Noack 2015/10/21 20:56:23 Either you treat it as a binary file. Then you sho
Felix Dahlke 2015/10/22 02:43:45 I see, done.
+ easylist_file.write(easylist_response.read())
Sebastian Noack 2015/10/12 13:08:31 Please use shutil.copyfileobj consistently.
Felix Dahlke 2015/10/21 20:36:21 Done.
+ exceptionrules_response = urllib2.urlopen(EXCEPTIONRULES_URL)
+ with tempfile.NamedTemporaryFile(mode="w", delete=False) as exceptionrules_file:
+ exceptionrules_file.write(exceptionrules_response.read())
+ return (easylist_file.name, exceptionrules_file.name)
+
+def _concatenate_files(*source_paths):
+ with tempfile.NamedTemporaryFile(mode="w", delete=False) as destination_file:
+ for source_path in source_paths:
+ with open(source_path, "r") as source_file:
+ shutil.copyfileobj(source_file, destination_file)
+ return destination_file.name
+
+def _convert_filter_list(source_path, destination_path):
+ with open(source_path, "r") as source_file, \
+ open(destination_path, "w") as destination_file:
+ subprocess.check_call(["node", "abp2blocklist.js"],
+ cwd=ABP2BLOCKLIST_PATH, stdin=source_file,
+ stdout=destination_file)
+
+if __name__ == "__main__":
+ print "Fetching/updating abp2blocklist ..."
+ _update_abp2blocklist()
+
+ print "Downloading filter lists ..."
+ easylist_path, exceptionrules_path = _download_filter_lists()
+
+ try:
+ print "Generating %s ..." % os.path.basename(EASYLIST_CONTENT_BLOCKER_PATH)
+ _convert_filter_list(easylist_path, EASYLIST_CONTENT_BLOCKER_PATH)
+
+ print "Generating %s ..." % os.path.basename(COMBINED_CONTENT_BLOCKER_PATH)
+ combined_path = _concatenate_files(easylist_path, exceptionrules_path)
+ try:
+ _convert_filter_list(combined_path, COMBINED_CONTENT_BLOCKER_PATH)
+ finally:
+ os.remove(combined_path)
+ finally:
+ os.remove(easylist_path)
+ os.remove(exceptionrules_path)
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld