| Index: sitescripts/crawler/bin/extract_sites.py | 
| =================================================================== | 
| deleted file mode 100644 | 
| --- a/sitescripts/crawler/bin/extract_sites.py | 
| +++ /dev/null | 
| @@ -1,47 +0,0 @@ | 
| -# coding: utf-8 | 
| - | 
| -# This file is part of the Adblock Plus web scripts, | 
| -# Copyright (C) 2006-2012 Eyeo GmbH | 
| -# | 
| -# Adblock Plus is free software: you can redistribute it and/or modify | 
| -# it under the terms of the GNU General Public License version 3 as | 
| -# published by the Free Software Foundation. | 
| -# | 
| -# Adblock Plus is distributed in the hope that it will be useful, | 
| -# but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| -# GNU General Public License for more details. | 
| -# | 
| -# You should have received a copy of the GNU General Public License | 
| -# along with Adblock Plus.  If not, see <http://www.gnu.org/licenses/>. | 
| - | 
| -import MySQLdb, os, re, subprocess | 
| -from sitescripts.utils import get_config | 
| - | 
| -def hg(args): | 
| -  return subprocess.Popen(["hg"] + args, stdout = subprocess.PIPE) | 
| - | 
| -def extract_urls(filter_list_dir): | 
| -  os.chdir(filter_list_dir) | 
| -  process = hg(["log", "--template", "{desc}\n"]) | 
| -  urls = set([]) | 
| - | 
| -  for line in process.stdout: | 
| -    match = re.search(r"\b(https?://\S*)", line) | 
| -    if not match: | 
| -      continue | 
| - | 
| -    url = match.group(1).strip() | 
| -    urls.add(url) | 
| - | 
| -  return urls | 
| - | 
| -def print_statements(urls): | 
| -  for url in urls: | 
| -    escaped_url = MySQLdb.escape_string(url) | 
| -    print "INSERT INTO crawler_sites (url) VALUES ('" + escaped_url + "');" | 
| - | 
| -if __name__ == "__main__": | 
| -  filter_list_dir = get_config().get("crawler", "filter_list_repository") | 
| -  urls = extract_urls(filter_list_dir) | 
| -  print_statements(urls) | 
|  |