Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: sitescripts/crawler/bin/import_sites.py

Issue 10942098: Make sure subprocess calls don`t ignore result codes indicating errors. Fix JS docs generation whil… (Closed)
Patch Set: Fixed wrong argument format Created July 4, 2013, 1:01 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2013 Eyeo GmbH 4 # Copyright (C) 2006-2013 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
(...skipping 16 matching lines...) Expand all
27 return MySQLdb.connect(user=dbuser, passwd=dbpasswd, db=database, 27 return MySQLdb.connect(user=dbuser, passwd=dbpasswd, db=database,
28 use_unicode=True, charset="utf8", named_pipe=True) 28 use_unicode=True, charset="utf8", named_pipe=True)
29 else: 29 else:
30 return MySQLdb.connect(user=dbuser, passwd=dbpasswd, db=database, 30 return MySQLdb.connect(user=dbuser, passwd=dbpasswd, db=database,
31 use_unicode=True, charset="utf8") 31 use_unicode=True, charset="utf8")
32 32
33 def _get_cursor(): 33 def _get_cursor():
34 return _get_db().cursor(MySQLdb.cursors.DictCursor) 34 return _get_db().cursor(MySQLdb.cursors.DictCursor)
35 35
36 def _hg(args): 36 def _hg(args):
37 return subprocess.Popen(["hg"] + args, stdout = subprocess.PIPE) 37 return subprocess.check_output(["hg"] + args)
38 38
39 def _extract_sites(easylist_dir): 39 def _extract_sites(easylist_dir):
40 os.chdir(easylist_dir) 40 os.chdir(easylist_dir)
41 process = _hg(["log", "--template", "{desc}\n"]) 41 process = _hg(["log", "--template", "{desc}\n"])
42 urls = set([]) 42 urls = set([])
43 43
44 for line in process.stdout: 44 for line in process.stdout:
45 match = re.search(r"\b(https?://\S*)", line) 45 match = re.search(r"\b(https?://\S*)", line)
46 if not match: 46 if not match:
47 continue 47 continue
48 48
49 url = match.group(1).strip() 49 url = match.group(1).strip()
50 urls.add(url) 50 urls.add(url)
51 51
52 return urls 52 return urls
53 53
54 def _insert_sites(site_urls): 54 def _insert_sites(site_urls):
55 cursor = _get_cursor() 55 cursor = _get_cursor()
56 for url in site_urls: 56 for url in site_urls:
57 cursor.execute("INSERT IGNORE INTO crawler_sites (url) VALUES (%s)", url) 57 cursor.execute("INSERT IGNORE INTO crawler_sites (url) VALUES (%s)", url)
58 _get_db().commit(); 58 _get_db().commit();
59 59
60 if __name__ == "__main__": 60 if __name__ == "__main__":
61 easylist_dir = get_config().get("crawler", "easylist_repository") 61 easylist_dir = get_config().get("crawler", "easylist_repository")
62 site_urls = _extract_sites(easylist_dir) 62 site_urls = _extract_sites(easylist_dir)
63 _insert_sites(site_urls) 63 _insert_sites(site_urls)
OLDNEW

Powered by Google App Engine
This is Rietveld