OLD | NEW |
1 # This file is part of the Adblock Plus web scripts, | 1 # This file is part of the Adblock Plus web scripts, |
2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH |
3 # | 3 # |
4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
7 # | 7 # |
8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
56 | 56 |
57 return urls | 57 return urls |
58 | 58 |
59 | 59 |
60 def _insert_sites(site_urls): | 60 def _insert_sites(site_urls): |
61 cursor = _get_cursor() | 61 cursor = _get_cursor() |
62 for url in site_urls: | 62 for url in site_urls: |
63 cursor.execute('INSERT IGNORE INTO crawler_sites (url) VALUES (%s)', url
) | 63 cursor.execute('INSERT IGNORE INTO crawler_sites (url) VALUES (%s)', url
) |
64 _get_db().commit() | 64 _get_db().commit() |
65 | 65 |
| 66 |
66 if __name__ == '__main__': | 67 if __name__ == '__main__': |
67 easylist_dir = get_config().get('crawler', 'easylist_repository') | 68 easylist_dir = get_config().get('crawler', 'easylist_repository') |
68 site_urls = _extract_sites(easylist_dir) | 69 site_urls = _extract_sites(easylist_dir) |
69 _insert_sites(site_urls) | 70 _insert_sites(site_urls) |
OLD | NEW |