| OLD | NEW |
| 1 import MySQLdb, os, re, simplejson, sys | 1 import MySQLdb, os, re, simplejson, sys |
| 2 from sitescripts.utils import cached, get_config | 2 from sitescripts.utils import cached, get_config |
| 3 from sitescripts.web import url_handler, basic_auth | 3 from sitescripts.web import url_handler, basic_auth |
| 4 | 4 |
| 5 @cached(600) | 5 @cached(600) |
| 6 def _get_db(): | 6 def _get_db(): |
| 7 database = get_config().get("crawler", "database") | 7 database = get_config().get("crawler", "database") |
| 8 dbuser = get_config().get("crawler", "dbuser") | 8 dbuser = get_config().get("crawler", "dbuser") |
| 9 dbpasswd = get_config().get("crawler", "dbpassword") | 9 dbpasswd = get_config().get("crawler", "dbpassword") |
| 10 if os.name == "nt": | 10 if os.name == "nt": |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 76 return cursor.lastrowid | 76 return cursor.lastrowid |
| 77 | 77 |
| 78 def _insert_data(run_id, site, url, filtered): | 78 def _insert_data(run_id, site, url, filtered): |
| 79 site_id = _find_site_id(site) | 79 site_id = _find_site_id(site) |
| 80 if site_id is None: | 80 if site_id is None: |
| 81 print >>sys.stderr, "Unable to find site '%s' in the database" % site | 81 print >>sys.stderr, "Unable to find site '%s' in the database" % site |
| 82 return | 82 return |
| 83 | 83 |
| 84 cursor = _get_cursor() | 84 cursor = _get_cursor() |
| 85 cursor.execute(""" | 85 cursor.execute(""" |
| 86 INSERT INTO crawler_data (run, site, url, filtered) | 86 INSERT INTO crawler_requests (run, site, url, filtered) |
| 87 VALUES (%s, %s, %s, %s)""", | 87 VALUES (%s, %s, %s, %s)""", |
| 88 (run_id, site_id, url, filtered)) | 88 (run_id, site_id, url, filtered)) |
| 89 | 89 |
| 90 @url_handler("/crawlerData") | 90 @url_handler("/crawlerRequests") |
| 91 @basic_auth("crawler") | 91 @basic_auth("crawler") |
| 92 def crawler_data(environ, start_response): | 92 def crawler_requests(environ, start_response): |
| 93 def line_callback(line): | 93 def line_callback(line): |
| 94 try: | 94 try: |
| 95 url, site, filtered = simplejson.loads(line) | 95 data = simplejson.loads(line) |
| 96 if len(data) < 3: |
| 97 print >>sys.stderr, "Not enough elements in line '%s'" % line |
| 98 return |
| 99 url = data[0] |
| 100 site = data[1] |
| 101 filtered = data[2] |
| 96 _insert_data(run_id, site, url, filtered) | 102 _insert_data(run_id, site, url, filtered) |
| 97 except simplejson.JSONDecodeError: | 103 except simplejson.JSONDecodeError: |
| 98 print >>sys.stderr, "Unable to parse JSON from '%s'" % line | 104 print >>sys.stderr, "Unable to parse JSON from '%s'" % line |
| 99 | 105 |
| 100 run_id = _create_run() | 106 run_id = _create_run() |
| 101 try: | 107 try: |
| 102 _read_multipart_lines(environ, line_callback) | 108 _read_multipart_lines(environ, line_callback) |
| 103 start_response("200 OK", [("Content-Type", "text/plain")]) | 109 start_response("200 OK", [("Content-Type", "text/plain")]) |
| 104 return "" | 110 return "" |
| 105 except ValueError as e: | 111 except ValueError as e: |
| 106 start_response("401 Bad Request", [("Content-Type", "text/plain")]) | 112 start_response("401 Bad Request", [("Content-Type", "text/plain")]) |
| 113 print >>sys.stderr, "Unable to read multipart data: %s" % e |
| 107 return e | 114 return e |
| OLD | NEW |