| Index: sitescripts/crawler/web/crawler.py |
| =================================================================== |
| --- a/sitescripts/crawler/web/crawler.py |
| +++ b/sitescripts/crawler/web/crawler.py |
| @@ -83,16 +83,22 @@ |
| cursor = _get_cursor() |
| cursor.execute(""" |
| -INSERT INTO crawler_data (run, site, url, filtered) |
| +INSERT INTO crawler_requests (run, site, url, filtered) |
| VALUES (%s, %s, %s, %s)""", |
| (run_id, site_id, url, filtered)) |
| -@url_handler("/crawlerData") |
| +@url_handler("/crawlerRequests") |
| @basic_auth("crawler") |
| -def crawler_data(environ, start_response): |
| +def crawler_requests(environ, start_response): |
| def line_callback(line): |
| try: |
| - url, site, filtered = simplejson.loads(line) |
| + data = simplejson.loads(line) |
| + if len(data) < 3: |
| + print >>sys.stderr, "Not enough elements in line '%s'" % line |
| + return |
| + url = data[0] |
| + site = data[1] |
| + filtered = data[2] |
| _insert_data(run_id, site, url, filtered) |
| except simplejson.JSONDecodeError: |
| print >>sys.stderr, "Unable to parse JSON from '%s'" % line |
| @@ -104,4 +110,5 @@ |
| return "" |
| except ValueError as e: |
| start_response("401 Bad Request", [("Content-Type", "text/plain")]) |
| + print >>sys.stderr, "Unable to read multipart data: %s" % e |
| return e |