Index: sitescripts/crawler/web/crawler.py |
=================================================================== |
--- a/sitescripts/crawler/web/crawler.py |
+++ b/sitescripts/crawler/web/crawler.py |
@@ -83,16 +83,22 @@ |
cursor = _get_cursor() |
cursor.execute(""" |
-INSERT INTO crawler_data (run, site, url, filtered) |
+INSERT INTO crawler_requests (run, site, url, filtered) |
VALUES (%s, %s, %s, %s)""", |
(run_id, site_id, url, filtered)) |
-@url_handler("/crawlerData") |
+@url_handler("/crawlerRequests") |
@basic_auth("crawler") |
-def crawler_data(environ, start_response): |
+def crawler_requests(environ, start_response): |
def line_callback(line): |
try: |
- url, site, filtered = simplejson.loads(line) |
+ data = simplejson.loads(line) |
+ if len(data) < 3: |
+ print >>sys.stderr, "Not enough elements in line '%s'" % line |
+ return |
+ url = data[0] |
+ site = data[1] |
+ filtered = data[2] |
_insert_data(run_id, site, url, filtered) |
except simplejson.JSONDecodeError: |
print >>sys.stderr, "Unable to parse JSON from '%s'" % line |
@@ -104,4 +110,5 @@ |
return "" |
except ValueError as e: |
start_response("401 Bad Request", [("Content-Type", "text/plain")]) |
+ print >>sys.stderr, "Unable to read multipart data: %s" % e |
return e |