OLD | NEW |
1 import MySQLdb, os, re, simplejson, sys | 1 import MySQLdb, os, re, simplejson, sys |
2 from sitescripts.utils import cached, get_config | 2 from sitescripts.utils import cached, get_config |
3 from sitescripts.web import url_handler, basic_auth | 3 from sitescripts.web import url_handler, basic_auth |
4 | 4 |
5 @cached(600) | 5 @cached(600) |
6 def _get_db(): | 6 def _get_db(): |
7 database = get_config().get("crawler", "database") | 7 database = get_config().get("crawler", "database") |
8 dbuser = get_config().get("crawler", "dbuser") | 8 dbuser = get_config().get("crawler", "dbuser") |
9 dbpasswd = get_config().get("crawler", "dbpassword") | 9 dbpasswd = get_config().get("crawler", "dbpassword") |
10 if os.name == "nt": | 10 if os.name == "nt": |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
76 return cursor.lastrowid | 76 return cursor.lastrowid |
77 | 77 |
78 def _insert_data(run_id, site, url, filtered): | 78 def _insert_data(run_id, site, url, filtered): |
79 site_id = _find_site_id(site) | 79 site_id = _find_site_id(site) |
80 if site_id is None: | 80 if site_id is None: |
81 print >>sys.stderr, "Unable to find site '%s' in the database" % site | 81 print >>sys.stderr, "Unable to find site '%s' in the database" % site |
82 return | 82 return |
83 | 83 |
84 cursor = _get_cursor() | 84 cursor = _get_cursor() |
85 cursor.execute(""" | 85 cursor.execute(""" |
86 INSERT INTO crawler_data (run, site, url, filtered) | 86 INSERT INTO crawler_requests (run, site, url, filtered) |
87 VALUES (%s, %s, %s, %s)""", | 87 VALUES (%s, %s, %s, %s)""", |
88 (run_id, site_id, url, filtered)) | 88 (run_id, site_id, url, filtered)) |
89 | 89 |
90 @url_handler("/crawlerData") | 90 @url_handler("/crawlerRequests") |
91 @basic_auth("crawler") | 91 @basic_auth("crawler") |
92 def crawler_data(environ, start_response): | 92 def crawler_requests(environ, start_response): |
93 def line_callback(line): | 93 def line_callback(line): |
94 try: | 94 try: |
95 url, site, filtered = simplejson.loads(line) | 95 data = simplejson.loads(line) |
| 96 if len(data) < 3: |
| 97 print >>sys.stderr, "Not enough elements in line '%s'" % line |
| 98 return |
| 99 url = data[0] |
| 100 site = data[1] |
| 101 filtered = data[2] |
96 _insert_data(run_id, site, url, filtered) | 102 _insert_data(run_id, site, url, filtered) |
97 except simplejson.JSONDecodeError: | 103 except simplejson.JSONDecodeError: |
98 print >>sys.stderr, "Unable to parse JSON from '%s'" % line | 104 print >>sys.stderr, "Unable to parse JSON from '%s'" % line |
99 | 105 |
100 run_id = _create_run() | 106 run_id = _create_run() |
101 try: | 107 try: |
102 _read_multipart_lines(environ, line_callback) | 108 _read_multipart_lines(environ, line_callback) |
103 start_response("200 OK", [("Content-Type", "text/plain")]) | 109 start_response("200 OK", [("Content-Type", "text/plain")]) |
104 return "" | 110 return "" |
105 except ValueError as e: | 111 except ValueError as e: |
106 start_response("401 Bad Request", [("Content-Type", "text/plain")]) | 112 start_response("401 Bad Request", [("Content-Type", "text/plain")]) |
| 113 print >>sys.stderr, "Unable to read multipart data: %s" % e |
107 return e | 114 return e |
OLD | NEW |