Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 # coding: utf-8 | 1 # coding: utf-8 |
2 | 2 |
3 # This Source Code is subject to the terms of the Mozilla Public License | 3 # This Source Code is subject to the terms of the Mozilla Public License |
4 # version 2.0 (the "License"). You can obtain a copy of the License at | 4 # version 2.0 (the "License"). You can obtain a copy of the License at |
5 # http://mozilla.org/MPL/2.0/. | 5 # http://mozilla.org/MPL/2.0/. |
6 | 6 |
7 # import MySQLdb, os, simplejson, sys, re | |
8 # from sitescripts.utils import cached, get_config | |
9 # from sitescripts.web import url_handler, basic_auth | |
Wladimir Palant
2012/10/09 11:51:14
Any reason why the lines above are required? ;)
| |
10 | |
11 import os, MySQLdb, simplejson as json | 7 import os, MySQLdb, simplejson as json |
12 from urlparse import parse_qs | 8 from urlparse import parse_qs |
13 from math import sqrt | |
Wladimir Palant
2012/10/09 11:51:14
I would prefer to just import math and use the fun
| |
14 from sitescripts.web import url_handler | 9 from sitescripts.web import url_handler |
15 from sitescripts.utils import get_config, setupStderr | 10 from sitescripts.utils import cached, get_config, setupStderr |
16 | |
17 STATUS_TYPED = 1 | |
18 STATUS_TYPO = 2 | |
19 STATUS_CORRECTION = 3 | |
20 STATUS_FALSE_POSITIVE = 4 | |
21 | 11 |
22 @url_handler("/submitData") | 12 @url_handler("/submitData") |
23 def submit_data(environ, start_response): | 13 def submit_data(environ, start_response): |
24 setupStderr(environ['wsgi.errors']) | 14 setupStderr(environ['wsgi.errors']) |
25 | 15 |
26 if environ['REQUEST_METHOD'].upper() != 'POST': | 16 if environ["REQUEST_METHOD"].upper() != "POST": |
27 return showError('Unsupported request method', start_response) | 17 return showError("Unsupported request method", start_response) |
28 | |
29 output = "" | |
30 | 18 |
31 params = parse_qs(environ.get("QUERY_STRING", "")) | 19 params = parse_qs(environ.get("QUERY_STRING", "")) |
32 requestVersion = params.get("version", ["0"])[0] | 20 requestVersion = params.get("version", ["0"])[0] |
33 data = "{}" | 21 data = "{}" |
34 try: | 22 try: |
35 data_length = int(environ.get("CONTENT_LENGTH", "0")) | 23 data_length = int(environ.get("CONTENT_LENGTH", "0")) |
36 except ValueError: | 24 except ValueError: |
37 data_length = 0 | 25 data_length = 0 |
38 if data_length != 0: | 26 if data_length != 0: |
39 data = environ["wsgi.input"].read(data_length) | 27 data = environ["wsgi.input"].read(data_length) |
40 data = json.loads(data, ["{}"]) | 28 try: |
Wladimir Palant
2012/10/09 11:51:14
Not sure I understand the significance of the seco
| |
29 data = json.loads(data) | |
30 except json.decoder.JSONDecodeError: | |
31 return showError("Error while parsing JSON data.", start_response) | |
41 | 32 |
42 try: | 33 db = _get_db() |
43 database = get_config().get("urlfixer", "database") | |
44 dbuser = get_config().get("urlfixer", "dbuser") | |
45 dbpasswd = get_config().get("urlfixer", "dbpassword") | |
46 except Exception: | |
47 return showError("Error while reading config.", start_response) | |
Wladimir Palant
2012/10/09 11:51:14
That's a server configuration issue, not something
| |
48 | 34 |
49 if os.name == "nt": | 35 for domain, status in data.iteritems(): |
50 db = MySQLdb.connect(user=dbuser, passwd=dbpasswd, db=database, | 36 process_domain(db, domain, status) |
51 use_unicode=True, charset="utf8", named_pipe=True) | |
52 else: | |
53 db = MySQLdb.connect(user=dbuser, passwd=dbpasswd, db=database, | |
54 use_unicode=True, charset="utf8") | |
Wladimir Palant
2012/10/09 11:51:14
I suggest caching the database connection similarl
| |
55 | |
56 for domain in data: | |
Wladimir Palant
2012/10/09 11:51:14
I suggest:
for domain, status in data.iteritems
| |
57 process_domain(db, domain, data[domain]) | |
58 | 37 |
59 db.commit() | 38 db.commit() |
60 db.close() | |
Wladimir Palant
2012/10/09 11:51:14
Database shouldn't be closed if you share the data
| |
61 | 39 |
62 response_headers = [('Content-type', 'text/plain'), | 40 response_headers = [("Content-type", "text/plain")] |
63 ('Content-Length', str(len(output)))] | |
Wladimir Palant
2012/10/09 11:51:14
Output is empty and this won't change (nobody can
| |
64 start_response("200 OK", response_headers) | 41 start_response("200 OK", response_headers) |
65 return [output] | 42 return [] |
66 | 43 |
67 def process_domain(db, domain, status): | 44 def process_domain(db, domain, status): |
68 domain_id = _get_domain_id(db, domain) | 45 domain_id = _get_domain_id(db, domain) |
69 | |
70 _increment_entry(db, domain_id, status) | 46 _increment_entry(db, domain_id, status) |
71 | |
72 typed = _get_entry(db, domain_id, STATUS_TYPED) | |
73 typo = _get_entry(db, domain_id, STATUS_TYPO) | |
74 correction = _get_entry(db, domain_id, STATUS_CORRECTION) | |
75 false_positive = _get_entry(db, domain_id, STATUS_FALSE_POSITIVE) | |
76 | |
77 # Determine the occurences of the domain being correct as is | |
78 typed_count = get_weighted_count(typed) | |
79 # Determine the occurences of the domain as a typo | |
80 typo_count = get_weighted_count(typo) | |
81 # Determine the occurences of the domain as a correction | |
82 correction_count = get_weighted_count(correction) | |
83 # Determine the occurences of the domain as a false positive | |
84 false_positive_count = get_weighted_count(false_positive) | |
85 | |
86 # Determine the correctness of the domain and of the corrections (with a confi dence level of 0.95) | |
87 # http://www.evanmiller.org/how-not-to-sort-by-average-rating.html | |
88 domain_correctness_score = calculateCorrectness(correction_count + typed_count , false_positive_count + typo_count) | |
89 correction_correctness_score = calculateCorrectness(correction_count, false_po sitive_count) | |
90 | |
91 _update_scores(db, domain_id, domain_correctness_score, correction_correctness _score) | |
92 | |
93 def calculateCorrectness(positive, negative): | |
94 score = 0 | |
95 | |
96 if positive + negative > 0: | |
97 score = ((positive + 1.9208) / (positive + negative) - | |
98 1.96 * sqrt((positive * negative) / (positive + negative) + 0.9604) / | |
99 (positive + negative)) / (1 + 3.8416 / (positive + negative)) | |
100 | |
101 return score | |
102 | |
103 def get_weighted_count(result): | |
104 if result == None: | |
105 return 0 | |
106 | |
107 return result["curr_month"] * 0.4 + result["prev_month"] * 0.3 + result["curr_ year"] * 0.2 + result["prev_year"] * 0.1 | |
108 | 47 |
109 def showError(message, start_response): | 48 def showError(message, start_response): |
110 start_response('400 Processing Error', [('Content-Type', 'text/plain; charset= utf-8')]) | 49 start_response("400 Processing Error", [("Content-Type", "text/plain; charset= utf-8")]) |
111 return [message.encode('utf-8')] | 50 return [message.encode("utf-8")] |
51 | |
52 @cached(600) | |
53 def _get_db(): | |
54 database = get_config().get("urlfixer", "database") | |
55 dbuser = get_config().get("urlfixer", "dbuser") | |
56 dbpasswd = get_config().get("urlfixer", "dbpassword") | |
57 if os.name == "nt": | |
58 return MySQLdb.connect(user=dbuser, passwd=dbpasswd, db=database, | |
59 use_unicode=True, charset="utf8", named_pipe=True) | |
60 else: | |
61 return MySQLdb.connect(user=dbuser, passwd=dbpasswd, db=database, | |
62 use_unicode=True, charset="utf8") | |
112 | 63 |
113 def _get_domain_id(db, domain): | 64 def _get_domain_id(db, domain): |
114 cursor = db.cursor(MySQLdb.cursors.DictCursor) | 65 cursor = db.cursor(MySQLdb.cursors.DictCursor) |
115 cursor.execute("SELECT id FROM domains WHERE domain='%s'" % (domain)) | 66 cursor.execute("SELECT id FROM domains WHERE domain = %s", (domain)) |
Wladimir Palant
2012/10/09 11:51:14
Please don't ever generate queries dynamically, us
Thomas Greiner
2012/10/09 13:47:02
Actually, I thought that this was the syntax for p
| |
116 result = cursor.fetchone() | 67 result = cursor.fetchone() |
117 if result == None: | 68 if result == None: |
118 cursor.execute("INSERT INTO domains(domain) VALUES ('%s')" % (domain)) | 69 cursor.execute("INSERT INTO domains(domain) VALUES (%s)", (domain)) |
119 return _get_domain_id(db, domain) | 70 return db.insert_id() |
Wladimir Palant
2012/10/09 11:51:14
Should be:
return db.insert_id()
| |
120 else: | 71 else: |
121 return result["id"] | 72 return result["id"] |
122 | |
123 def _get_entry(db, domain_id, status): | |
124 cursor = db.cursor(MySQLdb.cursors.DictCursor) | |
125 cursor.execute("SELECT * FROM corrections WHERE domain=%d AND status=%d" % (do main_id, status)) | |
126 return cursor.fetchone() | |
127 | 73 |
128 def _increment_entry(db, domain_id, status): | 74 def _increment_entry(db, domain_id, status): |
129 cursor = db.cursor(MySQLdb.cursors.DictCursor) | 75 cursor = db.cursor(MySQLdb.cursors.DictCursor) |
130 cursor.execute("SELECT COUNT(*) AS count FROM corrections WHERE domain=%d AND status=%d" % (domain_id, status)) | 76 cursor.execute("INSERT INTO corrections(domain, status, curr_month, prev_month , curr_year, prev_year) VALUES (%s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE curr_month = curr_month + 1, curr_year = curr_year + 1", (domain_id, status, 1, 0, 1, 0)) |
131 | |
132 if cursor.fetchone()["count"] < 1: | |
133 # Create and increment current period | |
134 cursor.execute("INSERT INTO corrections(domain, status, curr_month, prev_mon th, curr_year, prev_year) VALUES (%d, %d, %d, %d, %d, %d)" % (domain_id, status, 1, 0, 1, 0)) | |
135 else: | |
136 # Increment current period | |
137 cursor.execute("UPDATE corrections SET curr_month=curr_month+1, curr_year=cu rr_year+1 WHERE domain=%d AND status=%d" % (domain_id, status)) | |
Wladimir Palant
2012/10/09 11:51:14
http://dev.mysql.com/doc/refman/5.0/en/insert-on-d
Thomas Greiner
2012/10/09 13:47:02
Thanks for the tip!
| |
138 | |
139 def _update_scores(db, domain_id, domain_correctness, correction_correctness): | |
140 cursor = db.cursor() | |
141 cursor.execute("UPDATE domains SET domain_correct=%f, correction_correct=%f WH ERE id=%d" % (domain_correctness, correction_correctness, domain_id)) | |
LEFT | RIGHT |