OLD | NEW |
(Empty) | |
| 1 # coding: utf-8 |
| 2 |
| 3 # This Source Code is subject to the terms of the Mozilla Public License |
| 4 # version 2.0 (the "License"). You can obtain a copy of the License at |
| 5 # http://mozilla.org/MPL/2.0/. |
| 6 |
| 7 import os, MySQLdb, math |
| 8 from urlparse import parse_qs |
| 9 from sitescripts.web import url_handler, basic_auth |
| 10 from sitescripts.utils import cached, get_config, get_template, setupStderr |
| 11 |
| 12 STATUS_TYPED = 1 |
| 13 STATUS_TYPO = 2 |
| 14 STATUS_CORRECTION = 3 |
| 15 STATUS_FALSE_POSITIVE = 4 |
| 16 |
| 17 reset_period_values = ["month", "year"] |
| 18 |
| 19 @url_handler("/showScores") |
| 20 def show_scores(environ, start_response): |
| 21 setupStderr(environ["wsgi.errors"]) |
| 22 |
| 23 domains = _get_domains(_get_db()) |
| 24 template = get_template(get_config().get("urlfixer", "template")) |
| 25 output = template.render({"domains": domains}) |
| 26 |
| 27 response_headers = [("Content-type", "text/html")] |
| 28 start_response("200 OK", response_headers) |
| 29 return [output] |
| 30 |
| 31 @url_handler("/refreshScores") |
| 32 @basic_auth("urlfixer") |
| 33 def refresh_scores(environ, start_response): |
| 34 setupStderr(environ["wsgi.errors"]) |
| 35 |
| 36 db = _get_db() |
| 37 |
| 38 domains = _get_domains(db) |
| 39 for domain in domains: |
| 40 domain_id = domain["id"] |
| 41 typed = _get_entry(db, domain_id, STATUS_TYPED) |
| 42 typo = _get_entry(db, domain_id, STATUS_TYPO) |
| 43 correction = _get_entry(db, domain_id, STATUS_CORRECTION) |
| 44 false_positive = _get_entry(db, domain_id, STATUS_FALSE_POSITIVE) |
| 45 |
| 46 # Determine the occurences of the domain being correct as is |
| 47 typed_count = get_weighted_count(typed) |
| 48 # Determine the occurences of the domain as a typo |
| 49 typo_count = get_weighted_count(typo) |
| 50 # Determine the occurences of the domain as a correction |
| 51 correction_count = get_weighted_count(correction) |
| 52 # Determine the occurences of the domain as a false positive |
| 53 false_positive_count = get_weighted_count(false_positive) |
| 54 |
| 55 # Determine the correctness of the domain and of the corrections (with a con
fidence level of 0.95) |
| 56 # http://www.evanmiller.org/how-not-to-sort-by-average-rating.html |
| 57 domain_correctness_score = calculateCorrectness(correction_count + typed_cou
nt, false_positive_count + typo_count) |
| 58 correction_correctness_score = calculateCorrectness(correction_count, false_
positive_count) |
| 59 |
| 60 _update_scores(db, domain_id, domain_correctness_score, correction_correctne
ss_score) |
| 61 |
| 62 db.commit() |
| 63 |
| 64 response_headers = [("Content-type", "text/plain")] |
| 65 start_response("200 OK", response_headers) |
| 66 return [] |
| 67 |
| 68 @url_handler("/resetScores") |
| 69 @basic_auth("urlfixer") |
| 70 def reset_scores(environ, start_response): |
| 71 setupStderr(environ["wsgi.errors"]) |
| 72 |
| 73 params = parse_qs(environ.get("QUERY_STRING", "")) |
| 74 period = params.get("period", [None])[0] |
| 75 |
| 76 if (period is None): |
| 77 return showError("Parameter not defined: period", start_response) |
| 78 |
| 79 if (period not in reset_period_values): |
| 80 return showError("Invalid value for parameter 'period'\nValid values: " + ",
".join(reset_period_values), start_response) |
| 81 |
| 82 sql = { |
| 83 "month": |
| 84 """ |
| 85 UPDATE corrections SET |
| 86 prev_month = curr_month, |
| 87 curr_month = 0 |
| 88 """, |
| 89 "year": |
| 90 """ |
| 91 UPDATE corrections SET |
| 92 prev_month = curr_month, |
| 93 prev_year = curr_year, |
| 94 curr_month = 0, |
| 95 curr_year = 0 |
| 96 """ |
| 97 }.get(period) |
| 98 |
| 99 db = _get_db() |
| 100 cursor = db.cursor(MySQLdb.cursors.DictCursor) |
| 101 cursor.execute(sql) |
| 102 db.commit() |
| 103 |
| 104 response_headers = [("Content-type", "text/plain")] |
| 105 start_response("200 OK", response_headers) |
| 106 return [] |
| 107 |
| 108 def calculateCorrectness(positive, negative): |
| 109 score = 0 |
| 110 |
| 111 if positive + negative > 0: |
| 112 score = ((positive + 1.9208) / (positive + negative) - |
| 113 1.96 * math.sqrt((positive * negative) / (positive + negative) + 0.9604) /
|
| 114 (positive + negative)) / (1 + 3.8416 / (positive + negative)) |
| 115 |
| 116 return score |
| 117 |
| 118 def get_weighted_count(result): |
| 119 if result == None: |
| 120 return 0 |
| 121 |
| 122 return result["curr_month"] * 0.4 + result["prev_month"] * 0.3 + result["curr_
year"] * 0.2 + result["prev_year"] * 0.1 |
| 123 |
| 124 def showError(message, start_response): |
| 125 start_response("400 Processing Error", [("Content-Type", "text/plain; charset=
utf-8")]) |
| 126 return [message.encode("utf-8")] |
| 127 |
| 128 @cached(600) |
| 129 def _get_db(): |
| 130 database = get_config().get("urlfixer", "database") |
| 131 dbuser = get_config().get("urlfixer", "dbuser") |
| 132 dbpasswd = get_config().get("urlfixer", "dbpassword") |
| 133 if os.name == "nt": |
| 134 return MySQLdb.connect(user=dbuser, passwd=dbpasswd, db=database, |
| 135 use_unicode=True, charset="utf8", named_pipe=True) |
| 136 else: |
| 137 return MySQLdb.connect(user=dbuser, passwd=dbpasswd, db=database, |
| 138 use_unicode=True, charset="utf8") |
| 139 |
| 140 def _get_domains(db): |
| 141 cursor = db.cursor(MySQLdb.cursors.DictCursor) |
| 142 cursor.execute("SELECT * FROM domains ORDER BY correction_correct DESC") |
| 143 result = cursor.fetchall() |
| 144 return result |
| 145 |
| 146 def _get_entry(db, domain_id, status): |
| 147 cursor = db.cursor(MySQLdb.cursors.DictCursor) |
| 148 cursor.execute("SELECT * FROM corrections WHERE domain=%s AND status=%s", (dom
ain_id, status)) |
| 149 return cursor.fetchone() |
| 150 |
| 151 def _update_scores(db, domain_id, domain_correctness, correction_correctness): |
| 152 cursor = db.cursor() |
| 153 cursor.execute("UPDATE domains SET domain_correct=%s, correction_correct=%s WH
ERE id=%s", (domain_correctness, correction_correctness, domain_id)) |
OLD | NEW |