Index: sitescripts/urlfixer/web/calculateScores.py |
=================================================================== |
new file mode 100644 |
--- /dev/null |
+++ b/sitescripts/urlfixer/web/calculateScores.py |
@@ -0,0 +1,153 @@ |
+# coding: utf-8 |
+ |
+# This Source Code is subject to the terms of the Mozilla Public License |
+# version 2.0 (the "License"). You can obtain a copy of the License at |
+# http://mozilla.org/MPL/2.0/. |
+ |
+import os, MySQLdb, math |
+from urlparse import parse_qs |
+from sitescripts.web import url_handler, basic_auth |
+from sitescripts.utils import cached, get_config, get_template, setupStderr |
+ |
+STATUS_TYPED = 1 |
+STATUS_TYPO = 2 |
+STATUS_CORRECTION = 3 |
+STATUS_FALSE_POSITIVE = 4 |
+ |
+reset_period_values = ["month", "year"] |
+ |
+@url_handler("/showScores") |
+def show_scores(environ, start_response): |
+ setupStderr(environ["wsgi.errors"]) |
+ |
+ domains = _get_domains(_get_db()) |
+ template = get_template(get_config().get("urlfixer", "template")) |
+ output = template.render({"domains": domains}) |
+ |
+ response_headers = [("Content-type", "text/html")] |
+ start_response("200 OK", response_headers) |
+ return [output] |
+ |
+@url_handler("/refreshScores") |
+@basic_auth("urlfixer") |
+def refresh_scores(environ, start_response): |
+ setupStderr(environ["wsgi.errors"]) |
+ |
+ db = _get_db() |
+ |
+ domains = _get_domains(db) |
+ for domain in domains: |
+ domain_id = domain["id"] |
+ typed = _get_entry(db, domain_id, STATUS_TYPED) |
+ typo = _get_entry(db, domain_id, STATUS_TYPO) |
+ correction = _get_entry(db, domain_id, STATUS_CORRECTION) |
+ false_positive = _get_entry(db, domain_id, STATUS_FALSE_POSITIVE) |
+ |
+ # Determine the occurences of the domain being correct as is |
+ typed_count = get_weighted_count(typed) |
+ # Determine the occurences of the domain as a typo |
+ typo_count = get_weighted_count(typo) |
+ # Determine the occurences of the domain as a correction |
+ correction_count = get_weighted_count(correction) |
+ # Determine the occurences of the domain as a false positive |
+ false_positive_count = get_weighted_count(false_positive) |
+ |
+ # Determine the correctness of the domain and of the corrections (with a confidence level of 0.95) |
+ # http://www.evanmiller.org/how-not-to-sort-by-average-rating.html |
+ domain_correctness_score = calculateCorrectness(correction_count + typed_count, false_positive_count + typo_count) |
+ correction_correctness_score = calculateCorrectness(correction_count, false_positive_count) |
+ |
+ _update_scores(db, domain_id, domain_correctness_score, correction_correctness_score) |
+ |
+ db.commit() |
+ |
+ response_headers = [("Content-type", "text/plain")] |
+ start_response("200 OK", response_headers) |
+ return [] |
+ |
+@url_handler("/resetScores") |
+@basic_auth("urlfixer") |
+def reset_scores(environ, start_response): |
+ setupStderr(environ["wsgi.errors"]) |
+ |
+ params = parse_qs(environ.get("QUERY_STRING", "")) |
+ period = params.get("period", [None])[0] |
+ |
+ if (period is None): |
+ return showError("Parameter not defined: period", start_response) |
+ |
+ if (period not in reset_period_values): |
+ return showError("Invalid value for parameter 'period'\nValid values: " + ", ".join(reset_period_values), start_response) |
+ |
+ sql = { |
+ "month": |
+ """ |
+ UPDATE corrections SET |
+ prev_month = curr_month, |
+ curr_month = 0 |
+ """, |
+ "year": |
+ """ |
+ UPDATE corrections SET |
+ prev_month = curr_month, |
+ prev_year = curr_year, |
+ curr_month = 0, |
+ curr_year = 0 |
+ """ |
+ }.get(period) |
+ |
+ db = _get_db() |
+ cursor = db.cursor(MySQLdb.cursors.DictCursor) |
+ cursor.execute(sql) |
+ db.commit() |
+ |
+ response_headers = [("Content-type", "text/plain")] |
+ start_response("200 OK", response_headers) |
+ return [] |
+ |
+def calculateCorrectness(positive, negative): |
+ score = 0 |
+ |
+ if positive + negative > 0: |
+ score = ((positive + 1.9208) / (positive + negative) - |
+ 1.96 * math.sqrt((positive * negative) / (positive + negative) + 0.9604) / |
+ (positive + negative)) / (1 + 3.8416 / (positive + negative)) |
+ |
+ return score |
+ |
+def get_weighted_count(result): |
+ if result == None: |
+ return 0 |
+ |
+ return result["curr_month"] * 0.4 + result["prev_month"] * 0.3 + result["curr_year"] * 0.2 + result["prev_year"] * 0.1 |
+ |
+def showError(message, start_response): |
+ start_response("400 Processing Error", [("Content-Type", "text/plain; charset=utf-8")]) |
+ return [message.encode("utf-8")] |
+ |
+@cached(600) |
+def _get_db(): |
+ database = get_config().get("urlfixer", "database") |
+ dbuser = get_config().get("urlfixer", "dbuser") |
+ dbpasswd = get_config().get("urlfixer", "dbpassword") |
+ if os.name == "nt": |
+ return MySQLdb.connect(user=dbuser, passwd=dbpasswd, db=database, |
+ use_unicode=True, charset="utf8", named_pipe=True) |
+ else: |
+ return MySQLdb.connect(user=dbuser, passwd=dbpasswd, db=database, |
+ use_unicode=True, charset="utf8") |
+ |
+def _get_domains(db): |
+ cursor = db.cursor(MySQLdb.cursors.DictCursor) |
+ cursor.execute("SELECT * FROM domains ORDER BY correction_correct DESC") |
+ result = cursor.fetchall() |
+ return result |
+ |
+def _get_entry(db, domain_id, status): |
+ cursor = db.cursor(MySQLdb.cursors.DictCursor) |
+ cursor.execute("SELECT * FROM corrections WHERE domain=%s AND status=%s", (domain_id, status)) |
+ return cursor.fetchone() |
+ |
+def _update_scores(db, domain_id, domain_correctness, correction_correctness): |
+ cursor = db.cursor() |
+ cursor.execute("UPDATE domains SET domain_correct=%s, correction_correct=%s WHERE id=%s", (domain_correctness, correction_correctness, domain_id)) |