Index: lib/typoRules.js |
=================================================================== |
new file mode 100644 |
--- /dev/null |
+++ b/lib/typoRules.js |
@@ -0,0 +1,409 @@ |
+/* This Source Code Form is subject to the terms of the Mozilla Public |
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file, |
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */ |
+ |
+Cu.import("resource://gre/modules/Services.jsm"); |
+Cu.import("resource://gre/modules/FileUtils.jsm"); |
+ |
+let {Prefs} = require("prefs"); |
+ |
+let RULES_VERSION = 2; |
+ |
+let CUSTOM_RULE_PRIORITY = 0x7FFFFFFF; |
+ |
+let rules = {expressions: []}; |
+ |
+loadRules(); |
+ |
+// Make first attempt to update rules after five minutes |
+let updateTimer = null; |
+updateTimer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer); |
+updateTimer.initWithCallback(onTimer, 1000 * 60 * 5, Ci.nsITimer.TYPE_REPEATING_SLACK); |
+onShutdown.add(function() updateTimer.cancel()); |
+ |
+function loadRules() |
+{ |
+ loadRulesFrom(Services.io.newFileURI(getRuleFile()).spec, false, function(success) |
+ { |
+ if (!success) |
+ loadRulesFrom(require("info").addonRoot + "defaults/typoRules.json", true); |
+ }); |
+} |
+ |
+function loadRulesFrom(url, ignoreVersion, callback) |
+{ |
+ if (typeof callback != "function") |
+ callback = function() {}; |
+ |
+ let request = Cc["@mozilla.org/xmlextras/xmlhttprequest;1"].createInstance(Ci.nsIXMLHttpRequest); |
+ request.open("GET", url); |
+ request.overrideMimeType("text/plain"); |
+ request.addEventListener("load", function() |
+ { |
+ try |
+ { |
+ // Remove comments from the file if any |
+ let data = JSON.parse(request.responseText.replace(/^\s*\/\/.*/mg, "")); |
+ if (ignoreVersion || data.version == RULES_VERSION) |
+ { |
+ rules = data; |
+ callback(true); |
+ |
+ // Add user-defined rules after calling the callback - if the callback |
+ // saves the rules then the custom rules won't be included. |
+ addCustomRules(); |
+ } |
+ else |
+ callback(false); |
+ } |
+ catch (e) |
+ { |
+ Cu.reportError(e); |
+ callback(false); |
+ } |
+ }, false); |
+ request.addEventListener("error", function() |
+ { |
+ callback(false); |
+ }, false); |
+ |
+ try |
+ { |
+ request.send(null); |
+ } |
+ catch (e) |
+ { |
+ if (e.result != Cr.NS_ERROR_FILE_NOT_FOUND) |
+ Cu.reportError(e); |
+ callback(false); |
+ } |
+} |
+ |
+function getRuleFile() |
+{ |
+ let result = FileUtils.getFile("ProfD", ["url-fixer-rules.json"]); |
+ |
+ getRuleFile = function() result; |
+ return getRuleFile(); |
+} |
+ |
+function addCustomRules() |
+{ |
+ for (let domain in Prefs.whitelist) |
+ onWhitelistEntryAdded(domain); |
+} |
+ |
+function onWhitelistEntryAdded(domain) |
+{ |
+ let reverse = domain.split("").reverse().join(""); |
+ addSuffix(rules.domain, reverse, CUSTOM_RULE_PRIORITY); |
+} |
+exports.onWhitelistEntryAdded = onWhitelistEntryAdded; |
+ |
+function onWhitelistEntryRemoved(domain) |
+{ |
+ let reverse = domain.split("").reverse().join(""); |
+ removeSuffix(rules.domain, reverse, CUSTOM_RULE_PRIORITY); |
+} |
+exports.onWhitelistEntryRemoved = onWhitelistEntryRemoved; |
+ |
+function addSuffix(tree, suffix, priority) |
+{ |
+ if (suffix.length == 0) |
+ { |
+ // We are at the last character, just put our priority here |
+ tree[""] = " " + priority; |
+ return; |
+ } |
+ |
+ let c = suffix[0]; |
+ if (c in tree) |
+ { |
+ let existing = tree[c]; |
+ if (typeof existing == "string") |
+ { |
+ // Single choice for this suffix, maybe the same entry? |
+ if (existing.substr(0, suffix.length - 1) == suffix.substr(1) && existing[suffix.length - 1] == " ") |
+ { |
+ // Same entry, simply replace it by new priority |
+ tree[c] = suffix.substr(1) + " " + priority; |
+ } |
+ else |
+ { |
+ // Different entry, need to add a new branching point and go deeper |
+ if (existing[0] == " ") |
+ tree[c] = {"": existing}; |
+ else |
+ { |
+ tree[c] = {}; |
+ tree[c][existing[0]] = existing.substr(1); |
+ } |
+ addSuffix(tree[c], suffix.substr(1), priority); |
+ } |
+ } |
+ else |
+ { |
+ // Multiple choices for this suffix - go deeper |
+ addSuffix(existing, suffix.substr(1), priority); |
+ } |
+ } |
+ else |
+ { |
+ // No existing entry yet, just add ours |
+ tree[c] = suffix.substr(1) + " " + priority; |
+ } |
+} |
+ |
+function removeSuffix(tree, suffix, priority) |
+{ |
+ if (suffix.length == 0) |
+ { |
+ // We are at the last character, check whether there is an entry with |
+ // matching priority |
+ if ("" in tree && tree[""] == " " + priority) |
+ delete tree[""]; |
+ return; |
+ } |
+ |
+ let c = suffix[0]; |
+ if (!(c in tree)) |
+ return; |
+ |
+ if (typeof tree[c] == "string") |
+ { |
+ // Single entry - check whether it is the right one |
+ if (tree[c] == suffix.substr(1) + " " + priority) |
+ delete tree[c]; |
+ } |
+ else |
+ { |
+ // Multiple entries, need to go deeper |
+ removeSuffix(tree[c], suffix.substr(1), priority); |
+ } |
+} |
+ |
+function onTimer() |
+{ |
+ // Next check in 1 hour |
+ updateTimer.delay = 1000 * 60 * 60; |
+ |
+ // Only download rules every three days |
+ let nextUpdate = Prefs.lastRuleUpdate + 60 * 60 * 24 * 3; |
+ if (nextUpdate > Date.now() / 1000) |
+ return; |
+ |
+ loadRulesFrom("http://urlfixer.org/download/rules.json?version=" + RULES_VERSION, false, function(success) |
+ { |
+ if (success) |
+ { |
+ rules.timestamp = Date.now(); |
+ |
+ try |
+ { |
+ // Save the rules to file. |
+ let rulesText = JSON.stringify(rules); |
+ let fileStream = FileUtils.openSafeFileOutputStream(getRuleFile()); |
+ let stream = Cc["@mozilla.org/intl/converter-output-stream;1"].createInstance(Ci.nsIConverterOutputStream); |
+ stream.init(fileStream, "UTF-8", 16384, Ci.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER); |
+ stream.writeString(rulesText); |
+ stream.flush(); |
+ FileUtils.closeSafeFileOutputStream(fileStream); |
+ } |
+ catch(e) |
+ { |
+ Cu.reportError(e); |
+ } |
+ } |
+ }); |
+ |
+ Prefs.lastRuleUpdate = Date.now() / 1000; |
+} |
+ |
+exports.getSchemeCorrection = getSchemeCorrection; |
+function getSchemeCorrection(scheme) |
+{ |
+ return getBestMatch(scheme, rules.scheme, 1, null); |
+} |
+ |
+exports.isKnownScheme = isKnownScheme; |
+function isKnownScheme(scheme) |
+{ |
+ return (getSimilarStrings(scheme, rules.scheme, 0, null).length > 0); |
+} |
+ |
+exports.getDomainCorrection = getDomainCorrection; |
+function getDomainCorrection(domain) |
+{ |
+ // Apply user's custom changes first |
+ let customRules = Prefs.custom_replace; |
+ for (let searchString in customRules) |
+ { |
+ let replacement = customRules[searchString]; |
+ searchString = searchString.toLowerCase(); |
+ if (/^re:+/.test(searchString)) |
+ domain = domain.replace(new RegExp(RegExp.rightContext, "g"), replacement); |
+ else |
+ domain = domain.replace(searchString, replacement); |
+ } |
+ |
+ // Now apply our rules on the domain name |
+ for (let i = 0, l = rules.expressions.length; i < l; i++) |
+ domain = applyExpression(domain, rules.expressions[i]); |
+ |
+ // Find similar known domains, test domains without the www. prefix |
+ if (domain.substr(0, 4) == "www.") |
+ domain = "www." + getBestMatch(domain.substr(4), rules.domain, 1, "."); |
+ else |
+ domain = getBestMatch(domain, rules.domain, 1, "."); |
+ |
+ return domain; |
+} |
+ |
+exports.getDomainReferral = getDomainReferral; |
+function getDomainReferral(domain) |
+{ |
+ if ("domainReferrals" in rules && domain in rules.domainReferrals) |
+ return rules.domainReferrals[domain]; |
+ else |
+ return null; |
+} |
+ |
+function applyExpression(string, expression) |
+{ |
+ if (expression.nomatch && new RegExp(expression.nomatch).test(string)) |
+ return string; |
+ |
+ return string.replace(new RegExp(expression.find, "g"), expression.replace); |
+} |
+ |
+function getSimilarStrings(input, dictionary, maxDistance, separator) |
+{ |
+ // We use a non-deterministic final automaton to perform a search on all |
+ // strings in the dictionary simultaneously (see |
+ // http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata |
+ // for the basic algorithm). However, we use Damerau-Levenshtein distance |
+ // (transposition of two adjacent letters counts as one operation), meaning |
+ // one additional transition for the automaton. The number of automaton states |
+ // can theoretically be extremely large, the maxDistance parameter limits the |
+ // number of states we need to process however. We process the input string |
+ // backwards to allow matching domain names for a longer host name. |
+ |
+ let results = []; |
+ |
+ function processState(entry, distance, position, result) |
+ { |
+ let isString = (typeof entry == "string"); |
+ |
+ // Do we have a result? |
+ if (position < 0 || input[position] == separator) |
+ { |
+ if (!isString && "" in entry) |
+ results.push([result, input.substr(position + 1), distance, parseInt(entry[""], 10)]); |
+ else if (isString && entry[0] == " ") |
+ results.push([result, input.substr(position + 1), distance, parseInt(entry, 10)]); |
+ } |
+ |
+ // Maybe there is a match |
+ if (position >= 0) |
+ { |
+ let nextChar = input[position]; |
+ if (!isString && nextChar in entry) |
+ processState(entry[nextChar], distance, position - 1, nextChar + result); |
+ else if (isString && entry[0] == nextChar) |
+ processState(entry.substr(1), distance, position - 1, nextChar + result); |
+ } |
+ |
+ // Mistakes |
+ if (distance < maxDistance) |
+ { |
+ // Deletion and substitution |
+ if (!isString) |
+ { |
+ for (let c in entry) |
+ { |
+ if (c != "") |
+ processState(entry[c], distance + 1, position, c + result); |
+ if (c != "" && position >= 0) |
+ processState(entry[c], distance + 1, position - 1, c + result); |
+ } |
+ } |
+ else if (entry[0] != " ") |
+ { |
+ processState(entry.substr(1), distance + 1, position, entry[0] + result); |
+ if (position >= 0) |
+ processState(entry.substr(1), distance + 1, position - 1, entry[0] + result); |
+ } |
+ |
+ // Insertion |
+ if (position >= 0) |
+ processState(entry, distance + 1, position - 1, result); |
+ |
+ // Transposition |
+ if (position >= 1) |
+ { |
+ let nextChar1 = input[position]; |
+ let nextChar2 = input[position - 1]; |
+ if (isString) |
+ { |
+ if (entry[0] == nextChar2 && entry[1] == nextChar1) |
+ processState(entry.substr(2), distance + 1, position - 2, nextChar1 + nextChar2 + result); |
+ } |
+ else if (nextChar2 in entry) |
+ { |
+ let nextEntry = entry[nextChar2]; |
+ if (typeof nextEntry != "string") |
+ { |
+ if (nextChar1 in nextEntry) |
+ processState(nextEntry[nextChar1], distance + 1, position - 2, nextChar1 + nextChar2 + result); |
+ } |
+ else |
+ { |
+ if (nextEntry[0] == nextChar1) |
+ processState(nextEntry.substr(1), distance + 1, position - 2, nextChar1 + nextChar2 + result); |
+ } |
+ } |
+ } |
+ } |
+ } |
+ |
+ processState(dictionary, 0, input.length - 1, ""); |
+ return results; |
+} |
+ |
+function getBestMatch(input, dictionary, maxDistance, separator) |
+{ |
+ let suggestions = getSimilarStrings(input, dictionary, maxDistance, separator); |
+ |
+ let bestSuggestion = null; |
+ let bestSuggestionDistance; |
+ let bestSuggestionMatched; |
+ let bestSuggestionPriority; |
+ for (let i = 0; i < suggestions.length; i++) |
+ { |
+ let [suggestion, matchedString, distance, priority] = suggestions[i]; |
+ if (suggestion == input) |
+ return suggestion; |
+ |
+ let matchedLen = matchedString.length; |
+ if (priority < 0 && matchedLen == input.length) |
+ { |
+ // TLDs should never be proposed as a replacement for the entire host name |
+ continue; |
+ } |
+ |
+ if (!bestSuggestion || |
+ bestSuggestionMatched < matchedLen || |
+ (bestSuggestionMatched == matchedLen && bestSuggestionDistance > distance) || |
+ (bestSuggestionMatched == matchedLen && bestSuggestionDistance == distance && bestSuggestionPriority < priority)) |
+ { |
+ bestSuggestion = suggestion; |
+ bestSuggestionDistance = distance; |
+ bestSuggestionMatched = matchedLen; |
+ bestSuggestionPriority = priority; |
+ } |
+ } |
+ if (bestSuggestion) |
+ return input.substr(0, input.length - bestSuggestionMatched) + bestSuggestion; |
+ else |
+ return input; |
+} |