OLD | NEW |
(Empty) | |
| 1 /* This Source Code Form is subject to the terms of the Mozilla Public |
| 2 * License, v. 2.0. If a copy of the MPL was not distributed with this file, |
| 3 * You can obtain one at http://mozilla.org/MPL/2.0/. */ |
| 4 |
| 5 Cu.import("resource://gre/modules/Services.jsm"); |
| 6 Cu.import("resource://gre/modules/FileUtils.jsm"); |
| 7 |
| 8 let {Prefs} = require("prefs"); |
| 9 |
| 10 let RULES_VERSION = 2; |
| 11 |
| 12 let CUSTOM_RULE_PRIORITY = 0x7FFFFFFF; |
| 13 |
| 14 let rules = {expressions: []}; |
| 15 |
| 16 loadRules(); |
| 17 |
| 18 // Make first attempt to update rules after five minutes |
| 19 let updateTimer = null; |
| 20 updateTimer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer); |
| 21 updateTimer.initWithCallback(onTimer, 1000 * 60 * 5, Ci.nsITimer.TYPE_REPEATING_
SLACK); |
| 22 onShutdown.add(function() updateTimer.cancel()); |
| 23 |
| 24 function loadRules() |
| 25 { |
| 26 loadRulesFrom(Services.io.newFileURI(getRuleFile()).spec, false, function(succ
ess) |
| 27 { |
| 28 if (!success) |
| 29 loadRulesFrom(require("info").addonRoot + "defaults/typoRules.json", true)
; |
| 30 }); |
| 31 } |
| 32 |
| 33 function loadRulesFrom(url, ignoreVersion, callback) |
| 34 { |
| 35 if (typeof callback != "function") |
| 36 callback = function() {}; |
| 37 |
| 38 let request = Cc["@mozilla.org/xmlextras/xmlhttprequest;1"].createInstance(Ci.
nsIXMLHttpRequest); |
| 39 request.open("GET", url); |
| 40 request.overrideMimeType("text/plain"); |
| 41 request.addEventListener("load", function() |
| 42 { |
| 43 try |
| 44 { |
| 45 // Remove comments from the file if any |
| 46 let data = JSON.parse(request.responseText.replace(/^\s*\/\/.*/mg, "")); |
| 47 if (ignoreVersion || data.version == RULES_VERSION) |
| 48 { |
| 49 rules = data; |
| 50 callback(true); |
| 51 |
| 52 // Add user-defined rules after calling the callback - if the callback |
| 53 // saves the rules then the custom rules won't be included. |
| 54 addCustomRules(); |
| 55 } |
| 56 else |
| 57 callback(false); |
| 58 } |
| 59 catch (e) |
| 60 { |
| 61 Cu.reportError(e); |
| 62 callback(false); |
| 63 } |
| 64 }, false); |
| 65 request.addEventListener("error", function() |
| 66 { |
| 67 callback(false); |
| 68 }, false); |
| 69 |
| 70 try |
| 71 { |
| 72 request.send(null); |
| 73 } |
| 74 catch (e) |
| 75 { |
| 76 if (e.result != Cr.NS_ERROR_FILE_NOT_FOUND) |
| 77 Cu.reportError(e); |
| 78 callback(false); |
| 79 } |
| 80 } |
| 81 |
| 82 function getRuleFile() |
| 83 { |
| 84 let result = FileUtils.getFile("ProfD", ["url-fixer-rules.json"]); |
| 85 |
| 86 getRuleFile = function() result; |
| 87 return getRuleFile(); |
| 88 } |
| 89 |
| 90 function addCustomRules() |
| 91 { |
| 92 for (let domain in Prefs.whitelist) |
| 93 onWhitelistEntryAdded(domain); |
| 94 } |
| 95 |
| 96 function onWhitelistEntryAdded(domain) |
| 97 { |
| 98 let reverse = domain.split("").reverse().join(""); |
| 99 addSuffix(rules.domain, reverse, CUSTOM_RULE_PRIORITY); |
| 100 } |
| 101 exports.onWhitelistEntryAdded = onWhitelistEntryAdded; |
| 102 |
| 103 function onWhitelistEntryRemoved(domain) |
| 104 { |
| 105 let reverse = domain.split("").reverse().join(""); |
| 106 removeSuffix(rules.domain, reverse, CUSTOM_RULE_PRIORITY); |
| 107 } |
| 108 exports.onWhitelistEntryRemoved = onWhitelistEntryRemoved; |
| 109 |
| 110 function addSuffix(tree, suffix, priority) |
| 111 { |
| 112 if (suffix.length == 0) |
| 113 { |
| 114 // We are at the last character, just put our priority here |
| 115 tree[""] = " " + priority; |
| 116 return; |
| 117 } |
| 118 |
| 119 let c = suffix[0]; |
| 120 if (c in tree) |
| 121 { |
| 122 let existing = tree[c]; |
| 123 if (typeof existing == "string") |
| 124 { |
| 125 // Single choice for this suffix, maybe the same entry? |
| 126 if (existing.substr(0, suffix.length - 1) == suffix.substr(1) && existing[
suffix.length - 1] == " ") |
| 127 { |
| 128 // Same entry, simply replace it by new priority |
| 129 tree[c] = suffix.substr(1) + " " + priority; |
| 130 } |
| 131 else |
| 132 { |
| 133 // Different entry, need to add a new branching point and go deeper |
| 134 if (existing[0] == " ") |
| 135 tree[c] = {"": existing}; |
| 136 else |
| 137 { |
| 138 tree[c] = {}; |
| 139 tree[c][existing[0]] = existing.substr(1); |
| 140 } |
| 141 addSuffix(tree[c], suffix.substr(1), priority); |
| 142 } |
| 143 } |
| 144 else |
| 145 { |
| 146 // Multiple choices for this suffix - go deeper |
| 147 addSuffix(existing, suffix.substr(1), priority); |
| 148 } |
| 149 } |
| 150 else |
| 151 { |
| 152 // No existing entry yet, just add ours |
| 153 tree[c] = suffix.substr(1) + " " + priority; |
| 154 } |
| 155 } |
| 156 |
| 157 function removeSuffix(tree, suffix, priority) |
| 158 { |
| 159 if (suffix.length == 0) |
| 160 { |
| 161 // We are at the last character, check whether there is an entry with |
| 162 // matching priority |
| 163 if ("" in tree && tree[""] == " " + priority) |
| 164 delete tree[""]; |
| 165 return; |
| 166 } |
| 167 |
| 168 let c = suffix[0]; |
| 169 if (!(c in tree)) |
| 170 return; |
| 171 |
| 172 if (typeof tree[c] == "string") |
| 173 { |
| 174 // Single entry - check whether it is the right one |
| 175 if (tree[c] == suffix.substr(1) + " " + priority) |
| 176 delete tree[c]; |
| 177 } |
| 178 else |
| 179 { |
| 180 // Multiple entries, need to go deeper |
| 181 removeSuffix(tree[c], suffix.substr(1), priority); |
| 182 } |
| 183 } |
| 184 |
| 185 function onTimer() |
| 186 { |
| 187 // Next check in 1 hour |
| 188 updateTimer.delay = 1000 * 60 * 60; |
| 189 |
| 190 // Only download rules every three days |
| 191 let nextUpdate = Prefs.lastRuleUpdate + 60 * 60 * 24 * 3; |
| 192 if (nextUpdate > Date.now() / 1000) |
| 193 return; |
| 194 |
| 195 loadRulesFrom("http://urlfixer.org/download/rules.json?version=" + RULES_VERSI
ON, false, function(success) |
| 196 { |
| 197 if (success) |
| 198 { |
| 199 rules.timestamp = Date.now(); |
| 200 |
| 201 try |
| 202 { |
| 203 // Save the rules to file. |
| 204 let rulesText = JSON.stringify(rules); |
| 205 let fileStream = FileUtils.openSafeFileOutputStream(getRuleFile()); |
| 206 let stream = Cc["@mozilla.org/intl/converter-output-stream;1"].createIns
tance(Ci.nsIConverterOutputStream); |
| 207 stream.init(fileStream, "UTF-8", 16384, Ci.nsIConverterInputStream.DEFAU
LT_REPLACEMENT_CHARACTER); |
| 208 stream.writeString(rulesText); |
| 209 stream.flush(); |
| 210 FileUtils.closeSafeFileOutputStream(fileStream); |
| 211 } |
| 212 catch(e) |
| 213 { |
| 214 Cu.reportError(e); |
| 215 } |
| 216 } |
| 217 }); |
| 218 |
| 219 Prefs.lastRuleUpdate = Date.now() / 1000; |
| 220 } |
| 221 |
| 222 exports.getSchemeCorrection = getSchemeCorrection; |
| 223 function getSchemeCorrection(scheme) |
| 224 { |
| 225 return getBestMatch(scheme, rules.scheme, 1, null); |
| 226 } |
| 227 |
| 228 exports.isKnownScheme = isKnownScheme; |
| 229 function isKnownScheme(scheme) |
| 230 { |
| 231 return (getSimilarStrings(scheme, rules.scheme, 0, null).length > 0); |
| 232 } |
| 233 |
| 234 exports.getDomainCorrection = getDomainCorrection; |
| 235 function getDomainCorrection(domain) |
| 236 { |
| 237 // Apply user's custom changes first |
| 238 let customRules = Prefs.custom_replace; |
| 239 for (let searchString in customRules) |
| 240 { |
| 241 let replacement = customRules[searchString]; |
| 242 searchString = searchString.toLowerCase(); |
| 243 if (/^re:+/.test(searchString)) |
| 244 domain = domain.replace(new RegExp(RegExp.rightContext, "g"), replacement)
; |
| 245 else |
| 246 domain = domain.replace(searchString, replacement); |
| 247 } |
| 248 |
| 249 // Now apply our rules on the domain name |
| 250 for (let i = 0, l = rules.expressions.length; i < l; i++) |
| 251 domain = applyExpression(domain, rules.expressions[i]); |
| 252 |
| 253 // Find similar known domains, test domains without the www. prefix |
| 254 if (domain.substr(0, 4) == "www.") |
| 255 domain = "www." + getBestMatch(domain.substr(4), rules.domain, 1, "."); |
| 256 else |
| 257 domain = getBestMatch(domain, rules.domain, 1, "."); |
| 258 |
| 259 return domain; |
| 260 } |
| 261 |
| 262 exports.getDomainReferral = getDomainReferral; |
| 263 function getDomainReferral(domain) |
| 264 { |
| 265 if ("domainReferrals" in rules && domain in rules.domainReferrals) |
| 266 return rules.domainReferrals[domain]; |
| 267 else |
| 268 return null; |
| 269 } |
| 270 |
| 271 function applyExpression(string, expression) |
| 272 { |
| 273 if (expression.nomatch && new RegExp(expression.nomatch).test(string)) |
| 274 return string; |
| 275 |
| 276 return string.replace(new RegExp(expression.find, "g"), expression.replace); |
| 277 } |
| 278 |
| 279 function getSimilarStrings(input, dictionary, maxDistance, separator) |
| 280 { |
| 281 // We use a non-deterministic final automaton to perform a search on all |
| 282 // strings in the dictionary simultaneously (see |
| 283 // http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata |
| 284 // for the basic algorithm). However, we use Damerau-Levenshtein distance |
| 285 // (transposition of two adjacent letters counts as one operation), meaning |
| 286 // one additional transition for the automaton. The number of automaton states |
| 287 // can theoretically be extremely large, the maxDistance parameter limits the |
| 288 // number of states we need to process however. We process the input string |
| 289 // backwards to allow matching domain names for a longer host name. |
| 290 |
| 291 let results = []; |
| 292 |
| 293 function processState(entry, distance, position, result) |
| 294 { |
| 295 let isString = (typeof entry == "string"); |
| 296 |
| 297 // Do we have a result? |
| 298 if (position < 0 || input[position] == separator) |
| 299 { |
| 300 if (!isString && "" in entry) |
| 301 results.push([result, input.substr(position + 1), distance, parseInt(ent
ry[""], 10)]); |
| 302 else if (isString && entry[0] == " ") |
| 303 results.push([result, input.substr(position + 1), distance, parseInt(ent
ry, 10)]); |
| 304 } |
| 305 |
| 306 // Maybe there is a match |
| 307 if (position >= 0) |
| 308 { |
| 309 let nextChar = input[position]; |
| 310 if (!isString && nextChar in entry) |
| 311 processState(entry[nextChar], distance, position - 1, nextChar + result)
; |
| 312 else if (isString && entry[0] == nextChar) |
| 313 processState(entry.substr(1), distance, position - 1, nextChar + result)
; |
| 314 } |
| 315 |
| 316 // Mistakes |
| 317 if (distance < maxDistance) |
| 318 { |
| 319 // Deletion and substitution |
| 320 if (!isString) |
| 321 { |
| 322 for (let c in entry) |
| 323 { |
| 324 if (c != "") |
| 325 processState(entry[c], distance + 1, position, c + result); |
| 326 if (c != "" && position >= 0) |
| 327 processState(entry[c], distance + 1, position - 1, c + result); |
| 328 } |
| 329 } |
| 330 else if (entry[0] != " ") |
| 331 { |
| 332 processState(entry.substr(1), distance + 1, position, entry[0] + result)
; |
| 333 if (position >= 0) |
| 334 processState(entry.substr(1), distance + 1, position - 1, entry[0] + r
esult); |
| 335 } |
| 336 |
| 337 // Insertion |
| 338 if (position >= 0) |
| 339 processState(entry, distance + 1, position - 1, result); |
| 340 |
| 341 // Transposition |
| 342 if (position >= 1) |
| 343 { |
| 344 let nextChar1 = input[position]; |
| 345 let nextChar2 = input[position - 1]; |
| 346 if (isString) |
| 347 { |
| 348 if (entry[0] == nextChar2 && entry[1] == nextChar1) |
| 349 processState(entry.substr(2), distance + 1, position - 2, nextChar1
+ nextChar2 + result); |
| 350 } |
| 351 else if (nextChar2 in entry) |
| 352 { |
| 353 let nextEntry = entry[nextChar2]; |
| 354 if (typeof nextEntry != "string") |
| 355 { |
| 356 if (nextChar1 in nextEntry) |
| 357 processState(nextEntry[nextChar1], distance + 1, position - 2, nex
tChar1 + nextChar2 + result); |
| 358 } |
| 359 else |
| 360 { |
| 361 if (nextEntry[0] == nextChar1) |
| 362 processState(nextEntry.substr(1), distance + 1, position - 2, next
Char1 + nextChar2 + result); |
| 363 } |
| 364 } |
| 365 } |
| 366 } |
| 367 } |
| 368 |
| 369 processState(dictionary, 0, input.length - 1, ""); |
| 370 return results; |
| 371 } |
| 372 |
| 373 function getBestMatch(input, dictionary, maxDistance, separator) |
| 374 { |
| 375 let suggestions = getSimilarStrings(input, dictionary, maxDistance, separator)
; |
| 376 |
| 377 let bestSuggestion = null; |
| 378 let bestSuggestionDistance; |
| 379 let bestSuggestionMatched; |
| 380 let bestSuggestionPriority; |
| 381 for (let i = 0; i < suggestions.length; i++) |
| 382 { |
| 383 let [suggestion, matchedString, distance, priority] = suggestions[i]; |
| 384 if (suggestion == input) |
| 385 return suggestion; |
| 386 |
| 387 let matchedLen = matchedString.length; |
| 388 if (priority < 0 && matchedLen == input.length) |
| 389 { |
| 390 // TLDs should never be proposed as a replacement for the entire host name |
| 391 continue; |
| 392 } |
| 393 |
| 394 if (!bestSuggestion || |
| 395 bestSuggestionMatched < matchedLen || |
| 396 (bestSuggestionMatched == matchedLen && bestSuggestionDistance > distanc
e) || |
| 397 (bestSuggestionMatched == matchedLen && bestSuggestionDistance == distan
ce && bestSuggestionPriority < priority)) |
| 398 { |
| 399 bestSuggestion = suggestion; |
| 400 bestSuggestionDistance = distance; |
| 401 bestSuggestionMatched = matchedLen; |
| 402 bestSuggestionPriority = priority; |
| 403 } |
| 404 } |
| 405 if (bestSuggestion) |
| 406 return input.substr(0, input.length - bestSuggestionMatched) + bestSuggestio
n; |
| 407 else |
| 408 return input; |
| 409 } |
OLD | NEW |