OLD | NEW |
| (Empty) |
1 /* This Source Code Form is subject to the terms of the Mozilla Public | |
2 * License, v. 2.0. If a copy of the MPL was not distributed with this file, | |
3 * You can obtain one at http://mozilla.org/MPL/2.0/. */ | |
4 | |
5 Cu.import("resource://gre/modules/Services.jsm"); | |
6 Cu.import("resource://gre/modules/FileUtils.jsm"); | |
7 | |
8 let {Prefs} = require("prefs"); | |
9 | |
10 let RULES_VERSION = 2; | |
11 | |
12 let CUSTOM_RULE_PRIORITY = 0x7FFFFFFF; | |
13 | |
14 let rules = {expressions: []}; | |
15 | |
16 loadRules(); | |
17 | |
18 // Make first attempt to update rules after five minutes | |
19 let updateTimer = null; | |
20 updateTimer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer); | |
21 updateTimer.initWithCallback(onTimer, 1000 * 60 * 5, Ci.nsITimer.TYPE_REPEATING_
SLACK); | |
22 onShutdown.add(function() updateTimer.cancel()); | |
23 | |
24 function loadRules() | |
25 { | |
26 loadRulesFrom(Services.io.newFileURI(getRuleFile()).spec, false, function(succ
ess) | |
27 { | |
28 if (!success) | |
29 loadRulesFrom(require("info").addonRoot + "defaults/rules.json", true); | |
30 }); | |
31 } | |
32 | |
33 function loadRulesFrom(url, ignoreVersion, callback) | |
34 { | |
35 if (typeof callback != "function") | |
36 callback = function() {}; | |
37 | |
38 let request = Cc["@mozilla.org/xmlextras/xmlhttprequest;1"].createInstance(Ci.
nsIXMLHttpRequest); | |
39 request.open("GET", url); | |
40 request.overrideMimeType("text/plain"); | |
41 request.addEventListener("load", function() | |
42 { | |
43 try | |
44 { | |
45 // Remove comments from the file if any | |
46 let data = JSON.parse(request.responseText.replace(/^\s*\/\/.*/mg, "")); | |
47 if (ignoreVersion || data.version == RULES_VERSION) | |
48 { | |
49 rules = data; | |
50 callback(true); | |
51 | |
52 // Add user-defined rules after calling the callback - if the callback | |
53 // saves the rules then the custom rules won't be included. | |
54 addCustomRules(); | |
55 } | |
56 else | |
57 callback(false); | |
58 } | |
59 catch (e) | |
60 { | |
61 Cu.reportError(e); | |
62 callback(false); | |
63 } | |
64 }, false); | |
65 request.addEventListener("error", function() | |
66 { | |
67 callback(false); | |
68 }, false); | |
69 | |
70 try | |
71 { | |
72 request.send(null); | |
73 } | |
74 catch (e) | |
75 { | |
76 if (e.result != Cr.NS_ERROR_FILE_NOT_FOUND) | |
77 Cu.reportError(e); | |
78 callback(false); | |
79 } | |
80 } | |
81 | |
82 function getRuleFile() | |
83 { | |
84 let result = FileUtils.getFile("ProfD", ["url-fixer-rules.json"]); | |
85 | |
86 getRuleFile = function() result; | |
87 return getRuleFile(); | |
88 } | |
89 | |
90 function addCustomRules() | |
91 { | |
92 for (let domain in Prefs.whitelist) | |
93 onWhitelistEntryAdded(domain); | |
94 } | |
95 | |
96 function onWhitelistEntryAdded(domain) | |
97 { | |
98 let reverse = domain.split("").reverse().join(""); | |
99 addSuffix(rules.domain, reverse, CUSTOM_RULE_PRIORITY); | |
100 } | |
101 exports.onWhitelistEntryAdded = onWhitelistEntryAdded; | |
102 | |
103 function onWhitelistEntryRemoved(domain) | |
104 { | |
105 let reverse = domain.split("").reverse().join(""); | |
106 removeSuffix(rules.domain, reverse, CUSTOM_RULE_PRIORITY); | |
107 } | |
108 exports.onWhitelistEntryRemoved = onWhitelistEntryRemoved; | |
109 | |
110 function addSuffix(tree, suffix, priority) | |
111 { | |
112 if (suffix.length == 0) | |
113 { | |
114 // We are at the last character, just put our priority here | |
115 tree[""] = " " + priority; | |
116 return; | |
117 } | |
118 | |
119 let c = suffix[0]; | |
120 if (c in tree) | |
121 { | |
122 let existing = tree[c]; | |
123 if (typeof existing == "string") | |
124 { | |
125 // Single choice for this suffix, maybe the same entry? | |
126 if (existing.substr(0, suffix.length - 1) == suffix.substr(1) && existing[
suffix.length - 1] == " ") | |
127 { | |
128 // Same entry, simply replace it by new priority | |
129 tree[c] = suffix.substr(1) + " " + priority; | |
130 } | |
131 else | |
132 { | |
133 // Different entry, need to add a new branching point and go deeper | |
134 if (existing[0] == " ") | |
135 tree[c] = {"": existing}; | |
136 else | |
137 { | |
138 tree[c] = {}; | |
139 tree[c][existing[0]] = existing.substr(1); | |
140 } | |
141 addSuffix(tree[c], suffix.substr(1), priority); | |
142 } | |
143 } | |
144 else | |
145 { | |
146 // Multiple choices for this suffix - go deeper | |
147 addSuffix(existing, suffix.substr(1), priority); | |
148 } | |
149 } | |
150 else | |
151 { | |
152 // No existing entry yet, just add ours | |
153 tree[c] = suffix.substr(1) + " " + priority; | |
154 } | |
155 } | |
156 | |
157 function removeSuffix(tree, suffix, priority) | |
158 { | |
159 if (suffix.length == 0) | |
160 { | |
161 // We are at the last character, check whether there is an entry with | |
162 // matching priority | |
163 if ("" in tree && tree[""] == " " + priority) | |
164 delete tree[""]; | |
165 return; | |
166 } | |
167 | |
168 let c = suffix[0]; | |
169 if (!(c in tree)) | |
170 return; | |
171 | |
172 if (typeof tree[c] == "string") | |
173 { | |
174 // Single entry - check whether it is the right one | |
175 if (tree[c] == suffix.substr(1) + " " + priority) | |
176 delete tree[c]; | |
177 } | |
178 else | |
179 { | |
180 // Multiple entries, need to go deeper | |
181 removeSuffix(tree[c], suffix.substr(1), priority); | |
182 } | |
183 } | |
184 | |
185 function onTimer() | |
186 { | |
187 // Next check in 1 hour | |
188 updateTimer.delay = 1000 * 60 * 60; | |
189 | |
190 // Only download rules every three days | |
191 let nextUpdate = Prefs.lastRuleUpdate + 60 * 60 * 24 * 3; | |
192 if (nextUpdate > Date.now() / 1000) | |
193 return; | |
194 | |
195 loadRulesFrom("http://urlfixer.org/download/rules.json?version=" + RULES_VERSI
ON, false, function(success) | |
196 { | |
197 if (success) | |
198 { | |
199 rules.timestamp = Date.now(); | |
200 | |
201 try | |
202 { | |
203 // Save the rules to file. | |
204 let rulesText = JSON.stringify(rules); | |
205 let fileStream = FileUtils.openSafeFileOutputStream(getRuleFile()); | |
206 let stream = Cc["@mozilla.org/intl/converter-output-stream;1"].createIns
tance(Ci.nsIConverterOutputStream); | |
207 stream.init(fileStream, "UTF-8", 16384, Ci.nsIConverterInputStream.DEFAU
LT_REPLACEMENT_CHARACTER); | |
208 stream.writeString(rulesText); | |
209 stream.flush(); | |
210 FileUtils.closeSafeFileOutputStream(fileStream); | |
211 } | |
212 catch(e) | |
213 { | |
214 Cu.reportError(e); | |
215 } | |
216 } | |
217 }); | |
218 | |
219 Prefs.lastRuleUpdate = Date.now() / 1000; | |
220 } | |
221 | |
222 exports.getSchemeCorrection = getSchemeCorrection; | |
223 function getSchemeCorrection(scheme) | |
224 { | |
225 return getBestMatch(scheme, rules.scheme, 1, null); | |
226 } | |
227 | |
228 exports.isKnownScheme = isKnownScheme; | |
229 function isKnownScheme(scheme) | |
230 { | |
231 return (getSimilarStrings(scheme, rules.scheme, 0, null).length > 0); | |
232 } | |
233 | |
234 exports.getDomainCorrection = getDomainCorrection; | |
235 function getDomainCorrection(domain) | |
236 { | |
237 // Apply user's custom changes first | |
238 let customRules = Prefs.custom_replace; | |
239 for (let searchString in customRules) | |
240 { | |
241 let replacement = customRules[searchString]; | |
242 searchString = searchString.toLowerCase(); | |
243 if (/^re:+/.test(searchString)) | |
244 domain = domain.replace(new RegExp(RegExp.rightContext, "g"), replacement)
; | |
245 else | |
246 domain = domain.replace(searchString, replacement); | |
247 } | |
248 | |
249 // Now apply our rules on the domain name | |
250 for (let i = 0, l = rules.expressions.length; i < l; i++) | |
251 domain = applyExpression(domain, rules.expressions[i]); | |
252 | |
253 // Find similar known domains, test domains without the www. prefix | |
254 if (domain.substr(0, 4) == "www.") | |
255 domain = "www." + getBestMatch(domain.substr(4), rules.domain, 1, "."); | |
256 else | |
257 domain = getBestMatch(domain, rules.domain, 1, "."); | |
258 | |
259 return domain; | |
260 } | |
261 | |
262 exports.getDomainReferral = getDomainReferral; | |
263 function getDomainReferral(domain) | |
264 { | |
265 if ("domainReferrals" in rules && domain in rules.domainReferrals) | |
266 return rules.domainReferrals[domain]; | |
267 else | |
268 return null; | |
269 } | |
270 | |
271 function applyExpression(string, expression) | |
272 { | |
273 if (expression.nomatch && new RegExp(expression.nomatch).test(string)) | |
274 return string; | |
275 | |
276 return string.replace(new RegExp(expression.find, "g"), expression.replace); | |
277 } | |
278 | |
279 function getSimilarStrings(input, dictionary, maxDistance, separator) | |
280 { | |
281 // We use a non-deterministic final automaton to perform a search on all | |
282 // strings in the dictionary simultaneously (see | |
283 // http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata | |
284 // for the basic algorithm). However, we use Damerau-Levenshtein distance | |
285 // (transposition of two adjacent letters counts as one operation), meaning | |
286 // one additional transition for the automaton. The number of automaton states | |
287 // can theoretically be extremely large, the maxDistance parameter limits the | |
288 // number of states we need to process however. We process the input string | |
289 // backwards to allow matching domain names for a longer host name. | |
290 | |
291 let results = []; | |
292 | |
293 function processState(entry, distance, position, result) | |
294 { | |
295 let isString = (typeof entry == "string"); | |
296 | |
297 // Do we have a result? | |
298 if (position < 0 || input[position] == separator) | |
299 { | |
300 if (!isString && "" in entry) | |
301 results.push([result, input.substr(position + 1), distance, parseInt(ent
ry[""], 10)]); | |
302 else if (isString && entry[0] == " ") | |
303 results.push([result, input.substr(position + 1), distance, parseInt(ent
ry, 10)]); | |
304 } | |
305 | |
306 // Maybe there is a match | |
307 if (position >= 0) | |
308 { | |
309 let nextChar = input[position]; | |
310 if (!isString && nextChar in entry) | |
311 processState(entry[nextChar], distance, position - 1, nextChar + result)
; | |
312 else if (isString && entry[0] == nextChar) | |
313 processState(entry.substr(1), distance, position - 1, nextChar + result)
; | |
314 } | |
315 | |
316 // Mistakes | |
317 if (distance < maxDistance) | |
318 { | |
319 // Deletion and substitution | |
320 if (!isString) | |
321 { | |
322 for (let c in entry) | |
323 { | |
324 if (c != "") | |
325 processState(entry[c], distance + 1, position, c + result); | |
326 if (c != "" && position >= 0) | |
327 processState(entry[c], distance + 1, position - 1, c + result); | |
328 } | |
329 } | |
330 else if (entry[0] != " ") | |
331 { | |
332 processState(entry.substr(1), distance + 1, position, entry[0] + result)
; | |
333 if (position >= 0) | |
334 processState(entry.substr(1), distance + 1, position - 1, entry[0] + r
esult); | |
335 } | |
336 | |
337 // Insertion | |
338 if (position >= 0) | |
339 processState(entry, distance + 1, position - 1, result); | |
340 | |
341 // Transposition | |
342 if (position >= 1) | |
343 { | |
344 let nextChar1 = input[position]; | |
345 let nextChar2 = input[position - 1]; | |
346 if (isString) | |
347 { | |
348 if (entry[0] == nextChar2 && entry[1] == nextChar1) | |
349 processState(entry.substr(2), distance + 1, position - 2, nextChar1
+ nextChar2 + result); | |
350 } | |
351 else if (nextChar2 in entry) | |
352 { | |
353 let nextEntry = entry[nextChar2]; | |
354 if (typeof nextEntry != "string") | |
355 { | |
356 if (nextChar1 in nextEntry) | |
357 processState(nextEntry[nextChar1], distance + 1, position - 2, nex
tChar1 + nextChar2 + result); | |
358 } | |
359 else | |
360 { | |
361 if (nextEntry[0] == nextChar1) | |
362 processState(nextEntry.substr(1), distance + 1, position - 2, next
Char1 + nextChar2 + result); | |
363 } | |
364 } | |
365 } | |
366 } | |
367 } | |
368 | |
369 processState(dictionary, 0, input.length - 1, ""); | |
370 return results; | |
371 } | |
372 | |
373 function getBestMatch(input, dictionary, maxDistance, separator) | |
374 { | |
375 let suggestions = getSimilarStrings(input, dictionary, maxDistance, separator)
; | |
376 | |
377 let bestSuggestion = null; | |
378 let bestSuggestionDistance; | |
379 let bestSuggestionMatched; | |
380 let bestSuggestionPriority; | |
381 for (let i = 0; i < suggestions.length; i++) | |
382 { | |
383 let [suggestion, matchedString, distance, priority] = suggestions[i]; | |
384 if (suggestion == input) | |
385 return suggestion; | |
386 | |
387 let matchedLen = matchedString.length; | |
388 if (priority < 0 && matchedLen == input.length) | |
389 { | |
390 // TLDs should never be proposed as a replacement for the entire host name | |
391 continue; | |
392 } | |
393 | |
394 if (!bestSuggestion || | |
395 bestSuggestionMatched < matchedLen || | |
396 (bestSuggestionMatched == matchedLen && bestSuggestionDistance > distanc
e) || | |
397 (bestSuggestionMatched == matchedLen && bestSuggestionDistance == distan
ce && bestSuggestionPriority < priority)) | |
398 { | |
399 bestSuggestion = suggestion; | |
400 bestSuggestionDistance = distance; | |
401 bestSuggestionMatched = matchedLen; | |
402 bestSuggestionPriority = priority; | |
403 } | |
404 } | |
405 if (bestSuggestion) | |
406 return input.substr(0, input.length - bestSuggestionMatched) + bestSuggestio
n; | |
407 else | |
408 return input; | |
409 } | |
OLD | NEW |