Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/typoRules.js

Issue 8559070: Integrated URL Fixer into Adblock Plus (Closed)
Patch Set: Created Nov. 9, 2012, 3:21 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/typoNetError.js ('k') | lib/typoSurvey.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
3 * You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 Cu.import("resource://gre/modules/Services.jsm");
6 Cu.import("resource://gre/modules/FileUtils.jsm");
7
8 let {Prefs} = require("prefs");
9
10 let RULES_VERSION = 2;
11
12 let CUSTOM_RULE_PRIORITY = 0x7FFFFFFF;
13
14 let rules = {expressions: []};
15
16 loadRules();
17
18 // Make first attempt to update rules after five minutes
19 let updateTimer = null;
20 updateTimer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
21 updateTimer.initWithCallback(onTimer, 1000 * 60 * 5, Ci.nsITimer.TYPE_REPEATING_ SLACK);
22 onShutdown.add(function() updateTimer.cancel());
23
24 function loadRules()
25 {
26 loadRulesFrom(Services.io.newFileURI(getRuleFile()).spec, false, function(succ ess)
27 {
28 if (!success)
29 loadRulesFrom(require("info").addonRoot + "defaults/typoRules.json", true) ;
30 });
31 }
32
33 function loadRulesFrom(url, ignoreVersion, callback)
34 {
35 if (typeof callback != "function")
36 callback = function() {};
37
38 let request = Cc["@mozilla.org/xmlextras/xmlhttprequest;1"].createInstance(Ci. nsIXMLHttpRequest);
39 request.open("GET", url);
40 request.overrideMimeType("text/plain");
41 request.addEventListener("load", function()
42 {
43 try
44 {
45 // Remove comments from the file if any
46 let data = JSON.parse(request.responseText.replace(/^\s*\/\/.*/mg, ""));
47 if (ignoreVersion || data.version == RULES_VERSION)
48 {
49 rules = data;
50 callback(true);
51
52 // Add user-defined rules after calling the callback - if the callback
53 // saves the rules then the custom rules won't be included.
54 addCustomRules();
55 }
56 else
57 callback(false);
58 }
59 catch (e)
60 {
61 Cu.reportError(e);
62 callback(false);
63 }
64 }, false);
65 request.addEventListener("error", function()
66 {
67 callback(false);
68 }, false);
69
70 try
71 {
72 request.send(null);
73 }
74 catch (e)
75 {
76 if (e.result != Cr.NS_ERROR_FILE_NOT_FOUND)
77 Cu.reportError(e);
78 callback(false);
79 }
80 }
81
82 function getRuleFile()
83 {
84 let result = FileUtils.getFile("ProfD", [require("info").addonName + "-rules.j son"]);
85
86 getRuleFile = function() result;
87 return getRuleFile();
88 }
89
90 function addCustomRules()
91 {
92 for (let domain in Prefs.whitelist)
93 onWhitelistEntryAdded(domain);
94 }
95
96 function onWhitelistEntryAdded(domain)
97 {
98 let reverse = domain.split("").reverse().join("");
99 addSuffix(rules.domain, reverse, CUSTOM_RULE_PRIORITY);
100 }
101 exports.onWhitelistEntryAdded = onWhitelistEntryAdded;
102
103 function onWhitelistEntryRemoved(domain)
104 {
105 let reverse = domain.split("").reverse().join("");
106 removeSuffix(rules.domain, reverse, CUSTOM_RULE_PRIORITY);
107 }
108 exports.onWhitelistEntryRemoved = onWhitelistEntryRemoved;
109
110 function addSuffix(tree, suffix, priority)
111 {
112 if (suffix.length == 0)
113 {
114 // We are at the last character, just put our priority here
115 tree[""] = " " + priority;
116 return;
117 }
118
119 let c = suffix[0];
120 if (c in tree)
121 {
122 let existing = tree[c];
123 if (typeof existing == "string")
124 {
125 // Single choice for this suffix, maybe the same entry?
126 if (existing.substr(0, suffix.length - 1) == suffix.substr(1) && existing[ suffix.length - 1] == " ")
127 {
128 // Same entry, simply replace it by new priority
129 tree[c] = suffix.substr(1) + " " + priority;
130 }
131 else
132 {
133 // Different entry, need to add a new branching point and go deeper
134 if (existing[0] == " ")
135 tree[c] = {"": existing};
136 else
137 {
138 tree[c] = {};
139 tree[c][existing[0]] = existing.substr(1);
140 }
141 addSuffix(tree[c], suffix.substr(1), priority);
142 }
143 }
144 else
145 {
146 // Multiple choices for this suffix - go deeper
147 addSuffix(existing, suffix.substr(1), priority);
148 }
149 }
150 else
151 {
152 // No existing entry yet, just add ours
153 tree[c] = suffix.substr(1) + " " + priority;
154 }
155 }
156
157 function removeSuffix(tree, suffix, priority)
158 {
159 if (suffix.length == 0)
160 {
161 // We are at the last character, check whether there is an entry with
162 // matching priority
163 if ("" in tree && tree[""] == " " + priority)
164 delete tree[""];
165 return;
166 }
167
168 let c = suffix[0];
169 if (!(c in tree))
170 return;
171
172 if (typeof tree[c] == "string")
173 {
174 // Single entry - check whether it is the right one
175 if (tree[c] == suffix.substr(1) + " " + priority)
176 delete tree[c];
177 }
178 else
179 {
180 // Multiple entries, need to go deeper
181 removeSuffix(tree[c], suffix.substr(1), priority);
182 }
183 }
184
185 function onTimer()
186 {
187 // Next check in 1 hour
188 updateTimer.delay = 1000 * 60 * 60;
189
190 // Only download rules every three days
191 let nextUpdate = Prefs.lastRuleUpdate + 60 * 60 * 24 * 3;
192 if (nextUpdate > Date.now() / 1000)
193 return;
194
195 loadRulesFrom("http://urlfixer.org/download/rules.json?version=" + RULES_VERSI ON, false, function(success)
196 {
197 if (success)
198 {
199 rules.timestamp = Date.now();
200
201 try
202 {
203 // Save the rules to file.
204 let rulesText = JSON.stringify(rules);
205 let fileStream = FileUtils.openSafeFileOutputStream(getRuleFile());
206 let stream = Cc["@mozilla.org/intl/converter-output-stream;1"].createIns tance(Ci.nsIConverterOutputStream);
207 stream.init(fileStream, "UTF-8", 16384, Ci.nsIConverterInputStream.DEFAU LT_REPLACEMENT_CHARACTER);
208 stream.writeString(rulesText);
209 stream.flush();
210 FileUtils.closeSafeFileOutputStream(fileStream);
211 }
212 catch(e)
213 {
214 Cu.reportError(e);
215 }
216 }
217 });
218
219 Prefs.lastRuleUpdate = Date.now() / 1000;
220 }
221
222 exports.getSchemeCorrection = getSchemeCorrection;
223 function getSchemeCorrection(scheme)
224 {
225 return getBestMatch(scheme, rules.scheme, 1, null);
226 }
227
228 exports.isKnownScheme = isKnownScheme;
229 function isKnownScheme(scheme)
230 {
231 return (getSimilarStrings(scheme, rules.scheme, 0, null).length > 0);
232 }
233
234 exports.getDomainCorrection = getDomainCorrection;
235 function getDomainCorrection(domain)
236 {
237 // Apply user's custom changes first
238 let customRules = Prefs.custom_replace;
239 for (let searchString in customRules)
240 {
241 let replacement = customRules[searchString];
242 searchString = searchString.toLowerCase();
243 if (/^re:+/.test(searchString))
244 domain = domain.replace(new RegExp(RegExp.rightContext, "g"), replacement) ;
245 else
246 domain = domain.replace(searchString, replacement);
247 }
248
249 // Now apply our rules on the domain name
250 for (let i = 0, l = rules.expressions.length; i < l; i++)
251 domain = applyExpression(domain, rules.expressions[i]);
252
253 // Find similar known domains, test domains without the www. prefix
254 if (domain.substr(0, 4) == "www.")
255 domain = "www." + getBestMatch(domain.substr(4), rules.domain, 1, ".");
256 else
257 domain = getBestMatch(domain, rules.domain, 1, ".");
258
259 return domain;
260 }
261
262 exports.getDomainReferral = getDomainReferral;
263 function getDomainReferral(domain)
264 {
265 if ("domainReferrals" in rules && domain in rules.domainReferrals)
266 return rules.domainReferrals[domain];
267 else
268 return null;
269 }
270
271 function applyExpression(string, expression)
272 {
273 if (expression.nomatch && new RegExp(expression.nomatch).test(string))
274 return string;
275
276 return string.replace(new RegExp(expression.find, "g"), expression.replace);
277 }
278
279 function getSimilarStrings(input, dictionary, maxDistance, separator)
280 {
281 // We use a non-deterministic final automaton to perform a search on all
282 // strings in the dictionary simultaneously (see
283 // http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata
284 // for the basic algorithm). However, we use Damerau-Levenshtein distance
285 // (transposition of two adjacent letters counts as one operation), meaning
286 // one additional transition for the automaton. The number of automaton states
287 // can theoretically be extremely large, the maxDistance parameter limits the
288 // number of states we need to process however. We process the input string
289 // backwards to allow matching domain names for a longer host name.
290
291 let results = [];
292
293 function processState(entry, distance, position, result)
294 {
295 let isString = (typeof entry == "string");
296
297 // Do we have a result?
298 if (position < 0 || input[position] == separator)
299 {
300 if (!isString && "" in entry)
301 results.push([result, input.substr(position + 1), distance, parseInt(ent ry[""], 10)]);
302 else if (isString && entry[0] == " ")
303 results.push([result, input.substr(position + 1), distance, parseInt(ent ry, 10)]);
304 }
305
306 // Maybe there is a match
307 if (position >= 0)
308 {
309 let nextChar = input[position];
310 if (!isString && nextChar in entry)
311 processState(entry[nextChar], distance, position - 1, nextChar + result) ;
312 else if (isString && entry[0] == nextChar)
313 processState(entry.substr(1), distance, position - 1, nextChar + result) ;
314 }
315
316 // Mistakes
317 if (distance < maxDistance)
318 {
319 // Deletion and substitution
320 if (!isString)
321 {
322 for (let c in entry)
323 {
324 if (c != "")
325 processState(entry[c], distance + 1, position, c + result);
326 if (c != "" && position >= 0)
327 processState(entry[c], distance + 1, position - 1, c + result);
328 }
329 }
330 else if (entry[0] != " ")
331 {
332 processState(entry.substr(1), distance + 1, position, entry[0] + result) ;
333 if (position >= 0)
334 processState(entry.substr(1), distance + 1, position - 1, entry[0] + r esult);
335 }
336
337 // Insertion
338 if (position >= 0)
339 processState(entry, distance + 1, position - 1, result);
340
341 // Transposition
342 if (position >= 1)
343 {
344 let nextChar1 = input[position];
345 let nextChar2 = input[position - 1];
346 if (isString)
347 {
348 if (entry[0] == nextChar2 && entry[1] == nextChar1)
349 processState(entry.substr(2), distance + 1, position - 2, nextChar1 + nextChar2 + result);
350 }
351 else if (nextChar2 in entry)
352 {
353 let nextEntry = entry[nextChar2];
354 if (typeof nextEntry != "string")
355 {
356 if (nextChar1 in nextEntry)
357 processState(nextEntry[nextChar1], distance + 1, position - 2, nex tChar1 + nextChar2 + result);
358 }
359 else
360 {
361 if (nextEntry[0] == nextChar1)
362 processState(nextEntry.substr(1), distance + 1, position - 2, next Char1 + nextChar2 + result);
363 }
364 }
365 }
366 }
367 }
368
369 processState(dictionary, 0, input.length - 1, "");
370 return results;
371 }
372
373 function getBestMatch(input, dictionary, maxDistance, separator)
374 {
375 let suggestions = getSimilarStrings(input, dictionary, maxDistance, separator) ;
376
377 let bestSuggestion = null;
378 let bestSuggestionDistance;
379 let bestSuggestionMatched;
380 let bestSuggestionPriority;
381 for (let i = 0; i < suggestions.length; i++)
382 {
383 let [suggestion, matchedString, distance, priority] = suggestions[i];
384 if (suggestion == input)
385 return suggestion;
386
387 let matchedLen = matchedString.length;
388 if (priority < 0 && matchedLen == input.length)
389 {
390 // TLDs should never be proposed as a replacement for the entire host name
391 continue;
392 }
393
394 if (!bestSuggestion ||
395 bestSuggestionMatched < matchedLen ||
396 (bestSuggestionMatched == matchedLen && bestSuggestionDistance > distanc e) ||
397 (bestSuggestionMatched == matchedLen && bestSuggestionDistance == distan ce && bestSuggestionPriority < priority))
398 {
399 bestSuggestion = suggestion;
400 bestSuggestionDistance = distance;
401 bestSuggestionMatched = matchedLen;
402 bestSuggestionPriority = priority;
403 }
404 }
405 if (bestSuggestion)
406 return input.substr(0, input.length - bestSuggestionMatched) + bestSuggestio n;
407 else
408 return input;
409 }
OLDNEW
« no previous file with comments | « lib/typoNetError.js ('k') | lib/typoSurvey.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld