Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: assets/js/punycode.js

Issue 8482109: ABP/Android JavaScript code (Closed)
Patch Set: Created Oct. 5, 2012, 9:23 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*!
2 * Punycode.js <http://mths.be/punycode>
3 * Copyright 2011 Mathias Bynens <http://mathiasbynens.be/>
4 * Available under MIT license <http://mths.be/mit>
5 */
6
7 ;(function(root) {
8
9 /**
10 * The `punycode` object.
11 * @name punycode
12 * @type Object
13 */
14 var punycode,
15
16 /** Detect free variables `define`, `exports`, `module` and `require` */
17 freeDefine = typeof define == 'function' && typeof define.amd == 'object ' &&
18 define.amd && define,
19 freeExports = typeof exports == 'object' && exports,
20 freeModule = typeof module == 'object' && module,
21 freeRequire = typeof require == 'function' && require,
22
23 /** Highest positive signed 32-bit float value */
24 maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1
25
26 /** Bootstring parameters */
27 base = 36,
28 tMin = 1,
29 tMax = 26,
30 skew = 38,
31 damp = 700,
32 initialBias = 72,
33 initialN = 128, // 0x80
34 delimiter = '-', // '\x2D'
35
36 /** Regular expressions */
37 regexASCII = /[^\x20-\x7e]/,
38 regexPunycode = /^xn--/,
39
40 /** Error messages */
41 errors = {
42 'overflow': 'Overflow: input needs wider integers to process.',
43 'utf16decode': 'UTF-16(decode): illegal UTF-16 sequence',
44 'utf16encode': 'UTF-16(encode): illegal UTF-16 value',
45 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
46 'invalid-input': 'Invalid input'
47 },
48
49 /** Convenience shortcuts */
50 baseMinusTMin = base - tMin,
51 floor = Math.floor,
52 stringFromCharCode = String.fromCharCode,
53
54 /** Temporary variable */
55 key;
56
57 /*---------------------------------------------------------------------- ----*/
58
59 /**
60 * A generic error utility function.
61 * @private
62 * @param {String} type The error type.
63 * @returns {Error} Throws a `RangeError` with the applicable error mess age.
64 */
65 function error(type) {
66 throw RangeError(errors[type]);
67 }
68
69 /**
70 * A generic `Array#map` utility function.
71 * @private
72 * @param {Array} array The array to iterate over.
73 * @param {Function} callback The function that gets called for every ar ray
74 * item.
75 * @returns {Array} A new array of values returned by the callback funct ion.
76 */
77 function map(array, fn) {
78 var length = array.length;
79 while (length--) {
80 array[length] = fn(array[length]);
81 }
82 return array;
83 }
84
85 /**
86 * A simple `Array#map`-like wrapper to work with domain name strings.
87 * @private
88 * @param {String} domain The domain name.
89 * @param {Function} callback The function that gets called for every
90 * character.
91 * @returns {Array} A new string of characters returned by the callback
92 * function.
93 */
94 function mapDomain(string, fn) {
95 var glue = '.';
96 return map(string.split(glue), fn).join(glue);
97 }
98
99 /**
100 * Creates an array containing the decimal code points of each character in
101 * the string.
102 * @see `punycode.utf16.encode`
103 * @see <http://tools.ietf.org/html/rfc2781>
104 * @memberOf punycode.utf16
105 * @name decode
106 * @param {String} string The Unicode input string.
107 * @returns {Array} The new array.
108 */
109 function utf16decode(string) {
110 var output = [],
111 counter = 0,
112 length = string.length,
113 value,
114 extra;
115 while (counter < length) {
116 value = string.charCodeAt(counter++);
117 if ((value & 0xF800) == 0xD800) {
118 extra = string.charCodeAt(counter++);
119 if ((value & 0xFC00) != 0xD800 || (extra & 0xFC0 0) != 0xDC00) {
120 error('utf16decode');
121 }
122 value = ((value & 0x3FF) << 10) + (extra & 0x3FF ) + 0x10000;
123 }
124 output.push(value);
125 }
126 return output;
127 }
128
129 /**
130 * Creates a string based on an array of decimal code points.
131 * @see `punycode.utf16.decode`
132 * @see <http://tools.ietf.org/html/rfc2781>
133 * @memberOf punycode.utf16
134 * @name encode
135 * @param {Array} codePoints The array of decimal code points.
136 * @returns {String} The new string.
137 */
138 function utf16encode(array) {
139 return map(array, function(value) {
140 var output = '';
141 if ((value & 0xF800) == 0xD800) {
142 error('utf16encode');
143 }
144 if (value > 0xFFFF) {
145 value -= 0x10000;
146 output += stringFromCharCode(value >>> 10 & 0x3F F | 0xD800);
147 value = 0xDC00 | value & 0x3FF;
148 }
149 output += stringFromCharCode(value);
150 return output;
151 }).join('');
152 }
153
154 /**
155 * Converts a basic code point into a digit/integer.
156 * @see `digitToBasic()`
157 * @private
158 * @param {Number} codePoint The basic (decimal) code point.
159 * @returns {Number} The numeric value of a basic code point (for use in
160 * representing integers) in the range `0` to `base - 1`, or `base` if
161 * the code point does not represent a value.
162 */
163 function basicToDigit(codePoint) {
164 return codePoint - 48 < 10
165 ? codePoint - 22
166 : codePoint - 65 < 26
167 ? codePoint - 65
168 : codePoint - 97 < 26
169 ? codePoint - 97
170 : base;
171 }
172
173 /**
174 * Converts a digit/integer into a basic code point.
175 * @see `basicToDigit()`
176 * @private
177 * @param {Number} digit The numeric value of a basic code point.
178 * @returns {Number} The basic code point whose value (when used for
179 * representing integers) is `digit`, which needs to be in the range
180 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
181 * used; else, the lowercase form is used. The behavior is undefined
182 * if flag is non-zero and `digit` has no uppercase form.
183 */
184 function digitToBasic(digit, flag) {
185 // 0..25 map to ASCII a..z or A..Z
186 // 26..35 map to ASCII 0..9
187 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
188 }
189
190 /**
191 * Bias adaptation function as per section 3.4 of RFC 3492.
192 * http://tools.ietf.org/html/rfc3492#section-3.4
193 * @private
194 */
195 function adapt(delta, numPoints, firstTime) {
196 var k = 0;
197 delta = firstTime ? floor(delta / damp) : delta >> 1;
198 delta += floor(delta / numPoints);
199 for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
200 delta = floor(delta / baseMinusTMin);
201 }
202 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
203 }
204
205 /**
206 * Converts a basic code point to lowercase is `flag` is falsy, or to
207 * uppercase if `flag` is truthy. The code point is unchanged if it's
208 * caseless. The behavior is undefined if `codePoint` is not a basic cod e
209 * point.
210 * @private
211 * @param {Number} codePoint The numeric value of a basic code point.
212 * @returns {Number} The resulting basic code point.
213 */
214 function encodeBasic(codePoint, flag) {
215 codePoint -= (codePoint - 97 < 26) << 5;
216 return codePoint + (!flag && codePoint - 65 < 26) << 5;
217 }
218
219 /**
220 * Converts a Punycode string of ASCII code points to a string of Unicod e
221 * code points.
222 * @memberOf punycode
223 * @param {String} input The Punycode string of ASCII code points.
224 * @returns {String} The resulting string of Unicode code points.
225 */
226 function decode(input) {
227 // Don't use UTF-16
228 var output = [],
229 inputLength = input.length,
230 out,
231 i = 0,
232 n = initialN,
233 bias = initialBias,
234 basic,
235 j,
236 index,
237 oldi,
238 w,
239 k,
240 digit,
241 t,
242 length,
243 /** Cached calculation results */
244 baseMinusT;
245
246 // Handle the basic code points: let `basic` be the number of in put code
247 // points before the last delimiter, or `0` if there is none, th en copy
248 // the first basic code points to the output.
249
250 basic = input.lastIndexOf(delimiter);
251 if (basic < 0) {
252 basic = 0;
253 }
254
255 for (j = 0; j < basic; ++j) {
256 // if it's not a basic code point
257 if (input.charCodeAt(j) >= 0x80) {
258 error('not-basic');
259 }
260 output.push(input.charCodeAt(j));
261 }
262
263 // Main decoding loop: start just after the last delimiter if an y basic code
264 // points were copied; start at the beginning otherwise.
265
266 for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
267
268 // `index` is the index of the next character to be cons umed.
269 // Decode a generalized variable-length integer into `de lta`,
270 // which gets added to `i`. The overflow checking is eas ier
271 // if we increase `i` as we go, then subtract off its st arting
272 // value at the end to obtain `delta`.
273 for (oldi = i, w = 1, k = base; /* no condition */; k += base) {
274
275 if (index >= inputLength) {
276 error('invalid-input');
277 }
278
279 digit = basicToDigit(input.charCodeAt(index++));
280
281 if (digit >= base || digit > floor((maxInt - i) / w)) {
282 error('overflow');
283 }
284
285 i += digit * w;
286 t = k <= bias ? tMin : k >= bias + tMax ? tMax : k - bias;
287
288 if (digit < t) {
289 break;
290 }
291
292 baseMinusT = base - t;
293 if (w > floor(maxInt / baseMinusT)) {
294 error('overflow');
295 }
296
297 w *= baseMinusT;
298
299 }
300
301 out = output.length + 1;
302 bias = adapt(i - oldi, out, oldi == 0);
303
304 // `i` was supposed to wrap around from `out` to `0`,
305 // incrementing `n` each time, so we'll fix that now:
306 if (floor(i / out) > maxInt - n) {
307 error('overflow');
308 }
309
310 n += floor(i / out);
311 i %= out;
312
313 // Insert `n` at position `i` of the output
314 output.splice(i++, 0, n);
315
316 }
317
318 return utf16encode(output);
319 }
320
321 /**
322 * Converts a string of Unicode code points to a Punycode string of ASCI I
323 * code points.
324 * @memberOf punycode
325 * @param {String} input The string of Unicode code points.
326 * @returns {String} The resulting Punycode string of ASCII code points.
327 */
328 function encode(input) {
329 var n,
330 delta,
331 handledCPCount,
332 basicLength,
333 bias,
334 j,
335 m,
336 q,
337 k,
338 t,
339 currentValue,
340 output = [],
341 /** `inputLength` will hold the number of code points in `in put`. */
342 inputLength,
343 /** Cached calculation results */
344 handledCPCountPlusOne,
345 baseMinusT,
346 qMinusT;
347
348 // Convert the input in UTF-16 to Unicode
349 input = utf16decode(input);
350
351 // Cache the length
352 inputLength = input.length;
353
354 // Initialize the state
355 n = initialN;
356 delta = 0;
357 bias = initialBias;
358
359 // Handle the basic code points
360 for (j = 0; j < inputLength; ++j) {
361 currentValue = input[j];
362 if (currentValue < 0x80) {
363 output.push(stringFromCharCode(currentValue));
364 }
365 }
366
367 handledCPCount = basicLength = output.length;
368
369 // `handledCPCount` is the number of code points that have been handled;
370 // `basicLength` is the number of basic code points.
371
372 // Finish the basic string - if it is not empty - with a delimit er
373 if (basicLength) {
374 output.push(delimiter);
375 }
376
377 // Main encoding loop:
378 while (handledCPCount < inputLength) {
379
380 // All non-basic code points < n have been handled alrea dy. Find the next
381 // larger one:
382 for (m = maxInt, j = 0; j < inputLength; ++j) {
383 currentValue = input[j];
384 if (currentValue >= n && currentValue < m) {
385 m = currentValue;
386 }
387 }
388
389 // Increase `delta` enough to advance the decoder's <n,i > state to <m,0>,
390 // but guard against overflow
391 handledCPCountPlusOne = handledCPCount + 1;
392 if (m - n > floor((maxInt - delta) / handledCPCountPlusO ne)) {
393 error('overflow');
394 }
395
396 delta += (m - n) * handledCPCountPlusOne;
397 n = m;
398
399 for (j = 0; j < inputLength; ++j) {
400 currentValue = input[j];
401
402 if (currentValue < n && ++delta > maxInt) {
403 error('overflow');
404 }
405
406 if (currentValue == n) {
407 // Represent delta as a generalized vari able-length integer
408 for (q = delta, k = base; /* no conditio n */; k += base) {
409 t = k <= bias ? tMin : k >= bias + tMax ? tMax : k - bias;
410 if (q < t) {
411 break;
412 }
413 qMinusT = q - t;
414 baseMinusT = base - t;
415 output.push(
416 stringFromCharCode(digit ToBasic(t + qMinusT % baseMinusT, 0))
417 );
418 q = floor(qMinusT / baseMinusT);
419 }
420
421 output.push(stringFromCharCode(digitToBa sic(q, 0)));
422 bias = adapt(delta, handledCPCountPlusOn e, handledCPCount == basicLength);
423 delta = 0;
424 ++handledCPCount;
425 }
426 }
427
428 ++delta;
429 ++n;
430
431 }
432 return output.join('');
433 }
434
435 /**
436 * Converts a Punycode string representing a domain name to Unicode. Onl y the
437 * Punycoded parts of the domain name will be converted, i.e. it doesn't
438 * matter if you call it on a string that has already been converted to
439 * Unicode.
440 * @memberOf punycode
441 * @param {String} domain The Punycode domain name to convert to Unicode .
442 * @returns {String} The Unicode representation of the given Punycode
443 * string.
444 */
445 function toUnicode(domain) {
446 return mapDomain(domain, function(string) {
447 return regexPunycode.test(string)
448 ? decode(string.slice(4).toLowerCase())
449 : string;
450 });
451 }
452
453 /**
454 * Converts a Unicode string representing a domain name to Punycode. Onl y the
455 * non-ASCII parts of the domain name will be converted, i.e. it doesn't
456 * matter if you call it with a domain that's already in ASCII.
457 * @memberOf punycode
458 * @param {String} domain The domain name to convert, as a Unicode strin g.
459 * @returns {String} The Punycode representation of the given domain nam e.
460 */
461 function toASCII(domain) {
462 return mapDomain(domain, function(string) {
463 return regexASCII.test(string)
464 ? 'xn--' + encode(string)
465 : string;
466 });
467 }
468
469 /*---------------------------------------------------------------------- ----*/
470
471 /** Define the public API */
472 punycode = {
473 /**
474 * A string representing the current Punycode.js version number.
475 * @memberOf punycode
476 * @type String
477 */
478 'version': '0.2.0',
479 /**
480 * An object of methods to convert from JavaScript's internal ch aracter
481 * representation to Unicode and back.
482 * @memberOf punycode
483 * @type Object
484 */
485 'utf16': {
486 'decode': utf16decode,
487 'encode': utf16encode
488 },
489 'decode': decode,
490 'encode': encode,
491 'toASCII': toASCII,
492 'toUnicode': toUnicode
493 };
494
495 /** Expose `punycode` */
496 if (freeExports) {
497 if (freeModule && freeModule.exports == freeExports) {
498 // in Node.js or Ringo 0.8+
499 freeModule.exports = punycode;
500 } else {
501 // in Narwhal or Ringo 0.7-
502 for (key in punycode) {
503 punycode.hasOwnProperty(key) && (freeExports[key ] = punycode[key]);
504 }
505 }
506 } else if (freeDefine) {
507 // via curl.js or RequireJS
508 define('punycode', punycode);
509 } else {
510 // in a browser or Rhino
511 root.punycode = punycode;
512 }
513
514 }(this));
OLDNEW

Powered by Google App Engine
This is Rietveld