Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/punycode.js

Issue 29350076: Issue 4350 - Update punycode.js to 2.0.0 (Closed)
Patch Set: Add license header and fix for our use Created Aug. 23, 2016, 3:29 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /*! http://mths.be/punycode v1.2.3 by @mathias */ 1 /*
2 /* Used under GPL 2.0, see https://github.com/bestiejs/punycode.js/blob/master/L ICENSE-GPL.txt */ 2 * Copyright Mathias Bynens <https://mathiasbynens.be/>
Sebastian Noack 2016/08/23 16:27:06 The MIT license header seems to require a copyrigh
kzar 2016/08/23 16:51:13 Done. (I put 2011-2016 after looking at his reposi
3 ;(function() { 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 'use strict';
25
26 /** Highest positive signed 32-bit float value */
27 const maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1
28
29 /** Bootstring parameters */
30 const base = 36;
31 const tMin = 1;
32 const tMax = 26;
33 const skew = 38;
34 const damp = 700;
35 const initialBias = 72;
36 const initialN = 128; // 0x80
37 const delimiter = '-'; // '\x2D'
38
39 /** Regular expressions */
40 const regexPunycode = /^xn--/;
41 const regexNonASCII = /[^\x20-\x7E]/; // unprintable ASCII chars + non-ASCII cha rs
42 const regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators
43
44 /** Error messages */
45 const errors = {
46 'overflow': 'Overflow: input needs wider integers to process',
47 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
48 'invalid-input': 'Invalid input'
49 };
50
51 /** Convenience shortcuts */
52 const baseMinusTMin = base - tMin;
53 const floor = Math.floor;
54 const stringFromCharCode = String.fromCharCode;
55
56 /*--------------------------------------------------------------------------*/
57
58 /**
59 * A generic error utility function.
60 * @private
61 * @param {String} type The error type.
62 * @returns {Error} Throws a `RangeError` with the applicable error message.
63 */
64 function error(type) {
65 throw new RangeError(errors[type]);
66 }
67
68 /**
69 * A generic `Array#map` utility function.
70 * @private
71 * @param {Array} array The array to iterate over.
72 * @param {Function} callback The function that gets called for every array
73 * item.
74 * @returns {Array} A new array of values returned by the callback function.
75 */
76 function map(array, fn) {
77 const result = [];
78 let length = array.length;
79 while (length--) {
80 result[length] = fn(array[length]);
81 }
82 return result;
83 }
84
85 /**
86 * A simple `Array#map`-like wrapper to work with domain name strings or email
87 * addresses.
88 * @private
89 * @param {String} domain The domain name or email address.
90 * @param {Function} callback The function that gets called for every
91 * character.
92 * @returns {Array} A new string of characters returned by the callback
93 * function.
94 */
95 function mapDomain(string, fn) {
96 const parts = string.split('@');
97 let result = '';
98 if (parts.length > 1) {
99 // In email addresses, only the domain name should be punycoded. Leave
100 // the local part (i.e. everything up to `@`) intact.
101 result = parts[0] + '@';
102 string = parts[1];
103 }
104 // Avoid `split(regex)` for IE8 compatibility. See #17.
105 string = string.replace(regexSeparators, '\x2E');
106 const labels = string.split('.');
107 const encoded = map(labels, fn).join('.');
108 return result + encoded;
109 }
110
111 /**
112 * Creates an array containing the numeric code points of each Unicode
113 * character in the string. While JavaScript uses UCS-2 internally,
114 * this function will convert a pair of surrogate halves (each of which
115 * UCS-2 exposes as separate characters) into a single code point,
116 * matching UTF-16.
117 * @see `punycode.ucs2.encode`
118 * @see <https://mathiasbynens.be/notes/javascript-encoding>
119 * @memberOf punycode.ucs2
120 * @name decode
121 * @param {String} string The Unicode input string (UCS-2).
122 * @returns {Array} The new array of code points.
123 */
124 function ucs2decode(string) {
125 const output = [];
126 let counter = 0;
127 const length = string.length;
128 while (counter < length) {
129 const value = string.charCodeAt(counter++);
130 if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
131 // It's a high surrogate, and there is a next character.
132 const extra = string.charCodeAt(counter++);
133 if ((extra & 0xFC00) == 0xDC00) { // Low surrogate.
134 output.push(((value & 0x3FF) << 10) + (extra & 0 x3FF) + 0x10000);
135 } else {
136 // It's an unmatched surrogate; only append this code unit, in case the
137 // next code unit is the high surrogate of a sur rogate pair.
138 output.push(value);
139 counter--;
140 }
141 } else {
142 output.push(value);
143 }
144 }
145 return output;
146 }
147
148 /**
149 * Creates a string based on an array of numeric code points.
150 * @see `punycode.ucs2.decode`
151 * @memberOf punycode.ucs2
152 * @name encode
153 * @param {Array} codePoints The array of numeric code points.
154 * @returns {String} The new Unicode string (UCS-2).
155 */
156 const ucs2encode = array => String.fromCodePoint.apply(null, array);
157
158 /**
159 * Converts a basic code point into a digit/integer.
160 * @see `digitToBasic()`
161 * @private
162 * @param {Number} codePoint The basic numeric code point value.
163 * @returns {Number} The numeric value of a basic code point (for use in
164 * representing integers) in the range `0` to `base - 1`, or `base` if
165 * the code point does not represent a value.
166 */
167 const basicToDigit = function(codePoint) {
168 if (codePoint - 0x30 < 0x0A) {
169 return codePoint - 0x16;
170 }
171 if (codePoint - 0x41 < 0x1A) {
172 return codePoint - 0x41;
173 }
174 if (codePoint - 0x61 < 0x1A) {
175 return codePoint - 0x61;
176 }
177 return base;
178 };
179
180 /**
181 * Converts a digit/integer into a basic code point.
182 * @see `basicToDigit()`
183 * @private
184 * @param {Number} digit The numeric value of a basic code point.
185 * @returns {Number} The basic code point whose value (when used for
186 * representing integers) is `digit`, which needs to be in the range
187 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
188 * used; else, the lowercase form is used. The behavior is undefined
189 * if `flag` is non-zero and `digit` has no uppercase form.
190 */
191 const digitToBasic = function(digit, flag) {
192 // 0..25 map to ASCII a..z or A..Z
193 // 26..35 map to ASCII 0..9
194 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
195 };
196
197 /**
198 * Bias adaptation function as per section 3.4 of RFC 3492.
199 * https://tools.ietf.org/html/rfc3492#section-3.4
200 * @private
201 */
202 const adapt = function(delta, numPoints, firstTime) {
203 let k = 0;
204 delta = firstTime ? floor(delta / damp) : delta >> 1;
205 delta += floor(delta / numPoints);
206 for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += ba se) {
207 delta = floor(delta / baseMinusTMin);
208 }
209 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
210 };
211
212 /**
213 * Converts a Punycode string of ASCII-only symbols to a string of Unicode
214 * symbols.
215 * @memberOf punycode
216 * @param {String} input The Punycode string of ASCII-only symbols.
217 * @returns {String} The resulting string of Unicode symbols.
218 */
219 const decode = function(input) {
220 // Don't use UCS-2.
221 const output = [];
222 const inputLength = input.length;
223 let i = 0;
224 let n = initialN;
225 let bias = initialBias;
226
227 // Handle the basic code points: let `basic` be the number of input code
228 // points before the last delimiter, or `0` if there is none, then copy
229 // the first basic code points to the output.
230
231 let basic = input.lastIndexOf(delimiter);
232 if (basic < 0) {
233 basic = 0;
234 }
235
236 for (let j = 0; j < basic; ++j) {
237 // if it's not a basic code point
238 if (input.charCodeAt(j) >= 0x80) {
239 error('not-basic');
240 }
241 output.push(input.charCodeAt(j));
242 }
243
244 // Main decoding loop: start just after the last delimiter if any basic code
245 // points were copied; start at the beginning otherwise.
246
247 for (let index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no f inal expression */) {
248
249 // `index` is the index of the next character to be consumed.
250 // Decode a generalized variable-length integer into `delta`,
251 // which gets added to `i`. The overflow checking is easier
252 // if we increase `i` as we go, then subtract off its starting
253 // value at the end to obtain `delta`.
254 let oldi = i;
255 for (let w = 1, k = base; /* no condition */; k += base) {
256
257 if (index >= inputLength) {
258 error('invalid-input');
259 }
260
261 const digit = basicToDigit(input.charCodeAt(index++));
262
263 if (digit >= base || digit > floor((maxInt - i) / w)) {
264 error('overflow');
265 }
266
267 i += digit * w;
268 const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
269
270 if (digit < t) {
271 break;
272 }
273
274 const baseMinusT = base - t;
275 if (w > floor(maxInt / baseMinusT)) {
276 error('overflow');
277 }
278
279 w *= baseMinusT;
280
281 }
282
283 const out = output.length + 1;
284 bias = adapt(i - oldi, out, oldi == 0);
285
286 // `i` was supposed to wrap around from `out` to `0`,
287 // incrementing `n` each time, so we'll fix that now:
288 if (floor(i / out) > maxInt - n) {
289 error('overflow');
290 }
291
292 n += floor(i / out);
293 i %= out;
294
295 // Insert `n` at position `i` of the output.
296 output.splice(i++, 0, n);
297
298 }
299
300 return String.fromCodePoint.apply(null, output);
301 };
302
303 /**
304 * Converts a string of Unicode symbols (e.g. a domain name label) to a
305 * Punycode string of ASCII-only symbols.
306 * @memberOf punycode
307 * @param {String} input The string of Unicode symbols.
308 * @returns {String} The resulting Punycode string of ASCII-only symbols.
309 */
310 const encode = function(input) {
311 const output = [];
312
313 // Convert the input in UCS-2 to an array of Unicode code points.
314 input = ucs2decode(input);
315
316 // Cache the length.
317 let inputLength = input.length;
318
319 // Initialize the state.
320 let n = initialN;
321 let delta = 0;
322 let bias = initialBias;
323
324 // Handle the basic code points.
325 for (let currentValue of input) {
326 if (currentValue < 0x80) {
327 output.push(stringFromCharCode(currentValue));
328 }
329 }
330
331 let basicLength = output.length;
332 let handledCPCount = basicLength;
333
334 // `handledCPCount` is the number of code points that have been handled;
335 // `basicLength` is the number of basic code points.
336
337 // Finish the basic string with a delimiter unless it's empty.
338 if (basicLength) {
339 output.push(delimiter);
340 }
341
342 // Main encoding loop:
343 while (handledCPCount < inputLength) {
344
345 // All non-basic code points < n have been handled already. Find the next
346 // larger one:
347 let m = maxInt;
348 for (let currentValue of input) {
349 if (currentValue >= n && currentValue < m) {
350 m = currentValue;
351 }
352 }
353
354 // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
355 // but guard against overflow.
356 const handledCPCountPlusOne = handledCPCount + 1;
357 if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
358 error('overflow');
359 }
360
361 delta += (m - n) * handledCPCountPlusOne;
362 n = m;
363
364 for (let currentValue of input) {
365 if (currentValue < n && ++delta > maxInt) {
366 error('overflow');
367 }
368 if (currentValue == n) {
369 // Represent delta as a generalized variable-len gth integer.
370 let q = delta;
371 for (let k = base; /* no condition */; k += base ) {
372 const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
373 if (q < t) {
374 break;
375 }
376 const qMinusT = q - t;
377 const baseMinusT = base - t;
378 output.push(
379 stringFromCharCode(digitToBasic( t + qMinusT % baseMinusT, 0))
380 );
381 q = floor(qMinusT / baseMinusT);
382 }
383
384 output.push(stringFromCharCode(digitToBasic(q, 0 )));
385 bias = adapt(delta, handledCPCountPlusOne, handl edCPCount == basicLength);
386 delta = 0;
387 ++handledCPCount;
388 }
389 }
390
391 ++delta;
392 ++n;
393
394 }
395 return output.join('');
396 };
397
398 /**
399 * Converts a Punycode string representing a domain name or an email address
400 * to Unicode. Only the Punycoded parts of the input will be converted, i.e.
401 * it doesn't matter if you call it on a string that has already been
402 * converted to Unicode.
403 * @memberOf punycode
404 * @param {String} input The Punycoded domain name or email address to
405 * convert to Unicode.
406 * @returns {String} The Unicode representation of the given Punycode
407 * string.
408 */
409 const toUnicode = function(input) {
410 return mapDomain(input, function(string) {
411 return regexPunycode.test(string)
412 ? decode(string.slice(4).toLowerCase())
413 : string;
414 });
415 };
416
417 /**
418 * Converts a Unicode string representing a domain name or an email address to
419 * Punycode. Only the non-ASCII parts of the domain name will be converted,
420 * i.e. it doesn't matter if you call it with a domain that's already in
421 * ASCII.
422 * @memberOf punycode
423 * @param {String} input The domain name or email address to convert, as a
424 * Unicode string.
425 * @returns {String} The Punycode representation of the given domain name or
426 * email address.
427 */
428 const toASCII = function(input) {
429 return mapDomain(input, function(string) {
430 return regexNonASCII.test(string)
431 ? 'xn--' + encode(string)
432 : string;
433 });
434 };
435
436 /*--------------------------------------------------------------------------*/
437
438 /** Define the public API */
439 const punycode = exports = {
Sebastian Noack 2016/08/23 16:27:06 It seems the punycode object isn't used anywhere i
kzar 2016/08/23 16:51:13 Done.
4 /** 440 /**
5 » * The `punycode` object. 441 » * A string representing the current Punycode.js version number.
6 » * @name punycode 442 » * @memberOf punycode
443 » * @type String
444 » */
445 » 'version': '2.0.0',
446 » /**
447 » * An object of methods to convert from JavaScript's internal character
448 » * representation (UCS-2) to Unicode code points, and back.
449 » * @see <https://mathiasbynens.be/notes/javascript-encoding>
450 » * @memberOf punycode
7 * @type Object 451 * @type Object
8 */ 452 */
9 » var punycode, 453 » 'ucs2': {
10 454 » » 'decode': ucs2decode,
11 » /** Highest positive signed 32-bit float value */ 455 » » 'encode': ucs2encode
12 » maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1
13
14 » /** Bootstring parameters */
15 » base = 36,
16 » tMin = 1,
17 » tMax = 26,
18 » skew = 38,
19 » damp = 700,
20 » initialBias = 72,
21 » initialN = 128, // 0x80
22 » delimiter = '-', // '\x2D'
23
24 » /** Regular expressions */
25 » regexPunycode = /^xn--/,
26 » regexNonASCII = /[^ -~]/, // unprintable ASCII chars + non-ASCII chars
27 » regexSeparators = /\x2E|\u3002|\uFF0E|\uFF61/g, // RFC 3490 separators
28
29 » /** Error messages */
30 » errors = {
31 » » 'overflow': 'Overflow: input needs wider integers to process',
32 » » 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
33 » » 'invalid-input': 'Invalid input'
34 }, 456 },
35 457 'decode': decode,
36 /** Convenience shortcuts */ 458 'encode': encode,
37 baseMinusTMin = base - tMin, 459 'toASCII': toASCII,
38 floor = Math.floor, 460 'toUnicode': toUnicode
39 stringFromCharCode = String.fromCharCode, 461 };
40
41 /** Temporary variable */
42 key;
43
44 /*---------------------------------------------------------------------- ----*/
45
46 /**
47 * A generic error utility function.
48 * @private
49 * @param {String} type The error type.
50 * @returns {Error} Throws a `RangeError` with the applicable error mess age.
51 */
52 function error(type) {
53 throw RangeError(errors[type]);
54 }
55
56 /**
57 * A generic `Array#map` utility function.
58 * @private
59 * @param {Array} array The array to iterate over.
60 * @param {Function} callback The function that gets called for every ar ray
61 * item.
62 * @returns {Array} A new array of values returned by the callback funct ion.
63 */
64 function map(array, fn) {
65 var length = array.length;
66 while (length--) {
67 array[length] = fn(array[length]);
68 }
69 return array;
70 }
71
72 /**
73 * A simple `Array#map`-like wrapper to work with domain name strings.
74 * @private
75 * @param {String} domain The domain name.
76 * @param {Function} callback The function that gets called for every
77 * character.
78 * @returns {Array} A new string of characters returned by the callback
79 * function.
80 */
81 function mapDomain(string, fn) {
82 return map(string.split(regexSeparators), fn).join('.');
83 }
84
85 /**
86 * Creates an array containing the numeric code points of each Unicode
87 * character in the string. While JavaScript uses UCS-2 internally,
88 * this function will convert a pair of surrogate halves (each of which
89 * UCS-2 exposes as separate characters) into a single code point,
90 * matching UTF-16.
91 * @see `punycode.ucs2.encode`
92 * @see <http://mathiasbynens.be/notes/javascript-encoding>
93 * @memberOf punycode.ucs2
94 * @name decode
95 * @param {String} string The Unicode input string (UCS-2).
96 * @returns {Array} The new array of code points.
97 */
98 function ucs2decode(string) {
99 var output = [],
100 counter = 0,
101 length = string.length,
102 value,
103 extra;
104 while (counter < length) {
105 value = string.charCodeAt(counter++);
106 if (value >= 0xD800 && value <= 0xDBFF && counter < leng th) {
107 // high surrogate, and there is a next character
108 extra = string.charCodeAt(counter++);
109 if ((extra & 0xFC00) == 0xDC00) { // low surroga te
110 output.push(((value & 0x3FF) << 10) + (e xtra & 0x3FF) + 0x10000);
111 } else {
112 // unmatched surrogate; only append this code unit, in case the next
113 // code unit is the high surrogate of a surrogate pair
114 output.push(value);
115 counter--;
116 }
117 } else {
118 output.push(value);
119 }
120 }
121 return output;
122 }
123
124 /**
125 * Creates a string based on an array of numeric code points.
126 * @see `punycode.ucs2.decode`
127 * @memberOf punycode.ucs2
128 * @name encode
129 * @param {Array} codePoints The array of numeric code points.
130 * @returns {String} The new Unicode string (UCS-2).
131 */
132 function ucs2encode(array) {
133 return map(array, function(value) {
134 var output = '';
135 if (value > 0xFFFF) {
136 value -= 0x10000;
137 output += stringFromCharCode(value >>> 10 & 0x3F F | 0xD800);
138 value = 0xDC00 | value & 0x3FF;
139 }
140 output += stringFromCharCode(value);
141 return output;
142 }).join('');
143 }
144
145 /**
146 * Converts a basic code point into a digit/integer.
147 * @see `digitToBasic()`
148 * @private
149 * @param {Number} codePoint The basic numeric code point value.
150 * @returns {Number} The numeric value of a basic code point (for use in
151 * representing integers) in the range `0` to `base - 1`, or `base` if
152 * the code point does not represent a value.
153 */
154 function basicToDigit(codePoint) {
155 if (codePoint - 48 < 10) {
156 return codePoint - 22;
157 }
158 if (codePoint - 65 < 26) {
159 return codePoint - 65;
160 }
161 if (codePoint - 97 < 26) {
162 return codePoint - 97;
163 }
164 return base;
165 }
166
167 /**
168 * Converts a digit/integer into a basic code point.
169 * @see `basicToDigit()`
170 * @private
171 * @param {Number} digit The numeric value of a basic code point.
172 * @returns {Number} The basic code point whose value (when used for
173 * representing integers) is `digit`, which needs to be in the range
174 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
175 * used; else, the lowercase form is used. The behavior is undefined
176 * if `flag` is non-zero and `digit` has no uppercase form.
177 */
178 function digitToBasic(digit, flag) {
179 // 0..25 map to ASCII a..z or A..Z
180 // 26..35 map to ASCII 0..9
181 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
182 }
183
184 /**
185 * Bias adaptation function as per section 3.4 of RFC 3492.
186 * http://tools.ietf.org/html/rfc3492#section-3.4
187 * @private
188 */
189 function adapt(delta, numPoints, firstTime) {
190 var k = 0;
191 delta = firstTime ? floor(delta / damp) : delta >> 1;
192 delta += floor(delta / numPoints);
193 for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
194 delta = floor(delta / baseMinusTMin);
195 }
196 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
197 }
198
199 /**
200 * Converts a Punycode string of ASCII-only symbols to a string of Unico de
201 * symbols.
202 * @memberOf punycode
203 * @param {String} input The Punycode string of ASCII-only symbols.
204 * @returns {String} The resulting string of Unicode symbols.
205 */
206 function decode(input) {
207 // Don't use UCS-2
208 var output = [],
209 inputLength = input.length,
210 out,
211 i = 0,
212 n = initialN,
213 bias = initialBias,
214 basic,
215 j,
216 index,
217 oldi,
218 w,
219 k,
220 digit,
221 t,
222 length,
223 /** Cached calculation results */
224 baseMinusT;
225
226 // Handle the basic code points: let `basic` be the number of in put code
227 // points before the last delimiter, or `0` if there is none, th en copy
228 // the first basic code points to the output.
229
230 basic = input.lastIndexOf(delimiter);
231 if (basic < 0) {
232 basic = 0;
233 }
234
235 for (j = 0; j < basic; ++j) {
236 // if it's not a basic code point
237 if (input.charCodeAt(j) >= 0x80) {
238 error('not-basic');
239 }
240 output.push(input.charCodeAt(j));
241 }
242
243 // Main decoding loop: start just after the last delimiter if an y basic code
244 // points were copied; start at the beginning otherwise.
245
246 for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
247
248 // `index` is the index of the next character to be cons umed.
249 // Decode a generalized variable-length integer into `de lta`,
250 // which gets added to `i`. The overflow checking is eas ier
251 // if we increase `i` as we go, then subtract off its st arting
252 // value at the end to obtain `delta`.
253 for (oldi = i, w = 1, k = base; /* no condition */; k += base) {
254
255 if (index >= inputLength) {
256 error('invalid-input');
257 }
258
259 digit = basicToDigit(input.charCodeAt(index++));
260
261 if (digit >= base || digit > floor((maxInt - i) / w)) {
262 error('overflow');
263 }
264
265 i += digit * w;
266 t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
267
268 if (digit < t) {
269 break;
270 }
271
272 baseMinusT = base - t;
273 if (w > floor(maxInt / baseMinusT)) {
274 error('overflow');
275 }
276
277 w *= baseMinusT;
278
279 }
280
281 out = output.length + 1;
282 bias = adapt(i - oldi, out, oldi == 0);
283
284 // `i` was supposed to wrap around from `out` to `0`,
285 // incrementing `n` each time, so we'll fix that now:
286 if (floor(i / out) > maxInt - n) {
287 error('overflow');
288 }
289
290 n += floor(i / out);
291 i %= out;
292
293 // Insert `n` at position `i` of the output
294 output.splice(i++, 0, n);
295
296 }
297
298 return ucs2encode(output);
299 }
300
301 /**
302 * Converts a string of Unicode symbols to a Punycode string of ASCII-on ly
303 * symbols.
304 * @memberOf punycode
305 * @param {String} input The string of Unicode symbols.
306 * @returns {String} The resulting Punycode string of ASCII-only symbols .
307 */
308 function encode(input) {
309 var n,
310 delta,
311 handledCPCount,
312 basicLength,
313 bias,
314 j,
315 m,
316 q,
317 k,
318 t,
319 currentValue,
320 output = [],
321 /** `inputLength` will hold the number of code points in `in put`. */
322 inputLength,
323 /** Cached calculation results */
324 handledCPCountPlusOne,
325 baseMinusT,
326 qMinusT;
327
328 // Convert the input in UCS-2 to Unicode
329 input = ucs2decode(input);
330
331 // Cache the length
332 inputLength = input.length;
333
334 // Initialize the state
335 n = initialN;
336 delta = 0;
337 bias = initialBias;
338
339 // Handle the basic code points
340 for (j = 0; j < inputLength; ++j) {
341 currentValue = input[j];
342 if (currentValue < 0x80) {
343 output.push(stringFromCharCode(currentValue));
344 }
345 }
346
347 handledCPCount = basicLength = output.length;
348
349 // `handledCPCount` is the number of code points that have been handled;
350 // `basicLength` is the number of basic code points.
351
352 // Finish the basic string - if it is not empty - with a delimit er
353 if (basicLength) {
354 output.push(delimiter);
355 }
356
357 // Main encoding loop:
358 while (handledCPCount < inputLength) {
359
360 // All non-basic code points < n have been handled alrea dy. Find the next
361 // larger one:
362 for (m = maxInt, j = 0; j < inputLength; ++j) {
363 currentValue = input[j];
364 if (currentValue >= n && currentValue < m) {
365 m = currentValue;
366 }
367 }
368
369 // Increase `delta` enough to advance the decoder's <n,i > state to <m,0>,
370 // but guard against overflow
371 handledCPCountPlusOne = handledCPCount + 1;
372 if (m - n > floor((maxInt - delta) / handledCPCountPlusO ne)) {
373 error('overflow');
374 }
375
376 delta += (m - n) * handledCPCountPlusOne;
377 n = m;
378
379 for (j = 0; j < inputLength; ++j) {
380 currentValue = input[j];
381
382 if (currentValue < n && ++delta > maxInt) {
383 error('overflow');
384 }
385
386 if (currentValue == n) {
387 // Represent delta as a generalized vari able-length integer
388 for (q = delta, k = base; /* no conditio n */; k += base) {
389 t = k <= bias ? tMin : (k >= bia s + tMax ? tMax : k - bias);
390 if (q < t) {
391 break;
392 }
393 qMinusT = q - t;
394 baseMinusT = base - t;
395 output.push(
396 stringFromCharCode(digit ToBasic(t + qMinusT % baseMinusT, 0))
397 );
398 q = floor(qMinusT / baseMinusT);
399 }
400
401 output.push(stringFromCharCode(digitToBa sic(q, 0)));
402 bias = adapt(delta, handledCPCountPlusOn e, handledCPCount == basicLength);
403 delta = 0;
404 ++handledCPCount;
405 }
406 }
407
408 ++delta;
409 ++n;
410
411 }
412 return output.join('');
413 }
414
415 /**
416 * Converts a Punycode string representing a domain name to Unicode. Onl y the
417 * Punycoded parts of the domain name will be converted, i.e. it doesn't
418 * matter if you call it on a string that has already been converted to
419 * Unicode.
420 * @memberOf punycode
421 * @param {String} domain The Punycode domain name to convert to Unicode .
422 * @returns {String} The Unicode representation of the given Punycode
423 * string.
424 */
425 function toUnicode(domain) {
426 return mapDomain(domain, function(string) {
427 return regexPunycode.test(string)
428 ? decode(string.slice(4).toLowerCase())
429 : string;
430 });
431 }
432
433 /**
434 * Converts a Unicode string representing a domain name to Punycode. Onl y the
435 * non-ASCII parts of the domain name will be converted, i.e. it doesn't
436 * matter if you call it with a domain that's already in ASCII.
437 * @memberOf punycode
438 * @param {String} domain The domain name to convert, as a Unicode strin g.
439 * @returns {String} The Punycode representation of the given domain nam e.
440 */
441 function toASCII(domain) {
442 return mapDomain(domain, function(string) {
443 return regexNonASCII.test(string)
444 ? 'xn--' + encode(string)
445 : string;
446 });
447 }
448
449 /*---------------------------------------------------------------------- ----*/
450
451 /** Define the public API */
452 exports = {
453 /**
454 * A string representing the current Punycode.js version number.
455 * @memberOf punycode
456 * @type String
457 */
458 'version': '1.2.3',
459 /**
460 * An object of methods to convert from JavaScript's internal ch aracter
461 * representation (UCS-2) to Unicode code points, and back.
462 * @see <http://mathiasbynens.be/notes/javascript-encoding>
463 * @memberOf punycode
464 * @type Object
465 */
466 'ucs2': {
467 'decode': ucs2decode,
468 'encode': ucs2encode
469 },
470 'decode': decode,
471 'encode': encode,
472 'toASCII': toASCII,
473 'toUnicode': toUnicode
474 };
475 }());
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld