Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/punycode.js

Issue 29350076: Issue 4350 - Update punycode.js to 2.0.0 (Closed)
Patch Set: Created Aug. 23, 2016, 3:29 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /*! http://mths.be/punycode v1.2.3 by @mathias */ 1 'use strict';
2 /* Used under GPL 2.0, see https://github.com/bestiejs/punycode.js/blob/master/L ICENSE-GPL.txt */ 2
3 ;(function() { 3 /** Highest positive signed 32-bit float value */
4 const maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1
5
6 /** Bootstring parameters */
7 const base = 36;
8 const tMin = 1;
9 const tMax = 26;
10 const skew = 38;
11 const damp = 700;
12 const initialBias = 72;
13 const initialN = 128; // 0x80
14 const delimiter = '-'; // '\x2D'
15
16 /** Regular expressions */
17 const regexPunycode = /^xn--/;
18 const regexNonASCII = /[^\x20-\x7E]/; // unprintable ASCII chars + non-ASCII cha rs
19 const regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators
20
21 /** Error messages */
22 const errors = {
23 'overflow': 'Overflow: input needs wider integers to process',
24 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
25 'invalid-input': 'Invalid input'
26 };
27
28 /** Convenience shortcuts */
29 const baseMinusTMin = base - tMin;
30 const floor = Math.floor;
31 const stringFromCharCode = String.fromCharCode;
32
33 /*--------------------------------------------------------------------------*/
34
35 /**
36 * A generic error utility function.
37 * @private
38 * @param {String} type The error type.
39 * @returns {Error} Throws a `RangeError` with the applicable error message.
40 */
41 function error(type) {
42 throw new RangeError(errors[type]);
43 }
44
45 /**
46 * A generic `Array#map` utility function.
47 * @private
48 * @param {Array} array The array to iterate over.
49 * @param {Function} callback The function that gets called for every array
50 * item.
51 * @returns {Array} A new array of values returned by the callback function.
52 */
53 function map(array, fn) {
54 const result = [];
55 let length = array.length;
56 while (length--) {
57 result[length] = fn(array[length]);
58 }
59 return result;
60 }
61
62 /**
63 * A simple `Array#map`-like wrapper to work with domain name strings or email
64 * addresses.
65 * @private
66 * @param {String} domain The domain name or email address.
67 * @param {Function} callback The function that gets called for every
68 * character.
69 * @returns {Array} A new string of characters returned by the callback
70 * function.
71 */
72 function mapDomain(string, fn) {
73 const parts = string.split('@');
74 let result = '';
75 if (parts.length > 1) {
76 // In email addresses, only the domain name should be punycoded. Leave
77 // the local part (i.e. everything up to `@`) intact.
78 result = parts[0] + '@';
79 string = parts[1];
80 }
81 // Avoid `split(regex)` for IE8 compatibility. See #17.
82 string = string.replace(regexSeparators, '\x2E');
83 const labels = string.split('.');
84 const encoded = map(labels, fn).join('.');
85 return result + encoded;
86 }
87
88 /**
89 * Creates an array containing the numeric code points of each Unicode
90 * character in the string. While JavaScript uses UCS-2 internally,
91 * this function will convert a pair of surrogate halves (each of which
92 * UCS-2 exposes as separate characters) into a single code point,
93 * matching UTF-16.
94 * @see `punycode.ucs2.encode`
95 * @see <https://mathiasbynens.be/notes/javascript-encoding>
96 * @memberOf punycode.ucs2
97 * @name decode
98 * @param {String} string The Unicode input string (UCS-2).
99 * @returns {Array} The new array of code points.
100 */
101 function ucs2decode(string) {
102 const output = [];
103 let counter = 0;
104 const length = string.length;
105 while (counter < length) {
106 const value = string.charCodeAt(counter++);
107 if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
108 // It's a high surrogate, and there is a next character.
109 const extra = string.charCodeAt(counter++);
110 if ((extra & 0xFC00) == 0xDC00) { // Low surrogate.
111 output.push(((value & 0x3FF) << 10) + (extra & 0 x3FF) + 0x10000);
112 } else {
113 // It's an unmatched surrogate; only append this code unit, in case the
114 // next code unit is the high surrogate of a sur rogate pair.
115 output.push(value);
116 counter--;
117 }
118 } else {
119 output.push(value);
120 }
121 }
122 return output;
123 }
124
125 /**
126 * Creates a string based on an array of numeric code points.
127 * @see `punycode.ucs2.decode`
128 * @memberOf punycode.ucs2
129 * @name encode
130 * @param {Array} codePoints The array of numeric code points.
131 * @returns {String} The new Unicode string (UCS-2).
132 */
133 const ucs2encode = array => String.fromCodePoint(...array);
134
135 /**
136 * Converts a basic code point into a digit/integer.
137 * @see `digitToBasic()`
138 * @private
139 * @param {Number} codePoint The basic numeric code point value.
140 * @returns {Number} The numeric value of a basic code point (for use in
141 * representing integers) in the range `0` to `base - 1`, or `base` if
142 * the code point does not represent a value.
143 */
144 const basicToDigit = function(codePoint) {
145 if (codePoint - 0x30 < 0x0A) {
146 return codePoint - 0x16;
147 }
148 if (codePoint - 0x41 < 0x1A) {
149 return codePoint - 0x41;
150 }
151 if (codePoint - 0x61 < 0x1A) {
152 return codePoint - 0x61;
153 }
154 return base;
155 };
156
157 /**
158 * Converts a digit/integer into a basic code point.
159 * @see `basicToDigit()`
160 * @private
161 * @param {Number} digit The numeric value of a basic code point.
162 * @returns {Number} The basic code point whose value (when used for
163 * representing integers) is `digit`, which needs to be in the range
164 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
165 * used; else, the lowercase form is used. The behavior is undefined
166 * if `flag` is non-zero and `digit` has no uppercase form.
167 */
168 const digitToBasic = function(digit, flag) {
169 // 0..25 map to ASCII a..z or A..Z
170 // 26..35 map to ASCII 0..9
171 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
172 };
173
174 /**
175 * Bias adaptation function as per section 3.4 of RFC 3492.
176 * https://tools.ietf.org/html/rfc3492#section-3.4
177 * @private
178 */
179 const adapt = function(delta, numPoints, firstTime) {
180 let k = 0;
181 delta = firstTime ? floor(delta / damp) : delta >> 1;
182 delta += floor(delta / numPoints);
183 for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += ba se) {
184 delta = floor(delta / baseMinusTMin);
185 }
186 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
187 };
188
189 /**
190 * Converts a Punycode string of ASCII-only symbols to a string of Unicode
191 * symbols.
192 * @memberOf punycode
193 * @param {String} input The Punycode string of ASCII-only symbols.
194 * @returns {String} The resulting string of Unicode symbols.
195 */
196 const decode = function(input) {
197 // Don't use UCS-2.
198 const output = [];
199 const inputLength = input.length;
200 let i = 0;
201 let n = initialN;
202 let bias = initialBias;
203
204 // Handle the basic code points: let `basic` be the number of input code
205 // points before the last delimiter, or `0` if there is none, then copy
206 // the first basic code points to the output.
207
208 let basic = input.lastIndexOf(delimiter);
209 if (basic < 0) {
210 basic = 0;
211 }
212
213 for (let j = 0; j < basic; ++j) {
214 // if it's not a basic code point
215 if (input.charCodeAt(j) >= 0x80) {
216 error('not-basic');
217 }
218 output.push(input.charCodeAt(j));
219 }
220
221 // Main decoding loop: start just after the last delimiter if any basic code
222 // points were copied; start at the beginning otherwise.
223
224 for (let index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no f inal expression */) {
225
226 // `index` is the index of the next character to be consumed.
227 // Decode a generalized variable-length integer into `delta`,
228 // which gets added to `i`. The overflow checking is easier
229 // if we increase `i` as we go, then subtract off its starting
230 // value at the end to obtain `delta`.
231 let oldi = i;
232 for (let w = 1, k = base; /* no condition */; k += base) {
233
234 if (index >= inputLength) {
235 error('invalid-input');
236 }
237
238 const digit = basicToDigit(input.charCodeAt(index++));
239
240 if (digit >= base || digit > floor((maxInt - i) / w)) {
241 error('overflow');
242 }
243
244 i += digit * w;
245 const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
246
247 if (digit < t) {
248 break;
249 }
250
251 const baseMinusT = base - t;
252 if (w > floor(maxInt / baseMinusT)) {
253 error('overflow');
254 }
255
256 w *= baseMinusT;
257
258 }
259
260 const out = output.length + 1;
261 bias = adapt(i - oldi, out, oldi == 0);
262
263 // `i` was supposed to wrap around from `out` to `0`,
264 // incrementing `n` each time, so we'll fix that now:
265 if (floor(i / out) > maxInt - n) {
266 error('overflow');
267 }
268
269 n += floor(i / out);
270 i %= out;
271
272 // Insert `n` at position `i` of the output.
273 output.splice(i++, 0, n);
274
275 }
276
277 return String.fromCodePoint(...output);
278 };
279
280 /**
281 * Converts a string of Unicode symbols (e.g. a domain name label) to a
282 * Punycode string of ASCII-only symbols.
283 * @memberOf punycode
284 * @param {String} input The string of Unicode symbols.
285 * @returns {String} The resulting Punycode string of ASCII-only symbols.
286 */
287 const encode = function(input) {
288 const output = [];
289
290 // Convert the input in UCS-2 to an array of Unicode code points.
291 input = ucs2decode(input);
292
293 // Cache the length.
294 let inputLength = input.length;
295
296 // Initialize the state.
297 let n = initialN;
298 let delta = 0;
299 let bias = initialBias;
300
301 // Handle the basic code points.
302 for (const currentValue of input) {
303 if (currentValue < 0x80) {
304 output.push(stringFromCharCode(currentValue));
305 }
306 }
307
308 let basicLength = output.length;
309 let handledCPCount = basicLength;
310
311 // `handledCPCount` is the number of code points that have been handled;
312 // `basicLength` is the number of basic code points.
313
314 // Finish the basic string with a delimiter unless it's empty.
315 if (basicLength) {
316 output.push(delimiter);
317 }
318
319 // Main encoding loop:
320 while (handledCPCount < inputLength) {
321
322 // All non-basic code points < n have been handled already. Find the next
323 // larger one:
324 let m = maxInt;
325 for (const currentValue of input) {
326 if (currentValue >= n && currentValue < m) {
327 m = currentValue;
328 }
329 }
330
331 // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
332 // but guard against overflow.
333 const handledCPCountPlusOne = handledCPCount + 1;
334 if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
335 error('overflow');
336 }
337
338 delta += (m - n) * handledCPCountPlusOne;
339 n = m;
340
341 for (const currentValue of input) {
342 if (currentValue < n && ++delta > maxInt) {
343 error('overflow');
344 }
345 if (currentValue == n) {
346 // Represent delta as a generalized variable-len gth integer.
347 let q = delta;
348 for (let k = base; /* no condition */; k += base ) {
349 const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
350 if (q < t) {
351 break;
352 }
353 const qMinusT = q - t;
354 const baseMinusT = base - t;
355 output.push(
356 stringFromCharCode(digitToBasic( t + qMinusT % baseMinusT, 0))
357 );
358 q = floor(qMinusT / baseMinusT);
359 }
360
361 output.push(stringFromCharCode(digitToBasic(q, 0 )));
362 bias = adapt(delta, handledCPCountPlusOne, handl edCPCount == basicLength);
363 delta = 0;
364 ++handledCPCount;
365 }
366 }
367
368 ++delta;
369 ++n;
370
371 }
372 return output.join('');
373 };
374
375 /**
376 * Converts a Punycode string representing a domain name or an email address
377 * to Unicode. Only the Punycoded parts of the input will be converted, i.e.
378 * it doesn't matter if you call it on a string that has already been
379 * converted to Unicode.
380 * @memberOf punycode
381 * @param {String} input The Punycoded domain name or email address to
382 * convert to Unicode.
383 * @returns {String} The Unicode representation of the given Punycode
384 * string.
385 */
386 const toUnicode = function(input) {
387 return mapDomain(input, function(string) {
388 return regexPunycode.test(string)
389 ? decode(string.slice(4).toLowerCase())
390 : string;
391 });
392 };
393
394 /**
395 * Converts a Unicode string representing a domain name or an email address to
396 * Punycode. Only the non-ASCII parts of the domain name will be converted,
397 * i.e. it doesn't matter if you call it with a domain that's already in
398 * ASCII.
399 * @memberOf punycode
400 * @param {String} input The domain name or email address to convert, as a
401 * Unicode string.
402 * @returns {String} The Punycode representation of the given domain name or
403 * email address.
404 */
405 const toASCII = function(input) {
406 return mapDomain(input, function(string) {
407 return regexNonASCII.test(string)
408 ? 'xn--' + encode(string)
409 : string;
410 });
411 };
412
413 /*--------------------------------------------------------------------------*/
414
415 /** Define the public API */
416 const punycode = {
4 /** 417 /**
5 » * The `punycode` object. 418 » * A string representing the current Punycode.js version number.
6 » * @name punycode 419 » * @memberOf punycode
420 » * @type String
421 » */
422 » 'version': '2.0.0',
423 » /**
424 » * An object of methods to convert from JavaScript's internal character
425 » * representation (UCS-2) to Unicode code points, and back.
426 » * @see <https://mathiasbynens.be/notes/javascript-encoding>
427 » * @memberOf punycode
7 * @type Object 428 * @type Object
8 */ 429 */
9 » var punycode, 430 » 'ucs2': {
10 431 » » 'decode': ucs2decode,
11 » /** Highest positive signed 32-bit float value */ 432 » » 'encode': ucs2encode
12 » maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1
13
14 » /** Bootstring parameters */
15 » base = 36,
16 » tMin = 1,
17 » tMax = 26,
18 » skew = 38,
19 » damp = 700,
20 » initialBias = 72,
21 » initialN = 128, // 0x80
22 » delimiter = '-', // '\x2D'
23
24 » /** Regular expressions */
25 » regexPunycode = /^xn--/,
26 » regexNonASCII = /[^ -~]/, // unprintable ASCII chars + non-ASCII chars
27 » regexSeparators = /\x2E|\u3002|\uFF0E|\uFF61/g, // RFC 3490 separators
28
29 » /** Error messages */
30 » errors = {
31 » » 'overflow': 'Overflow: input needs wider integers to process',
32 » » 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
33 » » 'invalid-input': 'Invalid input'
34 }, 433 },
35 434 'decode': decode,
36 /** Convenience shortcuts */ 435 'encode': encode,
37 baseMinusTMin = base - tMin, 436 'toASCII': toASCII,
38 floor = Math.floor, 437 'toUnicode': toUnicode
39 stringFromCharCode = String.fromCharCode, 438 };
40 439
41 /** Temporary variable */ 440 module.exports = punycode;
42 key;
43
44 /*---------------------------------------------------------------------- ----*/
45
46 /**
47 * A generic error utility function.
48 * @private
49 * @param {String} type The error type.
50 * @returns {Error} Throws a `RangeError` with the applicable error mess age.
51 */
52 function error(type) {
53 throw RangeError(errors[type]);
54 }
55
56 /**
57 * A generic `Array#map` utility function.
58 * @private
59 * @param {Array} array The array to iterate over.
60 * @param {Function} callback The function that gets called for every ar ray
61 * item.
62 * @returns {Array} A new array of values returned by the callback funct ion.
63 */
64 function map(array, fn) {
65 var length = array.length;
66 while (length--) {
67 array[length] = fn(array[length]);
68 }
69 return array;
70 }
71
72 /**
73 * A simple `Array#map`-like wrapper to work with domain name strings.
74 * @private
75 * @param {String} domain The domain name.
76 * @param {Function} callback The function that gets called for every
77 * character.
78 * @returns {Array} A new string of characters returned by the callback
79 * function.
80 */
81 function mapDomain(string, fn) {
82 return map(string.split(regexSeparators), fn).join('.');
83 }
84
85 /**
86 * Creates an array containing the numeric code points of each Unicode
87 * character in the string. While JavaScript uses UCS-2 internally,
88 * this function will convert a pair of surrogate halves (each of which
89 * UCS-2 exposes as separate characters) into a single code point,
90 * matching UTF-16.
91 * @see `punycode.ucs2.encode`
92 * @see <http://mathiasbynens.be/notes/javascript-encoding>
93 * @memberOf punycode.ucs2
94 * @name decode
95 * @param {String} string The Unicode input string (UCS-2).
96 * @returns {Array} The new array of code points.
97 */
98 function ucs2decode(string) {
99 var output = [],
100 counter = 0,
101 length = string.length,
102 value,
103 extra;
104 while (counter < length) {
105 value = string.charCodeAt(counter++);
106 if (value >= 0xD800 && value <= 0xDBFF && counter < leng th) {
107 // high surrogate, and there is a next character
108 extra = string.charCodeAt(counter++);
109 if ((extra & 0xFC00) == 0xDC00) { // low surroga te
110 output.push(((value & 0x3FF) << 10) + (e xtra & 0x3FF) + 0x10000);
111 } else {
112 // unmatched surrogate; only append this code unit, in case the next
113 // code unit is the high surrogate of a surrogate pair
114 output.push(value);
115 counter--;
116 }
117 } else {
118 output.push(value);
119 }
120 }
121 return output;
122 }
123
124 /**
125 * Creates a string based on an array of numeric code points.
126 * @see `punycode.ucs2.decode`
127 * @memberOf punycode.ucs2
128 * @name encode
129 * @param {Array} codePoints The array of numeric code points.
130 * @returns {String} The new Unicode string (UCS-2).
131 */
132 function ucs2encode(array) {
133 return map(array, function(value) {
134 var output = '';
135 if (value > 0xFFFF) {
136 value -= 0x10000;
137 output += stringFromCharCode(value >>> 10 & 0x3F F | 0xD800);
138 value = 0xDC00 | value & 0x3FF;
139 }
140 output += stringFromCharCode(value);
141 return output;
142 }).join('');
143 }
144
145 /**
146 * Converts a basic code point into a digit/integer.
147 * @see `digitToBasic()`
148 * @private
149 * @param {Number} codePoint The basic numeric code point value.
150 * @returns {Number} The numeric value of a basic code point (for use in
151 * representing integers) in the range `0` to `base - 1`, or `base` if
152 * the code point does not represent a value.
153 */
154 function basicToDigit(codePoint) {
155 if (codePoint - 48 < 10) {
156 return codePoint - 22;
157 }
158 if (codePoint - 65 < 26) {
159 return codePoint - 65;
160 }
161 if (codePoint - 97 < 26) {
162 return codePoint - 97;
163 }
164 return base;
165 }
166
167 /**
168 * Converts a digit/integer into a basic code point.
169 * @see `basicToDigit()`
170 * @private
171 * @param {Number} digit The numeric value of a basic code point.
172 * @returns {Number} The basic code point whose value (when used for
173 * representing integers) is `digit`, which needs to be in the range
174 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
175 * used; else, the lowercase form is used. The behavior is undefined
176 * if `flag` is non-zero and `digit` has no uppercase form.
177 */
178 function digitToBasic(digit, flag) {
179 // 0..25 map to ASCII a..z or A..Z
180 // 26..35 map to ASCII 0..9
181 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
182 }
183
184 /**
185 * Bias adaptation function as per section 3.4 of RFC 3492.
186 * http://tools.ietf.org/html/rfc3492#section-3.4
187 * @private
188 */
189 function adapt(delta, numPoints, firstTime) {
190 var k = 0;
191 delta = firstTime ? floor(delta / damp) : delta >> 1;
192 delta += floor(delta / numPoints);
193 for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
194 delta = floor(delta / baseMinusTMin);
195 }
196 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
197 }
198
199 /**
200 * Converts a Punycode string of ASCII-only symbols to a string of Unico de
201 * symbols.
202 * @memberOf punycode
203 * @param {String} input The Punycode string of ASCII-only symbols.
204 * @returns {String} The resulting string of Unicode symbols.
205 */
206 function decode(input) {
207 // Don't use UCS-2
208 var output = [],
209 inputLength = input.length,
210 out,
211 i = 0,
212 n = initialN,
213 bias = initialBias,
214 basic,
215 j,
216 index,
217 oldi,
218 w,
219 k,
220 digit,
221 t,
222 length,
223 /** Cached calculation results */
224 baseMinusT;
225
226 // Handle the basic code points: let `basic` be the number of in put code
227 // points before the last delimiter, or `0` if there is none, th en copy
228 // the first basic code points to the output.
229
230 basic = input.lastIndexOf(delimiter);
231 if (basic < 0) {
232 basic = 0;
233 }
234
235 for (j = 0; j < basic; ++j) {
236 // if it's not a basic code point
237 if (input.charCodeAt(j) >= 0x80) {
238 error('not-basic');
239 }
240 output.push(input.charCodeAt(j));
241 }
242
243 // Main decoding loop: start just after the last delimiter if an y basic code
244 // points were copied; start at the beginning otherwise.
245
246 for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
247
248 // `index` is the index of the next character to be cons umed.
249 // Decode a generalized variable-length integer into `de lta`,
250 // which gets added to `i`. The overflow checking is eas ier
251 // if we increase `i` as we go, then subtract off its st arting
252 // value at the end to obtain `delta`.
253 for (oldi = i, w = 1, k = base; /* no condition */; k += base) {
254
255 if (index >= inputLength) {
256 error('invalid-input');
257 }
258
259 digit = basicToDigit(input.charCodeAt(index++));
260
261 if (digit >= base || digit > floor((maxInt - i) / w)) {
262 error('overflow');
263 }
264
265 i += digit * w;
266 t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
267
268 if (digit < t) {
269 break;
270 }
271
272 baseMinusT = base - t;
273 if (w > floor(maxInt / baseMinusT)) {
274 error('overflow');
275 }
276
277 w *= baseMinusT;
278
279 }
280
281 out = output.length + 1;
282 bias = adapt(i - oldi, out, oldi == 0);
283
284 // `i` was supposed to wrap around from `out` to `0`,
285 // incrementing `n` each time, so we'll fix that now:
286 if (floor(i / out) > maxInt - n) {
287 error('overflow');
288 }
289
290 n += floor(i / out);
291 i %= out;
292
293 // Insert `n` at position `i` of the output
294 output.splice(i++, 0, n);
295
296 }
297
298 return ucs2encode(output);
299 }
300
301 /**
302 * Converts a string of Unicode symbols to a Punycode string of ASCII-on ly
303 * symbols.
304 * @memberOf punycode
305 * @param {String} input The string of Unicode symbols.
306 * @returns {String} The resulting Punycode string of ASCII-only symbols .
307 */
308 function encode(input) {
309 var n,
310 delta,
311 handledCPCount,
312 basicLength,
313 bias,
314 j,
315 m,
316 q,
317 k,
318 t,
319 currentValue,
320 output = [],
321 /** `inputLength` will hold the number of code points in `in put`. */
322 inputLength,
323 /** Cached calculation results */
324 handledCPCountPlusOne,
325 baseMinusT,
326 qMinusT;
327
328 // Convert the input in UCS-2 to Unicode
329 input = ucs2decode(input);
330
331 // Cache the length
332 inputLength = input.length;
333
334 // Initialize the state
335 n = initialN;
336 delta = 0;
337 bias = initialBias;
338
339 // Handle the basic code points
340 for (j = 0; j < inputLength; ++j) {
341 currentValue = input[j];
342 if (currentValue < 0x80) {
343 output.push(stringFromCharCode(currentValue));
344 }
345 }
346
347 handledCPCount = basicLength = output.length;
348
349 // `handledCPCount` is the number of code points that have been handled;
350 // `basicLength` is the number of basic code points.
351
352 // Finish the basic string - if it is not empty - with a delimit er
353 if (basicLength) {
354 output.push(delimiter);
355 }
356
357 // Main encoding loop:
358 while (handledCPCount < inputLength) {
359
360 // All non-basic code points < n have been handled alrea dy. Find the next
361 // larger one:
362 for (m = maxInt, j = 0; j < inputLength; ++j) {
363 currentValue = input[j];
364 if (currentValue >= n && currentValue < m) {
365 m = currentValue;
366 }
367 }
368
369 // Increase `delta` enough to advance the decoder's <n,i > state to <m,0>,
370 // but guard against overflow
371 handledCPCountPlusOne = handledCPCount + 1;
372 if (m - n > floor((maxInt - delta) / handledCPCountPlusO ne)) {
373 error('overflow');
374 }
375
376 delta += (m - n) * handledCPCountPlusOne;
377 n = m;
378
379 for (j = 0; j < inputLength; ++j) {
380 currentValue = input[j];
381
382 if (currentValue < n && ++delta > maxInt) {
383 error('overflow');
384 }
385
386 if (currentValue == n) {
387 // Represent delta as a generalized vari able-length integer
388 for (q = delta, k = base; /* no conditio n */; k += base) {
389 t = k <= bias ? tMin : (k >= bia s + tMax ? tMax : k - bias);
390 if (q < t) {
391 break;
392 }
393 qMinusT = q - t;
394 baseMinusT = base - t;
395 output.push(
396 stringFromCharCode(digit ToBasic(t + qMinusT % baseMinusT, 0))
397 );
398 q = floor(qMinusT / baseMinusT);
399 }
400
401 output.push(stringFromCharCode(digitToBa sic(q, 0)));
402 bias = adapt(delta, handledCPCountPlusOn e, handledCPCount == basicLength);
403 delta = 0;
404 ++handledCPCount;
405 }
406 }
407
408 ++delta;
409 ++n;
410
411 }
412 return output.join('');
413 }
414
415 /**
416 * Converts a Punycode string representing a domain name to Unicode. Onl y the
417 * Punycoded parts of the domain name will be converted, i.e. it doesn't
418 * matter if you call it on a string that has already been converted to
419 * Unicode.
420 * @memberOf punycode
421 * @param {String} domain The Punycode domain name to convert to Unicode .
422 * @returns {String} The Unicode representation of the given Punycode
423 * string.
424 */
425 function toUnicode(domain) {
426 return mapDomain(domain, function(string) {
427 return regexPunycode.test(string)
428 ? decode(string.slice(4).toLowerCase())
429 : string;
430 });
431 }
432
433 /**
434 * Converts a Unicode string representing a domain name to Punycode. Onl y the
435 * non-ASCII parts of the domain name will be converted, i.e. it doesn't
436 * matter if you call it with a domain that's already in ASCII.
437 * @memberOf punycode
438 * @param {String} domain The domain name to convert, as a Unicode strin g.
439 * @returns {String} The Punycode representation of the given domain nam e.
440 */
441 function toASCII(domain) {
442 return mapDomain(domain, function(string) {
443 return regexNonASCII.test(string)
444 ? 'xn--' + encode(string)
445 : string;
446 });
447 }
448
449 /*---------------------------------------------------------------------- ----*/
450
451 /** Define the public API */
452 exports = {
453 /**
454 * A string representing the current Punycode.js version number.
455 * @memberOf punycode
456 * @type String
457 */
458 'version': '1.2.3',
459 /**
460 * An object of methods to convert from JavaScript's internal ch aracter
461 * representation (UCS-2) to Unicode code points, and back.
462 * @see <http://mathiasbynens.be/notes/javascript-encoding>
463 * @memberOf punycode
464 * @type Object
465 */
466 'ucs2': {
467 'decode': ucs2decode,
468 'encode': ucs2encode
469 },
470 'decode': decode,
471 'encode': encode,
472 'toASCII': toASCII,
473 'toUnicode': toUnicode
474 };
475 }());
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld