lib/punycode.js - Issue 29350076: Issue 4350 - Update punycode.js to 2.0.0

Side by Side Diff: lib/punycode.js

Issue 29350076: Issue 4350 - Update punycode.js to 2.0.0 (Closed)

Patch Set: Add license header and fix for our use Created Aug. 23, 2016, 3:29 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /! http://mths.be/punycode v1.2.3 by @mathias /	1 /*

2 /* Used under GPL 2.0, see https://github.com/bestiejs/punycode.js/blob/master/L ICENSE-GPL.txt */	2 * Copyright Mathias Bynens <https://mathiasbynens.be/>
	Sebastian Noack 2016/08/23 16:27:06 The MIT license header seems to require a copyrigh The MIT license header seems to require a copyright year. Also I guess we'd have to add ourselves as copyright holder since we modify some parts of the code. kzar 2016/08/23 16:51:13 Done. (I put 2011-2016 after looking at his reposi Show quoted text On 2016/08/23 16:27:06, Sebastian Noack wrote: > The MIT license header seems to require a copyright year. Also I guess we'd have > to add ourselves as copyright holder since we modify some parts of the code. Done. (I put 2011-2016 after looking at his repository's commit history https://github.com/bestiejs/punycode.js/commits/master?page=4 )
3 ;(function() {	3 *

	4 * Permission is hereby granted, free of charge, to any person obtaining

	5 * a copy of this software and associated documentation files (the

	6 * "Software"), to deal in the Software without restriction, including

	7 * without limitation the rights to use, copy, modify, merge, publish,

	8 * distribute, sublicense, and/or sell copies of the Software, and to

	9 * permit persons to whom the Software is furnished to do so, subject to

	10 * the following conditions:

	11 *

	12 * The above copyright notice and this permission notice shall be

	13 * included in all copies or substantial portions of the Software.

	14 *

	15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

	16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

	17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND

	18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE

	19 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION

	20 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

	21 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

	22 */

	23

	24 'use strict';

	25

	26 /** Highest positive signed 32-bit float value */

	27 const maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1

	28

	29 /** Bootstring parameters */

	30 const base = 36;

	31 const tMin = 1;

	32 const tMax = 26;

	33 const skew = 38;

	34 const damp = 700;

	35 const initialBias = 72;

	36 const initialN = 128; // 0x80

	37 const delimiter = '-'; // '\x2D'

	38

	39 /** Regular expressions */

	40 const regexPunycode = /^xn--/;

	41 const regexNonASCII = /[^\x20-\x7E]/; // unprintable ASCII chars + non-ASCII cha rs

	42 const regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators

	43

	44 /** Error messages */

	45 const errors = {

	46 'overflow': 'Overflow: input needs wider integers to process',

	47 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',

	48 'invalid-input': 'Invalid input'

	49 };

	50

	51 /** Convenience shortcuts */

	52 const baseMinusTMin = base - tMin;

	53 const floor = Math.floor;

	54 const stringFromCharCode = String.fromCharCode;

	55

	56 /--------------------------------------------------------------------------/

	57

	58 /**

	59 * A generic error utility function.

	60 * @private

	61 * @param {String} type The error type.

	62 * @returns {Error} Throws a `RangeError` with the applicable error message.

	63 */

	64 function error(type) {

	65 throw new RangeError(errors[type]);

	66 }

	67

	68 /**

	69 * A generic `Array#map` utility function.

	70 * @private

	71 * @param {Array} array The array to iterate over.

	72 * @param {Function} callback The function that gets called for every array

	73 * item.

	74 * @returns {Array} A new array of values returned by the callback function.

	75 */

	76 function map(array, fn) {

	77 const result = [];

	78 let length = array.length;

	79 while (length--) {

	80 result[length] = fn(array[length]);

	81 }

	82 return result;

	83 }

	84

	85 /**

	86 * A simple `Array#map`-like wrapper to work with domain name strings or email

	87 * addresses.

	88 * @private

	89 * @param {String} domain The domain name or email address.

	90 * @param {Function} callback The function that gets called for every

	91 * character.

	92 * @returns {Array} A new string of characters returned by the callback

	93 * function.

	94 */

	95 function mapDomain(string, fn) {

	96 const parts = string.split('@');

	97 let result = '';

	98 if (parts.length > 1) {

	99 // In email addresses, only the domain name should be punycoded. Leave

	100 // the local part (i.e. everything up to `@`) intact.

	101 result = parts[0] + '@';

	102 string = parts[1];

	103 }

	104 // Avoid `split(regex)` for IE8 compatibility. See #17.

	105 string = string.replace(regexSeparators, '\x2E');

	106 const labels = string.split('.');

	107 const encoded = map(labels, fn).join('.');

	108 return result + encoded;

	109 }

	110

	111 /**

	112 * Creates an array containing the numeric code points of each Unicode

	113 * character in the string. While JavaScript uses UCS-2 internally,

	114 * this function will convert a pair of surrogate halves (each of which

	115 * UCS-2 exposes as separate characters) into a single code point,

	116 * matching UTF-16.

	117 * @see `punycode.ucs2.encode`

	118 * @see <https://mathiasbynens.be/notes/javascript-encoding>

	119 * @memberOf punycode.ucs2

	120 * @name decode

	121 * @param {String} string The Unicode input string (UCS-2).

	122 * @returns {Array} The new array of code points.

	123 */

	124 function ucs2decode(string) {

	125 const output = [];

	126 let counter = 0;

	127 const length = string.length;

	128 while (counter < length) {

	129 const value = string.charCodeAt(counter++);

	130 if (value >= 0xD800 && value <= 0xDBFF && counter < length) {

	131 // It's a high surrogate, and there is a next character.

	132 const extra = string.charCodeAt(counter++);

	133 if ((extra & 0xFC00) == 0xDC00) { // Low surrogate.

	134 output.push(((value & 0x3FF) << 10) + (extra & 0 x3FF) + 0x10000);

	135 } else {

	136 // It's an unmatched surrogate; only append this code unit, in case the

	137 // next code unit is the high surrogate of a sur rogate pair.

	138 output.push(value);

	139 counter--;

	140 }

	141 } else {

	142 output.push(value);

	143 }

	144 }

	145 return output;

	146 }

	147

	148 /**

	149 * Creates a string based on an array of numeric code points.

	150 * @see `punycode.ucs2.decode`

	151 * @memberOf punycode.ucs2

	152 * @name encode

	153 * @param {Array} codePoints The array of numeric code points.

	154 * @returns {String} The new Unicode string (UCS-2).

	155 */

	156 const ucs2encode = array => String.fromCodePoint.apply(null, array);

	157

	158 /**

	159 * Converts a basic code point into a digit/integer.

	160 * @see `digitToBasic()`

	161 * @private

	162 * @param {Number} codePoint The basic numeric code point value.

	163 * @returns {Number} The numeric value of a basic code point (for use in

	164 * representing integers) in the range `0` to `base - 1`, or `base` if

	165 * the code point does not represent a value.

	166 */

	167 const basicToDigit = function(codePoint) {

	168 if (codePoint - 0x30 < 0x0A) {

	169 return codePoint - 0x16;

	170 }

	171 if (codePoint - 0x41 < 0x1A) {

	172 return codePoint - 0x41;

	173 }

	174 if (codePoint - 0x61 < 0x1A) {

	175 return codePoint - 0x61;

	176 }

	177 return base;

	178 };

	179

	180 /**

	181 * Converts a digit/integer into a basic code point.

	182 * @see `basicToDigit()`

	183 * @private

	184 * @param {Number} digit The numeric value of a basic code point.

	185 * @returns {Number} The basic code point whose value (when used for

	186 * representing integers) is `digit`, which needs to be in the range

	187 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is

	188 * used; else, the lowercase form is used. The behavior is undefined

	189 * if `flag` is non-zero and `digit` has no uppercase form.

	190 */

	191 const digitToBasic = function(digit, flag) {

	192 // 0..25 map to ASCII a..z or A..Z

	193 // 26..35 map to ASCII 0..9

	194 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);

	195 };

	196

	197 /**

	198 * Bias adaptation function as per section 3.4 of RFC 3492.

	199 * https://tools.ietf.org/html/rfc3492#section-3.4

	200 * @private

	201 */

	202 const adapt = function(delta, numPoints, firstTime) {

	203 let k = 0;

	204 delta = firstTime ? floor(delta / damp) : delta >> 1;

	205 delta += floor(delta / numPoints);

	206 for (/* no initialization /; delta > baseMinusTMin tMax >> 1; k += ba se) {

	207 delta = floor(delta / baseMinusTMin);

	208 }

	209 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));

	210 };

	211

	212 /**

	213 * Converts a Punycode string of ASCII-only symbols to a string of Unicode

	214 * symbols.

	215 * @memberOf punycode

	216 * @param {String} input The Punycode string of ASCII-only symbols.

	217 * @returns {String} The resulting string of Unicode symbols.

	218 */

	219 const decode = function(input) {

	220 // Don't use UCS-2.

	221 const output = [];

	222 const inputLength = input.length;

	223 let i = 0;

	224 let n = initialN;

	225 let bias = initialBias;

	226

	227 // Handle the basic code points: let `basic` be the number of input code

	228 // points before the last delimiter, or `0` if there is none, then copy

	229 // the first basic code points to the output.

	230

	231 let basic = input.lastIndexOf(delimiter);

	232 if (basic < 0) {

	233 basic = 0;

	234 }

	235

	236 for (let j = 0; j < basic; ++j) {

	237 // if it's not a basic code point

	238 if (input.charCodeAt(j) >= 0x80) {

	239 error('not-basic');

	240 }

	241 output.push(input.charCodeAt(j));

	242 }

	243

	244 // Main decoding loop: start just after the last delimiter if any basic code

	245 // points were copied; start at the beginning otherwise.

	246

	247 for (let index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no f inal expression */) {

	248

	249 // `index` is the index of the next character to be consumed.

	250 // Decode a generalized variable-length integer into `delta`,

	251 // which gets added to `i`. The overflow checking is easier

	252 // if we increase `i` as we go, then subtract off its starting

	253 // value at the end to obtain `delta`.

	254 let oldi = i;

	255 for (let w = 1, k = base; /* no condition */; k += base) {

	256

	257 if (index >= inputLength) {

	258 error('invalid-input');

	259 }

	260

	261 const digit = basicToDigit(input.charCodeAt(index++));

	262

	263 if (digit >= base \|\| digit > floor((maxInt - i) / w)) {

	264 error('overflow');

	265 }

	266

	267 i += digit * w;

	268 const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);

	269

	270 if (digit < t) {

	271 break;

	272 }

	273

	274 const baseMinusT = base - t;

	275 if (w > floor(maxInt / baseMinusT)) {

	276 error('overflow');

	277 }

	278

	279 w *= baseMinusT;

	280

	281 }

	282

	283 const out = output.length + 1;

	284 bias = adapt(i - oldi, out, oldi == 0);

	285

	286 // `i` was supposed to wrap around from `out` to `0`,

	287 // incrementing `n` each time, so we'll fix that now:

	288 if (floor(i / out) > maxInt - n) {

	289 error('overflow');

	290 }

	291

	292 n += floor(i / out);

	293 i %= out;

	294

	295 // Insert `n` at position `i` of the output.

	296 output.splice(i++, 0, n);

	297

	298 }

	299

	300 return String.fromCodePoint.apply(null, output);

	301 };

	302

	303 /**

	304 * Converts a string of Unicode symbols (e.g. a domain name label) to a

	305 * Punycode string of ASCII-only symbols.

	306 * @memberOf punycode

	307 * @param {String} input The string of Unicode symbols.

	308 * @returns {String} The resulting Punycode string of ASCII-only symbols.

	309 */

	310 const encode = function(input) {

	311 const output = [];

	312

	313 // Convert the input in UCS-2 to an array of Unicode code points.

	314 input = ucs2decode(input);

	315

	316 // Cache the length.

	317 let inputLength = input.length;

	318

	319 // Initialize the state.

	320 let n = initialN;

	321 let delta = 0;

	322 let bias = initialBias;

	323

	324 // Handle the basic code points.

	325 for (let currentValue of input) {

	326 if (currentValue < 0x80) {

	327 output.push(stringFromCharCode(currentValue));

	328 }

	329 }

	330

	331 let basicLength = output.length;

	332 let handledCPCount = basicLength;

	333

	334 // `handledCPCount` is the number of code points that have been handled;

	335 // `basicLength` is the number of basic code points.

	336

	337 // Finish the basic string with a delimiter unless it's empty.

	338 if (basicLength) {

	339 output.push(delimiter);

	340 }

	341

	342 // Main encoding loop:

	343 while (handledCPCount < inputLength) {

	344

	345 // All non-basic code points < n have been handled already. Find the next

	346 // larger one:

	347 let m = maxInt;

	348 for (let currentValue of input) {

	349 if (currentValue >= n && currentValue < m) {

	350 m = currentValue;

	351 }

	352 }

	353

	354 // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,

	355 // but guard against overflow.

	356 const handledCPCountPlusOne = handledCPCount + 1;

	357 if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {

	358 error('overflow');

	359 }

	360

	361 delta += (m - n) * handledCPCountPlusOne;

	362 n = m;

	363

	364 for (let currentValue of input) {

	365 if (currentValue < n && ++delta > maxInt) {

	366 error('overflow');

	367 }

	368 if (currentValue == n) {

	369 // Represent delta as a generalized variable-len gth integer.

	370 let q = delta;

	371 for (let k = base; /* no condition */; k += base ) {

	372 const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);

	373 if (q < t) {

	374 break;

	375 }

	376 const qMinusT = q - t;

	377 const baseMinusT = base - t;

	378 output.push(

	379 stringFromCharCode(digitToBasic( t + qMinusT % baseMinusT, 0))

	380 );

	381 q = floor(qMinusT / baseMinusT);

	382 }

	383

	384 output.push(stringFromCharCode(digitToBasic(q, 0 )));

	385 bias = adapt(delta, handledCPCountPlusOne, handl edCPCount == basicLength);

	386 delta = 0;

	387 ++handledCPCount;

	388 }

	389 }

	390

	391 ++delta;

	392 ++n;

	393

	394 }

	395 return output.join('');

	396 };

	397

	398 /**

	399 * Converts a Punycode string representing a domain name or an email address

	400 * to Unicode. Only the Punycoded parts of the input will be converted, i.e.

	401 * it doesn't matter if you call it on a string that has already been

	402 * converted to Unicode.

	403 * @memberOf punycode

	404 * @param {String} input The Punycoded domain name or email address to

	405 * convert to Unicode.

	406 * @returns {String} The Unicode representation of the given Punycode

	407 * string.

	408 */

	409 const toUnicode = function(input) {

	410 return mapDomain(input, function(string) {

	411 return regexPunycode.test(string)

	412 ? decode(string.slice(4).toLowerCase())

	413 : string;

	414 });

	415 };

	416

	417 /**

	418 * Converts a Unicode string representing a domain name or an email address to

	419 * Punycode. Only the non-ASCII parts of the domain name will be converted,

	420 * i.e. it doesn't matter if you call it with a domain that's already in

	421 * ASCII.

	422 * @memberOf punycode

	423 * @param {String} input The domain name or email address to convert, as a

	424 * Unicode string.

	425 * @returns {String} The Punycode representation of the given domain name or

	426 * email address.

	427 */

	428 const toASCII = function(input) {

	429 return mapDomain(input, function(string) {

	430 return regexNonASCII.test(string)

	431 ? 'xn--' + encode(string)

	432 : string;

	433 });

	434 };

	435

	436 /--------------------------------------------------------------------------/

	437

	438 /** Define the public API */

	439 const punycode = exports = {
	Sebastian Noack 2016/08/23 16:27:06 It seems the punycode object isn't used anywhere i It seems the punycode object isn't used anywhere inside this module. So just assigning to exports, without creating the punycode variable, will do. kzar 2016/08/23 16:51:13 Done. Show quoted text On 2016/08/23 16:27:06, Sebastian Noack wrote: > It seems the punycode object isn't used anywhere inside this module. So just > assigning to exports, without creating the punycode variable, will do. Done.
4 /**	440 /**

5 » * The `punycode` object.	441 » * A string representing the current Punycode.js version number.

6 » * @name punycode	442 » * @memberOf punycode

	443 » * @type String

	444 » */

	445 » 'version': '2.0.0',

	446 » /**

	447 » * An object of methods to convert from JavaScript's internal character

	448 » * representation (UCS-2) to Unicode code points, and back.

	449 » * @see <https://mathiasbynens.be/notes/javascript-encoding>

	450 » * @memberOf punycode

7 * @type Object	451 * @type Object

8 */	452 */

9 » var punycode,	453 » 'ucs2': {

10	454 » » 'decode': ucs2decode,

11 » /** Highest positive signed 32-bit float value */	455 » » 'encode': ucs2encode

12 » maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1

13

14 » /** Bootstring parameters */

15 » base = 36,

16 » tMin = 1,

17 » tMax = 26,

18 » skew = 38,

19 » damp = 700,

20 » initialBias = 72,

21 » initialN = 128, // 0x80

22 » delimiter = '-', // '\x2D'

23

24 » /** Regular expressions */

25 » regexPunycode = /^xn--/,

26 » regexNonASCII = /[^ -~]/, // unprintable ASCII chars + non-ASCII chars

27 » regexSeparators = /\x2E\|\u3002\|\uFF0E\|\uFF61/g, // RFC 3490 separators

28

29 » /** Error messages */

30 » errors = {

31 » » 'overflow': 'Overflow: input needs wider integers to process',

32 » » 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',

33 » » 'invalid-input': 'Invalid input'

34 },	456 },

35	457 'decode': decode,

36 /** Convenience shortcuts */	458 'encode': encode,

37 baseMinusTMin = base - tMin,	459 'toASCII': toASCII,

38 floor = Math.floor,	460 'toUnicode': toUnicode

39 stringFromCharCode = String.fromCharCode,	461 };

40

41 /** Temporary variable */

42 key;

43

44 /---------------------------------------------------------------------- ----/

45

46 /**

47 * A generic error utility function.

48 * @private

49 * @param {String} type The error type.

50 * @returns {Error} Throws a `RangeError` with the applicable error mess age.

51 */

52 function error(type) {

53 throw RangeError(errors[type]);

54 }

55

56 /**

57 * A generic `Array#map` utility function.

58 * @private

59 * @param {Array} array The array to iterate over.

60 * @param {Function} callback The function that gets called for every ar ray

61 * item.

62 * @returns {Array} A new array of values returned by the callback funct ion.

63 */

64 function map(array, fn) {

65 var length = array.length;

66 while (length--) {

67 array[length] = fn(array[length]);

68 }

69 return array;

70 }

71

72 /**

73 * A simple `Array#map`-like wrapper to work with domain name strings.

74 * @private

75 * @param {String} domain The domain name.

76 * @param {Function} callback The function that gets called for every

77 * character.

78 * @returns {Array} A new string of characters returned by the callback

79 * function.

80 */

81 function mapDomain(string, fn) {

82 return map(string.split(regexSeparators), fn).join('.');

83 }

84

85 /**

86 * Creates an array containing the numeric code points of each Unicode

87 * character in the string. While JavaScript uses UCS-2 internally,

88 * this function will convert a pair of surrogate halves (each of which

89 * UCS-2 exposes as separate characters) into a single code point,

90 * matching UTF-16.

91 * @see `punycode.ucs2.encode`

92 * @see <http://mathiasbynens.be/notes/javascript-encoding>

93 * @memberOf punycode.ucs2

94 * @name decode

95 * @param {String} string The Unicode input string (UCS-2).

96 * @returns {Array} The new array of code points.

97 */

98 function ucs2decode(string) {

99 var output = [],

100 counter = 0,

101 length = string.length,

102 value,

103 extra;

104 while (counter < length) {

105 value = string.charCodeAt(counter++);

106 if (value >= 0xD800 && value <= 0xDBFF && counter < leng th) {

107 // high surrogate, and there is a next character

108 extra = string.charCodeAt(counter++);

109 if ((extra & 0xFC00) == 0xDC00) { // low surroga te

110 output.push(((value & 0x3FF) << 10) + (e xtra & 0x3FF) + 0x10000);

111 } else {

112 // unmatched surrogate; only append this code unit, in case the next

113 // code unit is the high surrogate of a surrogate pair

114 output.push(value);

115 counter--;

116 }

117 } else {

118 output.push(value);

119 }

120 }

121 return output;

122 }

123

124 /**

125 * Creates a string based on an array of numeric code points.

126 * @see `punycode.ucs2.decode`

127 * @memberOf punycode.ucs2

128 * @name encode

129 * @param {Array} codePoints The array of numeric code points.

130 * @returns {String} The new Unicode string (UCS-2).

131 */

132 function ucs2encode(array) {

133 return map(array, function(value) {

134 var output = '';

135 if (value > 0xFFFF) {

136 value -= 0x10000;

137 output += stringFromCharCode(value >>> 10 & 0x3F F \| 0xD800);

138 value = 0xDC00 \| value & 0x3FF;

139 }

140 output += stringFromCharCode(value);

141 return output;

142 }).join('');

143 }

144

145 /**

146 * Converts a basic code point into a digit/integer.

147 * @see `digitToBasic()`

148 * @private

149 * @param {Number} codePoint The basic numeric code point value.

150 * @returns {Number} The numeric value of a basic code point (for use in

151 * representing integers) in the range `0` to `base - 1`, or `base` if

152 * the code point does not represent a value.

153 */

154 function basicToDigit(codePoint) {

155 if (codePoint - 48 < 10) {

156 return codePoint - 22;

157 }

158 if (codePoint - 65 < 26) {

159 return codePoint - 65;

160 }

161 if (codePoint - 97 < 26) {

162 return codePoint - 97;

163 }

164 return base;

165 }

166

167 /**

168 * Converts a digit/integer into a basic code point.

169 * @see `basicToDigit()`

170 * @private

171 * @param {Number} digit The numeric value of a basic code point.

172 * @returns {Number} The basic code point whose value (when used for

173 * representing integers) is `digit`, which needs to be in the range

174 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is

175 * used; else, the lowercase form is used. The behavior is undefined

176 * if `flag` is non-zero and `digit` has no uppercase form.

177 */

178 function digitToBasic(digit, flag) {

179 // 0..25 map to ASCII a..z or A..Z

180 // 26..35 map to ASCII 0..9

181 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);

182 }

183

184 /**

185 * Bias adaptation function as per section 3.4 of RFC 3492.

186 * http://tools.ietf.org/html/rfc3492#section-3.4

187 * @private

188 */

189 function adapt(delta, numPoints, firstTime) {

190 var k = 0;

191 delta = firstTime ? floor(delta / damp) : delta >> 1;

192 delta += floor(delta / numPoints);

193 for (/* no initialization /; delta > baseMinusTMin tMax >> 1; k += base) {

194 delta = floor(delta / baseMinusTMin);

195 }

196 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));

197 }

198

199 /**

200 * Converts a Punycode string of ASCII-only symbols to a string of Unico de

201 * symbols.

202 * @memberOf punycode

203 * @param {String} input The Punycode string of ASCII-only symbols.

204 * @returns {String} The resulting string of Unicode symbols.

205 */

206 function decode(input) {

207 // Don't use UCS-2

208 var output = [],

209 inputLength = input.length,

210 out,

211 i = 0,

212 n = initialN,

213 bias = initialBias,

214 basic,

215 j,

216 index,

217 oldi,

218 w,

219 k,

220 digit,

221 t,

222 length,

223 /** Cached calculation results */

224 baseMinusT;

225

226 // Handle the basic code points: let `basic` be the number of in put code

227 // points before the last delimiter, or `0` if there is none, th en copy

228 // the first basic code points to the output.

229

230 basic = input.lastIndexOf(delimiter);

231 if (basic < 0) {

232 basic = 0;

233 }

234

235 for (j = 0; j < basic; ++j) {

236 // if it's not a basic code point

237 if (input.charCodeAt(j) >= 0x80) {

238 error('not-basic');

239 }

240 output.push(input.charCodeAt(j));

241 }

242

243 // Main decoding loop: start just after the last delimiter if an y basic code

244 // points were copied; start at the beginning otherwise.

245

246 for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {

247

248 // `index` is the index of the next character to be cons umed.

249 // Decode a generalized variable-length integer into `de lta`,

250 // which gets added to `i`. The overflow checking is eas ier

251 // if we increase `i` as we go, then subtract off its st arting

252 // value at the end to obtain `delta`.

253 for (oldi = i, w = 1, k = base; /* no condition */; k += base) {

254

255 if (index >= inputLength) {

256 error('invalid-input');

257 }

258

259 digit = basicToDigit(input.charCodeAt(index++));

260

261 if (digit >= base \|\| digit > floor((maxInt - i) / w)) {

262 error('overflow');

263 }

264

265 i += digit * w;

266 t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);

267

268 if (digit < t) {

269 break;

270 }

271

272 baseMinusT = base - t;

273 if (w > floor(maxInt / baseMinusT)) {

274 error('overflow');

275 }

276

277 w *= baseMinusT;

278

279 }

280

281 out = output.length + 1;

282 bias = adapt(i - oldi, out, oldi == 0);

283

284 // `i` was supposed to wrap around from `out` to `0`,

285 // incrementing `n` each time, so we'll fix that now:

286 if (floor(i / out) > maxInt - n) {

287 error('overflow');

288 }

289

290 n += floor(i / out);

291 i %= out;

292

293 // Insert `n` at position `i` of the output

294 output.splice(i++, 0, n);

295

296 }

297

298 return ucs2encode(output);

299 }

300

301 /**

302 * Converts a string of Unicode symbols to a Punycode string of ASCII-on ly

303 * symbols.

304 * @memberOf punycode

305 * @param {String} input The string of Unicode symbols.

306 * @returns {String} The resulting Punycode string of ASCII-only symbols .

307 */

308 function encode(input) {

309 var n,

310 delta,

311 handledCPCount,

312 basicLength,

313 bias,

314 j,

315 m,

316 q,

317 k,

318 t,

319 currentValue,

320 output = [],

321 /** `inputLength` will hold the number of code points in `in put`. */

322 inputLength,

323 /** Cached calculation results */

324 handledCPCountPlusOne,

325 baseMinusT,

326 qMinusT;

327

328 // Convert the input in UCS-2 to Unicode

329 input = ucs2decode(input);

330

331 // Cache the length

332 inputLength = input.length;

333

334 // Initialize the state

335 n = initialN;

336 delta = 0;

337 bias = initialBias;

338

339 // Handle the basic code points

340 for (j = 0; j < inputLength; ++j) {

341 currentValue = input[j];

342 if (currentValue < 0x80) {

343 output.push(stringFromCharCode(currentValue));

344 }

345 }

346

347 handledCPCount = basicLength = output.length;

348

349 // `handledCPCount` is the number of code points that have been handled;

350 // `basicLength` is the number of basic code points.

351

352 // Finish the basic string - if it is not empty - with a delimit er

353 if (basicLength) {

354 output.push(delimiter);

355 }

356

357 // Main encoding loop:

358 while (handledCPCount < inputLength) {

359

360 // All non-basic code points < n have been handled alrea dy. Find the next

361 // larger one:

362 for (m = maxInt, j = 0; j < inputLength; ++j) {

363 currentValue = input[j];

364 if (currentValue >= n && currentValue < m) {

365 m = currentValue;

366 }

367 }

368

369 // Increase `delta` enough to advance the decoder's <n,i > state to <m,0>,

370 // but guard against overflow

371 handledCPCountPlusOne = handledCPCount + 1;

372 if (m - n > floor((maxInt - delta) / handledCPCountPlusO ne)) {

373 error('overflow');

374 }

375

376 delta += (m - n) * handledCPCountPlusOne;

377 n = m;

378

379 for (j = 0; j < inputLength; ++j) {

380 currentValue = input[j];

381

382 if (currentValue < n && ++delta > maxInt) {

383 error('overflow');

384 }

385

386 if (currentValue == n) {

387 // Represent delta as a generalized vari able-length integer

388 for (q = delta, k = base; /* no conditio n */; k += base) {

389 t = k <= bias ? tMin : (k >= bia s + tMax ? tMax : k - bias);

390 if (q < t) {

391 break;

392 }

393 qMinusT = q - t;

394 baseMinusT = base - t;

395 output.push(

396 stringFromCharCode(digit ToBasic(t + qMinusT % baseMinusT, 0))

397 );

398 q = floor(qMinusT / baseMinusT);

399 }

400

401 output.push(stringFromCharCode(digitToBa sic(q, 0)));

402 bias = adapt(delta, handledCPCountPlusOn e, handledCPCount == basicLength);

403 delta = 0;

404 ++handledCPCount;

405 }

406 }

407

408 ++delta;

409 ++n;

410

411 }

412 return output.join('');

413 }

414

415 /**

416 * Converts a Punycode string representing a domain name to Unicode. Onl y the

417 * Punycoded parts of the domain name will be converted, i.e. it doesn't

418 * matter if you call it on a string that has already been converted to

419 * Unicode.

420 * @memberOf punycode

421 * @param {String} domain The Punycode domain name to convert to Unicode .

422 * @returns {String} The Unicode representation of the given Punycode

423 * string.

424 */

425 function toUnicode(domain) {

426 return mapDomain(domain, function(string) {

427 return regexPunycode.test(string)

428 ? decode(string.slice(4).toLowerCase())

429 : string;

430 });

431 }

432

433 /**

434 * Converts a Unicode string representing a domain name to Punycode. Onl y the

435 * non-ASCII parts of the domain name will be converted, i.e. it doesn't

436 * matter if you call it with a domain that's already in ASCII.

437 * @memberOf punycode

438 * @param {String} domain The domain name to convert, as a Unicode strin g.

439 * @returns {String} The Punycode representation of the given domain nam e.

440 */

441 function toASCII(domain) {

442 return mapDomain(domain, function(string) {

443 return regexNonASCII.test(string)

444 ? 'xn--' + encode(string)

445 : string;

446 });

447 }

448

449 /---------------------------------------------------------------------- ----/

450

451 /** Define the public API */

452 exports = {

453 /**

454 * A string representing the current Punycode.js version number.

455 * @memberOf punycode

456 * @type String

457 */

458 'version': '1.2.3',

459 /**

460 * An object of methods to convert from JavaScript's internal ch aracter

461 * representation (UCS-2) to Unicode code points, and back.

462 * @see <http://mathiasbynens.be/notes/javascript-encoding>

463 * @memberOf punycode

464 * @type Object

465 */

466 'ucs2': {

467 'decode': ucs2decode,

468 'encode': ucs2encode

469 },

470 'decode': decode,

471 'encode': encode,

472 'toASCII': toASCII,

473 'toUnicode': toUnicode

474 };

475 }());

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »