lib/punycode.js - Issue 29350076: Issue 4350 - Update punycode.js to 2.0.0

Side by Side Diff: lib/punycode.js

Issue 29350076: Issue 4350 - Update punycode.js to 2.0.0 (Closed)

Patch Set: Addressed feedback Created Aug. 23, 2016, 4:49 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /! http://mths.be/punycode v1.2.3 by @mathias /	1 /*

2 /* Used under GPL 2.0, see https://github.com/bestiejs/punycode.js/blob/master/L ICENSE-GPL.txt */	2 * Copyright (C) 2011-2016 Mathias Bynens <https://mathiasbynens.be/>

3 ;(function() {	3 * Copyright (C) 2016 Eyeo GmbH (Minor modifications for compatibility.)

	4 *

	5 * Permission is hereby granted, free of charge, to any person obtaining

	6 * a copy of this software and associated documentation files (the

	7 * "Software"), to deal in the Software without restriction, including

	8 * without limitation the rights to use, copy, modify, merge, publish,

	9 * distribute, sublicense, and/or sell copies of the Software, and to

	10 * permit persons to whom the Software is furnished to do so, subject to

	11 * the following conditions:

	12 *

	13 * The above copyright notice and this permission notice shall be

	14 * included in all copies or substantial portions of the Software.

	15 *

	16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

	17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

	18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND

	19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE

	20 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION

	21 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

	22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

	23 */

	24

	25 'use strict';

	26

	27 /** Highest positive signed 32-bit float value */

	28 const maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1

	29

	30 /** Bootstring parameters */

	31 const base = 36;

	32 const tMin = 1;

	33 const tMax = 26;

	34 const skew = 38;

	35 const damp = 700;

	36 const initialBias = 72;

	37 const initialN = 128; // 0x80

	38 const delimiter = '-'; // '\x2D'

	39

	40 /** Regular expressions */

	41 const regexPunycode = /^xn--/;

	42 const regexNonASCII = /[^\x20-\x7E]/; // unprintable ASCII chars + non-ASCII cha rs

	43 const regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators

	44

	45 /** Error messages */

	46 const errors = {

	47 'overflow': 'Overflow: input needs wider integers to process',

	48 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',

	49 'invalid-input': 'Invalid input'

	50 };

	51

	52 /** Convenience shortcuts */

	53 const baseMinusTMin = base - tMin;

	54 const floor = Math.floor;

	55 const stringFromCharCode = String.fromCharCode;

	56

	57 /--------------------------------------------------------------------------/

	58

	59 /**

	60 * A generic error utility function.

	61 * @private

	62 * @param {String} type The error type.

	63 * @returns {Error} Throws a `RangeError` with the applicable error message.

	64 */

	65 function error(type) {

	66 throw new RangeError(errors[type]);

	67 }

	68

	69 /**

	70 * A generic `Array#map` utility function.

	71 * @private

	72 * @param {Array} array The array to iterate over.

	73 * @param {Function} callback The function that gets called for every array

	74 * item.

	75 * @returns {Array} A new array of values returned by the callback function.

	76 */

	77 function map(array, fn) {

	78 const result = [];

	79 let length = array.length;

	80 while (length--) {

	81 result[length] = fn(array[length]);

	82 }

	83 return result;

	84 }

	85

	86 /**

	87 * A simple `Array#map`-like wrapper to work with domain name strings or email

	88 * addresses.

	89 * @private

	90 * @param {String} domain The domain name or email address.

	91 * @param {Function} callback The function that gets called for every

	92 * character.

	93 * @returns {Array} A new string of characters returned by the callback

	94 * function.

	95 */

	96 function mapDomain(string, fn) {

	97 const parts = string.split('@');

	98 let result = '';

	99 if (parts.length > 1) {

	100 // In email addresses, only the domain name should be punycoded. Leave

	101 // the local part (i.e. everything up to `@`) intact.

	102 result = parts[0] + '@';

	103 string = parts[1];

	104 }

	105 // Avoid `split(regex)` for IE8 compatibility. See #17.

	106 string = string.replace(regexSeparators, '\x2E');

	107 const labels = string.split('.');

	108 const encoded = map(labels, fn).join('.');

	109 return result + encoded;

	110 }

	111

	112 /**

	113 * Creates an array containing the numeric code points of each Unicode

	114 * character in the string. While JavaScript uses UCS-2 internally,

	115 * this function will convert a pair of surrogate halves (each of which

	116 * UCS-2 exposes as separate characters) into a single code point,

	117 * matching UTF-16.

	118 * @see `punycode.ucs2.encode`

	119 * @see <https://mathiasbynens.be/notes/javascript-encoding>

	120 * @memberOf punycode.ucs2

	121 * @name decode

	122 * @param {String} string The Unicode input string (UCS-2).

	123 * @returns {Array} The new array of code points.

	124 */

	125 function ucs2decode(string) {

	126 const output = [];

	127 let counter = 0;

	128 const length = string.length;

	129 while (counter < length) {

	130 const value = string.charCodeAt(counter++);

	131 if (value >= 0xD800 && value <= 0xDBFF && counter < length) {

	132 // It's a high surrogate, and there is a next character.

	133 const extra = string.charCodeAt(counter++);

	134 if ((extra & 0xFC00) == 0xDC00) { // Low surrogate.

	135 output.push(((value & 0x3FF) << 10) + (extra & 0 x3FF) + 0x10000);

	136 } else {

	137 // It's an unmatched surrogate; only append this code unit, in case the

	138 // next code unit is the high surrogate of a sur rogate pair.

	139 output.push(value);

	140 counter--;

	141 }

	142 } else {

	143 output.push(value);

	144 }

	145 }

	146 return output;

	147 }

	148

	149 /**

	150 * Creates a string based on an array of numeric code points.

	151 * @see `punycode.ucs2.decode`

	152 * @memberOf punycode.ucs2

	153 * @name encode

	154 * @param {Array} codePoints The array of numeric code points.

	155 * @returns {String} The new Unicode string (UCS-2).

	156 */

	157 const ucs2encode = array => String.fromCodePoint.apply(null, array);

	158

	159 /**

	160 * Converts a basic code point into a digit/integer.

	161 * @see `digitToBasic()`

	162 * @private

	163 * @param {Number} codePoint The basic numeric code point value.

	164 * @returns {Number} The numeric value of a basic code point (for use in

	165 * representing integers) in the range `0` to `base - 1`, or `base` if

	166 * the code point does not represent a value.

	167 */

	168 const basicToDigit = function(codePoint) {

	169 if (codePoint - 0x30 < 0x0A) {

	170 return codePoint - 0x16;

	171 }

	172 if (codePoint - 0x41 < 0x1A) {

	173 return codePoint - 0x41;

	174 }

	175 if (codePoint - 0x61 < 0x1A) {

	176 return codePoint - 0x61;

	177 }

	178 return base;

	179 };

	180

	181 /**

	182 * Converts a digit/integer into a basic code point.

	183 * @see `basicToDigit()`

	184 * @private

	185 * @param {Number} digit The numeric value of a basic code point.

	186 * @returns {Number} The basic code point whose value (when used for

	187 * representing integers) is `digit`, which needs to be in the range

	188 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is

	189 * used; else, the lowercase form is used. The behavior is undefined

	190 * if `flag` is non-zero and `digit` has no uppercase form.

	191 */

	192 const digitToBasic = function(digit, flag) {

	193 // 0..25 map to ASCII a..z or A..Z

	194 // 26..35 map to ASCII 0..9

	195 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);

	196 };

	197

	198 /**

	199 * Bias adaptation function as per section 3.4 of RFC 3492.

	200 * https://tools.ietf.org/html/rfc3492#section-3.4

	201 * @private

	202 */

	203 const adapt = function(delta, numPoints, firstTime) {

	204 let k = 0;

	205 delta = firstTime ? floor(delta / damp) : delta >> 1;

	206 delta += floor(delta / numPoints);

	207 for (/* no initialization /; delta > baseMinusTMin tMax >> 1; k += ba se) {

	208 delta = floor(delta / baseMinusTMin);

	209 }

	210 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));

	211 };

	212

	213 /**

	214 * Converts a Punycode string of ASCII-only symbols to a string of Unicode

	215 * symbols.

	216 * @memberOf punycode

	217 * @param {String} input The Punycode string of ASCII-only symbols.

	218 * @returns {String} The resulting string of Unicode symbols.

	219 */

	220 const decode = function(input) {

	221 // Don't use UCS-2.

	222 const output = [];

	223 const inputLength = input.length;

	224 let i = 0;

	225 let n = initialN;

	226 let bias = initialBias;

	227

	228 // Handle the basic code points: let `basic` be the number of input code

	229 // points before the last delimiter, or `0` if there is none, then copy

	230 // the first basic code points to the output.

	231

	232 let basic = input.lastIndexOf(delimiter);

	233 if (basic < 0) {

	234 basic = 0;

	235 }

	236

	237 for (let j = 0; j < basic; ++j) {

	238 // if it's not a basic code point

	239 if (input.charCodeAt(j) >= 0x80) {

	240 error('not-basic');

	241 }

	242 output.push(input.charCodeAt(j));

	243 }

	244

	245 // Main decoding loop: start just after the last delimiter if any basic code

	246 // points were copied; start at the beginning otherwise.

	247

	248 for (let index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no f inal expression */) {

	249

	250 // `index` is the index of the next character to be consumed.

	251 // Decode a generalized variable-length integer into `delta`,

	252 // which gets added to `i`. The overflow checking is easier

	253 // if we increase `i` as we go, then subtract off its starting

	254 // value at the end to obtain `delta`.

	255 let oldi = i;

	256 for (let w = 1, k = base; /* no condition */; k += base) {

	257

	258 if (index >= inputLength) {

	259 error('invalid-input');

	260 }

	261

	262 const digit = basicToDigit(input.charCodeAt(index++));

	263

	264 if (digit >= base \|\| digit > floor((maxInt - i) / w)) {

	265 error('overflow');

	266 }

	267

	268 i += digit * w;

	269 const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);

	270

	271 if (digit < t) {

	272 break;

	273 }

	274

	275 const baseMinusT = base - t;

	276 if (w > floor(maxInt / baseMinusT)) {

	277 error('overflow');

	278 }

	279

	280 w *= baseMinusT;

	281

	282 }

	283

	284 const out = output.length + 1;

	285 bias = adapt(i - oldi, out, oldi == 0);

	286

	287 // `i` was supposed to wrap around from `out` to `0`,

	288 // incrementing `n` each time, so we'll fix that now:

	289 if (floor(i / out) > maxInt - n) {

	290 error('overflow');

	291 }

	292

	293 n += floor(i / out);

	294 i %= out;

	295

	296 // Insert `n` at position `i` of the output.

	297 output.splice(i++, 0, n);

	298

	299 }

	300

	301 return String.fromCodePoint.apply(null, output);

	302 };

	303

	304 /**

	305 * Converts a string of Unicode symbols (e.g. a domain name label) to a

	306 * Punycode string of ASCII-only symbols.

	307 * @memberOf punycode

	308 * @param {String} input The string of Unicode symbols.

	309 * @returns {String} The resulting Punycode string of ASCII-only symbols.

	310 */

	311 const encode = function(input) {

	312 const output = [];

	313

	314 // Convert the input in UCS-2 to an array of Unicode code points.

	315 input = ucs2decode(input);

	316

	317 // Cache the length.

	318 let inputLength = input.length;

	319

	320 // Initialize the state.

	321 let n = initialN;

	322 let delta = 0;

	323 let bias = initialBias;

	324

	325 // Handle the basic code points.

	326 for (let currentValue of input) {

	327 if (currentValue < 0x80) {

	328 output.push(stringFromCharCode(currentValue));

	329 }

	330 }

	331

	332 let basicLength = output.length;

	333 let handledCPCount = basicLength;

	334

	335 // `handledCPCount` is the number of code points that have been handled;

	336 // `basicLength` is the number of basic code points.

	337

	338 // Finish the basic string with a delimiter unless it's empty.

	339 if (basicLength) {

	340 output.push(delimiter);

	341 }

	342

	343 // Main encoding loop:

	344 while (handledCPCount < inputLength) {

	345

	346 // All non-basic code points < n have been handled already. Find the next

	347 // larger one:

	348 let m = maxInt;

	349 for (let currentValue of input) {

	350 if (currentValue >= n && currentValue < m) {

	351 m = currentValue;

	352 }

	353 }

	354

	355 // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,

	356 // but guard against overflow.

	357 const handledCPCountPlusOne = handledCPCount + 1;

	358 if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {

	359 error('overflow');

	360 }

	361

	362 delta += (m - n) * handledCPCountPlusOne;

	363 n = m;

	364

	365 for (let currentValue of input) {

	366 if (currentValue < n && ++delta > maxInt) {

	367 error('overflow');

	368 }

	369 if (currentValue == n) {

	370 // Represent delta as a generalized variable-len gth integer.

	371 let q = delta;

	372 for (let k = base; /* no condition */; k += base ) {

	373 const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);

	374 if (q < t) {

	375 break;

	376 }

	377 const qMinusT = q - t;

	378 const baseMinusT = base - t;

	379 output.push(

	380 stringFromCharCode(digitToBasic( t + qMinusT % baseMinusT, 0))

	381 );

	382 q = floor(qMinusT / baseMinusT);

	383 }

	384

	385 output.push(stringFromCharCode(digitToBasic(q, 0 )));

	386 bias = adapt(delta, handledCPCountPlusOne, handl edCPCount == basicLength);

	387 delta = 0;

	388 ++handledCPCount;

	389 }

	390 }

	391

	392 ++delta;

	393 ++n;

	394

	395 }

	396 return output.join('');

	397 };

	398

	399 /**

	400 * Converts a Punycode string representing a domain name or an email address

	401 * to Unicode. Only the Punycoded parts of the input will be converted, i.e.

	402 * it doesn't matter if you call it on a string that has already been

	403 * converted to Unicode.

	404 * @memberOf punycode

	405 * @param {String} input The Punycoded domain name or email address to

	406 * convert to Unicode.

	407 * @returns {String} The Unicode representation of the given Punycode

	408 * string.

	409 */

	410 const toUnicode = function(input) {

	411 return mapDomain(input, function(string) {

	412 return regexPunycode.test(string)

	413 ? decode(string.slice(4).toLowerCase())

	414 : string;

	415 });

	416 };

	417

	418 /**

	419 * Converts a Unicode string representing a domain name or an email address to

	420 * Punycode. Only the non-ASCII parts of the domain name will be converted,

	421 * i.e. it doesn't matter if you call it with a domain that's already in

	422 * ASCII.

	423 * @memberOf punycode

	424 * @param {String} input The domain name or email address to convert, as a

	425 * Unicode string.

	426 * @returns {String} The Punycode representation of the given domain name or

	427 * email address.

	428 */

	429 const toASCII = function(input) {

	430 return mapDomain(input, function(string) {

	431 return regexNonASCII.test(string)

	432 ? 'xn--' + encode(string)

	433 : string;

	434 });

	435 };

	436

	437 /--------------------------------------------------------------------------/

	438

	439 /** Define the public API */

	440 exports = {

4 /**	441 /**

5 » * The `punycode` object.	442 » * A string representing the current Punycode.js version number.

6 » * @name punycode	443 » * @memberOf punycode

	444 » * @type String

	445 » */

	446 » 'version': '2.0.0',

	447 » /**

	448 » * An object of methods to convert from JavaScript's internal character

	449 » * representation (UCS-2) to Unicode code points, and back.

	450 » * @see <https://mathiasbynens.be/notes/javascript-encoding>

	451 » * @memberOf punycode

7 * @type Object	452 * @type Object

8 */	453 */

9 » var punycode,	454 » 'ucs2': {

10	455 » » 'decode': ucs2decode,

11 » /** Highest positive signed 32-bit float value */	456 » » 'encode': ucs2encode

12 » maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1

13

14 » /** Bootstring parameters */

15 » base = 36,

16 » tMin = 1,

17 » tMax = 26,

18 » skew = 38,

19 » damp = 700,

20 » initialBias = 72,

21 » initialN = 128, // 0x80

22 » delimiter = '-', // '\x2D'

23

24 » /** Regular expressions */

25 » regexPunycode = /^xn--/,

26 » regexNonASCII = /[^ -~]/, // unprintable ASCII chars + non-ASCII chars

27 » regexSeparators = /\x2E\|\u3002\|\uFF0E\|\uFF61/g, // RFC 3490 separators

28

29 » /** Error messages */

30 » errors = {

31 » » 'overflow': 'Overflow: input needs wider integers to process',

32 » » 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',

33 » » 'invalid-input': 'Invalid input'

34 },	457 },

35	458 'decode': decode,

36 /** Convenience shortcuts */	459 'encode': encode,

37 baseMinusTMin = base - tMin,	460 'toASCII': toASCII,

38 floor = Math.floor,	461 'toUnicode': toUnicode

39 stringFromCharCode = String.fromCharCode,	462 };

40

41 /** Temporary variable */

42 key;

43

44 /---------------------------------------------------------------------- ----/

45

46 /**

47 * A generic error utility function.

48 * @private

49 * @param {String} type The error type.

50 * @returns {Error} Throws a `RangeError` with the applicable error mess age.

51 */

52 function error(type) {

53 throw RangeError(errors[type]);

54 }

55

56 /**

57 * A generic `Array#map` utility function.

58 * @private

59 * @param {Array} array The array to iterate over.

60 * @param {Function} callback The function that gets called for every ar ray

61 * item.

62 * @returns {Array} A new array of values returned by the callback funct ion.

63 */

64 function map(array, fn) {

65 var length = array.length;

66 while (length--) {

67 array[length] = fn(array[length]);

68 }

69 return array;

70 }

71

72 /**

73 * A simple `Array#map`-like wrapper to work with domain name strings.

74 * @private

75 * @param {String} domain The domain name.

76 * @param {Function} callback The function that gets called for every

77 * character.

78 * @returns {Array} A new string of characters returned by the callback

79 * function.

80 */

81 function mapDomain(string, fn) {

82 return map(string.split(regexSeparators), fn).join('.');

83 }

84

85 /**

86 * Creates an array containing the numeric code points of each Unicode

87 * character in the string. While JavaScript uses UCS-2 internally,

88 * this function will convert a pair of surrogate halves (each of which

89 * UCS-2 exposes as separate characters) into a single code point,

90 * matching UTF-16.

91 * @see `punycode.ucs2.encode`

92 * @see <http://mathiasbynens.be/notes/javascript-encoding>

93 * @memberOf punycode.ucs2

94 * @name decode

95 * @param {String} string The Unicode input string (UCS-2).

96 * @returns {Array} The new array of code points.

97 */

98 function ucs2decode(string) {

99 var output = [],

100 counter = 0,

101 length = string.length,

102 value,

103 extra;

104 while (counter < length) {

105 value = string.charCodeAt(counter++);

106 if (value >= 0xD800 && value <= 0xDBFF && counter < leng th) {

107 // high surrogate, and there is a next character

108 extra = string.charCodeAt(counter++);

109 if ((extra & 0xFC00) == 0xDC00) { // low surroga te

110 output.push(((value & 0x3FF) << 10) + (e xtra & 0x3FF) + 0x10000);

111 } else {

112 // unmatched surrogate; only append this code unit, in case the next

113 // code unit is the high surrogate of a surrogate pair

114 output.push(value);

115 counter--;

116 }

117 } else {

118 output.push(value);

119 }

120 }

121 return output;

122 }

123

124 /**

125 * Creates a string based on an array of numeric code points.

126 * @see `punycode.ucs2.decode`

127 * @memberOf punycode.ucs2

128 * @name encode

129 * @param {Array} codePoints The array of numeric code points.

130 * @returns {String} The new Unicode string (UCS-2).

131 */

132 function ucs2encode(array) {

133 return map(array, function(value) {

134 var output = '';

135 if (value > 0xFFFF) {

136 value -= 0x10000;

137 output += stringFromCharCode(value >>> 10 & 0x3F F \| 0xD800);

138 value = 0xDC00 \| value & 0x3FF;

139 }

140 output += stringFromCharCode(value);

141 return output;

142 }).join('');

143 }

144

145 /**

146 * Converts a basic code point into a digit/integer.

147 * @see `digitToBasic()`

148 * @private

149 * @param {Number} codePoint The basic numeric code point value.

150 * @returns {Number} The numeric value of a basic code point (for use in

151 * representing integers) in the range `0` to `base - 1`, or `base` if

152 * the code point does not represent a value.

153 */

154 function basicToDigit(codePoint) {

155 if (codePoint - 48 < 10) {

156 return codePoint - 22;

157 }

158 if (codePoint - 65 < 26) {

159 return codePoint - 65;

160 }

161 if (codePoint - 97 < 26) {

162 return codePoint - 97;

163 }

164 return base;

165 }

166

167 /**

168 * Converts a digit/integer into a basic code point.

169 * @see `basicToDigit()`

170 * @private

171 * @param {Number} digit The numeric value of a basic code point.

172 * @returns {Number} The basic code point whose value (when used for

173 * representing integers) is `digit`, which needs to be in the range

174 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is

175 * used; else, the lowercase form is used. The behavior is undefined

176 * if `flag` is non-zero and `digit` has no uppercase form.

177 */

178 function digitToBasic(digit, flag) {

179 // 0..25 map to ASCII a..z or A..Z

180 // 26..35 map to ASCII 0..9

181 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);

182 }

183

184 /**

185 * Bias adaptation function as per section 3.4 of RFC 3492.

186 * http://tools.ietf.org/html/rfc3492#section-3.4

187 * @private

188 */

189 function adapt(delta, numPoints, firstTime) {

190 var k = 0;

191 delta = firstTime ? floor(delta / damp) : delta >> 1;

192 delta += floor(delta / numPoints);

193 for (/* no initialization /; delta > baseMinusTMin tMax >> 1; k += base) {

194 delta = floor(delta / baseMinusTMin);

195 }

196 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));

197 }

198

199 /**

200 * Converts a Punycode string of ASCII-only symbols to a string of Unico de

201 * symbols.

202 * @memberOf punycode

203 * @param {String} input The Punycode string of ASCII-only symbols.

204 * @returns {String} The resulting string of Unicode symbols.

205 */

206 function decode(input) {

207 // Don't use UCS-2

208 var output = [],

209 inputLength = input.length,

210 out,

211 i = 0,

212 n = initialN,

213 bias = initialBias,

214 basic,

215 j,

216 index,

217 oldi,

218 w,

219 k,

220 digit,

221 t,

222 length,

223 /** Cached calculation results */

224 baseMinusT;

225

226 // Handle the basic code points: let `basic` be the number of in put code

227 // points before the last delimiter, or `0` if there is none, th en copy

228 // the first basic code points to the output.

229

230 basic = input.lastIndexOf(delimiter);

231 if (basic < 0) {

232 basic = 0;

233 }

234

235 for (j = 0; j < basic; ++j) {

236 // if it's not a basic code point

237 if (input.charCodeAt(j) >= 0x80) {

238 error('not-basic');

239 }

240 output.push(input.charCodeAt(j));

241 }

242

243 // Main decoding loop: start just after the last delimiter if an y basic code

244 // points were copied; start at the beginning otherwise.

245

246 for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {

247

248 // `index` is the index of the next character to be cons umed.

249 // Decode a generalized variable-length integer into `de lta`,

250 // which gets added to `i`. The overflow checking is eas ier

251 // if we increase `i` as we go, then subtract off its st arting

252 // value at the end to obtain `delta`.

253 for (oldi = i, w = 1, k = base; /* no condition */; k += base) {

254

255 if (index >= inputLength) {

256 error('invalid-input');

257 }

258

259 digit = basicToDigit(input.charCodeAt(index++));

260

261 if (digit >= base \|\| digit > floor((maxInt - i) / w)) {

262 error('overflow');

263 }

264

265 i += digit * w;

266 t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);

267

268 if (digit < t) {

269 break;

270 }

271

272 baseMinusT = base - t;

273 if (w > floor(maxInt / baseMinusT)) {

274 error('overflow');

275 }

276

277 w *= baseMinusT;

278

279 }

280

281 out = output.length + 1;

282 bias = adapt(i - oldi, out, oldi == 0);

283

284 // `i` was supposed to wrap around from `out` to `0`,

285 // incrementing `n` each time, so we'll fix that now:

286 if (floor(i / out) > maxInt - n) {

287 error('overflow');

288 }

289

290 n += floor(i / out);

291 i %= out;

292

293 // Insert `n` at position `i` of the output

294 output.splice(i++, 0, n);

295

296 }

297

298 return ucs2encode(output);

299 }

300

301 /**

302 * Converts a string of Unicode symbols to a Punycode string of ASCII-on ly

303 * symbols.

304 * @memberOf punycode

305 * @param {String} input The string of Unicode symbols.

306 * @returns {String} The resulting Punycode string of ASCII-only symbols .

307 */

308 function encode(input) {

309 var n,

310 delta,

311 handledCPCount,

312 basicLength,

313 bias,

314 j,

315 m,

316 q,

317 k,

318 t,

319 currentValue,

320 output = [],

321 /** `inputLength` will hold the number of code points in `in put`. */

322 inputLength,

323 /** Cached calculation results */

324 handledCPCountPlusOne,

325 baseMinusT,

326 qMinusT;

327

328 // Convert the input in UCS-2 to Unicode

329 input = ucs2decode(input);

330

331 // Cache the length

332 inputLength = input.length;

333

334 // Initialize the state

335 n = initialN;

336 delta = 0;

337 bias = initialBias;

338

339 // Handle the basic code points

340 for (j = 0; j < inputLength; ++j) {

341 currentValue = input[j];

342 if (currentValue < 0x80) {

343 output.push(stringFromCharCode(currentValue));

344 }

345 }

346

347 handledCPCount = basicLength = output.length;

348

349 // `handledCPCount` is the number of code points that have been handled;

350 // `basicLength` is the number of basic code points.

351

352 // Finish the basic string - if it is not empty - with a delimit er

353 if (basicLength) {

354 output.push(delimiter);

355 }

356

357 // Main encoding loop:

358 while (handledCPCount < inputLength) {

359

360 // All non-basic code points < n have been handled alrea dy. Find the next

361 // larger one:

362 for (m = maxInt, j = 0; j < inputLength; ++j) {

363 currentValue = input[j];

364 if (currentValue >= n && currentValue < m) {

365 m = currentValue;

366 }

367 }

368

369 // Increase `delta` enough to advance the decoder's <n,i > state to <m,0>,

370 // but guard against overflow

371 handledCPCountPlusOne = handledCPCount + 1;

372 if (m - n > floor((maxInt - delta) / handledCPCountPlusO ne)) {

373 error('overflow');

374 }

375

376 delta += (m - n) * handledCPCountPlusOne;

377 n = m;

378

379 for (j = 0; j < inputLength; ++j) {

380 currentValue = input[j];

381

382 if (currentValue < n && ++delta > maxInt) {

383 error('overflow');

384 }

385

386 if (currentValue == n) {

387 // Represent delta as a generalized vari able-length integer

388 for (q = delta, k = base; /* no conditio n */; k += base) {

389 t = k <= bias ? tMin : (k >= bia s + tMax ? tMax : k - bias);

390 if (q < t) {

391 break;

392 }

393 qMinusT = q - t;

394 baseMinusT = base - t;

395 output.push(

396 stringFromCharCode(digit ToBasic(t + qMinusT % baseMinusT, 0))

397 );

398 q = floor(qMinusT / baseMinusT);

399 }

400

401 output.push(stringFromCharCode(digitToBa sic(q, 0)));

402 bias = adapt(delta, handledCPCountPlusOn e, handledCPCount == basicLength);

403 delta = 0;

404 ++handledCPCount;

405 }

406 }

407

408 ++delta;

409 ++n;

410

411 }

412 return output.join('');

413 }

414

415 /**

416 * Converts a Punycode string representing a domain name to Unicode. Onl y the

417 * Punycoded parts of the domain name will be converted, i.e. it doesn't

418 * matter if you call it on a string that has already been converted to

419 * Unicode.

420 * @memberOf punycode

421 * @param {String} domain The Punycode domain name to convert to Unicode .

422 * @returns {String} The Unicode representation of the given Punycode

423 * string.

424 */

425 function toUnicode(domain) {

426 return mapDomain(domain, function(string) {

427 return regexPunycode.test(string)

428 ? decode(string.slice(4).toLowerCase())

429 : string;

430 });

431 }

432

433 /**

434 * Converts a Unicode string representing a domain name to Punycode. Onl y the

435 * non-ASCII parts of the domain name will be converted, i.e. it doesn't

436 * matter if you call it with a domain that's already in ASCII.

437 * @memberOf punycode

438 * @param {String} domain The domain name to convert, as a Unicode strin g.

439 * @returns {String} The Punycode representation of the given domain nam e.

440 */

441 function toASCII(domain) {

442 return mapDomain(domain, function(string) {

443 return regexNonASCII.test(string)

444 ? 'xn--' + encode(string)

445 : string;

446 });

447 }

448

449 /---------------------------------------------------------------------- ----/

450

451 /** Define the public API */

452 exports = {

453 /**

454 * A string representing the current Punycode.js version number.

455 * @memberOf punycode

456 * @type String

457 */

458 'version': '1.2.3',

459 /**

460 * An object of methods to convert from JavaScript's internal ch aracter

461 * representation (UCS-2) to Unicode code points, and back.

462 * @see <http://mathiasbynens.be/notes/javascript-encoding>

463 * @memberOf punycode

464 * @type Object

465 */

466 'ucs2': {

467 'decode': ucs2decode,

468 'encode': ucs2encode

469 },

470 'decode': decode,

471 'encode': encode,

472 'toASCII': toASCII,

473 'toUnicode': toUnicode

474 };

475 }());

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »