lib/punycode.js - Issue 29350076: Issue 4350 - Update punycode.js to 2.0.0

Side by Side Diff: lib/punycode.js

Issue 29350076: Issue 4350 - Update punycode.js to 2.0.0 (Closed)

Patch Set: Created Aug. 23, 2016, 3:29 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /! http://mths.be/punycode v1.2.3 by @mathias /	1 'use strict';

2 /* Used under GPL 2.0, see https://github.com/bestiejs/punycode.js/blob/master/L ICENSE-GPL.txt */	2

3 ;(function() {	3 /** Highest positive signed 32-bit float value */

	4 const maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1

	5

	6 /** Bootstring parameters */

	7 const base = 36;

	8 const tMin = 1;

	9 const tMax = 26;

	10 const skew = 38;

	11 const damp = 700;

	12 const initialBias = 72;

	13 const initialN = 128; // 0x80

	14 const delimiter = '-'; // '\x2D'

	15

	16 /** Regular expressions */

	17 const regexPunycode = /^xn--/;

	18 const regexNonASCII = /[^\x20-\x7E]/; // unprintable ASCII chars + non-ASCII cha rs

	19 const regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators

	20

	21 /** Error messages */

	22 const errors = {

	23 'overflow': 'Overflow: input needs wider integers to process',

	24 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',

	25 'invalid-input': 'Invalid input'

	26 };

	27

	28 /** Convenience shortcuts */

	29 const baseMinusTMin = base - tMin;

	30 const floor = Math.floor;

	31 const stringFromCharCode = String.fromCharCode;

	32

	33 /--------------------------------------------------------------------------/

	34

	35 /**

	36 * A generic error utility function.

	37 * @private

	38 * @param {String} type The error type.

	39 * @returns {Error} Throws a `RangeError` with the applicable error message.

	40 */

	41 function error(type) {

	42 throw new RangeError(errors[type]);

	43 }

	44

	45 /**

	46 * A generic `Array#map` utility function.

	47 * @private

	48 * @param {Array} array The array to iterate over.

	49 * @param {Function} callback The function that gets called for every array

	50 * item.

	51 * @returns {Array} A new array of values returned by the callback function.

	52 */

	53 function map(array, fn) {

	54 const result = [];

	55 let length = array.length;

	56 while (length--) {

	57 result[length] = fn(array[length]);

	58 }

	59 return result;

	60 }

	61

	62 /**

	63 * A simple `Array#map`-like wrapper to work with domain name strings or email

	64 * addresses.

	65 * @private

	66 * @param {String} domain The domain name or email address.

	67 * @param {Function} callback The function that gets called for every

	68 * character.

	69 * @returns {Array} A new string of characters returned by the callback

	70 * function.

	71 */

	72 function mapDomain(string, fn) {

	73 const parts = string.split('@');

	74 let result = '';

	75 if (parts.length > 1) {

	76 // In email addresses, only the domain name should be punycoded. Leave

	77 // the local part (i.e. everything up to `@`) intact.

	78 result = parts[0] + '@';

	79 string = parts[1];

	80 }

	81 // Avoid `split(regex)` for IE8 compatibility. See #17.

	82 string = string.replace(regexSeparators, '\x2E');

	83 const labels = string.split('.');

	84 const encoded = map(labels, fn).join('.');

	85 return result + encoded;

	86 }

	87

	88 /**

	89 * Creates an array containing the numeric code points of each Unicode

	90 * character in the string. While JavaScript uses UCS-2 internally,

	91 * this function will convert a pair of surrogate halves (each of which

	92 * UCS-2 exposes as separate characters) into a single code point,

	93 * matching UTF-16.

	94 * @see `punycode.ucs2.encode`

	95 * @see <https://mathiasbynens.be/notes/javascript-encoding>

	96 * @memberOf punycode.ucs2

	97 * @name decode

	98 * @param {String} string The Unicode input string (UCS-2).

	99 * @returns {Array} The new array of code points.

	100 */

	101 function ucs2decode(string) {

	102 const output = [];

	103 let counter = 0;

	104 const length = string.length;

	105 while (counter < length) {

	106 const value = string.charCodeAt(counter++);

	107 if (value >= 0xD800 && value <= 0xDBFF && counter < length) {

	108 // It's a high surrogate, and there is a next character.

	109 const extra = string.charCodeAt(counter++);

	110 if ((extra & 0xFC00) == 0xDC00) { // Low surrogate.

	111 output.push(((value & 0x3FF) << 10) + (extra & 0 x3FF) + 0x10000);

	112 } else {

	113 // It's an unmatched surrogate; only append this code unit, in case the

	114 // next code unit is the high surrogate of a sur rogate pair.

	115 output.push(value);

	116 counter--;

	117 }

	118 } else {

	119 output.push(value);

	120 }

	121 }

	122 return output;

	123 }

	124

	125 /**

	126 * Creates a string based on an array of numeric code points.

	127 * @see `punycode.ucs2.decode`

	128 * @memberOf punycode.ucs2

	129 * @name encode

	130 * @param {Array} codePoints The array of numeric code points.

	131 * @returns {String} The new Unicode string (UCS-2).

	132 */

	133 const ucs2encode = array => String.fromCodePoint(...array);

	134

	135 /**

	136 * Converts a basic code point into a digit/integer.

	137 * @see `digitToBasic()`

	138 * @private

	139 * @param {Number} codePoint The basic numeric code point value.

	140 * @returns {Number} The numeric value of a basic code point (for use in

	141 * representing integers) in the range `0` to `base - 1`, or `base` if

	142 * the code point does not represent a value.

	143 */

	144 const basicToDigit = function(codePoint) {

	145 if (codePoint - 0x30 < 0x0A) {

	146 return codePoint - 0x16;

	147 }

	148 if (codePoint - 0x41 < 0x1A) {

	149 return codePoint - 0x41;

	150 }

	151 if (codePoint - 0x61 < 0x1A) {

	152 return codePoint - 0x61;

	153 }

	154 return base;

	155 };

	156

	157 /**

	158 * Converts a digit/integer into a basic code point.

	159 * @see `basicToDigit()`

	160 * @private

	161 * @param {Number} digit The numeric value of a basic code point.

	162 * @returns {Number} The basic code point whose value (when used for

	163 * representing integers) is `digit`, which needs to be in the range

	164 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is

	165 * used; else, the lowercase form is used. The behavior is undefined

	166 * if `flag` is non-zero and `digit` has no uppercase form.

	167 */

	168 const digitToBasic = function(digit, flag) {

	169 // 0..25 map to ASCII a..z or A..Z

	170 // 26..35 map to ASCII 0..9

	171 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);

	172 };

	173

	174 /**

	175 * Bias adaptation function as per section 3.4 of RFC 3492.

	176 * https://tools.ietf.org/html/rfc3492#section-3.4

	177 * @private

	178 */

	179 const adapt = function(delta, numPoints, firstTime) {

	180 let k = 0;

	181 delta = firstTime ? floor(delta / damp) : delta >> 1;

	182 delta += floor(delta / numPoints);

	183 for (/* no initialization /; delta > baseMinusTMin tMax >> 1; k += ba se) {

	184 delta = floor(delta / baseMinusTMin);

	185 }

	186 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));

	187 };

	188

	189 /**

	190 * Converts a Punycode string of ASCII-only symbols to a string of Unicode

	191 * symbols.

	192 * @memberOf punycode

	193 * @param {String} input The Punycode string of ASCII-only symbols.

	194 * @returns {String} The resulting string of Unicode symbols.

	195 */

	196 const decode = function(input) {

	197 // Don't use UCS-2.

	198 const output = [];

	199 const inputLength = input.length;

	200 let i = 0;

	201 let n = initialN;

	202 let bias = initialBias;

	203

	204 // Handle the basic code points: let `basic` be the number of input code

	205 // points before the last delimiter, or `0` if there is none, then copy

	206 // the first basic code points to the output.

	207

	208 let basic = input.lastIndexOf(delimiter);

	209 if (basic < 0) {

	210 basic = 0;

	211 }

	212

	213 for (let j = 0; j < basic; ++j) {

	214 // if it's not a basic code point

	215 if (input.charCodeAt(j) >= 0x80) {

	216 error('not-basic');

	217 }

	218 output.push(input.charCodeAt(j));

	219 }

	220

	221 // Main decoding loop: start just after the last delimiter if any basic code

	222 // points were copied; start at the beginning otherwise.

	223

	224 for (let index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no f inal expression */) {

	225

	226 // `index` is the index of the next character to be consumed.

	227 // Decode a generalized variable-length integer into `delta`,

	228 // which gets added to `i`. The overflow checking is easier

	229 // if we increase `i` as we go, then subtract off its starting

	230 // value at the end to obtain `delta`.

	231 let oldi = i;

	232 for (let w = 1, k = base; /* no condition */; k += base) {

	233

	234 if (index >= inputLength) {

	235 error('invalid-input');

	236 }

	237

	238 const digit = basicToDigit(input.charCodeAt(index++));

	239

	240 if (digit >= base \|\| digit > floor((maxInt - i) / w)) {

	241 error('overflow');

	242 }

	243

	244 i += digit * w;

	245 const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);

	246

	247 if (digit < t) {

	248 break;

	249 }

	250

	251 const baseMinusT = base - t;

	252 if (w > floor(maxInt / baseMinusT)) {

	253 error('overflow');

	254 }

	255

	256 w *= baseMinusT;

	257

	258 }

	259

	260 const out = output.length + 1;

	261 bias = adapt(i - oldi, out, oldi == 0);

	262

	263 // `i` was supposed to wrap around from `out` to `0`,

	264 // incrementing `n` each time, so we'll fix that now:

	265 if (floor(i / out) > maxInt - n) {

	266 error('overflow');

	267 }

	268

	269 n += floor(i / out);

	270 i %= out;

	271

	272 // Insert `n` at position `i` of the output.

	273 output.splice(i++, 0, n);

	274

	275 }

	276

	277 return String.fromCodePoint(...output);

	278 };

	279

	280 /**

	281 * Converts a string of Unicode symbols (e.g. a domain name label) to a

	282 * Punycode string of ASCII-only symbols.

	283 * @memberOf punycode

	284 * @param {String} input The string of Unicode symbols.

	285 * @returns {String} The resulting Punycode string of ASCII-only symbols.

	286 */

	287 const encode = function(input) {

	288 const output = [];

	289

	290 // Convert the input in UCS-2 to an array of Unicode code points.

	291 input = ucs2decode(input);

	292

	293 // Cache the length.

	294 let inputLength = input.length;

	295

	296 // Initialize the state.

	297 let n = initialN;

	298 let delta = 0;

	299 let bias = initialBias;

	300

	301 // Handle the basic code points.

	302 for (const currentValue of input) {

	303 if (currentValue < 0x80) {

	304 output.push(stringFromCharCode(currentValue));

	305 }

	306 }

	307

	308 let basicLength = output.length;

	309 let handledCPCount = basicLength;

	310

	311 // `handledCPCount` is the number of code points that have been handled;

	312 // `basicLength` is the number of basic code points.

	313

	314 // Finish the basic string with a delimiter unless it's empty.

	315 if (basicLength) {

	316 output.push(delimiter);

	317 }

	318

	319 // Main encoding loop:

	320 while (handledCPCount < inputLength) {

	321

	322 // All non-basic code points < n have been handled already. Find the next

	323 // larger one:

	324 let m = maxInt;

	325 for (const currentValue of input) {

	326 if (currentValue >= n && currentValue < m) {

	327 m = currentValue;

	328 }

	329 }

	330

	331 // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,

	332 // but guard against overflow.

	333 const handledCPCountPlusOne = handledCPCount + 1;

	334 if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {

	335 error('overflow');

	336 }

	337

	338 delta += (m - n) * handledCPCountPlusOne;

	339 n = m;

	340

	341 for (const currentValue of input) {

	342 if (currentValue < n && ++delta > maxInt) {

	343 error('overflow');

	344 }

	345 if (currentValue == n) {

	346 // Represent delta as a generalized variable-len gth integer.

	347 let q = delta;

	348 for (let k = base; /* no condition */; k += base ) {

	349 const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);

	350 if (q < t) {

	351 break;

	352 }

	353 const qMinusT = q - t;

	354 const baseMinusT = base - t;

	355 output.push(

	356 stringFromCharCode(digitToBasic( t + qMinusT % baseMinusT, 0))

	357 );

	358 q = floor(qMinusT / baseMinusT);

	359 }

	360

	361 output.push(stringFromCharCode(digitToBasic(q, 0 )));

	362 bias = adapt(delta, handledCPCountPlusOne, handl edCPCount == basicLength);

	363 delta = 0;

	364 ++handledCPCount;

	365 }

	366 }

	367

	368 ++delta;

	369 ++n;

	370

	371 }

	372 return output.join('');

	373 };

	374

	375 /**

	376 * Converts a Punycode string representing a domain name or an email address

	377 * to Unicode. Only the Punycoded parts of the input will be converted, i.e.

	378 * it doesn't matter if you call it on a string that has already been

	379 * converted to Unicode.

	380 * @memberOf punycode

	381 * @param {String} input The Punycoded domain name or email address to

	382 * convert to Unicode.

	383 * @returns {String} The Unicode representation of the given Punycode

	384 * string.

	385 */

	386 const toUnicode = function(input) {

	387 return mapDomain(input, function(string) {

	388 return regexPunycode.test(string)

	389 ? decode(string.slice(4).toLowerCase())

	390 : string;

	391 });

	392 };

	393

	394 /**

	395 * Converts a Unicode string representing a domain name or an email address to

	396 * Punycode. Only the non-ASCII parts of the domain name will be converted,

	397 * i.e. it doesn't matter if you call it with a domain that's already in

	398 * ASCII.

	399 * @memberOf punycode

	400 * @param {String} input The domain name or email address to convert, as a

	401 * Unicode string.

	402 * @returns {String} The Punycode representation of the given domain name or

	403 * email address.

	404 */

	405 const toASCII = function(input) {

	406 return mapDomain(input, function(string) {

	407 return regexNonASCII.test(string)

	408 ? 'xn--' + encode(string)

	409 : string;

	410 });

	411 };

	412

	413 /--------------------------------------------------------------------------/

	414

	415 /** Define the public API */

	416 const punycode = {

4 /**	417 /**

5 » * The `punycode` object.	418 » * A string representing the current Punycode.js version number.

6 » * @name punycode	419 » * @memberOf punycode

	420 » * @type String

	421 » */

	422 » 'version': '2.0.0',

	423 » /**

	424 » * An object of methods to convert from JavaScript's internal character

	425 » * representation (UCS-2) to Unicode code points, and back.

	426 » * @see <https://mathiasbynens.be/notes/javascript-encoding>

	427 » * @memberOf punycode

7 * @type Object	428 * @type Object

8 */	429 */

9 » var punycode,	430 » 'ucs2': {

10	431 » » 'decode': ucs2decode,

11 » /** Highest positive signed 32-bit float value */	432 » » 'encode': ucs2encode

12 » maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1

13

14 » /** Bootstring parameters */

15 » base = 36,

16 » tMin = 1,

17 » tMax = 26,

18 » skew = 38,

19 » damp = 700,

20 » initialBias = 72,

21 » initialN = 128, // 0x80

22 » delimiter = '-', // '\x2D'

23

24 » /** Regular expressions */

25 » regexPunycode = /^xn--/,

26 » regexNonASCII = /[^ -~]/, // unprintable ASCII chars + non-ASCII chars

27 » regexSeparators = /\x2E\|\u3002\|\uFF0E\|\uFF61/g, // RFC 3490 separators

28

29 » /** Error messages */

30 » errors = {

31 » » 'overflow': 'Overflow: input needs wider integers to process',

32 » » 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',

33 » » 'invalid-input': 'Invalid input'

34 },	433 },

35	434 'decode': decode,

36 /** Convenience shortcuts */	435 'encode': encode,

37 baseMinusTMin = base - tMin,	436 'toASCII': toASCII,

38 floor = Math.floor,	437 'toUnicode': toUnicode

39 stringFromCharCode = String.fromCharCode,	438 };

40	439

41 /** Temporary variable */	440 module.exports = punycode;

42 key;

43

44 /---------------------------------------------------------------------- ----/

45

46 /**

47 * A generic error utility function.

48 * @private

49 * @param {String} type The error type.

50 * @returns {Error} Throws a `RangeError` with the applicable error mess age.

51 */

52 function error(type) {

53 throw RangeError(errors[type]);

54 }

55

56 /**

57 * A generic `Array#map` utility function.

58 * @private

59 * @param {Array} array The array to iterate over.

60 * @param {Function} callback The function that gets called for every ar ray

61 * item.

62 * @returns {Array} A new array of values returned by the callback funct ion.

63 */

64 function map(array, fn) {

65 var length = array.length;

66 while (length--) {

67 array[length] = fn(array[length]);

68 }

69 return array;

70 }

71

72 /**

73 * A simple `Array#map`-like wrapper to work with domain name strings.

74 * @private

75 * @param {String} domain The domain name.

76 * @param {Function} callback The function that gets called for every

77 * character.

78 * @returns {Array} A new string of characters returned by the callback

79 * function.

80 */

81 function mapDomain(string, fn) {

82 return map(string.split(regexSeparators), fn).join('.');

83 }

84

85 /**

86 * Creates an array containing the numeric code points of each Unicode

87 * character in the string. While JavaScript uses UCS-2 internally,

88 * this function will convert a pair of surrogate halves (each of which

89 * UCS-2 exposes as separate characters) into a single code point,

90 * matching UTF-16.

91 * @see `punycode.ucs2.encode`

92 * @see <http://mathiasbynens.be/notes/javascript-encoding>

93 * @memberOf punycode.ucs2

94 * @name decode

95 * @param {String} string The Unicode input string (UCS-2).

96 * @returns {Array} The new array of code points.

97 */

98 function ucs2decode(string) {

99 var output = [],

100 counter = 0,

101 length = string.length,

102 value,

103 extra;

104 while (counter < length) {

105 value = string.charCodeAt(counter++);

106 if (value >= 0xD800 && value <= 0xDBFF && counter < leng th) {

107 // high surrogate, and there is a next character

108 extra = string.charCodeAt(counter++);

109 if ((extra & 0xFC00) == 0xDC00) { // low surroga te

110 output.push(((value & 0x3FF) << 10) + (e xtra & 0x3FF) + 0x10000);

111 } else {

112 // unmatched surrogate; only append this code unit, in case the next

113 // code unit is the high surrogate of a surrogate pair

114 output.push(value);

115 counter--;

116 }

117 } else {

118 output.push(value);

119 }

120 }

121 return output;

122 }

123

124 /**

125 * Creates a string based on an array of numeric code points.

126 * @see `punycode.ucs2.decode`

127 * @memberOf punycode.ucs2

128 * @name encode

129 * @param {Array} codePoints The array of numeric code points.

130 * @returns {String} The new Unicode string (UCS-2).

131 */

132 function ucs2encode(array) {

133 return map(array, function(value) {

134 var output = '';

135 if (value > 0xFFFF) {

136 value -= 0x10000;

137 output += stringFromCharCode(value >>> 10 & 0x3F F \| 0xD800);

138 value = 0xDC00 \| value & 0x3FF;

139 }

140 output += stringFromCharCode(value);

141 return output;

142 }).join('');

143 }

144

145 /**

146 * Converts a basic code point into a digit/integer.

147 * @see `digitToBasic()`

148 * @private

149 * @param {Number} codePoint The basic numeric code point value.

150 * @returns {Number} The numeric value of a basic code point (for use in

151 * representing integers) in the range `0` to `base - 1`, or `base` if

152 * the code point does not represent a value.

153 */

154 function basicToDigit(codePoint) {

155 if (codePoint - 48 < 10) {

156 return codePoint - 22;

157 }

158 if (codePoint - 65 < 26) {

159 return codePoint - 65;

160 }

161 if (codePoint - 97 < 26) {

162 return codePoint - 97;

163 }

164 return base;

165 }

166

167 /**

168 * Converts a digit/integer into a basic code point.

169 * @see `basicToDigit()`

170 * @private

171 * @param {Number} digit The numeric value of a basic code point.

172 * @returns {Number} The basic code point whose value (when used for

173 * representing integers) is `digit`, which needs to be in the range

174 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is

175 * used; else, the lowercase form is used. The behavior is undefined

176 * if `flag` is non-zero and `digit` has no uppercase form.

177 */

178 function digitToBasic(digit, flag) {

179 // 0..25 map to ASCII a..z or A..Z

180 // 26..35 map to ASCII 0..9

181 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);

182 }

183

184 /**

185 * Bias adaptation function as per section 3.4 of RFC 3492.

186 * http://tools.ietf.org/html/rfc3492#section-3.4

187 * @private

188 */

189 function adapt(delta, numPoints, firstTime) {

190 var k = 0;

191 delta = firstTime ? floor(delta / damp) : delta >> 1;

192 delta += floor(delta / numPoints);

193 for (/* no initialization /; delta > baseMinusTMin tMax >> 1; k += base) {

194 delta = floor(delta / baseMinusTMin);

195 }

196 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));

197 }

198

199 /**

200 * Converts a Punycode string of ASCII-only symbols to a string of Unico de

201 * symbols.

202 * @memberOf punycode

203 * @param {String} input The Punycode string of ASCII-only symbols.

204 * @returns {String} The resulting string of Unicode symbols.

205 */

206 function decode(input) {

207 // Don't use UCS-2

208 var output = [],

209 inputLength = input.length,

210 out,

211 i = 0,

212 n = initialN,

213 bias = initialBias,

214 basic,

215 j,

216 index,

217 oldi,

218 w,

219 k,

220 digit,

221 t,

222 length,

223 /** Cached calculation results */

224 baseMinusT;

225

226 // Handle the basic code points: let `basic` be the number of in put code

227 // points before the last delimiter, or `0` if there is none, th en copy

228 // the first basic code points to the output.

229

230 basic = input.lastIndexOf(delimiter);

231 if (basic < 0) {

232 basic = 0;

233 }

234

235 for (j = 0; j < basic; ++j) {

236 // if it's not a basic code point

237 if (input.charCodeAt(j) >= 0x80) {

238 error('not-basic');

239 }

240 output.push(input.charCodeAt(j));

241 }

242

243 // Main decoding loop: start just after the last delimiter if an y basic code

244 // points were copied; start at the beginning otherwise.

245

246 for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {

247

248 // `index` is the index of the next character to be cons umed.

249 // Decode a generalized variable-length integer into `de lta`,

250 // which gets added to `i`. The overflow checking is eas ier

251 // if we increase `i` as we go, then subtract off its st arting

252 // value at the end to obtain `delta`.

253 for (oldi = i, w = 1, k = base; /* no condition */; k += base) {

254

255 if (index >= inputLength) {

256 error('invalid-input');

257 }

258

259 digit = basicToDigit(input.charCodeAt(index++));

260

261 if (digit >= base \|\| digit > floor((maxInt - i) / w)) {

262 error('overflow');

263 }

264

265 i += digit * w;

266 t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);

267

268 if (digit < t) {

269 break;

270 }

271

272 baseMinusT = base - t;

273 if (w > floor(maxInt / baseMinusT)) {

274 error('overflow');

275 }

276

277 w *= baseMinusT;

278

279 }

280

281 out = output.length + 1;

282 bias = adapt(i - oldi, out, oldi == 0);

283

284 // `i` was supposed to wrap around from `out` to `0`,

285 // incrementing `n` each time, so we'll fix that now:

286 if (floor(i / out) > maxInt - n) {

287 error('overflow');

288 }

289

290 n += floor(i / out);

291 i %= out;

292

293 // Insert `n` at position `i` of the output

294 output.splice(i++, 0, n);

295

296 }

297

298 return ucs2encode(output);

299 }

300

301 /**

302 * Converts a string of Unicode symbols to a Punycode string of ASCII-on ly

303 * symbols.

304 * @memberOf punycode

305 * @param {String} input The string of Unicode symbols.

306 * @returns {String} The resulting Punycode string of ASCII-only symbols .

307 */

308 function encode(input) {

309 var n,

310 delta,

311 handledCPCount,

312 basicLength,

313 bias,

314 j,

315 m,

316 q,

317 k,

318 t,

319 currentValue,

320 output = [],

321 /** `inputLength` will hold the number of code points in `in put`. */

322 inputLength,

323 /** Cached calculation results */

324 handledCPCountPlusOne,

325 baseMinusT,

326 qMinusT;

327

328 // Convert the input in UCS-2 to Unicode

329 input = ucs2decode(input);

330

331 // Cache the length

332 inputLength = input.length;

333

334 // Initialize the state

335 n = initialN;

336 delta = 0;

337 bias = initialBias;

338

339 // Handle the basic code points

340 for (j = 0; j < inputLength; ++j) {

341 currentValue = input[j];

342 if (currentValue < 0x80) {

343 output.push(stringFromCharCode(currentValue));

344 }

345 }

346

347 handledCPCount = basicLength = output.length;

348

349 // `handledCPCount` is the number of code points that have been handled;

350 // `basicLength` is the number of basic code points.

351

352 // Finish the basic string - if it is not empty - with a delimit er

353 if (basicLength) {

354 output.push(delimiter);

355 }

356

357 // Main encoding loop:

358 while (handledCPCount < inputLength) {

359

360 // All non-basic code points < n have been handled alrea dy. Find the next

361 // larger one:

362 for (m = maxInt, j = 0; j < inputLength; ++j) {

363 currentValue = input[j];

364 if (currentValue >= n && currentValue < m) {

365 m = currentValue;

366 }

367 }

368

369 // Increase `delta` enough to advance the decoder's <n,i > state to <m,0>,

370 // but guard against overflow

371 handledCPCountPlusOne = handledCPCount + 1;

372 if (m - n > floor((maxInt - delta) / handledCPCountPlusO ne)) {

373 error('overflow');

374 }

375

376 delta += (m - n) * handledCPCountPlusOne;

377 n = m;

378

379 for (j = 0; j < inputLength; ++j) {

380 currentValue = input[j];

381

382 if (currentValue < n && ++delta > maxInt) {

383 error('overflow');

384 }

385

386 if (currentValue == n) {

387 // Represent delta as a generalized vari able-length integer

388 for (q = delta, k = base; /* no conditio n */; k += base) {

389 t = k <= bias ? tMin : (k >= bia s + tMax ? tMax : k - bias);

390 if (q < t) {

391 break;

392 }

393 qMinusT = q - t;

394 baseMinusT = base - t;

395 output.push(

396 stringFromCharCode(digit ToBasic(t + qMinusT % baseMinusT, 0))

397 );

398 q = floor(qMinusT / baseMinusT);

399 }

400

401 output.push(stringFromCharCode(digitToBa sic(q, 0)));

402 bias = adapt(delta, handledCPCountPlusOn e, handledCPCount == basicLength);

403 delta = 0;

404 ++handledCPCount;

405 }

406 }

407

408 ++delta;

409 ++n;

410

411 }

412 return output.join('');

413 }

414

415 /**

416 * Converts a Punycode string representing a domain name to Unicode. Onl y the

417 * Punycoded parts of the domain name will be converted, i.e. it doesn't

418 * matter if you call it on a string that has already been converted to

419 * Unicode.

420 * @memberOf punycode

421 * @param {String} domain The Punycode domain name to convert to Unicode .

422 * @returns {String} The Unicode representation of the given Punycode

423 * string.

424 */

425 function toUnicode(domain) {

426 return mapDomain(domain, function(string) {

427 return regexPunycode.test(string)

428 ? decode(string.slice(4).toLowerCase())

429 : string;

430 });

431 }

432

433 /**

434 * Converts a Unicode string representing a domain name to Punycode. Onl y the

435 * non-ASCII parts of the domain name will be converted, i.e. it doesn't

436 * matter if you call it with a domain that's already in ASCII.

437 * @memberOf punycode

438 * @param {String} domain The domain name to convert, as a Unicode strin g.

439 * @returns {String} The Punycode representation of the given domain nam e.

440 */

441 function toASCII(domain) {

442 return mapDomain(domain, function(string) {

443 return regexNonASCII.test(string)

444 ? 'xn--' + encode(string)

445 : string;

446 });

447 }

448

449 /---------------------------------------------------------------------- ----/

450

451 /** Define the public API */

452 exports = {

453 /**

454 * A string representing the current Punycode.js version number.

455 * @memberOf punycode

456 * @type String

457 */

458 'version': '1.2.3',

459 /**

460 * An object of methods to convert from JavaScript's internal ch aracter

461 * representation (UCS-2) to Unicode code points, and back.

462 * @see <http://mathiasbynens.be/notes/javascript-encoding>

463 * @memberOf punycode

464 * @type Object

465 */

466 'ucs2': {

467 'decode': ucs2decode,

468 'encode': ucs2encode

469 },

470 'decode': decode,

471 'encode': encode,

472 'toASCII': toASCII,

473 'toUnicode': toUnicode

474 };

475 }());

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »