Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/punycode.js

Issue 29772555: Issue 6647 - Stop converting domains from punycode to unicode (Closed)
Patch Set: Created May 6, 2018, 2:42 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2011-2016 Mathias Bynens <https://mathiasbynens.be/>
3 * Copyright (C) 2016-present eyeo GmbH (Minor modifications for compatibility.)
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 'use strict';
26
27 /** Highest positive signed 32-bit float value */
28 const maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1
29
30 /** Bootstring parameters */
31 const base = 36;
32 const tMin = 1;
33 const tMax = 26;
34 const skew = 38;
35 const damp = 700;
36 const initialBias = 72;
37 const initialN = 128; // 0x80
38 const delimiter = '-'; // '\x2D'
39
40 /** Regular expressions */
41 const regexPunycode = /^xn--/;
42 const regexNonASCII = /[^\x20-\x7E]/; // unprintable ASCII chars + non-ASCII cha rs
43 const regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators
44
45 /** Error messages */
46 const errors = {
47 'overflow': 'Overflow: input needs wider integers to process',
48 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
49 'invalid-input': 'Invalid input'
50 };
51
52 /** Convenience shortcuts */
53 const baseMinusTMin = base - tMin;
54 const floor = Math.floor;
55 const stringFromCharCode = String.fromCharCode;
56
57 /*--------------------------------------------------------------------------*/
58
59 /**
60 * A generic error utility function.
61 * @private
62 * @param {String} type The error type.
63 * @returns {Error} Throws a `RangeError` with the applicable error message.
64 */
65 function error(type) {
66 throw new RangeError(errors[type]);
67 }
68
69 /**
70 * A generic `Array#map` utility function.
71 * @private
72 * @param {Array} array The array to iterate over.
73 * @param {Function} callback The function that gets called for every array
74 * item.
75 * @returns {Array} A new array of values returned by the callback function.
76 */
77 function map(array, fn) {
78 const result = [];
79 let length = array.length;
80 while (length--) {
81 result[length] = fn(array[length]);
82 }
83 return result;
84 }
85
86 /**
87 * A simple `Array#map`-like wrapper to work with domain name strings or email
88 * addresses.
89 * @private
90 * @param {String} domain The domain name or email address.
91 * @param {Function} callback The function that gets called for every
92 * character.
93 * @returns {Array} A new string of characters returned by the callback
94 * function.
95 */
96 function mapDomain(string, fn) {
97 const parts = string.split('@');
98 let result = '';
99 if (parts.length > 1) {
100 // In email addresses, only the domain name should be punycoded. Leave
101 // the local part (i.e. everything up to `@`) intact.
102 result = parts[0] + '@';
103 string = parts[1];
104 }
105 // Avoid `split(regex)` for IE8 compatibility. See #17.
106 string = string.replace(regexSeparators, '\x2E');
107 const labels = string.split('.');
108 const encoded = map(labels, fn).join('.');
109 return result + encoded;
110 }
111
112 /**
113 * Creates an array containing the numeric code points of each Unicode
114 * character in the string. While JavaScript uses UCS-2 internally,
115 * this function will convert a pair of surrogate halves (each of which
116 * UCS-2 exposes as separate characters) into a single code point,
117 * matching UTF-16.
118 * @see `punycode.ucs2.encode`
119 * @see <https://mathiasbynens.be/notes/javascript-encoding>
120 * @memberOf punycode.ucs2
121 * @name decode
122 * @param {String} string The Unicode input string (UCS-2).
123 * @returns {Array} The new array of code points.
124 */
125 function ucs2decode(string) {
126 const output = [];
127 let counter = 0;
128 const length = string.length;
129 while (counter < length) {
130 const value = string.charCodeAt(counter++);
131 if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
132 // It's a high surrogate, and there is a next character.
133 const extra = string.charCodeAt(counter++);
134 if ((extra & 0xFC00) == 0xDC00) { // Low surrogate.
135 output.push(((value & 0x3FF) << 10) + (extra & 0 x3FF) + 0x10000);
136 } else {
137 // It's an unmatched surrogate; only append this code unit, in case the
138 // next code unit is the high surrogate of a sur rogate pair.
139 output.push(value);
140 counter--;
141 }
142 } else {
143 output.push(value);
144 }
145 }
146 return output;
147 }
148
149 /**
150 * Creates a string based on an array of numeric code points.
151 * @see `punycode.ucs2.decode`
152 * @memberOf punycode.ucs2
153 * @name encode
154 * @param {Array} codePoints The array of numeric code points.
155 * @returns {String} The new Unicode string (UCS-2).
156 */
157 const ucs2encode = array => String.fromCodePoint.apply(null, array);
158
159 /**
160 * Converts a basic code point into a digit/integer.
161 * @see `digitToBasic()`
162 * @private
163 * @param {Number} codePoint The basic numeric code point value.
164 * @returns {Number} The numeric value of a basic code point (for use in
165 * representing integers) in the range `0` to `base - 1`, or `base` if
166 * the code point does not represent a value.
167 */
168 const basicToDigit = function(codePoint) {
169 if (codePoint - 0x30 < 0x0A) {
170 return codePoint - 0x16;
171 }
172 if (codePoint - 0x41 < 0x1A) {
173 return codePoint - 0x41;
174 }
175 if (codePoint - 0x61 < 0x1A) {
176 return codePoint - 0x61;
177 }
178 return base;
179 };
180
181 /**
182 * Converts a digit/integer into a basic code point.
183 * @see `basicToDigit()`
184 * @private
185 * @param {Number} digit The numeric value of a basic code point.
186 * @returns {Number} The basic code point whose value (when used for
187 * representing integers) is `digit`, which needs to be in the range
188 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
189 * used; else, the lowercase form is used. The behavior is undefined
190 * if `flag` is non-zero and `digit` has no uppercase form.
191 */
192 const digitToBasic = function(digit, flag) {
193 // 0..25 map to ASCII a..z or A..Z
194 // 26..35 map to ASCII 0..9
195 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
196 };
197
198 /**
199 * Bias adaptation function as per section 3.4 of RFC 3492.
200 * https://tools.ietf.org/html/rfc3492#section-3.4
201 * @private
202 */
203 const adapt = function(delta, numPoints, firstTime) {
204 let k = 0;
205 delta = firstTime ? floor(delta / damp) : delta >> 1;
206 delta += floor(delta / numPoints);
207 for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += ba se) {
208 delta = floor(delta / baseMinusTMin);
209 }
210 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
211 };
212
213 /**
214 * Converts a Punycode string of ASCII-only symbols to a string of Unicode
215 * symbols.
216 * @memberOf punycode
217 * @param {String} input The Punycode string of ASCII-only symbols.
218 * @returns {String} The resulting string of Unicode symbols.
219 */
220 const decode = function(input) {
221 // Don't use UCS-2.
222 const output = [];
223 const inputLength = input.length;
224 let i = 0;
225 let n = initialN;
226 let bias = initialBias;
227
228 // Handle the basic code points: let `basic` be the number of input code
229 // points before the last delimiter, or `0` if there is none, then copy
230 // the first basic code points to the output.
231
232 let basic = input.lastIndexOf(delimiter);
233 if (basic < 0) {
234 basic = 0;
235 }
236
237 for (let j = 0; j < basic; ++j) {
238 // if it's not a basic code point
239 if (input.charCodeAt(j) >= 0x80) {
240 error('not-basic');
241 }
242 output.push(input.charCodeAt(j));
243 }
244
245 // Main decoding loop: start just after the last delimiter if any basic code
246 // points were copied; start at the beginning otherwise.
247
248 for (let index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no f inal expression */) {
249
250 // `index` is the index of the next character to be consumed.
251 // Decode a generalized variable-length integer into `delta`,
252 // which gets added to `i`. The overflow checking is easier
253 // if we increase `i` as we go, then subtract off its starting
254 // value at the end to obtain `delta`.
255 let oldi = i;
256 for (let w = 1, k = base; /* no condition */; k += base) {
257
258 if (index >= inputLength) {
259 error('invalid-input');
260 }
261
262 const digit = basicToDigit(input.charCodeAt(index++));
263
264 if (digit >= base || digit > floor((maxInt - i) / w)) {
265 error('overflow');
266 }
267
268 i += digit * w;
269 const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
270
271 if (digit < t) {
272 break;
273 }
274
275 const baseMinusT = base - t;
276 if (w > floor(maxInt / baseMinusT)) {
277 error('overflow');
278 }
279
280 w *= baseMinusT;
281
282 }
283
284 const out = output.length + 1;
285 bias = adapt(i - oldi, out, oldi == 0);
286
287 // `i` was supposed to wrap around from `out` to `0`,
288 // incrementing `n` each time, so we'll fix that now:
289 if (floor(i / out) > maxInt - n) {
290 error('overflow');
291 }
292
293 n += floor(i / out);
294 i %= out;
295
296 // Insert `n` at position `i` of the output.
297 output.splice(i++, 0, n);
298
299 }
300
301 return String.fromCodePoint.apply(null, output);
302 };
303
304 /**
305 * Converts a string of Unicode symbols (e.g. a domain name label) to a
306 * Punycode string of ASCII-only symbols.
307 * @memberOf punycode
308 * @param {String} input The string of Unicode symbols.
309 * @returns {String} The resulting Punycode string of ASCII-only symbols.
310 */
311 const encode = function(input) {
312 const output = [];
313
314 // Convert the input in UCS-2 to an array of Unicode code points.
315 input = ucs2decode(input);
316
317 // Cache the length.
318 let inputLength = input.length;
319
320 // Initialize the state.
321 let n = initialN;
322 let delta = 0;
323 let bias = initialBias;
324
325 // Handle the basic code points.
326 for (let currentValue of input) {
327 if (currentValue < 0x80) {
328 output.push(stringFromCharCode(currentValue));
329 }
330 }
331
332 let basicLength = output.length;
333 let handledCPCount = basicLength;
334
335 // `handledCPCount` is the number of code points that have been handled;
336 // `basicLength` is the number of basic code points.
337
338 // Finish the basic string with a delimiter unless it's empty.
339 if (basicLength) {
340 output.push(delimiter);
341 }
342
343 // Main encoding loop:
344 while (handledCPCount < inputLength) {
345
346 // All non-basic code points < n have been handled already. Find the next
347 // larger one:
348 let m = maxInt;
349 for (let currentValue of input) {
350 if (currentValue >= n && currentValue < m) {
351 m = currentValue;
352 }
353 }
354
355 // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
356 // but guard against overflow.
357 const handledCPCountPlusOne = handledCPCount + 1;
358 if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
359 error('overflow');
360 }
361
362 delta += (m - n) * handledCPCountPlusOne;
363 n = m;
364
365 for (let currentValue of input) {
366 if (currentValue < n && ++delta > maxInt) {
367 error('overflow');
368 }
369 if (currentValue == n) {
370 // Represent delta as a generalized variable-len gth integer.
371 let q = delta;
372 for (let k = base; /* no condition */; k += base ) {
373 const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
374 if (q < t) {
375 break;
376 }
377 const qMinusT = q - t;
378 const baseMinusT = base - t;
379 output.push(
380 stringFromCharCode(digitToBasic( t + qMinusT % baseMinusT, 0))
381 );
382 q = floor(qMinusT / baseMinusT);
383 }
384
385 output.push(stringFromCharCode(digitToBasic(q, 0 )));
386 bias = adapt(delta, handledCPCountPlusOne, handl edCPCount == basicLength);
387 delta = 0;
388 ++handledCPCount;
389 }
390 }
391
392 ++delta;
393 ++n;
394
395 }
396 return output.join('');
397 };
398
399 /**
400 * Converts a Punycode string representing a domain name or an email address
401 * to Unicode. Only the Punycoded parts of the input will be converted, i.e.
402 * it doesn't matter if you call it on a string that has already been
403 * converted to Unicode.
404 * @memberOf punycode
405 * @param {String} input The Punycoded domain name or email address to
406 * convert to Unicode.
407 * @returns {String} The Unicode representation of the given Punycode
408 * string.
409 */
410 const toUnicode = function(input) {
411 return mapDomain(input, function(string) {
412 return regexPunycode.test(string)
413 ? decode(string.slice(4).toLowerCase())
414 : string;
415 });
416 };
417
418 /**
419 * Converts a Unicode string representing a domain name or an email address to
420 * Punycode. Only the non-ASCII parts of the domain name will be converted,
421 * i.e. it doesn't matter if you call it with a domain that's already in
422 * ASCII.
423 * @memberOf punycode
424 * @param {String} input The domain name or email address to convert, as a
425 * Unicode string.
426 * @returns {String} The Punycode representation of the given domain name or
427 * email address.
428 */
429 const toASCII = function(input) {
430 return mapDomain(input, function(string) {
431 return regexNonASCII.test(string)
432 ? 'xn--' + encode(string)
433 : string;
434 });
435 };
436
437 /*--------------------------------------------------------------------------*/
438
439 /** Define the public API */
440 module.exports = {
441 /**
442 * A string representing the current Punycode.js version number.
443 * @memberOf punycode
444 * @type String
445 */
446 'version': '2.0.0',
447 /**
448 * An object of methods to convert from JavaScript's internal character
449 * representation (UCS-2) to Unicode code points, and back.
450 * @see <https://mathiasbynens.be/notes/javascript-encoding>
451 * @memberOf punycode
452 * @type Object
453 */
454 'ucs2': {
455 'decode': ucs2decode,
456 'encode': ucs2encode
457 },
458 'decode': decode,
459 'encode': encode,
460 'toASCII': toASCII,
461 'toUnicode': toUnicode
462 };
OLDNEW
« lib/options.js ('K') | « lib/popupBlocker.js ('k') | lib/requestBlocker.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld