| Index: lib/abp2blocklist.js |
| =================================================================== |
| --- a/lib/abp2blocklist.js |
| +++ b/lib/abp2blocklist.js |
| @@ -104,52 +104,61 @@ |
| * {regexp: "...", |
| * canSafelyMatchAsLowercase: true/false, |
| * hostname: "...", |
| * justHostname: true/false} |
| */ |
| function parseFilterRegexpSource(text) |
| { |
| let regexp = []; |
| - let lastIndex = text.length - 1; |
| + |
| + // Convert the text into an array of Unicode characters. |
| + // |
| + // In the case of surrogate pairs (the smiley emoji, for example), one |
| + // Unicode code point is represented by two JavaScript characters together. |
| + // We want to iterate over Unicode code points rather than JavaScript |
| + // characters. |
| + let characters = Array.from(text); |
| + |
| + let lastIndex = characters.length - 1; |
| let hostname; |
| let hostnameStart = null; |
| let hostnameFinished = false; |
| let justHostname = false; |
| let canSafelyMatchAsLowercase = false; |
| - for (let i = 0; i < text.length; i++) |
| + for (let i = 0; i < characters.length; i++) |
| { |
| - let c = text[i]; |
| + let c = characters[i]; |
| if (hostnameFinished) |
| justHostname = false; |
| // If we're currently inside the hostname we have to be careful not to |
| // escape any characters until after we have converted it to punycode. |
| if (hostnameStart != null && !hostnameFinished) |
| { |
| let endingChar = (c == "*" || c == "^" || |
| c == "?" || c == "/" || c == "|"); |
| if (!endingChar && i != lastIndex) |
| continue; |
| hostname = punycode.toASCII( |
| - text.substring(hostnameStart, endingChar ? i : i + 1) |
| + characters.slice(hostnameStart, endingChar ? i : i + 1).join("") |
| ); |
| hostnameFinished = justHostname = true; |
| regexp.push(escapeRegExp(hostname)); |
| if (!endingChar) |
| break; |
| } |
| switch (c) |
| { |
| case "*": |
| - if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") |
| + if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*") |
| regexp.push(".*"); |
| break; |
| case "^": |
| if (i < lastIndex) |
| regexp.push("."); |
| break; |
| case "|": |
| if (i == 0) |
| @@ -157,44 +166,44 @@ |
| regexp.push("^"); |
| break; |
| } |
| if (i == lastIndex) |
| { |
| regexp.push("$"); |
| break; |
| } |
| - if (i == 1 && text[0] == "|") |
| + if (i == 1 && characters[0] == "|") |
| { |
| hostnameStart = i + 1; |
| canSafelyMatchAsLowercase = true; |
| regexp.push("https?://([^/]+\\.)?"); |
| break; |
| } |
| regexp.push("\\|"); |
| break; |
| case "/": |
| if (!hostnameFinished && |
| - text.charAt(i-2) == ":" && text.charAt(i-1) == "/") |
| + characters[i - 2] == ":" && characters[i - 1] == "/") |
| { |
| hostnameStart = i + 1; |
| canSafelyMatchAsLowercase = true; |
| } |
| regexp.push("/"); |
| break; |
| case ".": case "+": case "$": case "?": |
| case "{": case "}": case "(": case ")": |
| case "[": case "]": case "\\": |
| regexp.push("\\", c); |
| break; |
| default: |
| if (hostnameFinished && (c >= "a" && c <= "z" || |
| c >= "A" && c <= "Z")) |
| canSafelyMatchAsLowercase = false; |
| - regexp.push(c); |
| + regexp.push(c == "%" ? c : encodeURI(c)); |
| } |
| } |
| return { |
| regexp: regexp.join(""), |
| canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
| hostname: hostname, |
| justHostname: justHostname |