Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/abp2blocklist.js

Issue 29468575: Issue 5332 - Support Unicode characters in path (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist
Patch Set: Add comment about Unicode surrogate pairs Created July 8, 2017, 6 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | test/abp2blocklist.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/abp2blocklist.js
===================================================================
--- a/lib/abp2blocklist.js
+++ b/lib/abp2blocklist.js
@@ -104,52 +104,61 @@
* {regexp: "...",
* canSafelyMatchAsLowercase: true/false,
* hostname: "...",
* justHostname: true/false}
*/
function parseFilterRegexpSource(text)
{
let regexp = [];
- let lastIndex = text.length - 1;
+
+ // Convert the text into an array of Unicode characters.
+ //
+ // In the case of surrogate pairs (the smiley emoji, for example), one
+ // Unicode code point is represented by two JavaScript characters together.
+ // We want to iterate over Unicode code points rather than JavaScript
+ // characters.
+ let characters = Array.from(text);
+
+ let lastIndex = characters.length - 1;
let hostname;
let hostnameStart = null;
let hostnameFinished = false;
let justHostname = false;
let canSafelyMatchAsLowercase = false;
- for (let i = 0; i < text.length; i++)
+ for (let i = 0; i < characters.length; i++)
{
- let c = text[i];
+ let c = characters[i];
if (hostnameFinished)
justHostname = false;
// If we're currently inside the hostname we have to be careful not to
// escape any characters until after we have converted it to punycode.
if (hostnameStart != null && !hostnameFinished)
{
let endingChar = (c == "*" || c == "^" ||
c == "?" || c == "/" || c == "|");
if (!endingChar && i != lastIndex)
continue;
hostname = punycode.toASCII(
- text.substring(hostnameStart, endingChar ? i : i + 1)
+ characters.slice(hostnameStart, endingChar ? i : i + 1).join("")
);
hostnameFinished = justHostname = true;
regexp.push(escapeRegExp(hostname));
if (!endingChar)
break;
}
switch (c)
{
case "*":
- if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*")
+ if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*")
regexp.push(".*");
break;
case "^":
if (i < lastIndex)
regexp.push(".");
break;
case "|":
if (i == 0)
@@ -157,44 +166,44 @@
regexp.push("^");
break;
}
if (i == lastIndex)
{
regexp.push("$");
break;
}
- if (i == 1 && text[0] == "|")
+ if (i == 1 && characters[0] == "|")
{
hostnameStart = i + 1;
canSafelyMatchAsLowercase = true;
regexp.push("https?://([^/]+\\.)?");
break;
}
regexp.push("\\|");
break;
case "/":
if (!hostnameFinished &&
- text.charAt(i-2) == ":" && text.charAt(i-1) == "/")
+ characters[i - 2] == ":" && characters[i - 1] == "/")
{
hostnameStart = i + 1;
canSafelyMatchAsLowercase = true;
}
regexp.push("/");
break;
case ".": case "+": case "$": case "?":
case "{": case "}": case "(": case ")":
case "[": case "]": case "\\":
regexp.push("\\", c);
break;
default:
if (hostnameFinished && (c >= "a" && c <= "z" ||
c >= "A" && c <= "Z"))
canSafelyMatchAsLowercase = false;
- regexp.push(c);
+ regexp.push(c == "%" ? c : encodeURI(c));
}
}
return {
regexp: regexp.join(""),
canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,
hostname: hostname,
justHostname: justHostname
« no previous file with comments | « no previous file | test/abp2blocklist.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld