lib/abp2blocklist.js - Issue 29468575: Issue 5332 - Support Unicode characters in path

Keyboard Shortcuts

	File
u :	up to issue
m :	publish + mail comments
M :	edit review message
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line
<Enter> :	respond to / edit current comment
d :	mark current comment as done

	Issue
u :	up to list of issues
m :	publish + mail comments
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue
# :	close issue

	Comment/message editing
<Ctrl> + s or <Ctrl> + Enter :	save comment
<Esc> :	cancel edit

Unified Diff: lib/abp2blocklist.js

Issue 29468575: Issue 5332 - Support Unicode characters in path (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist

Patch Set: Add comment about Unicode surrogate pairs Created July 8, 2017, 6 a.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: lib/abp2blocklist.js

===================================================================

--- a/lib/abp2blocklist.js

+++ b/lib/abp2blocklist.js

@@ -104,52 +104,61 @@

* {regexp: "...",

* canSafelyMatchAsLowercase: true/false,

* hostname: "...",

* justHostname: true/false}

function parseFilterRegexpSource(text)

{

let regexp = [];

- let lastIndex = text.length - 1;

+ // Convert the text into an array of Unicode characters.

+ //

+ // In the case of surrogate pairs (the smiley emoji, for example), one

+ // Unicode code point is represented by two JavaScript characters together.

+ // We want to iterate over Unicode code points rather than JavaScript

+ // characters.

+ let characters = Array.from(text);

+ let lastIndex = characters.length - 1;

let hostname;

let hostnameStart = null;

let hostnameFinished = false;

let justHostname = false;

let canSafelyMatchAsLowercase = false;

- for (let i = 0; i < text.length; i++)

+ for (let i = 0; i < characters.length; i++)

{

- let c = text[i];

+ let c = characters[i];

if (hostnameFinished)

justHostname = false;

// If we're currently inside the hostname we have to be careful not to

// escape any characters until after we have converted it to punycode.

if (hostnameStart != null && !hostnameFinished)

{

let endingChar = (c == "*" || c == "^" ||

c == "?" || c == "/" || c == "|");

if (!endingChar && i != lastIndex)

continue;

hostname = punycode.toASCII(

- text.substring(hostnameStart, endingChar ? i : i + 1)

+ characters.slice(hostnameStart, endingChar ? i : i + 1).join("")

);

hostnameFinished = justHostname = true;

regexp.push(escapeRegExp(hostname));

if (!endingChar)

break;

}

switch (c)

{

case "*":

- if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*")

+ if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*")

regexp.push(".*");

break;

case "^":

if (i < lastIndex)

regexp.push(".");

break;

case "|":

if (i == 0)

@@ -157,44 +166,44 @@

regexp.push("^");

break;

}

if (i == lastIndex)

{

regexp.push("$");

break;

}

- if (i == 1 && text[0] == "|")

+ if (i == 1 && characters[0] == "|")

{

hostnameStart = i + 1;

canSafelyMatchAsLowercase = true;

regexp.push("https?://([^/]+\\.)?");

break;

}

regexp.push("\\|");

break;

case "/":

if (!hostnameFinished &&

- text.charAt(i-2) == ":" && text.charAt(i-1) == "/")

+ characters[i - 2] == ":" && characters[i - 1] == "/")

{

hostnameStart = i + 1;

canSafelyMatchAsLowercase = true;

}

regexp.push("/");

break;

case ".": case "+": case "$": case "?":

case "{": case "}": case "(": case ")":

case "[": case "]": case "\\":

regexp.push("\\", c);

break;

default:

if (hostnameFinished && (c >= "a" && c <= "z" ||

c >= "A" && c <= "Z"))

canSafelyMatchAsLowercase = false;

- regexp.push(c);

+ regexp.push(c == "%" ? c : encodeURI(c));

}

return {

regexp: regexp.join(""),

canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,

hostname: hostname,

justHostname: justHostname

« no previous file with comments | « no previous file | test/abp2blocklist.js » ('j') | no next file with comments »