| Index: lib/abp2blocklist.js |
| =================================================================== |
| --- a/lib/abp2blocklist.js |
| +++ b/lib/abp2blocklist.js |
| @@ -143,18 +143,36 @@ |
| switch (c) |
| { |
| case "*": |
| if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") |
| regexp.push(".*"); |
| break; |
| case "^": |
| - if (i < lastIndex) |
| - regexp.push("."); |
| + let alphabet = "a-z"; |
| + // If justHostname is true and we've encountered a "^", it means we're |
| + // still in the hostname part of the URL. Since hostnames are always |
| + // lower case (Punycode), there's no need to include "A-Z" in the |
| + // pattern. Further, subsequent code may lower-case the entire regular |
| + // expression (if the URL contains only the hostname part), leaving us |
| + // with "a-za-z", which would be redundant. |
| + if (!justHostname) |
| + alphabet = "A-Z" + alphabet; |
| + let digits = "0-9"; |
| + // Note that the "-" must appear first here in order to retain its |
| + // literal meaning within the brackets. |
| + let specialCharacters = "-_.%"; |
| + let separator = "[^" + specialCharacters + alphabet + digits + "]"; |
| + if (i == 0) |
| + regexp.push("^https?://(.*" + separator + ")?"); |
| + else if (i == lastIndex) |
| + regexp.push("(" + separator + ".*)?$"); |
| + else |
| + regexp.push(separator); |
| break; |
| case "|": |
| if (i == 0) |
| { |
| regexp.push("^"); |
| break; |
| } |
| if (i == lastIndex) |