Index: lib/abp2blocklist.js |
=================================================================== |
--- a/lib/abp2blocklist.js |
+++ b/lib/abp2blocklist.js |
@@ -143,18 +143,36 @@ |
switch (c) |
{ |
case "*": |
if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") |
regexp.push(".*"); |
break; |
case "^": |
- if (i < lastIndex) |
- regexp.push("."); |
+ let alphabet = "a-z"; |
+ // If justHostname is true and we've encountered a "^", it means we're |
+ // still in the hostname part of the URL. Since hostnames are always |
+ // lower case (Punycode), there's no need to include "A-Z" in the |
+ // pattern. Further, subsequent code may lower-case the entire regular |
+ // expression (if the URL contains only the hostname part), leaving us |
+ // with "a-za-z", which would be redundant. |
+ if (!justHostname) |
+ alphabet = "A-Z" + alphabet; |
+ let digits = "0-9"; |
+ // Note that the "-" must appear first here in order to retain its |
+ // literal meaning within the brackets. |
+ let specialCharacters = "-_.%"; |
+ let separator = "[^" + specialCharacters + alphabet + digits + "]"; |
+ if (i == 0) |
+ regexp.push("^https?://(.*" + separator + ")?"); |
+ else if (i == lastIndex) |
+ regexp.push("(" + separator + ".*)?$"); |
+ else |
+ regexp.push(separator); |
break; |
case "|": |
if (i == 0) |
{ |
regexp.push("^"); |
break; |
} |
if (i == lastIndex) |