Index: lib/abp2blocklist.js |
=================================================================== |
--- a/lib/abp2blocklist.js |
+++ b/lib/abp2blocklist.js |
@@ -28,16 +28,18 @@ |
| typeMap.STYLESHEET |
| typeMap.SCRIPT |
| typeMap.FONT |
| typeMap.MEDIA |
| typeMap.POPUP |
| typeMap.OBJECT |
| typeMap.OBJECT_SUBREQUEST |
| typeMap.XMLHTTPREQUEST |
+ | typeMap.WEBSOCKET |
+ | typeMap.WEBRTC |
| typeMap.PING |
| typeMap.SUBDOCUMENT |
| typeMap.OTHER); |
function parseDomains(domains, included, excluded) |
{ |
for (let domain in domains) |
{ |
@@ -59,16 +61,41 @@ |
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); |
} |
function matchDomain(domain) |
{ |
return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; |
} |
+function getURLSchemes(contentType) |
+{ |
+ // If the given content type includes all supported URL schemes, simply |
+ // return a single generic URL scheme pattern. This minimizes the size of the |
+ // generated rule set. The downside to this is that it will also match |
+ // schemes that we do not want to match (e.g. "ftp://"), but this can be |
+ // mitigated by adding exceptions for those schemes. |
+ if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC && |
+ contentType & ~(typeMap.WEBSOCKET | typeMap.WEBRTC)) |
+ return ["[^:]+:(//)?"]; |
+ |
+ let urlSchemes = []; |
+ |
+ if (contentType & typeMap.WEBSOCKET) |
+ urlSchemes.push("wss?://"); |
+ |
+ if (contentType & typeMap.WEBRTC) |
+ urlSchemes.push("stuns?:", "turns?:"); |
+ |
+ if (contentType & ~(typeMap.WEBSOCKET | typeMap.WEBRTC)) |
+ urlSchemes.push("https?://"); |
+ |
+ return urlSchemes; |
+} |
+ |
function findSubdomainsInList(domain, list) |
{ |
let subdomains = []; |
let suffixLength = domain.length + 1; |
for (let name of list) |
{ |
if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain) |
@@ -104,26 +131,27 @@ |
/** |
* Parse the given filter "regexpSource" string. Producing a regular expression, |
* extracting the hostname (if any), deciding if the regular expression is safe |
* to be converted + matched as lower case and noting if the source contains |
* anything after the hostname.) |
* |
* @param {string} text regexpSource property of a filter |
+ * @param {string} urlScheme The URL scheme to use in the regular expression |
* @returns {object} An object containing a regular expression string, a bool |
* indicating if the filter can be safely matched as lower |
* case, a hostname string (or undefined) and a bool |
* indicating if the source only contains a hostname or not: |
* {regexp: "...", |
* canSafelyMatchAsLowercase: true/false, |
* hostname: "...", |
* justHostname: true/false} |
*/ |
-function parseFilterRegexpSource(text) |
+function parseFilterRegexpSource(text, urlScheme) |
{ |
let regexp = []; |
// Convert the text into an array of Unicode characters. |
// |
// In the case of surrogate pairs (the smiley emoji, for example), one |
// Unicode code point is represented by two JavaScript characters together. |
// We want to iterate over Unicode code points rather than JavaScript |
@@ -132,16 +160,19 @@ |
let lastIndex = characters.length - 1; |
let hostname; |
let hostnameStart = null; |
let hostnameFinished = false; |
let justHostname = false; |
let canSafelyMatchAsLowercase = false; |
+ if (!urlScheme) |
+ urlScheme = getURLSchemes()[0]; |
+ |
for (let i = 0; i < characters.length; i++) |
{ |
let c = characters[i]; |
if (hostnameFinished) |
justHostname = false; |
// If we're currently inside the hostname we have to be careful not to |
@@ -180,17 +211,17 @@ |
if (!justHostname) |
alphabet = "A-Z" + alphabet; |
let digits = "0-9"; |
// Note that the "-" must appear first here in order to retain its |
// literal meaning within the brackets. |
let specialCharacters = "-_.%"; |
let separator = "[^" + specialCharacters + alphabet + digits + "]"; |
if (i == 0) |
- regexp.push("^https?://(.*" + separator + ")?"); |
+ regexp.push("^" + urlScheme + "(.*" + separator + ")?"); |
else if (i == lastIndex) |
regexp.push("(" + separator + ".*)?$"); |
else |
regexp.push(separator); |
break; |
case "|": |
if (i == 0) |
{ |
@@ -201,17 +232,17 @@ |
{ |
regexp.push("$"); |
break; |
} |
if (i == 1 && characters[0] == "|") |
{ |
hostnameStart = i + 1; |
canSafelyMatchAsLowercase = true; |
- regexp.push("https?://([^/]+\\.)?"); |
+ regexp.push(urlScheme + "([^/]+\\.)?"); |
break; |
} |
regexp.push("\\|"); |
break; |
case "/": |
if (!hostnameFinished && |
characters[i - 2] == ":" && characters[i - 1] == "/") |
{ |
@@ -236,72 +267,177 @@ |
return { |
regexp: regexp.join(""), |
canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
hostname: hostname, |
justHostname: justHostname |
}; |
} |
-function getResourceTypes(filter) |
+function getResourceTypes(contentType) |
{ |
let types = []; |
- if (filter.contentType & typeMap.IMAGE) |
+ if (contentType & typeMap.IMAGE) |
types.push("image"); |
- if (filter.contentType & typeMap.STYLESHEET) |
+ if (contentType & typeMap.STYLESHEET) |
types.push("style-sheet"); |
- if (filter.contentType & typeMap.SCRIPT) |
+ if (contentType & typeMap.SCRIPT) |
types.push("script"); |
- if (filter.contentType & typeMap.FONT) |
+ if (contentType & typeMap.FONT) |
types.push("font"); |
- if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) |
+ if (contentType & (typeMap.MEDIA | typeMap.OBJECT)) |
types.push("media"); |
- if (filter.contentType & typeMap.POPUP) |
+ if (contentType & typeMap.POPUP) |
types.push("popup"); |
- if (filter.contentType & (typeMap.XMLHTTPREQUEST | |
- typeMap.OBJECT_SUBREQUEST | |
- typeMap.PING | |
- typeMap.OTHER)) |
+ if (contentType & (typeMap.XMLHTTPREQUEST | |
+ typeMap.WEBSOCKET | |
+ typeMap.WEBRTC | |
+ typeMap.OBJECT_SUBREQUEST | |
+ typeMap.PING | |
+ typeMap.OTHER)) |
+ { |
types.push("raw"); |
- if (filter.contentType & typeMap.SUBDOCUMENT) |
+ } |
+ if (contentType & typeMap.SUBDOCUMENT) |
types.push("document"); |
return types; |
} |
+function makeRuleCopies(trigger, action, urlSchemes) |
+{ |
+ let copies = []; |
+ |
+ // Always make a deep copy of the rule, since rules may have to be |
+ // manipulated individually at a later stage. |
+ let stringifiedTrigger = JSON.stringify(trigger); |
+ |
+ let filterPattern = trigger["url-filter"].substring(1); |
+ let startIndex = 0; |
+ |
+ // If the URL filter already begins with the first URL scheme pattern, skip |
+ // it. |
+ if (trigger["url-filter"].startsWith("^" + urlSchemes[0])) |
+ { |
+ filterPattern = filterPattern.substring(urlSchemes[0].length); |
+ startIndex = 1; |
+ } |
+ else |
+ { |
+ filterPattern = ".*" + filterPattern; |
+ } |
+ |
+ for (let i = startIndex; i < urlSchemes.length; i++) |
+ { |
+ let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), { |
+ "url-filter": "^" + urlSchemes[i] + filterPattern |
+ }); |
+ copies.push({trigger: copyTrigger, action}); |
+ } |
+ |
+ return copies; |
+} |
+ |
+function excludeTopURLFromTrigger(trigger) |
+{ |
+ trigger["unless-top-url"] = [trigger["url-filter"]]; |
+ if (trigger["url-filter-is-case-sensitive"]) |
+ trigger["top-url-filter-is-case-sensitive"] = true; |
+} |
+ |
function convertFilterAddRules(rules, filter, action, withResourceTypes, |
- exceptionDomains) |
+ exceptionDomains, contentType) |
{ |
- let parsed = parseFilterRegexpSource(filter.regexpSource); |
+ if (!contentType) |
+ contentType = filter.contentType; |
+ |
+ // If WebSocket or WebRTC are given along with other options but not |
+ // including all three of WebSocket, WebRTC, and XMLHttpRequest, we must |
+ // generate multiple rules. For example, for the filter |
+ // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw" |
+ // and another rule with "^https?://" and "image". If we merge the two, we |
+ // end up blocking requests of type XMLHttpRequest inadvertently. |
+ if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET && |
+ !(contentType & typeMap.WEBRTC && |
+ contentType & typeMap.XMLHTTPREQUEST)) || |
+ (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC && |
+ !(contentType & typeMap.WEBSOCKET && |
+ contentType & typeMap.XMLHTTPREQUEST))) |
+ { |
+ if (contentType & typeMap.WEBSOCKET) |
+ { |
+ convertFilterAddRules(rules, filter, action, withResourceTypes, |
+ exceptionDomains, typeMap.WEBSOCKET); |
+ } |
+ |
+ if (contentType & typeMap.WEBRTC) |
+ { |
+ convertFilterAddRules(rules, filter, action, withResourceTypes, |
+ exceptionDomains, typeMap.WEBRTC); |
+ } |
+ |
+ contentType &= ~(typeMap.WEBSOCKET | typeMap.WEBRTC); |
+ |
+ if (!contentType) |
+ return; |
+ } |
+ |
+ let urlSchemes = getURLSchemes(contentType); |
+ let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]); |
// For the special case of $document whitelisting filters with just a domain |
// we can generate an equivalent blocking rule exception using if-domain. |
if (filter instanceof filterClasses.WhitelistFilter && |
- filter.contentType & typeMap.DOCUMENT && |
+ contentType & typeMap.DOCUMENT && |
parsed.justHostname) |
{ |
rules.push({ |
trigger: { |
"url-filter": ".*", |
"if-domain": ["*" + parsed.hostname] |
}, |
action: {type: "ignore-previous-rules"} |
}); |
// If the filter contains other supported options we'll need to generate |
// further rules for it, but if not we can simply return now. |
- if (!(filter.contentType & whitelistableRequestTypes)) |
+ if (!(contentType & whitelistableRequestTypes)) |
return; |
} |
let trigger = {"url-filter": parsed.regexp}; |
- // Limit rules to HTTP(S) URLs |
- if (!/^(\^|http)/i.test(trigger["url-filter"])) |
- trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; |
+ // If the URL filter begins with one of the URL schemes for this content |
+ // type, we generate additional rules for all the URL scheme patterns; |
+ // otherwise, if the start of the URL filter literally matches the first URL |
+ // scheme pattern, we just generate additional rules for the remaining URL |
+ // scheme patterns. |
+ // |
+ // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^" |
+ // in front of this and generate two additional rules for |
+ // "^stuns?:.*stun:foo" and "^turns?:.*stun:foo". On the other hand, |
+ // "||foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate |
+ // "^turns?:([^/]+\\.)?foo" in addition. |
+ // |
+ // Note that the filter can be already anchored to the beginning |
+ // (e.g. "|stun:foo$webrtc"), in which case we do not generate any additional |
+ // rules. |
+ let needAltRules = trigger["url-filter"][0] != "^" || |
+ trigger["url-filter"].startsWith("^" + urlSchemes[0]); |
+ |
+ if (trigger["url-filter"][0] != "^") |
+ { |
+ if (!urlSchemes.some(scheme => new RegExp("^" + scheme) |
+ .test(trigger["url-filter"]))) |
+ { |
+ trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"]; |
+ } |
+ |
+ trigger["url-filter"] = "^" + trigger["url-filter"]; |
+ } |
// For rules containing only a hostname we know that we're matching against |
// a lowercase string unless the matchCase option was passed. |
if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) |
trigger["url-filter"] = trigger["url-filter"].toLowerCase(); |
if (parsed.canSafelyMatchAsLowercase || filter.matchCase) |
trigger["url-filter-is-case-sensitive"] = true; |
@@ -311,17 +447,17 @@ |
parseDomains(filter.domains, included, excluded); |
if (exceptionDomains) |
excluded = excluded.concat(exceptionDomains); |
if (withResourceTypes) |
{ |
- let resourceTypes = getResourceTypes(filter); |
+ let resourceTypes = getResourceTypes(contentType); |
// Content blocker rules can't differentiate between sub-document requests |
// (iframes) and top-level document requests. To avoid too many false |
// positives, we prevent rules with no hostname part from blocking document |
// requests. |
// |
// Once Safari 11 becomes our minimum supported version, we could change |
// our approach here to use the new "unless-top-url" property instead. |
@@ -332,16 +468,18 @@ |
return; |
trigger["resource-type"] = resourceTypes; |
} |
if (filter.thirdParty != null) |
trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |
+ let addTopLevelException = false; |
+ |
if (included.length > 0) |
{ |
trigger["if-domain"] = []; |
for (let name of included) |
{ |
// If this is a blocking filter or an element hiding filter, add the |
// subdomain wildcard only if no subdomains have been excluded. |
@@ -371,22 +509,33 @@ |
{ |
// Rules with a hostname part are still allowed to block document requests, |
// but we add an exception for top-level documents. |
// |
// Note that we can only do this if there's no "unless-domain" property for |
// now. This also only works in Safari 11 onwards, while older versions |
// simply ignore this property. Once Safari 11 becomes our minimum |
// supported version, we can merge "unless-domain" into "unless-top-url". |
- trigger["unless-top-url"] = [trigger["url-filter"]]; |
- if (trigger["url-filter-is-case-sensitive"]) |
- trigger["top-url-filter-is-case-sensitive"] = true; |
+ addTopLevelException = true; |
+ excludeTopURLFromTrigger(trigger); |
} |
rules.push({trigger: trigger, action: {type: action}}); |
+ |
+ if (needAltRules) |
+ { |
+ // Generate additional rules for any alternative URL schemes. |
+ for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes)) |
+ { |
+ if (addTopLevelException) |
+ excludeTopURLFromTrigger(altRule.trigger); |
+ |
+ rules.push(altRule); |
+ } |
+ } |
} |
function convertIDSelectorsToAttributeSelectors(selector) |
{ |
// First we figure out where all the IDs are |
let sep = ""; |
let start = null; |
let positions = []; |