Index: lib/abp2blocklist.js |
=================================================================== |
--- a/lib/abp2blocklist.js |
+++ b/lib/abp2blocklist.js |
@@ -28,16 +28,18 @@ |
| typeMap.STYLESHEET |
| typeMap.SCRIPT |
| typeMap.FONT |
| typeMap.MEDIA |
| typeMap.POPUP |
| typeMap.OBJECT |
| typeMap.OBJECT_SUBREQUEST |
| typeMap.XMLHTTPREQUEST |
+ | typeMap.WEBSOCKET |
+ | typeMap.WEBRTC |
| typeMap.PING |
| typeMap.SUBDOCUMENT |
| typeMap.OTHER); |
function parseDomains(domains, included, excluded) |
{ |
for (let domain in domains) |
{ |
@@ -59,16 +61,27 @@ |
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); |
} |
function matchDomain(domain) |
{ |
return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; |
} |
+function getURLSchemes(contentType) |
+{ |
+ if (contentType == typeMap.WEBSOCKET) |
+ return ["wss?://"]; |
+ |
+ if (contentType == typeMap.WEBRTC) |
+ return ["stuns?:", "turns?:"]; |
+ |
+ return ["https?://"]; |
+} |
+ |
function findSubdomainsInList(domain, list) |
{ |
let subdomains = []; |
let suffixLength = domain.length + 1; |
for (let name of list) |
{ |
if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain) |
@@ -92,35 +105,39 @@ |
/** |
* Parse the given filter "regexpSource" string. Producing a regular expression, |
* extracting the hostname (if any), deciding if the regular expression is safe |
* to be converted + matched as lower case and noting if the source contains |
* anything after the hostname.) |
* |
* @param {string} text regexpSource property of a filter |
+ * @param {string} urlScheme The URL scheme to use in the regular expression |
* @returns {object} An object containing a regular expression string, a bool |
* indicating if the filter can be safely matched as lower |
* case, a hostname string (or undefined) and a bool |
* indicating if the source only contains a hostname or not: |
* {regexp: "...", |
* canSafelyMatchAsLowercase: true/false, |
* hostname: "...", |
* justHostname: true/false} |
*/ |
-function parseFilterRegexpSource(text) |
+function parseFilterRegexpSource(text, urlScheme) |
{ |
let regexp = []; |
let lastIndex = text.length - 1; |
let hostname; |
let hostnameStart = null; |
let hostnameFinished = false; |
let justHostname = false; |
let canSafelyMatchAsLowercase = false; |
+ if (!urlScheme) |
+ urlScheme = getURLSchemes()[0]; |
+ |
for (let i = 0; i < text.length; i++) |
{ |
let c = text[i]; |
if (hostnameFinished) |
justHostname = false; |
// If we're currently inside the hostname we have to be careful not to |
@@ -161,17 +178,17 @@ |
{ |
regexp.push("$"); |
break; |
} |
if (i == 1 && text[0] == "|") |
{ |
hostnameStart = i + 1; |
canSafelyMatchAsLowercase = true; |
- regexp.push("https?://([^/]+\\.)?"); |
+ regexp.push(urlScheme + "([^/]+\\.)?"); |
break; |
} |
regexp.push("\\|"); |
break; |
case "/": |
if (!hostnameFinished && |
text.charAt(i-2) == ":" && text.charAt(i-1) == "/") |
{ |
@@ -196,72 +213,148 @@ |
return { |
regexp: regexp.join(""), |
canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
hostname: hostname, |
justHostname: justHostname |
}; |
} |
-function getResourceTypes(filter) |
+function getResourceTypes(contentType) |
{ |
let types = []; |
- if (filter.contentType & typeMap.IMAGE) |
+ if (contentType & typeMap.IMAGE) |
types.push("image"); |
- if (filter.contentType & typeMap.STYLESHEET) |
+ if (contentType & typeMap.STYLESHEET) |
types.push("style-sheet"); |
- if (filter.contentType & typeMap.SCRIPT) |
+ if (contentType & typeMap.SCRIPT) |
types.push("script"); |
- if (filter.contentType & typeMap.FONT) |
+ if (contentType & typeMap.FONT) |
types.push("font"); |
- if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) |
+ if (contentType & (typeMap.MEDIA | typeMap.OBJECT)) |
types.push("media"); |
- if (filter.contentType & typeMap.POPUP) |
+ if (contentType & typeMap.POPUP) |
types.push("popup"); |
- if (filter.contentType & (typeMap.XMLHTTPREQUEST | |
+ if (contentType & (typeMap.XMLHTTPREQUEST | |
+ typeMap.WEBSOCKET | |
+ typeMap.WEBRTC | |
typeMap.OBJECT_SUBREQUEST | |
typeMap.PING | |
typeMap.OTHER)) |
+ { |
types.push("raw"); |
- if (filter.contentType & typeMap.SUBDOCUMENT) |
+ } |
+ if (contentType & typeMap.SUBDOCUMENT) |
types.push("document"); |
return types; |
} |
+function makeRuleCopies(trigger, action, urlSchemes) |
+{ |
+ let copies = []; |
+ |
+ // Always make a deep copy of the rule, since rules may have to be |
+ // manipulated individually at a later stage. |
+ let stringifiedTrigger = JSON.stringify(trigger); |
+ |
+ let filterPattern = trigger["url-filter"].substring(1); |
+ let startIndex = 0; |
+ |
+ // If the URL filter already begins with the first URL scheme pattern, skip |
+ // it. |
+ if (trigger["url-filter"].startsWith("^" + urlSchemes[0])) |
+ { |
+ filterPattern = filterPattern.substring(urlSchemes[0].length); |
+ startIndex = 1; |
+ } |
+ else |
+ { |
+ filterPattern = ".*" + filterPattern; |
+ } |
+ |
+ for (let i = startIndex; i < urlSchemes.length; i++) |
+ { |
+ let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), { |
+ "url-filter": "^" + urlSchemes[i] + filterPattern |
+ }); |
+ copies.push({trigger: copyTrigger, action}); |
+ } |
+ |
+ return copies; |
+} |
+ |
function convertFilterAddRules(rules, filter, action, withResourceTypes, |
exceptionDomains) |
{ |
- let parsed = parseFilterRegexpSource(filter.regexpSource); |
+ let contentType = filter.contentType; |
+ |
+ // Support WebSocket and WebRTC only if they're the only option. If we try to |
+ // support them otherwise (e.g. $xmlhttprequest,websocket,webrtc), we end up |
+ // having to generate multiple rules, which bloats the rule set and is not |
+ // really necessary in practice. |
+ if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET) || |
+ (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC)) |
+ { |
+ contentType &= ~(typeMap.WEBSOCKET | typeMap.WEBRTC); |
+ } |
+ |
+ let urlSchemes = getURLSchemes(contentType); |
+ let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]); |
// For the special case of $document whitelisting filters with just a domain |
// we can generate an equivalent blocking rule exception using if-domain. |
if (filter instanceof filterClasses.WhitelistFilter && |
- filter.contentType & typeMap.DOCUMENT && |
+ contentType & typeMap.DOCUMENT && |
parsed.justHostname) |
{ |
rules.push({ |
trigger: { |
"url-filter": ".*", |
"if-domain": ["*" + parsed.hostname] |
}, |
action: {type: "ignore-previous-rules"} |
}); |
// If the filter contains other supported options we'll need to generate |
// further rules for it, but if not we can simply return now. |
- if (!(filter.contentType & whitelistableRequestTypes)) |
+ if (!(contentType & whitelistableRequestTypes)) |
return; |
} |
let trigger = {"url-filter": parsed.regexp}; |
- // Limit rules to HTTP(S) URLs |
- if (!/^(\^|http)/i.test(trigger["url-filter"])) |
- trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; |
+ // If the URL filter begins with one of the URL schemes for this content |
+ // type, we generate additional rules for all the URL scheme patterns; |
+ // otherwise, if the start of the URL filter literally matches the first URL |
+ // scheme pattern, we just generate additional rules for the remaining URL |
+ // scheme patterns. |
+ // |
+ // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^" |
+ // in front of this and generate two additional rules for |
+ // "^stuns?:.*stun:foo" and "^turns?:.*stun:foo". On the other hand, |
+ // "||foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate |
+ // "^turns?:([^/]+\\.)?foo" in addition. |
+ // |
+ // Note that the filter can be already anchored to the beginning |
+ // (e.g. "|stun:foo$webrtc"), in which case we do not generate any additional |
+ // rules. |
+ let needAltRules = trigger["url-filter"][0] != "^" || |
+ trigger["url-filter"].startsWith("^" + urlSchemes[0]); |
+ |
+ if (trigger["url-filter"][0] != "^") |
+ { |
+ if (!urlSchemes.some(scheme => new RegExp("^" + scheme) |
+ .test(trigger["url-filter"]))) |
+ { |
+ trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"]; |
+ } |
+ |
+ trigger["url-filter"] = "^" + trigger["url-filter"]; |
+ } |
// For rules containing only a hostname we know that we're matching against |
// a lowercase string unless the matchCase option was passed. |
if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) |
trigger["url-filter"] = trigger["url-filter"].toLowerCase(); |
if (parsed.canSafelyMatchAsLowercase || filter.matchCase) |
trigger["url-filter-is-case-sensitive"] = true; |
@@ -271,17 +364,17 @@ |
parseDomains(filter.domains, included, excluded); |
if (exceptionDomains) |
excluded = excluded.concat(exceptionDomains); |
if (withResourceTypes) |
{ |
- trigger["resource-type"] = getResourceTypes(filter); |
+ trigger["resource-type"] = getResourceTypes(contentType); |
if (trigger["resource-type"].length == 0) |
return; |
} |
if (filter.thirdParty != null) |
trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |
@@ -311,16 +404,23 @@ |
} |
} |
else if (excluded.length > 0) |
{ |
trigger["unless-domain"] = excluded.map(name => "*" + name); |
} |
rules.push({trigger: trigger, action: {type: action}}); |
+ |
+ if (needAltRules) |
+ { |
+ // Generate additional rules for any alternative URL schemes. |
+ for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes)) |
+ rules.push(altRule); |
+ } |
} |
function hasNonASCI(obj) |
{ |
if (typeof obj == "string") |
{ |
if (/[^\x00-\x7F]/.test(obj)) |
return true; |