| Index: lib/abp2blocklist.js |
| =================================================================== |
| --- a/lib/abp2blocklist.js |
| +++ b/lib/abp2blocklist.js |
| @@ -28,16 +28,18 @@ |
| | typeMap.STYLESHEET |
| | typeMap.SCRIPT |
| | typeMap.FONT |
| | typeMap.MEDIA |
| | typeMap.POPUP |
| | typeMap.OBJECT |
| | typeMap.OBJECT_SUBREQUEST |
| | typeMap.XMLHTTPREQUEST |
| + | typeMap.WEBSOCKET |
| + | typeMap.WEBRTC |
| | typeMap.PING |
| | typeMap.SUBDOCUMENT |
| | typeMap.OTHER); |
| function parseDomains(domains, included, excluded) |
| { |
| for (let domain in domains) |
| { |
| @@ -59,16 +61,41 @@ |
| return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); |
| } |
| function matchDomain(domain) |
| { |
| return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; |
| } |
| +function getURLSchemes(contentType) |
| +{ |
| + // If the given content type includes all supported URL schemes, simply |
| + // return a single generic URL scheme pattern. This minimizes the size of the |
| + // generated rule set. The downside to this is that it will also match |
| + // schemes that we do not want to match (e.g. "ftp://"), but this can be |
| + // mitigated by adding exceptions for those schemes. |
| + if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC && |
| + contentType & ~(typeMap.WEBSOCKET | typeMap.WEBRTC)) |
| + return ["[^:]+:(//)?"]; |
| + |
| + let urlSchemes = []; |
| + |
| + if (contentType & typeMap.WEBSOCKET) |
| + urlSchemes.push("wss?://"); |
| + |
| + if (contentType & typeMap.WEBRTC) |
| + urlSchemes.push("stuns?:", "turns?:"); |
| + |
| + if (contentType & ~(typeMap.WEBSOCKET | typeMap.WEBRTC)) |
| + urlSchemes.push("https?://"); |
| + |
| + return urlSchemes; |
| +} |
| + |
| function findSubdomainsInList(domain, list) |
| { |
| let subdomains = []; |
| let suffixLength = domain.length + 1; |
| for (let name of list) |
| { |
| if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain) |
| @@ -92,35 +119,39 @@ |
| /** |
| * Parse the given filter "regexpSource" string. Producing a regular expression, |
| * extracting the hostname (if any), deciding if the regular expression is safe |
| * to be converted + matched as lower case and noting if the source contains |
| * anything after the hostname.) |
| * |
| * @param {string} text regexpSource property of a filter |
| + * @param {string} urlScheme The URL scheme to use in the regular expression |
| * @returns {object} An object containing a regular expression string, a bool |
| * indicating if the filter can be safely matched as lower |
| * case, a hostname string (or undefined) and a bool |
| * indicating if the source only contains a hostname or not: |
| * {regexp: "...", |
| * canSafelyMatchAsLowercase: true/false, |
| * hostname: "...", |
| * justHostname: true/false} |
| */ |
| -function parseFilterRegexpSource(text) |
| +function parseFilterRegexpSource(text, urlScheme) |
| { |
| let regexp = []; |
| let lastIndex = text.length - 1; |
| let hostname; |
| let hostnameStart = null; |
| let hostnameFinished = false; |
| let justHostname = false; |
| let canSafelyMatchAsLowercase = false; |
| + if (!urlScheme) |
| + urlScheme = getURLSchemes()[0]; |
| + |
| for (let i = 0; i < text.length; i++) |
| { |
| let c = text[i]; |
| if (hostnameFinished) |
| justHostname = false; |
| // If we're currently inside the hostname we have to be careful not to |
| @@ -161,17 +192,17 @@ |
| { |
| regexp.push("$"); |
| break; |
| } |
| if (i == 1 && text[0] == "|") |
| { |
| hostnameStart = i + 1; |
| canSafelyMatchAsLowercase = true; |
| - regexp.push("https?://([^/]+\\.)?"); |
| + regexp.push(urlScheme + "([^/]+\\.)?"); |
| break; |
| } |
| regexp.push("\\|"); |
| break; |
| case "/": |
| if (!hostnameFinished && |
| text.charAt(i-2) == ":" && text.charAt(i-1) == "/") |
| { |
| @@ -196,72 +227,170 @@ |
| return { |
| regexp: regexp.join(""), |
| canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
| hostname: hostname, |
| justHostname: justHostname |
| }; |
| } |
| -function getResourceTypes(filter) |
| +function getResourceTypes(contentType) |
| { |
| let types = []; |
| - if (filter.contentType & typeMap.IMAGE) |
| + if (contentType & typeMap.IMAGE) |
| types.push("image"); |
| - if (filter.contentType & typeMap.STYLESHEET) |
| + if (contentType & typeMap.STYLESHEET) |
| types.push("style-sheet"); |
| - if (filter.contentType & typeMap.SCRIPT) |
| + if (contentType & typeMap.SCRIPT) |
| types.push("script"); |
| - if (filter.contentType & typeMap.FONT) |
| + if (contentType & typeMap.FONT) |
| types.push("font"); |
| - if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) |
| + if (contentType & (typeMap.MEDIA | typeMap.OBJECT)) |
| types.push("media"); |
| - if (filter.contentType & typeMap.POPUP) |
| + if (contentType & typeMap.POPUP) |
| types.push("popup"); |
| - if (filter.contentType & (typeMap.XMLHTTPREQUEST | |
| + if (contentType & (typeMap.XMLHTTPREQUEST | |
| + typeMap.WEBSOCKET | |
| + typeMap.WEBRTC | |
| typeMap.OBJECT_SUBREQUEST | |
| typeMap.PING | |
| typeMap.OTHER)) |
| + { |
| types.push("raw"); |
| - if (filter.contentType & typeMap.SUBDOCUMENT) |
| + } |
| + if (contentType & typeMap.SUBDOCUMENT) |
| types.push("document"); |
| return types; |
| } |
| +function makeRuleCopies(trigger, action, urlSchemes) |
| +{ |
| + let copies = []; |
| + |
| + // Always make a deep copy of the rule, since rules may have to be |
| + // manipulated individually at a later stage. |
| + let stringifiedTrigger = JSON.stringify(trigger); |
| + |
| + let filterPattern = trigger["url-filter"].substring(1); |
| + let startIndex = 0; |
| + |
| + // If the URL filter already begins with the first URL scheme pattern, skip |
| + // it. |
| + if (trigger["url-filter"].startsWith("^" + urlSchemes[0])) |
| + { |
| + filterPattern = filterPattern.substring(urlSchemes[0].length); |
| + startIndex = 1; |
| + } |
| + else |
| + { |
| + filterPattern = ".*" + filterPattern; |
| + } |
| + |
| + for (let i = startIndex; i < urlSchemes.length; i++) |
| + { |
| + let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), { |
| + "url-filter": "^" + urlSchemes[i] + filterPattern |
| + }); |
| + copies.push({trigger: copyTrigger, action}); |
| + } |
| + |
| + return copies; |
| +} |
| + |
| function convertFilterAddRules(rules, filter, action, withResourceTypes, |
| - exceptionDomains) |
| + exceptionDomains, contentType) |
| { |
| - let parsed = parseFilterRegexpSource(filter.regexpSource); |
| + if (!contentType) |
| + contentType = filter.contentType; |
| + |
| + // If WebSocket or WebRTC are given along with other options but not |
| + // including all three of WebSocket, WebRTC, and XMLHttpRequest, we must |
| + // generate multiple rules. For example, for the filter |
| + // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw" |
| + // and another rule with "^https?://" and "image". If we merge the two, we |
| + // end up blocking requests of type XMLHttpRequest inadvertently. |
| + if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET && |
| + !(contentType & typeMap.WEBRTC && |
| + contentType & typeMap.XMLHTTPREQUEST)) || |
| + (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC && |
| + !(contentType & typeMap.WEBSOCKET && |
| + contentType & typeMap.XMLHTTPREQUEST))) |
| + { |
| + if (contentType & typeMap.WEBSOCKET) |
| + { |
| + convertFilterAddRules(rules, filter, action, withResourceTypes, |
| + exceptionDomains, typeMap.WEBSOCKET); |
| + } |
| + |
| + if (contentType & typeMap.WEBRTC) |
| + { |
| + convertFilterAddRules(rules, filter, action, withResourceTypes, |
| + exceptionDomains, typeMap.WEBRTC); |
| + } |
| + |
| + contentType &= ~(typeMap.WEBSOCKET | typeMap.WEBRTC); |
| + |
| + if (!contentType) |
| + return; |
| + } |
| + |
| + let urlSchemes = getURLSchemes(contentType); |
| + let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]); |
| // For the special case of $document whitelisting filters with just a domain |
| // we can generate an equivalent blocking rule exception using if-domain. |
| if (filter instanceof filterClasses.WhitelistFilter && |
| - filter.contentType & typeMap.DOCUMENT && |
| + contentType & typeMap.DOCUMENT && |
| parsed.justHostname) |
| { |
| rules.push({ |
| trigger: { |
| "url-filter": ".*", |
| "if-domain": ["*" + parsed.hostname] |
| }, |
| action: {type: "ignore-previous-rules"} |
| }); |
| // If the filter contains other supported options we'll need to generate |
| // further rules for it, but if not we can simply return now. |
| - if (!(filter.contentType & whitelistableRequestTypes)) |
| + if (!(contentType & whitelistableRequestTypes)) |
| return; |
| } |
| let trigger = {"url-filter": parsed.regexp}; |
| - // Limit rules to HTTP(S) URLs |
| - if (!/^(\^|http)/i.test(trigger["url-filter"])) |
| - trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; |
| + // If the URL filter begins with one of the URL schemes for this content |
| + // type, we generate additional rules for all the URL scheme patterns; |
| + // otherwise, if the start of the URL filter literally matches the first URL |
| + // scheme pattern, we just generate additional rules for the remaining URL |
| + // scheme patterns. |
| + // |
| + // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^" |
| + // in front of this and generate two additional rules for |
| + // "^stuns?:.*stun:foo" and "^turns?:.*stun:foo". On the other hand, |
| + // "||foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate |
| + // "^turns?:([^/]+\\.)?foo" in addition. |
| + // |
| + // Note that the filter can be already anchored to the beginning |
| + // (e.g. "|stun:foo$webrtc"), in which case we do not generate any additional |
| + // rules. |
| + let needAltRules = trigger["url-filter"][0] != "^" || |
| + trigger["url-filter"].startsWith("^" + urlSchemes[0]); |
| + |
| + if (trigger["url-filter"][0] != "^") |
| + { |
| + if (!urlSchemes.some(scheme => new RegExp("^" + scheme) |
| + .test(trigger["url-filter"]))) |
| + { |
| + trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"]; |
| + } |
| + |
| + trigger["url-filter"] = "^" + trigger["url-filter"]; |
| + } |
| // For rules containing only a hostname we know that we're matching against |
| // a lowercase string unless the matchCase option was passed. |
| if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) |
| trigger["url-filter"] = trigger["url-filter"].toLowerCase(); |
| if (parsed.canSafelyMatchAsLowercase || filter.matchCase) |
| trigger["url-filter-is-case-sensitive"] = true; |
| @@ -271,17 +400,17 @@ |
| parseDomains(filter.domains, included, excluded); |
| if (exceptionDomains) |
| excluded = excluded.concat(exceptionDomains); |
| if (withResourceTypes) |
| { |
| - trigger["resource-type"] = getResourceTypes(filter); |
| + trigger["resource-type"] = getResourceTypes(contentType); |
| if (trigger["resource-type"].length == 0) |
| return; |
| } |
| if (filter.thirdParty != null) |
| trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |
| @@ -309,25 +438,25 @@ |
| trigger["if-domain"].push("*" + name); |
| } |
| } |
| } |
| else if (excluded.length > 0) |
| { |
| trigger["unless-domain"] = excluded.map(name => "*" + name); |
| } |
| - else if (filter instanceof filterClasses.BlockingFilter && |
| - filter.contentType & typeMap.SUBDOCUMENT) |
| - { |
| - trigger["unless-top-url"] = [trigger["url-filter"]]; |
| - if (trigger["url-filter-is-case-sensitive"]) |
| - trigger["top-url-filter-is-case-sensitive"] = true; |
| - } |
| rules.push({trigger: trigger, action: {type: action}}); |
| + |
| + if (needAltRules) |
| + { |
| + // Generate additional rules for any alternative URL schemes. |
| + for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes)) |
| + rules.push(altRule); |
| + } |
| } |
| function hasNonASCI(obj) |
| { |
| if (typeof obj == "string") |
| { |
| if (/[^\x00-\x7F]/.test(obj)) |
| return true; |