| Index: lib/abp2blocklist.js | 
| =================================================================== | 
| --- a/lib/abp2blocklist.js | 
| +++ b/lib/abp2blocklist.js | 
| @@ -28,16 +28,18 @@ | 
| | typeMap.STYLESHEET | 
| | typeMap.SCRIPT | 
| | typeMap.FONT | 
| | typeMap.MEDIA | 
| | typeMap.POPUP | 
| | typeMap.OBJECT | 
| | typeMap.OBJECT_SUBREQUEST | 
| | typeMap.XMLHTTPREQUEST | 
| +                                   | typeMap.WEBSOCKET | 
| +                                   | typeMap.WEBRTC | 
| | typeMap.PING | 
| | typeMap.SUBDOCUMENT | 
| | typeMap.OTHER); | 
|  | 
| function parseDomains(domains, included, excluded) | 
| { | 
| for (let domain in domains) | 
| { | 
| @@ -59,16 +61,41 @@ | 
| return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); | 
| } | 
|  | 
| function matchDomain(domain) | 
| { | 
| return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; | 
| } | 
|  | 
| +function getURLSchemes(contentType) | 
| +{ | 
| +  // If the given content type includes all supported URL schemes, simply | 
| +  // return a single generic URL scheme pattern. This minimizes the size of the | 
| +  // generated rule set. The downside to this is that it will also match | 
| +  // schemes that we do not want to match (e.g. "ftp://"), but this can be | 
| +  // mitigated by adding exceptions for those schemes. | 
| +  if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC && | 
| +      contentType & ~(typeMap.WEBSOCKET | typeMap.WEBRTC)) | 
| +    return ["[^:]+:(//)?"]; | 
| + | 
| +  let urlSchemes = []; | 
| + | 
| +  if (contentType & typeMap.WEBSOCKET) | 
| +    urlSchemes.push("wss?://"); | 
| + | 
| +  if (contentType & typeMap.WEBRTC) | 
| +    urlSchemes.push("stuns?:", "turns?:"); | 
| + | 
| +  if (contentType & ~(typeMap.WEBSOCKET | typeMap.WEBRTC)) | 
| +    urlSchemes.push("https?://"); | 
| + | 
| +  return urlSchemes; | 
| +} | 
| + | 
| function findSubdomainsInList(domain, list) | 
| { | 
| let subdomains = []; | 
| let suffixLength = domain.length + 1; | 
|  | 
| for (let name of list) | 
| { | 
| if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain) | 
| @@ -104,26 +131,27 @@ | 
|  | 
| /** | 
| * Parse the given filter "regexpSource" string. Producing a regular expression, | 
| * extracting the hostname (if any), deciding if the regular expression is safe | 
| * to be converted + matched as lower case and noting if the source contains | 
| * anything after the hostname.) | 
| * | 
| * @param   {string} text regexpSource property of a filter | 
| + * @param   {string} urlScheme The URL scheme to use in the regular expression | 
| * @returns {object} An object containing a regular expression string, a bool | 
| *                   indicating if the filter can be safely matched as lower | 
| *                   case, a hostname string (or undefined) and a bool | 
| *                   indicating if the source only contains a hostname or not: | 
| *                     {regexp: "...", | 
| *                      canSafelyMatchAsLowercase: true/false, | 
| *                      hostname: "...", | 
| *                      justHostname: true/false} | 
| */ | 
| -function parseFilterRegexpSource(text) | 
| +function parseFilterRegexpSource(text, urlScheme) | 
| { | 
| let regexp = []; | 
|  | 
| // Convert the text into an array of Unicode characters. | 
| // | 
| // In the case of surrogate pairs (the smiley emoji, for example), one | 
| // Unicode code point is represented by two JavaScript characters together. | 
| // We want to iterate over Unicode code points rather than JavaScript | 
| @@ -132,16 +160,19 @@ | 
|  | 
| let lastIndex = characters.length - 1; | 
| let hostname; | 
| let hostnameStart = null; | 
| let hostnameFinished = false; | 
| let justHostname = false; | 
| let canSafelyMatchAsLowercase = false; | 
|  | 
| +  if (!urlScheme) | 
| +    urlScheme = getURLSchemes()[0]; | 
| + | 
| for (let i = 0; i < characters.length; i++) | 
| { | 
| let c = characters[i]; | 
|  | 
| if (hostnameFinished) | 
| justHostname = false; | 
|  | 
| // If we're currently inside the hostname we have to be careful not to | 
| @@ -180,17 +211,17 @@ | 
| if (!justHostname) | 
| alphabet = "A-Z" + alphabet; | 
| let digits = "0-9"; | 
| // Note that the "-" must appear first here in order to retain its | 
| // literal meaning within the brackets. | 
| let specialCharacters = "-_.%"; | 
| let separator = "[^" + specialCharacters + alphabet + digits + "]"; | 
| if (i == 0) | 
| -          regexp.push("^https?://(.*" + separator + ")?"); | 
| +          regexp.push("^" + urlScheme + "(.*" + separator + ")?"); | 
| else if (i == lastIndex) | 
| regexp.push("(" + separator + ".*)?$"); | 
| else | 
| regexp.push(separator); | 
| break; | 
| case "|": | 
| if (i == 0) | 
| { | 
| @@ -201,17 +232,17 @@ | 
| { | 
| regexp.push("$"); | 
| break; | 
| } | 
| if (i == 1 && characters[0] == "|") | 
| { | 
| hostnameStart = i + 1; | 
| canSafelyMatchAsLowercase = true; | 
| -          regexp.push("https?://([^/]+\\.)?"); | 
| +          regexp.push(urlScheme + "([^/]+\\.)?"); | 
| break; | 
| } | 
| regexp.push("\\|"); | 
| break; | 
| case "/": | 
| if (!hostnameFinished && | 
| characters[i - 2] == ":" && characters[i - 1] == "/") | 
| { | 
| @@ -236,72 +267,177 @@ | 
| return { | 
| regexp: regexp.join(""), | 
| canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, | 
| hostname: hostname, | 
| justHostname: justHostname | 
| }; | 
| } | 
|  | 
| -function getResourceTypes(filter) | 
| +function getResourceTypes(contentType) | 
| { | 
| let types = []; | 
|  | 
| -  if (filter.contentType & typeMap.IMAGE) | 
| +  if (contentType & typeMap.IMAGE) | 
| types.push("image"); | 
| -  if (filter.contentType & typeMap.STYLESHEET) | 
| +  if (contentType & typeMap.STYLESHEET) | 
| types.push("style-sheet"); | 
| -  if (filter.contentType & typeMap.SCRIPT) | 
| +  if (contentType & typeMap.SCRIPT) | 
| types.push("script"); | 
| -  if (filter.contentType & typeMap.FONT) | 
| +  if (contentType & typeMap.FONT) | 
| types.push("font"); | 
| -  if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) | 
| +  if (contentType & (typeMap.MEDIA | typeMap.OBJECT)) | 
| types.push("media"); | 
| -  if (filter.contentType & typeMap.POPUP) | 
| +  if (contentType & typeMap.POPUP) | 
| types.push("popup"); | 
| -  if (filter.contentType & (typeMap.XMLHTTPREQUEST | | 
| -                            typeMap.OBJECT_SUBREQUEST | | 
| -                            typeMap.PING | | 
| -                            typeMap.OTHER)) | 
| +  if (contentType & (typeMap.XMLHTTPREQUEST | | 
| +                     typeMap.WEBSOCKET | | 
| +                     typeMap.WEBRTC | | 
| +                     typeMap.OBJECT_SUBREQUEST | | 
| +                     typeMap.PING | | 
| +                     typeMap.OTHER)) | 
| +  { | 
| types.push("raw"); | 
| -  if (filter.contentType & typeMap.SUBDOCUMENT) | 
| +  } | 
| +  if (contentType & typeMap.SUBDOCUMENT) | 
| types.push("document"); | 
|  | 
| return types; | 
| } | 
|  | 
| +function makeRuleCopies(trigger, action, urlSchemes) | 
| +{ | 
| +  let copies = []; | 
| + | 
| +  // Always make a deep copy of the rule, since rules may have to be | 
| +  // manipulated individually at a later stage. | 
| +  let stringifiedTrigger = JSON.stringify(trigger); | 
| + | 
| +  let filterPattern = trigger["url-filter"].substring(1); | 
| +  let startIndex = 0; | 
| + | 
| +  // If the URL filter already begins with the first URL scheme pattern, skip | 
| +  // it. | 
| +  if (trigger["url-filter"].startsWith("^" + urlSchemes[0])) | 
| +  { | 
| +    filterPattern = filterPattern.substring(urlSchemes[0].length); | 
| +    startIndex = 1; | 
| +  } | 
| +  else | 
| +  { | 
| +    filterPattern = ".*" + filterPattern; | 
| +  } | 
| + | 
| +  for (let i = startIndex; i < urlSchemes.length; i++) | 
| +  { | 
| +    let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), { | 
| +      "url-filter": "^" + urlSchemes[i] + filterPattern | 
| +    }); | 
| +    copies.push({trigger: copyTrigger, action}); | 
| +  } | 
| + | 
| +  return copies; | 
| +} | 
| + | 
| +function excludeTopURLFromTrigger(trigger) | 
| +{ | 
| +  trigger["unless-top-url"] = [trigger["url-filter"]]; | 
| +  if (trigger["url-filter-is-case-sensitive"]) | 
| +    trigger["top-url-filter-is-case-sensitive"] = true; | 
| +} | 
| + | 
| function convertFilterAddRules(rules, filter, action, withResourceTypes, | 
| -                               exceptionDomains) | 
| +                               exceptionDomains, contentType) | 
| { | 
| -  let parsed = parseFilterRegexpSource(filter.regexpSource); | 
| +  if (!contentType) | 
| +    contentType = filter.contentType; | 
| + | 
| +  // If WebSocket or WebRTC are given along with other options but not | 
| +  // including all three of WebSocket, WebRTC, and XMLHttpRequest, we must | 
| +  // generate multiple rules. For example, for the filter | 
| +  // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw" | 
| +  // and another rule with "^https?://" and "image". If we merge the two, we | 
| +  // end up blocking requests of type XMLHttpRequest inadvertently. | 
| +  if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET && | 
| +       !(contentType & typeMap.WEBRTC && | 
| +         contentType & typeMap.XMLHTTPREQUEST)) || | 
| +      (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC && | 
| +       !(contentType & typeMap.WEBSOCKET && | 
| +         contentType & typeMap.XMLHTTPREQUEST))) | 
| +  { | 
| +    if (contentType & typeMap.WEBSOCKET) | 
| +    { | 
| +      convertFilterAddRules(rules, filter, action, withResourceTypes, | 
| +                            exceptionDomains, typeMap.WEBSOCKET); | 
| +    } | 
| + | 
| +    if (contentType & typeMap.WEBRTC) | 
| +    { | 
| +      convertFilterAddRules(rules, filter, action, withResourceTypes, | 
| +                            exceptionDomains, typeMap.WEBRTC); | 
| +    } | 
| + | 
| +    contentType &= ~(typeMap.WEBSOCKET | typeMap.WEBRTC); | 
| + | 
| +    if (!contentType) | 
| +      return; | 
| +  } | 
| + | 
| +  let urlSchemes = getURLSchemes(contentType); | 
| +  let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]); | 
|  | 
| // For the special case of $document whitelisting filters with just a domain | 
| // we can generate an equivalent blocking rule exception using if-domain. | 
| if (filter instanceof filterClasses.WhitelistFilter && | 
| -      filter.contentType & typeMap.DOCUMENT && | 
| +      contentType & typeMap.DOCUMENT && | 
| parsed.justHostname) | 
| { | 
| rules.push({ | 
| trigger: { | 
| "url-filter": ".*", | 
| "if-domain": ["*" + parsed.hostname] | 
| }, | 
| action: {type: "ignore-previous-rules"} | 
| }); | 
| // If the filter contains other supported options we'll need to generate | 
| // further rules for it, but if not we can simply return now. | 
| -    if (!(filter.contentType & whitelistableRequestTypes)) | 
| +    if (!(contentType & whitelistableRequestTypes)) | 
| return; | 
| } | 
|  | 
| let trigger = {"url-filter": parsed.regexp}; | 
|  | 
| -  // Limit rules to HTTP(S) URLs | 
| -  if (!/^(\^|http)/i.test(trigger["url-filter"])) | 
| -    trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; | 
| +  // If the URL filter begins with one of the URL schemes for this content | 
| +  // type, we generate additional rules for all the URL scheme patterns; | 
| +  // otherwise, if the start of the URL filter literally matches the first URL | 
| +  // scheme pattern, we just generate additional rules for the remaining URL | 
| +  // scheme patterns. | 
| +  // | 
| +  // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^" | 
| +  // in front of this and generate two additional rules for | 
| +  // "^stuns?:.*stun:foo" and "^turns?:.*stun:foo". On the other hand, | 
| +  // "||foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate | 
| +  // "^turns?:([^/]+\\.)?foo" in addition. | 
| +  // | 
| +  // Note that the filter can be already anchored to the beginning | 
| +  // (e.g. "|stun:foo$webrtc"), in which case we do not generate any additional | 
| +  // rules. | 
| +  let needAltRules = trigger["url-filter"][0] != "^" || | 
| +                     trigger["url-filter"].startsWith("^" + urlSchemes[0]); | 
| + | 
| +  if (trigger["url-filter"][0] != "^") | 
| +  { | 
| +    if (!urlSchemes.some(scheme => new RegExp("^" + scheme) | 
| +                                   .test(trigger["url-filter"]))) | 
| +    { | 
| +      trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"]; | 
| +    } | 
| + | 
| +    trigger["url-filter"] = "^" + trigger["url-filter"]; | 
| +  } | 
|  | 
| // For rules containing only a hostname we know that we're matching against | 
| // a lowercase string unless the matchCase option was passed. | 
| if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) | 
| trigger["url-filter"] = trigger["url-filter"].toLowerCase(); | 
|  | 
| if (parsed.canSafelyMatchAsLowercase || filter.matchCase) | 
| trigger["url-filter-is-case-sensitive"] = true; | 
| @@ -311,17 +447,17 @@ | 
|  | 
| parseDomains(filter.domains, included, excluded); | 
|  | 
| if (exceptionDomains) | 
| excluded = excluded.concat(exceptionDomains); | 
|  | 
| if (withResourceTypes) | 
| { | 
| -    let resourceTypes = getResourceTypes(filter); | 
| +    let resourceTypes = getResourceTypes(contentType); | 
|  | 
| // Content blocker rules can't differentiate between sub-document requests | 
| // (iframes) and top-level document requests. To avoid too many false | 
| // positives, we prevent rules with no hostname part from blocking document | 
| // requests. | 
| // | 
| // Once Safari 11 becomes our minimum supported version, we could change | 
| // our approach here to use the new "unless-top-url" property instead. | 
| @@ -332,16 +468,18 @@ | 
| return; | 
|  | 
| trigger["resource-type"] = resourceTypes; | 
| } | 
|  | 
| if (filter.thirdParty != null) | 
| trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; | 
|  | 
| +  let addTopLevelException = false; | 
| + | 
| if (included.length > 0) | 
| { | 
| trigger["if-domain"] = []; | 
|  | 
| for (let name of included) | 
| { | 
| // If this is a blocking filter or an element hiding filter, add the | 
| // subdomain wildcard only if no subdomains have been excluded. | 
| @@ -371,22 +509,33 @@ | 
| { | 
| // Rules with a hostname part are still allowed to block document requests, | 
| // but we add an exception for top-level documents. | 
| // | 
| // Note that we can only do this if there's no "unless-domain" property for | 
| // now. This also only works in Safari 11 onwards, while older versions | 
| // simply ignore this property. Once Safari 11 becomes our minimum | 
| // supported version, we can merge "unless-domain" into "unless-top-url". | 
| -    trigger["unless-top-url"] = [trigger["url-filter"]]; | 
| -    if (trigger["url-filter-is-case-sensitive"]) | 
| -      trigger["top-url-filter-is-case-sensitive"] = true; | 
| +    addTopLevelException = true; | 
| +    excludeTopURLFromTrigger(trigger); | 
| } | 
|  | 
| rules.push({trigger: trigger, action: {type: action}}); | 
| + | 
| +  if (needAltRules) | 
| +  { | 
| +    // Generate additional rules for any alternative URL schemes. | 
| +    for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes)) | 
| +    { | 
| +      if (addTopLevelException) | 
| +        excludeTopURLFromTrigger(altRule.trigger); | 
| + | 
| +      rules.push(altRule); | 
| +    } | 
| +  } | 
| } | 
|  | 
| function convertIDSelectorsToAttributeSelectors(selector) | 
| { | 
| // First we figure out where all the IDs are | 
| let sep = ""; | 
| let start = null; | 
| let positions = []; | 
|  |