lib/abp2blocklist.js - Issue 29452289: Issue 5283 - Add support for $websocket and $webrtc

Unified Diff: lib/abp2blocklist.js

Issue 29452289: Issue 5283 - Add support for $websocket and $webrtc (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist

Patch Set: Rebase Created July 13, 2017, 11:41 a.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: lib/abp2blocklist.js

===================================================================

--- a/lib/abp2blocklist.js

+++ b/lib/abp2blocklist.js

@@ -28,16 +28,18 @@

| typeMap.STYLESHEET

| typeMap.SCRIPT

| typeMap.FONT

| typeMap.MEDIA

| typeMap.POPUP

| typeMap.OBJECT

| typeMap.OBJECT_SUBREQUEST

| typeMap.XMLHTTPREQUEST

+ | typeMap.WEBSOCKET

+ | typeMap.WEBRTC

| typeMap.PING

| typeMap.SUBDOCUMENT

| typeMap.OTHER);

function parseDomains(domains, included, excluded)

{

for (let domain in domains)

{

@@ -59,16 +61,41 @@

return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");

}

function matchDomain(domain)

{

return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";

}

+function getURLSchemes(contentType)

+ // If the given content type includes all supported URL schemes, simply

+ // return a single generic URL scheme pattern. This minimizes the size of the

+ // generated rule set. The downside to this is that it will also match

+ // schemes that we do not want to match (e.g. "ftp://"), but this can be

+ // mitigated by adding exceptions for those schemes.

+ if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC &&

+ contentType & ~(typeMap.WEBSOCKET | typeMap.WEBRTC))

+ return ["[^:]+:(//)?"];

+ let urlSchemes = [];

+ if (contentType & typeMap.WEBSOCKET)

+ urlSchemes.push("wss?://");

+ if (contentType & typeMap.WEBRTC)

+ urlSchemes.push("stuns?:", "turns?:");

+ if (contentType & ~(typeMap.WEBSOCKET | typeMap.WEBRTC))

+ urlSchemes.push("https?://");

+ return urlSchemes;

function findSubdomainsInList(domain, list)

{

let subdomains = [];

let suffixLength = domain.length + 1;

for (let name of list)

{

if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)

@@ -104,26 +131,27 @@

/**

* Parse the given filter "regexpSource" string. Producing a regular expression,

* extracting the hostname (if any), deciding if the regular expression is safe

* to be converted + matched as lower case and noting if the source contains

* anything after the hostname.)

* @param {string} text regexpSource property of a filter

+ * @param {string} urlScheme The URL scheme to use in the regular expression

* @returns {object} An object containing a regular expression string, a bool

* indicating if the filter can be safely matched as lower

* case, a hostname string (or undefined) and a bool

* indicating if the source only contains a hostname or not:

* {regexp: "...",

* canSafelyMatchAsLowercase: true/false,

* hostname: "...",

* justHostname: true/false}

-function parseFilterRegexpSource(text)

+function parseFilterRegexpSource(text, urlScheme)

{

let regexp = [];

// Convert the text into an array of Unicode characters.

// In the case of surrogate pairs (the smiley emoji, for example), one

// Unicode code point is represented by two JavaScript characters together.

// We want to iterate over Unicode code points rather than JavaScript

@@ -132,16 +160,19 @@

let lastIndex = characters.length - 1;

let hostname;

let hostnameStart = null;

let hostnameFinished = false;

let justHostname = false;

let canSafelyMatchAsLowercase = false;

+ if (!urlScheme)

+ urlScheme = getURLSchemes()[0];

for (let i = 0; i < characters.length; i++)

{

let c = characters[i];

if (hostnameFinished)

justHostname = false;

// If we're currently inside the hostname we have to be careful not to

@@ -180,17 +211,17 @@

if (!justHostname)

alphabet = "A-Z" + alphabet;

let digits = "0-9";

// Note that the "-" must appear first here in order to retain its

// literal meaning within the brackets.

let specialCharacters = "-_.%";

let separator = "[^" + specialCharacters + alphabet + digits + "]";

if (i == 0)

- regexp.push("^https?://(.*" + separator + ")?");

+ regexp.push("^" + urlScheme + "(.*" + separator + ")?");

else if (i == lastIndex)

regexp.push("(" + separator + ".*)?$");

else

regexp.push(separator);

break;

case "|":

if (i == 0)

{

@@ -201,17 +232,17 @@

{

regexp.push("$");

break;

}

if (i == 1 && characters[0] == "|")

{

hostnameStart = i + 1;

canSafelyMatchAsLowercase = true;

- regexp.push("https?://([^/]+\\.)?");

+ regexp.push(urlScheme + "([^/]+\\.)?");

break;

}

regexp.push("\\|");

break;

case "/":

if (!hostnameFinished &&

characters[i - 2] == ":" && characters[i - 1] == "/")

{

@@ -236,72 +267,177 @@

return {

regexp: regexp.join(""),

canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,

hostname: hostname,

justHostname: justHostname

};

}

-function getResourceTypes(filter)

+function getResourceTypes(contentType)

{

let types = [];

- if (filter.contentType & typeMap.IMAGE)

+ if (contentType & typeMap.IMAGE)

types.push("image");

- if (filter.contentType & typeMap.STYLESHEET)

+ if (contentType & typeMap.STYLESHEET)

types.push("style-sheet");

- if (filter.contentType & typeMap.SCRIPT)

+ if (contentType & typeMap.SCRIPT)

types.push("script");

- if (filter.contentType & typeMap.FONT)

+ if (contentType & typeMap.FONT)

types.push("font");

- if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT))

+ if (contentType & (typeMap.MEDIA | typeMap.OBJECT))

types.push("media");

- if (filter.contentType & typeMap.POPUP)

+ if (contentType & typeMap.POPUP)

types.push("popup");

- if (filter.contentType & (typeMap.XMLHTTPREQUEST |

- typeMap.OBJECT_SUBREQUEST |

- typeMap.PING |

- typeMap.OTHER))

+ if (contentType & (typeMap.XMLHTTPREQUEST |

+ typeMap.WEBSOCKET |

+ typeMap.WEBRTC |

+ typeMap.OBJECT_SUBREQUEST |

+ typeMap.PING |

+ typeMap.OTHER))

+ {

types.push("raw");

- if (filter.contentType & typeMap.SUBDOCUMENT)

+ }

+ if (contentType & typeMap.SUBDOCUMENT)

types.push("document");

return types;

}

+function makeRuleCopies(trigger, action, urlSchemes)

+ let copies = [];

+ // Always make a deep copy of the rule, since rules may have to be

+ // manipulated individually at a later stage.

+ let stringifiedTrigger = JSON.stringify(trigger);

+ let filterPattern = trigger["url-filter"].substring(1);

+ let startIndex = 0;

+ // If the URL filter already begins with the first URL scheme pattern, skip

+ // it.

+ if (trigger["url-filter"].startsWith("^" + urlSchemes[0]))

+ {

+ filterPattern = filterPattern.substring(urlSchemes[0].length);

+ startIndex = 1;

+ }

+ else

+ {

+ filterPattern = ".*" + filterPattern;

+ }

+ for (let i = startIndex; i < urlSchemes.length; i++)

+ {

+ let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), {

+ "url-filter": "^" + urlSchemes[i] + filterPattern

+ });

+ copies.push({trigger: copyTrigger, action});

+ }

+ return copies;

+function excludeTopURLFromTrigger(trigger)

+ trigger["unless-top-url"] = [trigger["url-filter"]];

+ if (trigger["url-filter-is-case-sensitive"])

+ trigger["top-url-filter-is-case-sensitive"] = true;

function convertFilterAddRules(rules, filter, action, withResourceTypes,

- exceptionDomains)

+ exceptionDomains, contentType)

{

- let parsed = parseFilterRegexpSource(filter.regexpSource);

+ if (!contentType)

+ contentType = filter.contentType;

+ // If WebSocket or WebRTC are given along with other options but not

+ // including all three of WebSocket, WebRTC, and XMLHttpRequest, we must

+ // generate multiple rules. For example, for the filter

+ // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw"

+ // and another rule with "^https?://" and "image". If we merge the two, we

+ // end up blocking requests of type XMLHttpRequest inadvertently.

+ if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET &&

+ !(contentType & typeMap.WEBRTC &&

+ contentType & typeMap.XMLHTTPREQUEST)) ||

+ (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC &&

+ !(contentType & typeMap.WEBSOCKET &&

+ contentType & typeMap.XMLHTTPREQUEST)))

+ {

+ if (contentType & typeMap.WEBSOCKET)

+ {

+ convertFilterAddRules(rules, filter, action, withResourceTypes,

+ exceptionDomains, typeMap.WEBSOCKET);

+ }

+ if (contentType & typeMap.WEBRTC)

+ {

+ convertFilterAddRules(rules, filter, action, withResourceTypes,

+ exceptionDomains, typeMap.WEBRTC);

+ }

+ contentType &= ~(typeMap.WEBSOCKET | typeMap.WEBRTC);

+ if (!contentType)

+ return;

+ }

+ let urlSchemes = getURLSchemes(contentType);

+ let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]);

// For the special case of $document whitelisting filters with just a domain

// we can generate an equivalent blocking rule exception using if-domain.

if (filter instanceof filterClasses.WhitelistFilter &&

- filter.contentType & typeMap.DOCUMENT &&

+ contentType & typeMap.DOCUMENT &&

parsed.justHostname)

{

rules.push({

trigger: {

"url-filter": ".*",

"if-domain": ["*" + parsed.hostname]

action: {type: "ignore-previous-rules"}

});

// If the filter contains other supported options we'll need to generate

// further rules for it, but if not we can simply return now.

- if (!(filter.contentType & whitelistableRequestTypes))

+ if (!(contentType & whitelistableRequestTypes))

return;

}

let trigger = {"url-filter": parsed.regexp};

- // Limit rules to HTTP(S) URLs

- if (!/^(\^|http)/i.test(trigger["url-filter"]))

- trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];

+ // If the URL filter begins with one of the URL schemes for this content

+ // type, we generate additional rules for all the URL scheme patterns;

+ // otherwise, if the start of the URL filter literally matches the first URL

+ // scheme pattern, we just generate additional rules for the remaining URL

+ // scheme patterns.

+ //

+ // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^"

+ // in front of this and generate two additional rules for

+ // "^stuns?:.*stun:foo" and "^turns?:.*stun:foo". On the other hand,

+ // "||foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate

+ // "^turns?:([^/]+\\.)?foo" in addition.

+ //

+ // Note that the filter can be already anchored to the beginning

+ // (e.g. "|stun:foo$webrtc"), in which case we do not generate any additional

+ // rules.

+ let needAltRules = trigger["url-filter"][0] != "^" ||

+ trigger["url-filter"].startsWith("^" + urlSchemes[0]);

+ if (trigger["url-filter"][0] != "^")

+ {

+ if (!urlSchemes.some(scheme => new RegExp("^" + scheme)

+ .test(trigger["url-filter"])))

+ {

+ trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"];

+ }

+ trigger["url-filter"] = "^" + trigger["url-filter"];

+ }

// For rules containing only a hostname we know that we're matching against

// a lowercase string unless the matchCase option was passed.

if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)

trigger["url-filter"] = trigger["url-filter"].toLowerCase();

if (parsed.canSafelyMatchAsLowercase || filter.matchCase)

trigger["url-filter-is-case-sensitive"] = true;

@@ -311,17 +447,17 @@

parseDomains(filter.domains, included, excluded);

if (exceptionDomains)

excluded = excluded.concat(exceptionDomains);

if (withResourceTypes)

{

- let resourceTypes = getResourceTypes(filter);

+ let resourceTypes = getResourceTypes(contentType);

// Content blocker rules can't differentiate between sub-document requests

// (iframes) and top-level document requests. To avoid too many false

// positives, we prevent rules with no hostname part from blocking document

// requests.

// Once Safari 11 becomes our minimum supported version, we could change

// our approach here to use the new "unless-top-url" property instead.

@@ -332,16 +468,18 @@

return;

trigger["resource-type"] = resourceTypes;

}

if (filter.thirdParty != null)

trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];

+ let addTopLevelException = false;

if (included.length > 0)

{

trigger["if-domain"] = [];

for (let name of included)

{

// If this is a blocking filter or an element hiding filter, add the

// subdomain wildcard only if no subdomains have been excluded.

@@ -371,22 +509,33 @@

{

// Rules with a hostname part are still allowed to block document requests,

// but we add an exception for top-level documents.

// Note that we can only do this if there's no "unless-domain" property for

// now. This also only works in Safari 11 onwards, while older versions

// simply ignore this property. Once Safari 11 becomes our minimum

// supported version, we can merge "unless-domain" into "unless-top-url".

- trigger["unless-top-url"] = [trigger["url-filter"]];

- if (trigger["url-filter-is-case-sensitive"])

- trigger["top-url-filter-is-case-sensitive"] = true;

+ addTopLevelException = true;

+ excludeTopURLFromTrigger(trigger);

}

rules.push({trigger: trigger, action: {type: action}});

+ if (needAltRules)

+ {

+ // Generate additional rules for any alternative URL schemes.

+ for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes))

+ {

+ if (addTopLevelException)

+ excludeTopURLFromTrigger(altRule.trigger);

+ rules.push(altRule);

+ }

}

function convertIDSelectorsToAttributeSelectors(selector)

{

// First we figure out where all the IDs are

let sep = "";

let start = null;

let positions = [];

« no previous file with comments | « no previous file | node_modules/filterClasses.js » ('j') | no next file with comments »