Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/abp2blocklist.js

Issue 29452289: Issue 5283 - Add support for $websocket and $webrtc (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist
Patch Set: Rebase Created July 13, 2017, 11:41 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | node_modules/filterClasses.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/abp2blocklist.js
===================================================================
--- a/lib/abp2blocklist.js
+++ b/lib/abp2blocklist.js
@@ -28,16 +28,18 @@
| typeMap.STYLESHEET
| typeMap.SCRIPT
| typeMap.FONT
| typeMap.MEDIA
| typeMap.POPUP
| typeMap.OBJECT
| typeMap.OBJECT_SUBREQUEST
| typeMap.XMLHTTPREQUEST
+ | typeMap.WEBSOCKET
+ | typeMap.WEBRTC
| typeMap.PING
| typeMap.SUBDOCUMENT
| typeMap.OTHER);
function parseDomains(domains, included, excluded)
{
for (let domain in domains)
{
@@ -59,16 +61,41 @@
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
function matchDomain(domain)
{
return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";
}
+function getURLSchemes(contentType)
+{
+ // If the given content type includes all supported URL schemes, simply
+ // return a single generic URL scheme pattern. This minimizes the size of the
+ // generated rule set. The downside to this is that it will also match
+ // schemes that we do not want to match (e.g. "ftp://"), but this can be
+ // mitigated by adding exceptions for those schemes.
+ if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC &&
+ contentType & ~(typeMap.WEBSOCKET | typeMap.WEBRTC))
+ return ["[^:]+:(//)?"];
+
+ let urlSchemes = [];
+
+ if (contentType & typeMap.WEBSOCKET)
+ urlSchemes.push("wss?://");
+
+ if (contentType & typeMap.WEBRTC)
+ urlSchemes.push("stuns?:", "turns?:");
+
+ if (contentType & ~(typeMap.WEBSOCKET | typeMap.WEBRTC))
+ urlSchemes.push("https?://");
+
+ return urlSchemes;
+}
+
function findSubdomainsInList(domain, list)
{
let subdomains = [];
let suffixLength = domain.length + 1;
for (let name of list)
{
if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)
@@ -104,26 +131,27 @@
/**
* Parse the given filter "regexpSource" string. Producing a regular expression,
* extracting the hostname (if any), deciding if the regular expression is safe
* to be converted + matched as lower case and noting if the source contains
* anything after the hostname.)
*
* @param {string} text regexpSource property of a filter
+ * @param {string} urlScheme The URL scheme to use in the regular expression
* @returns {object} An object containing a regular expression string, a bool
* indicating if the filter can be safely matched as lower
* case, a hostname string (or undefined) and a bool
* indicating if the source only contains a hostname or not:
* {regexp: "...",
* canSafelyMatchAsLowercase: true/false,
* hostname: "...",
* justHostname: true/false}
*/
-function parseFilterRegexpSource(text)
+function parseFilterRegexpSource(text, urlScheme)
{
let regexp = [];
// Convert the text into an array of Unicode characters.
//
// In the case of surrogate pairs (the smiley emoji, for example), one
// Unicode code point is represented by two JavaScript characters together.
// We want to iterate over Unicode code points rather than JavaScript
@@ -132,16 +160,19 @@
let lastIndex = characters.length - 1;
let hostname;
let hostnameStart = null;
let hostnameFinished = false;
let justHostname = false;
let canSafelyMatchAsLowercase = false;
+ if (!urlScheme)
+ urlScheme = getURLSchemes()[0];
+
for (let i = 0; i < characters.length; i++)
{
let c = characters[i];
if (hostnameFinished)
justHostname = false;
// If we're currently inside the hostname we have to be careful not to
@@ -180,17 +211,17 @@
if (!justHostname)
alphabet = "A-Z" + alphabet;
let digits = "0-9";
// Note that the "-" must appear first here in order to retain its
// literal meaning within the brackets.
let specialCharacters = "-_.%";
let separator = "[^" + specialCharacters + alphabet + digits + "]";
if (i == 0)
- regexp.push("^https?://(.*" + separator + ")?");
+ regexp.push("^" + urlScheme + "(.*" + separator + ")?");
else if (i == lastIndex)
regexp.push("(" + separator + ".*)?$");
else
regexp.push(separator);
break;
case "|":
if (i == 0)
{
@@ -201,17 +232,17 @@
{
regexp.push("$");
break;
}
if (i == 1 && characters[0] == "|")
{
hostnameStart = i + 1;
canSafelyMatchAsLowercase = true;
- regexp.push("https?://([^/]+\\.)?");
+ regexp.push(urlScheme + "([^/]+\\.)?");
break;
}
regexp.push("\\|");
break;
case "/":
if (!hostnameFinished &&
characters[i - 2] == ":" && characters[i - 1] == "/")
{
@@ -236,72 +267,177 @@
return {
regexp: regexp.join(""),
canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,
hostname: hostname,
justHostname: justHostname
};
}
-function getResourceTypes(filter)
+function getResourceTypes(contentType)
{
let types = [];
- if (filter.contentType & typeMap.IMAGE)
+ if (contentType & typeMap.IMAGE)
types.push("image");
- if (filter.contentType & typeMap.STYLESHEET)
+ if (contentType & typeMap.STYLESHEET)
types.push("style-sheet");
- if (filter.contentType & typeMap.SCRIPT)
+ if (contentType & typeMap.SCRIPT)
types.push("script");
- if (filter.contentType & typeMap.FONT)
+ if (contentType & typeMap.FONT)
types.push("font");
- if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT))
+ if (contentType & (typeMap.MEDIA | typeMap.OBJECT))
types.push("media");
- if (filter.contentType & typeMap.POPUP)
+ if (contentType & typeMap.POPUP)
types.push("popup");
- if (filter.contentType & (typeMap.XMLHTTPREQUEST |
- typeMap.OBJECT_SUBREQUEST |
- typeMap.PING |
- typeMap.OTHER))
+ if (contentType & (typeMap.XMLHTTPREQUEST |
+ typeMap.WEBSOCKET |
+ typeMap.WEBRTC |
+ typeMap.OBJECT_SUBREQUEST |
+ typeMap.PING |
+ typeMap.OTHER))
+ {
types.push("raw");
- if (filter.contentType & typeMap.SUBDOCUMENT)
+ }
+ if (contentType & typeMap.SUBDOCUMENT)
types.push("document");
return types;
}
+function makeRuleCopies(trigger, action, urlSchemes)
+{
+ let copies = [];
+
+ // Always make a deep copy of the rule, since rules may have to be
+ // manipulated individually at a later stage.
+ let stringifiedTrigger = JSON.stringify(trigger);
+
+ let filterPattern = trigger["url-filter"].substring(1);
+ let startIndex = 0;
+
+ // If the URL filter already begins with the first URL scheme pattern, skip
+ // it.
+ if (trigger["url-filter"].startsWith("^" + urlSchemes[0]))
+ {
+ filterPattern = filterPattern.substring(urlSchemes[0].length);
+ startIndex = 1;
+ }
+ else
+ {
+ filterPattern = ".*" + filterPattern;
+ }
+
+ for (let i = startIndex; i < urlSchemes.length; i++)
+ {
+ let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), {
+ "url-filter": "^" + urlSchemes[i] + filterPattern
+ });
+ copies.push({trigger: copyTrigger, action});
+ }
+
+ return copies;
+}
+
+function excludeTopURLFromTrigger(trigger)
+{
+ trigger["unless-top-url"] = [trigger["url-filter"]];
+ if (trigger["url-filter-is-case-sensitive"])
+ trigger["top-url-filter-is-case-sensitive"] = true;
+}
+
function convertFilterAddRules(rules, filter, action, withResourceTypes,
- exceptionDomains)
+ exceptionDomains, contentType)
{
- let parsed = parseFilterRegexpSource(filter.regexpSource);
+ if (!contentType)
+ contentType = filter.contentType;
+
+ // If WebSocket or WebRTC are given along with other options but not
+ // including all three of WebSocket, WebRTC, and XMLHttpRequest, we must
+ // generate multiple rules. For example, for the filter
+ // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw"
+ // and another rule with "^https?://" and "image". If we merge the two, we
+ // end up blocking requests of type XMLHttpRequest inadvertently.
+ if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET &&
+ !(contentType & typeMap.WEBRTC &&
+ contentType & typeMap.XMLHTTPREQUEST)) ||
+ (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC &&
+ !(contentType & typeMap.WEBSOCKET &&
+ contentType & typeMap.XMLHTTPREQUEST)))
+ {
+ if (contentType & typeMap.WEBSOCKET)
+ {
+ convertFilterAddRules(rules, filter, action, withResourceTypes,
+ exceptionDomains, typeMap.WEBSOCKET);
+ }
+
+ if (contentType & typeMap.WEBRTC)
+ {
+ convertFilterAddRules(rules, filter, action, withResourceTypes,
+ exceptionDomains, typeMap.WEBRTC);
+ }
+
+ contentType &= ~(typeMap.WEBSOCKET | typeMap.WEBRTC);
+
+ if (!contentType)
+ return;
+ }
+
+ let urlSchemes = getURLSchemes(contentType);
+ let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]);
// For the special case of $document whitelisting filters with just a domain
// we can generate an equivalent blocking rule exception using if-domain.
if (filter instanceof filterClasses.WhitelistFilter &&
- filter.contentType & typeMap.DOCUMENT &&
+ contentType & typeMap.DOCUMENT &&
parsed.justHostname)
{
rules.push({
trigger: {
"url-filter": ".*",
"if-domain": ["*" + parsed.hostname]
},
action: {type: "ignore-previous-rules"}
});
// If the filter contains other supported options we'll need to generate
// further rules for it, but if not we can simply return now.
- if (!(filter.contentType & whitelistableRequestTypes))
+ if (!(contentType & whitelistableRequestTypes))
return;
}
let trigger = {"url-filter": parsed.regexp};
- // Limit rules to HTTP(S) URLs
- if (!/^(\^|http)/i.test(trigger["url-filter"]))
- trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
+ // If the URL filter begins with one of the URL schemes for this content
+ // type, we generate additional rules for all the URL scheme patterns;
+ // otherwise, if the start of the URL filter literally matches the first URL
+ // scheme pattern, we just generate additional rules for the remaining URL
+ // scheme patterns.
+ //
+ // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^"
+ // in front of this and generate two additional rules for
+ // "^stuns?:.*stun:foo" and "^turns?:.*stun:foo". On the other hand,
+ // "||foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate
+ // "^turns?:([^/]+\\.)?foo" in addition.
+ //
+ // Note that the filter can be already anchored to the beginning
+ // (e.g. "|stun:foo$webrtc"), in which case we do not generate any additional
+ // rules.
+ let needAltRules = trigger["url-filter"][0] != "^" ||
+ trigger["url-filter"].startsWith("^" + urlSchemes[0]);
+
+ if (trigger["url-filter"][0] != "^")
+ {
+ if (!urlSchemes.some(scheme => new RegExp("^" + scheme)
+ .test(trigger["url-filter"])))
+ {
+ trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"];
+ }
+
+ trigger["url-filter"] = "^" + trigger["url-filter"];
+ }
// For rules containing only a hostname we know that we're matching against
// a lowercase string unless the matchCase option was passed.
if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)
trigger["url-filter"] = trigger["url-filter"].toLowerCase();
if (parsed.canSafelyMatchAsLowercase || filter.matchCase)
trigger["url-filter-is-case-sensitive"] = true;
@@ -311,17 +447,17 @@
parseDomains(filter.domains, included, excluded);
if (exceptionDomains)
excluded = excluded.concat(exceptionDomains);
if (withResourceTypes)
{
- let resourceTypes = getResourceTypes(filter);
+ let resourceTypes = getResourceTypes(contentType);
// Content blocker rules can't differentiate between sub-document requests
// (iframes) and top-level document requests. To avoid too many false
// positives, we prevent rules with no hostname part from blocking document
// requests.
//
// Once Safari 11 becomes our minimum supported version, we could change
// our approach here to use the new "unless-top-url" property instead.
@@ -332,16 +468,18 @@
return;
trigger["resource-type"] = resourceTypes;
}
if (filter.thirdParty != null)
trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];
+ let addTopLevelException = false;
+
if (included.length > 0)
{
trigger["if-domain"] = [];
for (let name of included)
{
// If this is a blocking filter or an element hiding filter, add the
// subdomain wildcard only if no subdomains have been excluded.
@@ -371,22 +509,33 @@
{
// Rules with a hostname part are still allowed to block document requests,
// but we add an exception for top-level documents.
//
// Note that we can only do this if there's no "unless-domain" property for
// now. This also only works in Safari 11 onwards, while older versions
// simply ignore this property. Once Safari 11 becomes our minimum
// supported version, we can merge "unless-domain" into "unless-top-url".
- trigger["unless-top-url"] = [trigger["url-filter"]];
- if (trigger["url-filter-is-case-sensitive"])
- trigger["top-url-filter-is-case-sensitive"] = true;
+ addTopLevelException = true;
+ excludeTopURLFromTrigger(trigger);
}
rules.push({trigger: trigger, action: {type: action}});
+
+ if (needAltRules)
+ {
+ // Generate additional rules for any alternative URL schemes.
+ for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes))
+ {
+ if (addTopLevelException)
+ excludeTopURLFromTrigger(altRule.trigger);
+
+ rules.push(altRule);
+ }
+ }
}
function convertIDSelectorsToAttributeSelectors(selector)
{
// First we figure out where all the IDs are
let sep = "";
let start = null;
let positions = [];
« no previous file with comments | « no previous file | node_modules/filterClasses.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld