lib/contentBlockerList.js - Issue 29336753: Issue 3671 - Split out contentBlockerList API

Side by Side Diff: lib/contentBlockerList.js

Issue 29336753: Issue 3671 - Split out contentBlockerList API (Closed)

Patch Set: Reworked API into ContentBlockerList class and addressed other feedback Created Feb. 22, 2016, 12:21 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 /*

	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

	3 * Copyright (C) 2006-2016 Eyeo GmbH

	4 *

	5 * Adblock Plus is free software: you can redistribute it and/or modify

	6 * it under the terms of the GNU General Public License version 3 as

	7 * published by the Free Software Foundation.

	8 *

	9 * Adblock Plus is distributed in the hope that it will be useful,

	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

	11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

	12 * GNU General Public License for more details.

	13 *

	14 * You should have received a copy of the GNU General Public License

	15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

	16 */

	17

	18 /** @module contentBlockerList */

	19

	20 "use strict";

	21

	22 let filterClasses = require("filterClasses");

	23 let getBaseDomain = require("urlHelpers").getBaseDomain;

	24 let punycode = require("punycode");

	25

	26 const selectorLimit = 5000;

	27 const typeMap = filterClasses.RegExpFilter.typeMap;

	28

	29 function parseDomains(domains, included, excluded)

	30 {

	31 for (let domain in domains)

	32 {

	33 if (domain != "")

	34 {

	35 let enabled = domains[domain];

	36 domain = punycode.toASCII(domain.toLowerCase());

	37

	38 if (!enabled)

	39 excluded.push(domain);

	40 else if (!domains[""])

	41 included.push(domain);

	42 }

	43 }

	44 }

	45

	46 function escapeRegExp(s)

	47 {

	48 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");

	49 }

	50

	51 function matchDomain(domain)

	52 {

	53 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]";

	54 }

	55

	56 function convertElemHideFilter(filter, elemhideSelectorExceptions)

	57 {

	58 let included = [];

	59 let excluded = [];

	60 let rules = [];

	61

	62 parseDomains(filter.domains, included, excluded);

	63

	64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))

	65 return {matchDomains: included.map(matchDomain), selector: filter.selector};

	66 }

	67

	68 function toRegExp(text)

	69 {

	70 let result = [];

	71 let lastIndex = text.length - 1;

	72

	73 for (let i = 0; i < text.length; i++)

	74 {

	75 let c = text[i];

	76

	77 switch (c)

	78 {

	79 case "*":

	80 if (result.length > 0 && i < lastIndex && text[i + 1] != "*")

	81 result.push(".*");

	82 break;

	83 case "^":

	84 if (i < lastIndex)

	85 result.push(".");

	86 break;

	87 case "\|":

	88 if (i == 0)

	89 {

	90 result.push("^");

	91 break;

	92 }

	93 if (i == lastIndex)

	94 {

	95 result.push("$");

	96 break;

	97 }

	98 if (i == 1 && text[0] == "\|")

	99 {

	100 result.push("https?://");

	101 break;

	102 }

	103 case ".": case "+": case "?": case "$":

	104 case "{": case "}": case "(": case ")":

	105 case "[": case "]": case "\\":

	106 result.push("\\");
	Sebastian Noack 2016/02/22 17:35:28 Nit: Perhaps we should change the code here to: Nit: Perhaps we should change the code here to: result.push("\\", c); break; The missing drop-through here might be a little sneaky and hard to spot when reading the code. Plus that way we have one function call less, when escaping charterers. But I wouldn't insist. Sebastian Noack 2016/02/22 17:37:31 s/missing drop-through/missing break/ Show quoted text On 2016/02/22 17:35:28, Sebastian Noack wrote: > Nit: Perhaps we should change the code here to: > > result.push("\\", c); > break; > > The missing drop-through here might be a little sneaky and hard to spot when > reading the code. Plus that way we have one function call less, when escaping > charterers. But I wouldn't insist. s/missing drop-through/missing break/ kzar 2016/02/22 18:09:29 Done. Show quoted text On 2016/02/22 17:35:28, Sebastian Noack wrote: > Nit: Perhaps we should change the code here to: > > result.push("\\", c); > break; > > The missing drop-through here might be a little sneaky and hard to spot when > reading the code. Plus that way we have one function call less, when escaping > charterers. But I wouldn't insist. Done.
	107 default:

	108 result.push(c);

	109 }

	110 }

	111

	112 return result.join("");

	113 }

	114

	115 function getRegExpSource(filter)

	116 {

	117 let source = toRegExp(filter.regexpSource.replace(

	118 // Safari expects punycode, filter lists use unicode

	119 /^(\\|\\|\|\\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i,

	120 function (match, prefix, domain)

	121 {

	122 return prefix + punycode.toASCII(domain);

	123 }

	124 ));

	125

	126 // Limit rules to to HTTP(S) URLs

	127 if (!/^(\^\|http)/i.test(source))

	128 source = "^https?://.*" + source;

	129

	130 return source;

	131 }

	132

	133 function getResourceTypes(filter)

	134 {

	135 let types = [];

	136

	137 if (filter.contentType & typeMap.IMAGE)

	138 types.push("image");

	139 if (filter.contentType & typeMap.STYLESHEET)

	140 types.push("style-sheet");

	141 if (filter.contentType & typeMap.SCRIPT)

	142 types.push("script");

	143 if (filter.contentType & typeMap.FONT)

	144 types.push("font");

	145 if (filter.contentType & (typeMap.MEDIA \| typeMap.OBJECT))

	146 types.push("media");

	147 if (filter.contentType & typeMap.POPUP)

	148 types.push("popup");

	149 if (filter.contentType & (typeMap.XMLHTTPREQUEST \|

	150 typeMap.OBJECT_SUBREQUEST \|

	151 typeMap.PING \| typeMap.OTHER))
	Sebastian Noack 2016/02/22 17:35:28 Nit: I think it reads slightly better, and looks m Nit: I think it reads slightly better, and looks more consistent, if all types are aligned. But I wouldn't insist. kzar 2016/02/22 18:09:29 Done. Show quoted text On 2016/02/22 17:35:28, Sebastian Noack wrote: > Nit: I think it reads slightly better, and looks more consistent, if all types > are aligned. But I wouldn't insist. Done.
	152 types.push("raw");

	153 if (filter.contentType & typeMap.SUBDOCUMENT)

	154 types.push("document");

	155

	156 return types;

	157 }

	158

	159 function addDomainPrefix(domains)

	160 {

	161 let result = [];

	162

	163 for (let domain of domains)

	164 {

	165 result.push(domain);

	166

	167 if (getBaseDomain(domain) == domain)

	168 result.push("www." + domain);

	169 }

	170

	171 return result;

	172 }

	173

	174 function convertFilter(filter, action, withResourceTypes)

	175 {

	176 let trigger = {"url-filter": getRegExpSource(filter)};

	177 let included = [];

	178 let excluded = [];

	179

	180 parseDomains(filter.domains, included, excluded);

	181

	182 if (withResourceTypes)

	183 trigger["resource-type"] = getResourceTypes(filter);

	184 if (filter.thirdParty != null)

	185 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];

	186

	187 if (included.length > 0)

	188 trigger["if-domain"] = addDomainPrefix(included);

	189 else if (excluded.length > 0)

	190 trigger["unless-domain"] = addDomainPrefix(excluded);

	191

	192 return {trigger: trigger, action: {type: action}};

	193 }

	194

	195 function hasNonASCI(obj)

	196 {

	197 if (typeof obj == "string")

	198 {

	199 if (/[^\x00-\x7F]/.test(obj))

	200 return true;

	201 }

	202

	203 if (typeof obj == "object")

	204 {

	205 if (obj instanceof Array)

	206 for (let item of obj)

	207 if (hasNonASCI(item))

	208 return true;

	209

	210 let names = Object.getOwnPropertyNames(obj);

	211 for (let name of names)

	212 if (hasNonASCI(obj[name]))

	213 return true;

	214 }

	215

	216 return false;

	217 }

	218

	219 function convertIDSelectorsToAttributeSelectors(selector)

	220 {

	221 // First we figure out where all the IDs are

	222 let sep = "";

	223 let start = null;

	224 let positions = [];

	225 for (let i = 0; i < selector.length; i++)

	226 {

	227 let chr = selector[i];

	228

	229 if (chr == "\\") // ignore escaped characters

	230 i++;

	231 else if (chr == sep) // don't match IDs within quoted text

	232 sep = ""; // e.g. [attr="#Hello"]

	233 else if (sep == "")

	234 {

	235 if (chr == '"' \|\| chr == "'")

	236 sep = chr;

	237 else if (start == null) // look for the start of an ID

	238 {

	239 if (chr == "#")

	240 start = i;

	241 }

	242 else if (chr != "-" && chr != "_" &&

	243 (chr < "0" \|\|

	244 chr > "9" && chr < "A" \|\|

	245 chr > "Z" && chr < "a" \|\|

	246 chr > "z" && chr < "\x80")) // look for the end of the ID

	247 {

	248 positions.push({start: start, end: i});

	249 start = null;

	250 }

	251 }

	252 }

	253 if (start != null)

	254 positions.push({start: start, end: selector.length});

	255

	256 // Now replace them all with the [id="someID"] form

	257 let newSelector = [];

	258 let i = 0;

	259 for (let pos of positions)

	260 {

	261 newSelector.push(selector.substring(i, pos.start));

	262 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');

	263 i = pos.end;

	264 }

	265 newSelector.push(selector.substring(i));

	266

	267 return newSelector.join("");

	268 }

	269

	270 let ContentBlockerList =

	271 /**

	272 * Create a new Adblock Plus filter to content blocker list converter

	273 *

	274 * @constructor

	275 */

	276 exports.ContentBlockerList = function ()

	277 {

	278 this.requestFilters = [];

	279 this.requestExceptions = [];

	280 this.elemhideFilters = [];

	281 this.elemhideExceptions = [];

	282 this.elemhideSelectorExceptions = new Map();

	283 };

	284

	285 ContentBlockerList.prototype = {};
	Sebastian Noack 2016/02/22 17:35:28 There is no need to set the prototype to an empty There is no need to set the prototype to an empty object. Each function automatically gets a prototype object setup. kzar 2016/02/22 18:09:29 Done. Show quoted text On 2016/02/22 17:35:28, Sebastian Noack wrote: > There is no need to set the prototype to an empty object. Each function > automatically gets a prototype object setup. Done.
	286

	287 /**

	288 * Add Adblock Plus filter to be converted

	289 *

	290 * @param {Filter} filter Filter to convert

	291 */

	292 ContentBlockerList.prototype.addFilter = function(filter)

	293 {

	294 if (filter.sitekeys)

	295 return;

	296 if (filter instanceof filterClasses.RegExpFilter && !filter.regexpSource)

	297 return;

	298

	299 if (filter instanceof filterClasses.BlockingFilter)

	300 this.requestFilters.push(filter);

	301

	302 if (filter instanceof filterClasses.WhitelistFilter)

	303 {

	304 if (filter.contentType & (typeMap.IMAGE

	305 \| typeMap.STYLESHEET

	306 \| typeMap.SCRIPT

	307 \| typeMap.FONT

	308 \| typeMap.MEDIA

	309 \| typeMap.POPUP

	310 \| typeMap.OBJECT

	311 \| typeMap.OBJECT_SUBREQUEST

	312 \| typeMap.XMLHTTPREQUEST

	313 \| typeMap.PING

	314 \| typeMap.SUBDOCUMENT

	315 \| typeMap.OTHER))

	316 this.requestExceptions.push(filter);

	317

	318 if (filter.contentType & typeMap.ELEMHIDE)

	319 this.elemhideExceptions.push(filter);

	320 }

	321

	322 if (filter instanceof filterClasses.ElemHideFilter)

	323 this.elemhideFilters.push(filter);

	324

	325 if (filter instanceof filterClasses.ElemHideException)

	326 {

	327 let domains = this.elemhideSelectorExceptions[filter.selector];

	328 if (!domains)

	329 domains = this.elemhideSelectorExceptions[filter.selector] = [];

	330

	331 parseDomains(filter.domains, domains, []);

	332 }

	333 };

	334

	335 /**

	336 * Generate content blocker list for all filters that were added

	337 *

	338 * @returns {Filter} filter Filter to convert

	339 */

	340 ContentBlockerList.prototype.generateRules = function(filter)

	341 {

	342 let rules = [];

	343

	344 function addRule(rule)

	345 {

	346 if (!hasNonASCI(rule))

	347 rules.push(rule);

	348 }

	349

	350 let groupedElemhideFilters = new Map();

	351 for (let filter of this.elemhideFilters)

	352 {

	353 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);

	354 if (!result)

	355 continue;

	356

	357 if (result.matchDomains.length == 0)

	358 result.matchDomains = ["^https?://"];

	359

	360 for (let matchDomain of result.matchDomains)

	361 {

	362 let group = groupedElemhideFilters.get(matchDomain) \|\| [];

	363 group.push(result.selector);

	364 groupedElemhideFilters.set(matchDomain, group);

	365 }

	366 }

	367

	368 groupedElemhideFilters.forEach((selectors, matchDomain) =>

	369 {

	370 while (selectors.length)

	371 {

	372 let selector = selectors.splice(0, selectorLimit).join(", ");

	373

	374 // As of Safari 9.0 element IDs are matched as lowercase. We work around

	375 // this by converting to the attribute format [id="elementID"]

	376 selector = convertIDSelectorsToAttributeSelectors(selector);

	377

	378 addRule({

	379 trigger: {"url-filter": matchDomain},

	380 action: {type: "css-display-none",

	381 selector: selector}

	382 });

	383 }

	384 });

	385

	386 for (let filter of this.elemhideExceptions)

	387 addRule(convertFilter(filter, "ignore-previous-rules", false));

	388 for (let filter of this.requestFilters)

	389 addRule(convertFilter(filter, "block", true));

	390 for (let filter of this.requestExceptions)

	391 addRule(convertFilter(filter, "ignore-previous-rules", true));

	392

	393 return rules;

	394 };

OLD	NEW

« no previous file with comments | « adblockplus.js ('k') | node_modules/filterClasses.js » ('j') | no next file with comments »