 Issue 29336753:
  Issue 3671 - Split out contentBlockerList API  (Closed)
    
  
    Issue 29336753:
  Issue 3671 - Split out contentBlockerList API  (Closed) 
  | Left: | ||
| Right: | 
| OLD | NEW | 
|---|---|
| (Empty) | |
| 1 /* | |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | |
| 3 * Copyright (C) 2006-2016 Eyeo GmbH | |
| 4 * | |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | |
| 6 * it under the terms of the GNU General Public License version 3 as | |
| 7 * published by the Free Software Foundation. | |
| 8 * | |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 12 * GNU General Public License for more details. | |
| 13 * | |
| 14 * You should have received a copy of the GNU General Public License | |
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
| 16 */ | |
| 17 | |
| 18 /** @module contentBlockerList */ | |
| 19 | |
| 20 "use strict"; | |
| 21 | |
| 22 let filterClasses = require("filterClasses"); | |
| 23 let getBaseDomain = require("urlHelpers").getBaseDomain; | |
| 24 let punycode = require("punycode"); | |
| 25 | |
| 26 const selectorLimit = 5000; | |
| 27 const typeMap = filterClasses.RegExpFilter.typeMap; | |
| 28 | |
| 29 function parseDomains(domains, included, excluded) | |
| 30 { | |
| 31 for (let domain in domains) | |
| 32 { | |
| 33 if (domain != "") | |
| 34 { | |
| 35 let enabled = domains[domain]; | |
| 36 domain = punycode.toASCII(domain.toLowerCase()); | |
| 37 | |
| 38 if (!enabled) | |
| 39 excluded.push(domain); | |
| 40 else if (!domains[""]) | |
| 41 included.push(domain); | |
| 42 } | |
| 43 } | |
| 44 } | |
| 45 | |
| 46 function escapeRegExp(s) | |
| 47 { | |
| 48 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); | |
| 49 } | |
| 50 | |
| 51 function matchDomain(domain) | |
| 52 { | |
| 53 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]"; | |
| 54 } | |
| 55 | |
| 56 function convertElemHideFilter(filter, elemhideSelectorExceptions) | |
| 57 { | |
| 58 let included = []; | |
| 59 let excluded = []; | |
| 60 let rules = []; | |
| 61 | |
| 62 parseDomains(filter.domains, included, excluded); | |
| 63 | |
| 64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) | |
| 65 return {matchDomains: included.map(matchDomain), selector: filter.selector}; | |
| 66 } | |
| 67 | |
| 68 function toRegExp(text) | |
| 69 { | |
| 70 let result = []; | |
| 71 let lastIndex = text.length - 1; | |
| 72 | |
| 73 for (let i = 0; i < text.length; i++) | |
| 74 { | |
| 75 let c = text[i]; | |
| 76 | |
| 77 switch (c) | |
| 78 { | |
| 79 case "*": | |
| 80 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") | |
| 81 result.push(".*"); | |
| 82 break; | |
| 83 case "^": | |
| 84 if (i < lastIndex) | |
| 85 result.push("."); | |
| 86 break; | |
| 87 case "|": | |
| 88 if (i == 0) | |
| 89 { | |
| 90 result.push("^"); | |
| 91 break; | |
| 92 } | |
| 93 if (i == lastIndex) | |
| 94 { | |
| 95 result.push("$"); | |
| 96 break; | |
| 97 } | |
| 98 if (i == 1 && text[0] == "|") | |
| 99 { | |
| 100 result.push("https?://"); | |
| 101 break; | |
| 102 } | |
| 103 case ".": case "+": case "?": case "$": | |
| 104 case "{": case "}": case "(": case ")": | |
| 105 case "[": case "]": case "\\": | |
| 106 result.push("\\"); | |
| 
Sebastian Noack
2016/02/22 17:35:28
Nit: Perhaps we should change the code here to:
 
Sebastian Noack
2016/02/22 17:37:31
s/missing drop-through/missing break/
 
kzar
2016/02/22 18:09:29
Done.
 | |
| 107 default: | |
| 108 result.push(c); | |
| 109 } | |
| 110 } | |
| 111 | |
| 112 return result.join(""); | |
| 113 } | |
| 114 | |
| 115 function getRegExpSource(filter) | |
| 116 { | |
| 117 let source = toRegExp(filter.regexpSource.replace( | |
| 118 // Safari expects punycode, filter lists use unicode | |
| 119 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i, | |
| 120 function (match, prefix, domain) | |
| 121 { | |
| 122 return prefix + punycode.toASCII(domain); | |
| 123 } | |
| 124 )); | |
| 125 | |
| 126 // Limit rules to to HTTP(S) URLs | |
| 127 if (!/^(\^|http)/i.test(source)) | |
| 128 source = "^https?://.*" + source; | |
| 129 | |
| 130 return source; | |
| 131 } | |
| 132 | |
| 133 function getResourceTypes(filter) | |
| 134 { | |
| 135 let types = []; | |
| 136 | |
| 137 if (filter.contentType & typeMap.IMAGE) | |
| 138 types.push("image"); | |
| 139 if (filter.contentType & typeMap.STYLESHEET) | |
| 140 types.push("style-sheet"); | |
| 141 if (filter.contentType & typeMap.SCRIPT) | |
| 142 types.push("script"); | |
| 143 if (filter.contentType & typeMap.FONT) | |
| 144 types.push("font"); | |
| 145 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) | |
| 146 types.push("media"); | |
| 147 if (filter.contentType & typeMap.POPUP) | |
| 148 types.push("popup"); | |
| 149 if (filter.contentType & (typeMap.XMLHTTPREQUEST | | |
| 150 typeMap.OBJECT_SUBREQUEST | | |
| 151 typeMap.PING | typeMap.OTHER)) | |
| 
Sebastian Noack
2016/02/22 17:35:28
Nit: I think it reads slightly better, and looks m
 
kzar
2016/02/22 18:09:29
Done.
 | |
| 152 types.push("raw"); | |
| 153 if (filter.contentType & typeMap.SUBDOCUMENT) | |
| 154 types.push("document"); | |
| 155 | |
| 156 return types; | |
| 157 } | |
| 158 | |
| 159 function addDomainPrefix(domains) | |
| 160 { | |
| 161 let result = []; | |
| 162 | |
| 163 for (let domain of domains) | |
| 164 { | |
| 165 result.push(domain); | |
| 166 | |
| 167 if (getBaseDomain(domain) == domain) | |
| 168 result.push("www." + domain); | |
| 169 } | |
| 170 | |
| 171 return result; | |
| 172 } | |
| 173 | |
| 174 function convertFilter(filter, action, withResourceTypes) | |
| 175 { | |
| 176 let trigger = {"url-filter": getRegExpSource(filter)}; | |
| 177 let included = []; | |
| 178 let excluded = []; | |
| 179 | |
| 180 parseDomains(filter.domains, included, excluded); | |
| 181 | |
| 182 if (withResourceTypes) | |
| 183 trigger["resource-type"] = getResourceTypes(filter); | |
| 184 if (filter.thirdParty != null) | |
| 185 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; | |
| 186 | |
| 187 if (included.length > 0) | |
| 188 trigger["if-domain"] = addDomainPrefix(included); | |
| 189 else if (excluded.length > 0) | |
| 190 trigger["unless-domain"] = addDomainPrefix(excluded); | |
| 191 | |
| 192 return {trigger: trigger, action: {type: action}}; | |
| 193 } | |
| 194 | |
| 195 function hasNonASCI(obj) | |
| 196 { | |
| 197 if (typeof obj == "string") | |
| 198 { | |
| 199 if (/[^\x00-\x7F]/.test(obj)) | |
| 200 return true; | |
| 201 } | |
| 202 | |
| 203 if (typeof obj == "object") | |
| 204 { | |
| 205 if (obj instanceof Array) | |
| 206 for (let item of obj) | |
| 207 if (hasNonASCI(item)) | |
| 208 return true; | |
| 209 | |
| 210 let names = Object.getOwnPropertyNames(obj); | |
| 211 for (let name of names) | |
| 212 if (hasNonASCI(obj[name])) | |
| 213 return true; | |
| 214 } | |
| 215 | |
| 216 return false; | |
| 217 } | |
| 218 | |
| 219 function convertIDSelectorsToAttributeSelectors(selector) | |
| 220 { | |
| 221 // First we figure out where all the IDs are | |
| 222 let sep = ""; | |
| 223 let start = null; | |
| 224 let positions = []; | |
| 225 for (let i = 0; i < selector.length; i++) | |
| 226 { | |
| 227 let chr = selector[i]; | |
| 228 | |
| 229 if (chr == "\\") // ignore escaped characters | |
| 230 i++; | |
| 231 else if (chr == sep) // don't match IDs within quoted text | |
| 232 sep = ""; // e.g. [attr="#Hello"] | |
| 233 else if (sep == "") | |
| 234 { | |
| 235 if (chr == '"' || chr == "'") | |
| 236 sep = chr; | |
| 237 else if (start == null) // look for the start of an ID | |
| 238 { | |
| 239 if (chr == "#") | |
| 240 start = i; | |
| 241 } | |
| 242 else if (chr != "-" && chr != "_" && | |
| 243 (chr < "0" || | |
| 244 chr > "9" && chr < "A" || | |
| 245 chr > "Z" && chr < "a" || | |
| 246 chr > "z" && chr < "\x80")) // look for the end of the ID | |
| 247 { | |
| 248 positions.push({start: start, end: i}); | |
| 249 start = null; | |
| 250 } | |
| 251 } | |
| 252 } | |
| 253 if (start != null) | |
| 254 positions.push({start: start, end: selector.length}); | |
| 255 | |
| 256 // Now replace them all with the [id="someID"] form | |
| 257 let newSelector = []; | |
| 258 let i = 0; | |
| 259 for (let pos of positions) | |
| 260 { | |
| 261 newSelector.push(selector.substring(i, pos.start)); | |
| 262 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | |
| 263 i = pos.end; | |
| 264 } | |
| 265 newSelector.push(selector.substring(i)); | |
| 266 | |
| 267 return newSelector.join(""); | |
| 268 } | |
| 269 | |
| 270 let ContentBlockerList = | |
| 271 /** | |
| 272 * Create a new Adblock Plus filter to content blocker list converter | |
| 273 * | |
| 274 * @constructor | |
| 275 */ | |
| 276 exports.ContentBlockerList = function () | |
| 277 { | |
| 278 this.requestFilters = []; | |
| 279 this.requestExceptions = []; | |
| 280 this.elemhideFilters = []; | |
| 281 this.elemhideExceptions = []; | |
| 282 this.elemhideSelectorExceptions = new Map(); | |
| 283 }; | |
| 284 | |
| 285 ContentBlockerList.prototype = {}; | |
| 
Sebastian Noack
2016/02/22 17:35:28
There is no need to set the prototype to an empty
 
kzar
2016/02/22 18:09:29
Done.
 | |
| 286 | |
| 287 /** | |
| 288 * Add Adblock Plus filter to be converted | |
| 289 * | |
| 290 * @param {Filter} filter Filter to convert | |
| 291 */ | |
| 292 ContentBlockerList.prototype.addFilter = function(filter) | |
| 293 { | |
| 294 if (filter.sitekeys) | |
| 295 return; | |
| 296 if (filter instanceof filterClasses.RegExpFilter && !filter.regexpSource) | |
| 297 return; | |
| 298 | |
| 299 if (filter instanceof filterClasses.BlockingFilter) | |
| 300 this.requestFilters.push(filter); | |
| 301 | |
| 302 if (filter instanceof filterClasses.WhitelistFilter) | |
| 303 { | |
| 304 if (filter.contentType & (typeMap.IMAGE | |
| 305 | typeMap.STYLESHEET | |
| 306 | typeMap.SCRIPT | |
| 307 | typeMap.FONT | |
| 308 | typeMap.MEDIA | |
| 309 | typeMap.POPUP | |
| 310 | typeMap.OBJECT | |
| 311 | typeMap.OBJECT_SUBREQUEST | |
| 312 | typeMap.XMLHTTPREQUEST | |
| 313 | typeMap.PING | |
| 314 | typeMap.SUBDOCUMENT | |
| 315 | typeMap.OTHER)) | |
| 316 this.requestExceptions.push(filter); | |
| 317 | |
| 318 if (filter.contentType & typeMap.ELEMHIDE) | |
| 319 this.elemhideExceptions.push(filter); | |
| 320 } | |
| 321 | |
| 322 if (filter instanceof filterClasses.ElemHideFilter) | |
| 323 this.elemhideFilters.push(filter); | |
| 324 | |
| 325 if (filter instanceof filterClasses.ElemHideException) | |
| 326 { | |
| 327 let domains = this.elemhideSelectorExceptions[filter.selector]; | |
| 328 if (!domains) | |
| 329 domains = this.elemhideSelectorExceptions[filter.selector] = []; | |
| 330 | |
| 331 parseDomains(filter.domains, domains, []); | |
| 332 } | |
| 333 }; | |
| 334 | |
| 335 /** | |
| 336 * Generate content blocker list for all filters that were added | |
| 337 * | |
| 338 * @returns {Filter} filter Filter to convert | |
| 339 */ | |
| 340 ContentBlockerList.prototype.generateRules = function(filter) | |
| 341 { | |
| 342 let rules = []; | |
| 343 | |
| 344 function addRule(rule) | |
| 345 { | |
| 346 if (!hasNonASCI(rule)) | |
| 347 rules.push(rule); | |
| 348 } | |
| 349 | |
| 350 let groupedElemhideFilters = new Map(); | |
| 351 for (let filter of this.elemhideFilters) | |
| 352 { | |
| 353 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | |
| 354 if (!result) | |
| 355 continue; | |
| 356 | |
| 357 if (result.matchDomains.length == 0) | |
| 358 result.matchDomains = ["^https?://"]; | |
| 359 | |
| 360 for (let matchDomain of result.matchDomains) | |
| 361 { | |
| 362 let group = groupedElemhideFilters.get(matchDomain) || []; | |
| 363 group.push(result.selector); | |
| 364 groupedElemhideFilters.set(matchDomain, group); | |
| 365 } | |
| 366 } | |
| 367 | |
| 368 groupedElemhideFilters.forEach((selectors, matchDomain) => | |
| 369 { | |
| 370 while (selectors.length) | |
| 371 { | |
| 372 let selector = selectors.splice(0, selectorLimit).join(", "); | |
| 373 | |
| 374 // As of Safari 9.0 element IDs are matched as lowercase. We work around | |
| 375 // this by converting to the attribute format [id="elementID"] | |
| 376 selector = convertIDSelectorsToAttributeSelectors(selector); | |
| 377 | |
| 378 addRule({ | |
| 379 trigger: {"url-filter": matchDomain}, | |
| 380 action: {type: "css-display-none", | |
| 381 selector: selector} | |
| 382 }); | |
| 383 } | |
| 384 }); | |
| 385 | |
| 386 for (let filter of this.elemhideExceptions) | |
| 387 addRule(convertFilter(filter, "ignore-previous-rules", false)); | |
| 388 for (let filter of this.requestFilters) | |
| 389 addRule(convertFilter(filter, "block", true)); | |
| 390 for (let filter of this.requestExceptions) | |
| 391 addRule(convertFilter(filter, "ignore-previous-rules", true)); | |
| 392 | |
| 393 return rules; | |
| 394 }; | |
| OLD | NEW |