OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
| 3 * Copyright (C) 2006-2016 Eyeo GmbH |
| 4 * |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify |
| 6 * it under the terms of the GNU General Public License version 3 as |
| 7 * published by the Free Software Foundation. |
| 8 * |
| 9 * Adblock Plus is distributed in the hope that it will be useful, |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 * GNU General Public License for more details. |
| 13 * |
| 14 * You should have received a copy of the GNU General Public License |
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| 16 */ |
| 17 |
| 18 /** @module contentBlockerList */ |
| 19 |
| 20 "use strict"; |
| 21 |
| 22 let filterClasses = require("filterClasses"); |
| 23 let tldjs = require("tldjs"); |
| 24 let punycode = require("punycode"); |
| 25 |
| 26 const selectorLimit = 5000; |
| 27 const typeMap = filterClasses.RegExpFilter.typeMap; |
| 28 |
| 29 function parseDomains(domains, included, excluded) |
| 30 { |
| 31 for (let domain in domains) |
| 32 { |
| 33 if (domain != "") |
| 34 { |
| 35 let enabled = domains[domain]; |
| 36 domain = punycode.toASCII(domain.toLowerCase()); |
| 37 |
| 38 if (!enabled) |
| 39 excluded.push(domain); |
| 40 else if (!domains[""]) |
| 41 included.push(domain); |
| 42 } |
| 43 } |
| 44 } |
| 45 |
| 46 function escapeRegExp(s) |
| 47 { |
| 48 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); |
| 49 } |
| 50 |
| 51 function matchDomain(domain) |
| 52 { |
| 53 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]"; |
| 54 } |
| 55 |
| 56 function convertElemHideFilter(filter, elemhideSelectorExceptions) |
| 57 { |
| 58 let included = []; |
| 59 let excluded = []; |
| 60 let rules = []; |
| 61 |
| 62 parseDomains(filter.domains, included, excluded); |
| 63 |
| 64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) |
| 65 return {matchDomains: included.map(matchDomain), selector: filter.selector}; |
| 66 } |
| 67 |
| 68 function toRegExp(text) |
| 69 { |
| 70 let result = []; |
| 71 let lastIndex = text.length - 1; |
| 72 |
| 73 for (let i = 0; i < text.length; i++) |
| 74 { |
| 75 let c = text[i]; |
| 76 |
| 77 switch (c) |
| 78 { |
| 79 case "*": |
| 80 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") |
| 81 result.push(".*"); |
| 82 break; |
| 83 case "^": |
| 84 if (i < lastIndex) |
| 85 result.push("."); |
| 86 break; |
| 87 case "|": |
| 88 if (i == 0) |
| 89 { |
| 90 result.push("^"); |
| 91 break; |
| 92 } |
| 93 if (i == lastIndex) |
| 94 { |
| 95 result.push("$"); |
| 96 break; |
| 97 } |
| 98 if (i == 1 && text[0] == "|") |
| 99 { |
| 100 result.push("https?://"); |
| 101 break; |
| 102 } |
| 103 case ".": case "+": case "?": case "$": |
| 104 case "{": case "}": case "(": case ")": |
| 105 case "[": case "]": case "\\": |
| 106 result.push("\\", c); |
| 107 break; |
| 108 default: |
| 109 result.push(c); |
| 110 } |
| 111 } |
| 112 |
| 113 return result.join(""); |
| 114 } |
| 115 |
| 116 function getRegExpSource(filter) |
| 117 { |
| 118 let source = toRegExp(filter.regexpSource.replace( |
| 119 // Safari expects punycode, filter lists use unicode |
| 120 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i, |
| 121 function (match, prefix, domain) |
| 122 { |
| 123 return prefix + punycode.toASCII(domain); |
| 124 } |
| 125 )); |
| 126 |
| 127 // Limit rules to to HTTP(S) URLs |
| 128 if (!/^(\^|http)/i.test(source)) |
| 129 source = "^https?://.*" + source; |
| 130 |
| 131 return source; |
| 132 } |
| 133 |
| 134 function getResourceTypes(filter) |
| 135 { |
| 136 let types = []; |
| 137 |
| 138 if (filter.contentType & typeMap.IMAGE) |
| 139 types.push("image"); |
| 140 if (filter.contentType & typeMap.STYLESHEET) |
| 141 types.push("style-sheet"); |
| 142 if (filter.contentType & typeMap.SCRIPT) |
| 143 types.push("script"); |
| 144 if (filter.contentType & typeMap.FONT) |
| 145 types.push("font"); |
| 146 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) |
| 147 types.push("media"); |
| 148 if (filter.contentType & typeMap.POPUP) |
| 149 types.push("popup"); |
| 150 if (filter.contentType & (typeMap.XMLHTTPREQUEST | |
| 151 typeMap.OBJECT_SUBREQUEST | |
| 152 typeMap.PING | |
| 153 typeMap.OTHER)) |
| 154 types.push("raw"); |
| 155 if (filter.contentType & typeMap.SUBDOCUMENT) |
| 156 types.push("document"); |
| 157 |
| 158 return types; |
| 159 } |
| 160 |
| 161 function addDomainPrefix(domains) |
| 162 { |
| 163 let result = []; |
| 164 |
| 165 for (let domain of domains) |
| 166 { |
| 167 result.push(domain); |
| 168 |
| 169 if (tldjs.getDomain(domain) == domain) |
| 170 result.push("www." + domain); |
| 171 } |
| 172 |
| 173 return result; |
| 174 } |
| 175 |
| 176 function convertFilter(filter, action, withResourceTypes) |
| 177 { |
| 178 let trigger = {"url-filter": getRegExpSource(filter)}; |
| 179 let included = []; |
| 180 let excluded = []; |
| 181 |
| 182 parseDomains(filter.domains, included, excluded); |
| 183 |
| 184 if (withResourceTypes) |
| 185 trigger["resource-type"] = getResourceTypes(filter); |
| 186 if (filter.thirdParty != null) |
| 187 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |
| 188 |
| 189 if (included.length > 0) |
| 190 trigger["if-domain"] = addDomainPrefix(included); |
| 191 else if (excluded.length > 0) |
| 192 trigger["unless-domain"] = addDomainPrefix(excluded); |
| 193 |
| 194 return {trigger: trigger, action: {type: action}}; |
| 195 } |
| 196 |
| 197 function hasNonASCI(obj) |
| 198 { |
| 199 if (typeof obj == "string") |
| 200 { |
| 201 if (/[^\x00-\x7F]/.test(obj)) |
| 202 return true; |
| 203 } |
| 204 |
| 205 if (typeof obj == "object") |
| 206 { |
| 207 if (obj instanceof Array) |
| 208 for (let item of obj) |
| 209 if (hasNonASCI(item)) |
| 210 return true; |
| 211 |
| 212 let names = Object.getOwnPropertyNames(obj); |
| 213 for (let name of names) |
| 214 if (hasNonASCI(obj[name])) |
| 215 return true; |
| 216 } |
| 217 |
| 218 return false; |
| 219 } |
| 220 |
| 221 function convertIDSelectorsToAttributeSelectors(selector) |
| 222 { |
| 223 // First we figure out where all the IDs are |
| 224 let sep = ""; |
| 225 let start = null; |
| 226 let positions = []; |
| 227 for (let i = 0; i < selector.length; i++) |
| 228 { |
| 229 let chr = selector[i]; |
| 230 |
| 231 if (chr == "\\") // ignore escaped characters |
| 232 i++; |
| 233 else if (chr == sep) // don't match IDs within quoted text |
| 234 sep = ""; // e.g. [attr="#Hello"] |
| 235 else if (sep == "") |
| 236 { |
| 237 if (chr == '"' || chr == "'") |
| 238 sep = chr; |
| 239 else if (start == null) // look for the start of an ID |
| 240 { |
| 241 if (chr == "#") |
| 242 start = i; |
| 243 } |
| 244 else if (chr != "-" && chr != "_" && |
| 245 (chr < "0" || |
| 246 chr > "9" && chr < "A" || |
| 247 chr > "Z" && chr < "a" || |
| 248 chr > "z" && chr < "\x80")) // look for the end of the ID |
| 249 { |
| 250 positions.push({start: start, end: i}); |
| 251 start = null; |
| 252 } |
| 253 } |
| 254 } |
| 255 if (start != null) |
| 256 positions.push({start: start, end: selector.length}); |
| 257 |
| 258 // Now replace them all with the [id="someID"] form |
| 259 let newSelector = []; |
| 260 let i = 0; |
| 261 for (let pos of positions) |
| 262 { |
| 263 newSelector.push(selector.substring(i, pos.start)); |
| 264 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); |
| 265 i = pos.end; |
| 266 } |
| 267 newSelector.push(selector.substring(i)); |
| 268 |
| 269 return newSelector.join(""); |
| 270 } |
| 271 |
| 272 let ContentBlockerList = |
| 273 /** |
| 274 * Create a new Adblock Plus filter to content blocker list converter |
| 275 * |
| 276 * @constructor |
| 277 */ |
| 278 exports.ContentBlockerList = function () |
| 279 { |
| 280 this.requestFilters = []; |
| 281 this.requestExceptions = []; |
| 282 this.elemhideFilters = []; |
| 283 this.elemhideExceptions = []; |
| 284 this.elemhideSelectorExceptions = new Map(); |
| 285 }; |
| 286 |
| 287 /** |
| 288 * Add Adblock Plus filter to be converted |
| 289 * |
| 290 * @param {Filter} filter Filter to convert |
| 291 */ |
| 292 ContentBlockerList.prototype.addFilter = function(filter) |
| 293 { |
| 294 if (filter.sitekeys) |
| 295 return; |
| 296 if (filter instanceof filterClasses.RegExpFilter && |
| 297 filter.regexpSource == null) |
| 298 return; |
| 299 |
| 300 if (filter instanceof filterClasses.BlockingFilter) |
| 301 this.requestFilters.push(filter); |
| 302 |
| 303 if (filter instanceof filterClasses.WhitelistFilter) |
| 304 { |
| 305 if (filter.contentType & (typeMap.IMAGE |
| 306 | typeMap.STYLESHEET |
| 307 | typeMap.SCRIPT |
| 308 | typeMap.FONT |
| 309 | typeMap.MEDIA |
| 310 | typeMap.POPUP |
| 311 | typeMap.OBJECT |
| 312 | typeMap.OBJECT_SUBREQUEST |
| 313 | typeMap.XMLHTTPREQUEST |
| 314 | typeMap.PING |
| 315 | typeMap.SUBDOCUMENT |
| 316 | typeMap.OTHER)) |
| 317 this.requestExceptions.push(filter); |
| 318 |
| 319 if (filter.contentType & typeMap.ELEMHIDE) |
| 320 this.elemhideExceptions.push(filter); |
| 321 } |
| 322 |
| 323 if (filter instanceof filterClasses.ElemHideFilter) |
| 324 this.elemhideFilters.push(filter); |
| 325 |
| 326 if (filter instanceof filterClasses.ElemHideException) |
| 327 { |
| 328 let domains = this.elemhideSelectorExceptions[filter.selector]; |
| 329 if (!domains) |
| 330 domains = this.elemhideSelectorExceptions[filter.selector] = []; |
| 331 |
| 332 parseDomains(filter.domains, domains, []); |
| 333 } |
| 334 }; |
| 335 |
| 336 /** |
| 337 * Generate content blocker list for all filters that were added |
| 338 * |
| 339 * @returns {Filter} filter Filter to convert |
| 340 */ |
| 341 ContentBlockerList.prototype.generateRules = function(filter) |
| 342 { |
| 343 let rules = []; |
| 344 |
| 345 function addRule(rule) |
| 346 { |
| 347 if (!hasNonASCI(rule)) |
| 348 rules.push(rule); |
| 349 } |
| 350 |
| 351 let groupedElemhideFilters = new Map(); |
| 352 for (let filter of this.elemhideFilters) |
| 353 { |
| 354 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); |
| 355 if (!result) |
| 356 continue; |
| 357 |
| 358 if (result.matchDomains.length == 0) |
| 359 result.matchDomains = ["^https?://"]; |
| 360 |
| 361 for (let matchDomain of result.matchDomains) |
| 362 { |
| 363 let group = groupedElemhideFilters.get(matchDomain) || []; |
| 364 group.push(result.selector); |
| 365 groupedElemhideFilters.set(matchDomain, group); |
| 366 } |
| 367 } |
| 368 |
| 369 groupedElemhideFilters.forEach((selectors, matchDomain) => |
| 370 { |
| 371 while (selectors.length) |
| 372 { |
| 373 let selector = selectors.splice(0, selectorLimit).join(", "); |
| 374 |
| 375 // As of Safari 9.0 element IDs are matched as lowercase. We work around |
| 376 // this by converting to the attribute format [id="elementID"] |
| 377 selector = convertIDSelectorsToAttributeSelectors(selector); |
| 378 |
| 379 addRule({ |
| 380 trigger: {"url-filter": matchDomain}, |
| 381 action: {type: "css-display-none", |
| 382 selector: selector} |
| 383 }); |
| 384 } |
| 385 }); |
| 386 |
| 387 for (let filter of this.elemhideExceptions) |
| 388 addRule(convertFilter(filter, "ignore-previous-rules", false)); |
| 389 for (let filter of this.requestFilters) |
| 390 addRule(convertFilter(filter, "block", true)); |
| 391 for (let filter of this.requestExceptions) |
| 392 addRule(convertFilter(filter, "ignore-previous-rules", true)); |
| 393 |
| 394 return rules; |
| 395 }; |
OLD | NEW |