| OLD | NEW | 
 |    1 /* | 
 |    2  * This file is part of Adblock Plus <https://adblockplus.org/>, | 
 |    3  * Copyright (C) 2006-2016 Eyeo GmbH | 
 |    4  * | 
 |    5  * Adblock Plus is free software: you can redistribute it and/or modify | 
 |    6  * it under the terms of the GNU General Public License version 3 as | 
 |    7  * published by the Free Software Foundation. | 
 |    8  * | 
 |    9  * Adblock Plus is distributed in the hope that it will be useful, | 
 |   10  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 |   11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
 |   12  * GNU General Public License for more details. | 
 |   13  * | 
 |   14  * You should have received a copy of the GNU General Public License | 
 |   15  * along with Adblock Plus.  If not, see <http://www.gnu.org/licenses/>. | 
 |   16  */ | 
 |   17  | 
|    1 "use strict"; |   18 "use strict"; | 
|    2  |   19  | 
|    3 let readline = require("readline"); |   20 let readline = require("readline"); | 
|    4 let punycode = require("punycode"); |   21 let Filter = require("filterClasses").Filter; | 
|    5 let tldjs = require("tldjs"); |   22 let ContentBlockerList = require("./lib/contentBlockerList.js").ContentBlockerLi
     st; | 
|    6 let filterClasses = require("./adblockplus.js"); |  | 
|    7  |   23  | 
|    8 let typeMap = filterClasses.RegExpFilter.typeMap; |   24 var rl = readline.createInterface({input: process.stdin, terminal: false}); | 
 |   25 var blockerList = new ContentBlockerList(); | 
|    9  |   26  | 
|   10 const selectorLimit = 5000; |   27 rl.on("line", line => | 
 |   28 { | 
 |   29   let firstNonWhitespace = line.search(/\S/); | 
 |   30   if (firstNonWhitespace > -1 && line.charAt(firstNonWhitespace) != "[") | 
 |   31     blockerList.addFilter(Filter.fromText(line)); | 
 |   32 }); | 
|   11  |   33  | 
|   12 let requestFilters = []; |   34 rl.on("close", () => | 
|   13 let requestExceptions = []; |  | 
|   14 let elemhideFilters = []; |  | 
|   15 let elemhideExceptions = []; |  | 
|   16 let elemhideSelectorExceptions = new Map(); |  | 
|   17  |  | 
|   18 function recordException(filter) |  | 
|   19 { |   35 { | 
|   20   if (filter.contentType & (typeMap.IMAGE |   36   console.log(JSON.stringify(blockerList.generateRules(), | 
|   21                             | typeMap.STYLESHEET |   37                              null, "\t")); | 
|   22                             | typeMap.SCRIPT |   38 }); | 
|   23                             | typeMap.FONT |  | 
|   24                             | typeMap.MEDIA |  | 
|   25                             | typeMap.POPUP |  | 
|   26                             | typeMap.OBJECT |  | 
|   27                             | typeMap.OBJECT_SUBREQUEST |  | 
|   28                             | typeMap.XMLHTTPREQUEST |  | 
|   29                             | typeMap.PING |  | 
|   30                             | typeMap.SUBDOCUMENT |  | 
|   31                             | typeMap.OTHER)) |  | 
|   32     requestExceptions.push(filter); |  | 
|   33  |  | 
|   34     if (filter.contentType & typeMap.ELEMHIDE) |  | 
|   35       elemhideExceptions.push(filter); |  | 
|   36 } |  | 
|   37  |  | 
|   38 function parseDomains(domains, included, excluded) |  | 
|   39 { |  | 
|   40   for (let domain in domains) |  | 
|   41   { |  | 
|   42     if (domain != "") |  | 
|   43     { |  | 
|   44       let enabled = domains[domain]; |  | 
|   45       domain = punycode.toASCII(domain.toLowerCase()); |  | 
|   46  |  | 
|   47       if (!enabled) |  | 
|   48         excluded.push(domain); |  | 
|   49       else if (!domains[""]) |  | 
|   50         included.push(domain); |  | 
|   51     } |  | 
|   52   } |  | 
|   53 } |  | 
|   54  |  | 
|   55 function recordSelectorException(filter) |  | 
|   56 { |  | 
|   57   let domains = elemhideSelectorExceptions[filter.selector]; |  | 
|   58   if (!domains) |  | 
|   59     domains = elemhideSelectorExceptions[filter.selector] = []; |  | 
|   60  |  | 
|   61   parseDomains(filter.domains, domains, []); |  | 
|   62 } |  | 
|   63  |  | 
|   64 function parseFilter(line) |  | 
|   65 { |  | 
|   66   if (line.charAt(0) == "[") |  | 
|   67     return; |  | 
|   68  |  | 
|   69   let filter = filterClasses.Filter.fromText(line); |  | 
|   70  |  | 
|   71   if (filter.sitekeys) |  | 
|   72     return; |  | 
|   73   if (filter instanceof filterClasses.RegExpFilter && !filter.regexpSource) |  | 
|   74     return; |  | 
|   75  |  | 
|   76   if (filter instanceof filterClasses.BlockingFilter) |  | 
|   77     requestFilters.push(filter); |  | 
|   78   if (filter instanceof filterClasses.WhitelistFilter) |  | 
|   79     recordException(filter); |  | 
|   80   if (filter instanceof filterClasses.ElemHideFilter) |  | 
|   81     elemhideFilters.push(filter); |  | 
|   82   if (filter instanceof filterClasses.ElemHideException) |  | 
|   83     recordSelectorException(filter); |  | 
|   84 } |  | 
|   85  |  | 
|   86 function escapeRegExp(s) |  | 
|   87 { |  | 
|   88   return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); |  | 
|   89 } |  | 
|   90  |  | 
|   91 function matchDomain(domain) |  | 
|   92 { |  | 
|   93   return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]"; |  | 
|   94 } |  | 
|   95  |  | 
|   96 function convertElemHideFilter(filter) |  | 
|   97 { |  | 
|   98   let included = []; |  | 
|   99   let excluded = []; |  | 
|  100   let rules = []; |  | 
|  101  |  | 
|  102   parseDomains(filter.domains, included, excluded); |  | 
|  103  |  | 
|  104   if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) |  | 
|  105     return {matchDomains: included.map(matchDomain), selector: filter.selector}; |  | 
|  106 } |  | 
|  107  |  | 
|  108 function toRegExp(text) |  | 
|  109 { |  | 
|  110   let result = ""; |  | 
|  111   let lastIndex = text.length - 1; |  | 
|  112  |  | 
|  113   for (let i = 0; i < text.length; i++) |  | 
|  114   { |  | 
|  115     let c = text[i]; |  | 
|  116  |  | 
|  117     switch (c) |  | 
|  118     { |  | 
|  119       case "*": |  | 
|  120         if (result.length > 0 && i < lastIndex && text[i + 1] != "*") |  | 
|  121           result += ".*"; |  | 
|  122         break; |  | 
|  123       case "^": |  | 
|  124         if (i < lastIndex) |  | 
|  125           result += "."; |  | 
|  126         break; |  | 
|  127       case "|": |  | 
|  128         if (i == 0) |  | 
|  129         { |  | 
|  130           result += "^"; |  | 
|  131           break; |  | 
|  132         } |  | 
|  133         if (i == lastIndex) |  | 
|  134         { |  | 
|  135           result += "$"; |  | 
|  136           break; |  | 
|  137         } |  | 
|  138         if (i == 1 && text[0] == "|") |  | 
|  139         { |  | 
|  140           result += "https?://"; |  | 
|  141           break; |  | 
|  142         } |  | 
|  143       case ".": case "+": case "?": case "$": |  | 
|  144       case "{": case "}": case "(": case ")": |  | 
|  145       case "[": case "]": case "\\": |  | 
|  146         result += "\\"; |  | 
|  147       default: |  | 
|  148         result += c; |  | 
|  149     } |  | 
|  150   } |  | 
|  151  |  | 
|  152   return result; |  | 
|  153 } |  | 
|  154  |  | 
|  155 function getRegExpSource(filter) |  | 
|  156 { |  | 
|  157   let source = toRegExp(filter.regexpSource.replace( |  | 
|  158     // Safari expects punycode, filter lists use unicode |  | 
|  159     /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i, |  | 
|  160     function (match, prefix, domain) |  | 
|  161     { |  | 
|  162       return prefix + punycode.toASCII(domain); |  | 
|  163     } |  | 
|  164   )); |  | 
|  165  |  | 
|  166   // Limit rules to to HTTP(S) URLs |  | 
|  167   if (!/^(\^|http)/i.test(source)) |  | 
|  168     source = "^https?://.*" + source; |  | 
|  169  |  | 
|  170   return source; |  | 
|  171 } |  | 
|  172  |  | 
|  173 function getResourceTypes(filter) |  | 
|  174 { |  | 
|  175   let types = []; |  | 
|  176  |  | 
|  177   if (filter.contentType & typeMap.IMAGE) |  | 
|  178     types.push("image"); |  | 
|  179   if (filter.contentType & typeMap.STYLESHEET) |  | 
|  180     types.push("style-sheet"); |  | 
|  181   if (filter.contentType & typeMap.SCRIPT) |  | 
|  182     types.push("script"); |  | 
|  183   if (filter.contentType & typeMap.FONT) |  | 
|  184     types.push("font"); |  | 
|  185   if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) |  | 
|  186     types.push("media"); |  | 
|  187   if (filter.contentType & typeMap.POPUP) |  | 
|  188     types.push("popup"); |  | 
|  189   if (filter.contentType & (typeMap.XMLHTTPREQUEST | typeMap.OBJECT_SUBREQUEST |  | 
|  190       | typeMap.PING | typeMap.OTHER)) |  | 
|  191     types.push("raw"); |  | 
|  192   if (filter.contentType & typeMap.SUBDOCUMENT) |  | 
|  193     types.push("document"); |  | 
|  194  |  | 
|  195   return types; |  | 
|  196 } |  | 
|  197  |  | 
|  198 function addDomainPrefix(domains) |  | 
|  199 { |  | 
|  200   let result = []; |  | 
|  201  |  | 
|  202   for (let domain of domains) |  | 
|  203   { |  | 
|  204     result.push(domain); |  | 
|  205  |  | 
|  206     if (tldjs.getSubdomain(domain) == "") |  | 
|  207       result.push("www." + domain); |  | 
|  208   } |  | 
|  209  |  | 
|  210   return result; |  | 
|  211 } |  | 
|  212  |  | 
|  213 function convertFilter(filter, action, withResourceTypes) |  | 
|  214 { |  | 
|  215   let trigger = {"url-filter": getRegExpSource(filter)}; |  | 
|  216   let included = []; |  | 
|  217   let excluded = []; |  | 
|  218  |  | 
|  219   parseDomains(filter.domains, included, excluded); |  | 
|  220  |  | 
|  221   if (withResourceTypes) |  | 
|  222     trigger["resource-type"] = getResourceTypes(filter); |  | 
|  223   if (filter.thirdParty != null) |  | 
|  224     trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |  | 
|  225  |  | 
|  226   if (included.length > 0) |  | 
|  227     trigger["if-domain"] = addDomainPrefix(included); |  | 
|  228   else if (excluded.length > 0) |  | 
|  229     trigger["unless-domain"] = addDomainPrefix(excluded); |  | 
|  230  |  | 
|  231   return {trigger: trigger, action: {type: action}}; |  | 
|  232 } |  | 
|  233  |  | 
|  234 function hasNonASCI(obj) |  | 
|  235 { |  | 
|  236   if (typeof obj == "string") |  | 
|  237   { |  | 
|  238     if (/[^\x00-\x7F]/.test(obj)) |  | 
|  239       return true; |  | 
|  240   } |  | 
|  241  |  | 
|  242   if (typeof obj == "object") |  | 
|  243   { |  | 
|  244     if (obj instanceof Array) |  | 
|  245       for (let item of obj) |  | 
|  246         if (hasNonASCI(item)) |  | 
|  247           return true; |  | 
|  248  |  | 
|  249     for (let name of Object.getOwnPropertyNames(obj)) |  | 
|  250       if (hasNonASCI(obj[name])) |  | 
|  251         return true; |  | 
|  252   } |  | 
|  253  |  | 
|  254   return false; |  | 
|  255 } |  | 
|  256  |  | 
|  257 function convertIDSelectorsToAttributeSelectors(selector) |  | 
|  258 { |  | 
|  259   // First we figure out where all the IDs are |  | 
|  260   let sep = ""; |  | 
|  261   let start = null; |  | 
|  262   let positions = []; |  | 
|  263   for (let i = 0; i < selector.length; i++) |  | 
|  264   { |  | 
|  265     let chr = selector[i]; |  | 
|  266  |  | 
|  267     if (chr == "\\")        // ignore escaped characters |  | 
|  268       i++; |  | 
|  269     else if (chr == sep)    // don't match IDs within quoted text |  | 
|  270       sep = "";             // e.g. [attr="#Hello"] |  | 
|  271     else if (sep == "") |  | 
|  272     { |  | 
|  273       if (chr == '"' || chr == "'") |  | 
|  274         sep = chr; |  | 
|  275       else if (start == null)  // look for the start of an ID |  | 
|  276       { |  | 
|  277         if (chr == "#") |  | 
|  278           start = i; |  | 
|  279       } |  | 
|  280       else if (chr != "-" && chr != "_" && |  | 
|  281                (chr < "0" || |  | 
|  282                 chr > "9" && chr < "A" || |  | 
|  283                 chr > "Z" && chr < "a" || |  | 
|  284                 chr > "z" && chr < "\x80")) // look for the end of the ID |  | 
|  285       { |  | 
|  286         positions.push({start: start, end: i}); |  | 
|  287         start = null; |  | 
|  288       } |  | 
|  289     } |  | 
|  290   } |  | 
|  291   if (start != null) |  | 
|  292     positions.push({start: start, end: selector.length}); |  | 
|  293  |  | 
|  294   // Now replace them all with the [id="someID"] form |  | 
|  295   let newSelector = []; |  | 
|  296   let i = 0; |  | 
|  297   for (let pos of positions) |  | 
|  298   { |  | 
|  299     newSelector.push(selector.substring(i, pos.start)); |  | 
|  300     newSelector.push('[id=' + selector.substring(pos.start + 1, pos.end) + ']'); |  | 
|  301     i = pos.end; |  | 
|  302   } |  | 
|  303   newSelector.push(selector.substring(i)); |  | 
|  304  |  | 
|  305   return newSelector.join(""); |  | 
|  306 } |  | 
|  307  |  | 
|  308 function logRules() |  | 
|  309 { |  | 
|  310   let rules = []; |  | 
|  311  |  | 
|  312   function addRule(rule) |  | 
|  313   { |  | 
|  314     if (!hasNonASCI(rule)) |  | 
|  315       rules.push(rule); |  | 
|  316   } |  | 
|  317  |  | 
|  318   let groupedElemhideFilters = new Map(); |  | 
|  319   for (let filter of elemhideFilters) |  | 
|  320   { |  | 
|  321     let result = convertElemHideFilter(filter); |  | 
|  322     if (!result) |  | 
|  323       continue; |  | 
|  324  |  | 
|  325     if (result.matchDomains.length == 0) |  | 
|  326       result.matchDomains = ["^https?://"]; |  | 
|  327  |  | 
|  328     for (let matchDomain of result.matchDomains) |  | 
|  329     { |  | 
|  330       let group = groupedElemhideFilters.get(matchDomain) || []; |  | 
|  331       group.push(result.selector); |  | 
|  332       groupedElemhideFilters.set(matchDomain, group); |  | 
|  333     } |  | 
|  334   } |  | 
|  335  |  | 
|  336   groupedElemhideFilters.forEach((selectors, matchDomain) => |  | 
|  337   { |  | 
|  338     while (selectors.length) |  | 
|  339     { |  | 
|  340       let selector = selectors.splice(0, selectorLimit).join(", "); |  | 
|  341  |  | 
|  342       // As of Safari 9.0 element IDs are matched as lowercase. We work around |  | 
|  343       // this by converting to the attribute format [id="elementID"] |  | 
|  344       selector = convertIDSelectorsToAttributeSelectors(selector); |  | 
|  345  |  | 
|  346       addRule({ |  | 
|  347         trigger: {"url-filter": matchDomain}, |  | 
|  348         action: {type: "css-display-none", |  | 
|  349                  selector: selector} |  | 
|  350       }); |  | 
|  351     } |  | 
|  352   }); |  | 
|  353  |  | 
|  354   for (let filter of elemhideExceptions) |  | 
|  355     addRule(convertFilter(filter, "ignore-previous-rules", false)); |  | 
|  356  |  | 
|  357   for (let filter of requestFilters) |  | 
|  358     addRule(convertFilter(filter, "block", true)); |  | 
|  359   for (let filter of requestExceptions) |  | 
|  360     addRule(convertFilter(filter, "ignore-previous-rules", true)); |  | 
|  361  |  | 
|  362   console.log(JSON.stringify(rules, null, "\t")); |  | 
|  363 } |  | 
|  364  |  | 
|  365 let rl = readline.createInterface({input: process.stdin, terminal: false}); |  | 
|  366 rl.on("line", parseFilter); |  | 
|  367 rl.on("close", logRules); |  | 
| OLD | NEW |