| Left: | ||
| Right: |
| LEFT | RIGHT |
|---|---|
| 1 "use strict"; | 1 "use strict"; |
| 2 | 2 |
| 3 var readline = require("readline"); | 3 let readline = require("readline"); |
| 4 var punycode = require("punycode"); | 4 let punycode = require("punycode"); |
| 5 var tldjs = require("tldjs"); | 5 let tldjs = require("tldjs"); |
| 6 var filterClasses = require("./adblockplus.js"); | 6 let filterClasses = require("./adblockplus.js"); |
| 7 | 7 |
| 8 var typeMap = filterClasses.RegExpFilter.typeMap; | 8 let typeMap = filterClasses.RegExpFilter.typeMap; |
| 9 | 9 |
| 10 var requestFilters = []; | 10 const selectorLimit = 5000; |
| 11 var requestExceptions = []; | 11 |
| 12 var elemhideFilters = []; | 12 let requestFilters = []; |
| 13 var elemhideExceptions = []; | 13 let requestExceptions = []; |
| 14 var elemhideSelectorExceptions = Object.create(null); | 14 let elemhideFilters = []; |
| 15 let elemhideExceptions = []; | |
| 16 let elemhideSelectorExceptions = new Map(); | |
| 15 | 17 |
| 16 function recordException(filter) | 18 function recordException(filter) |
| 17 { | 19 { |
| 18 if (filter.contentType & (typeMap.IMAGE | 20 if (filter.contentType & (typeMap.IMAGE |
| 19 | typeMap.STYLESHEET | 21 | typeMap.STYLESHEET |
| 20 | typeMap.SCRIPT | 22 | typeMap.SCRIPT |
| 21 | typeMap.FONT | 23 | typeMap.FONT |
| 22 | typeMap.MEDIA | 24 | typeMap.MEDIA |
| 23 | typeMap.POPUP | 25 | typeMap.POPUP |
| 24 | typeMap.OBJECT | 26 | typeMap.OBJECT |
| 25 | typeMap.OBJECT_SUBREQUEST | 27 | typeMap.OBJECT_SUBREQUEST |
| 26 | typeMap.XMLHTTPREQUEST | 28 | typeMap.XMLHTTPREQUEST |
| 27 | typeMap.PING | 29 | typeMap.PING |
| 28 | typeMap.SUBDOCUMENT | 30 | typeMap.SUBDOCUMENT |
| 29 | typeMap.OTHER)) | 31 | typeMap.OTHER)) |
| 30 requestExceptions.push(filter); | 32 requestExceptions.push(filter); |
| 31 | 33 |
| 32 if (filter.contentType & typeMap.ELEMHIDE) | 34 if (filter.contentType & typeMap.ELEMHIDE) |
| 33 elemhideExceptions.push(filter); | 35 elemhideExceptions.push(filter); |
| 34 } | 36 } |
| 35 | 37 |
| 36 function parseDomains(domains, included, excluded) | 38 function parseDomains(domains, included, excluded) |
| 37 { | 39 { |
| 38 for (var domain in domains) | 40 for (let domain in domains) |
| 39 { | 41 { |
| 40 if (domain != "") | 42 if (domain != "") |
| 41 { | 43 { |
| 42 var enabled = domains[domain]; | 44 let enabled = domains[domain]; |
| 43 domain = punycode.toASCII(domain.toLowerCase()); | 45 domain = punycode.toASCII(domain.toLowerCase()); |
| 44 | 46 |
| 45 if (!enabled) | 47 if (!enabled) |
| 46 excluded.push(domain); | 48 excluded.push(domain); |
| 47 else if (!domains[""]) | 49 else if (!domains[""]) |
| 48 included.push(domain); | 50 included.push(domain); |
| 49 } | 51 } |
| 50 } | 52 } |
| 51 } | 53 } |
| 52 | 54 |
| 53 function recordSelectorException(filter) | 55 function recordSelectorException(filter) |
| 54 { | 56 { |
| 55 var domains = elemhideSelectorExceptions[filter.selector]; | 57 let domains = elemhideSelectorExceptions[filter.selector]; |
| 56 if (!domains) | 58 if (!domains) |
| 57 domains = elemhideSelectorExceptions[filter.selector] = []; | 59 domains = elemhideSelectorExceptions[filter.selector] = []; |
| 58 | 60 |
| 59 parseDomains(filter.domains, domains, []); | 61 parseDomains(filter.domains, domains, []); |
| 60 } | 62 } |
| 61 | 63 |
| 62 function parseFilter(line) | 64 function parseFilter(line) |
| 63 { | 65 { |
| 64 if (line.charAt(0) == "[") | 66 if (line.charAt(0) == "[") |
| 65 return; | 67 return; |
| 66 | 68 |
| 67 var filter = filterClasses.Filter.fromText(line); | 69 let filter = filterClasses.Filter.fromText(line); |
| 68 | 70 |
| 69 if (filter.sitekeys) | 71 if (filter.sitekeys) |
| 70 return; | 72 return; |
| 71 if (filter instanceof filterClasses.RegExpFilter && !filter.regexpSource) | 73 if (filter instanceof filterClasses.RegExpFilter && !filter.regexpSource) |
| 72 return; | 74 return; |
| 73 | 75 |
| 74 if (filter instanceof filterClasses.BlockingFilter) | 76 if (filter instanceof filterClasses.BlockingFilter) |
| 75 requestFilters.push(filter); | 77 requestFilters.push(filter); |
| 76 if (filter instanceof filterClasses.WhitelistFilter) | 78 if (filter instanceof filterClasses.WhitelistFilter) |
| 77 recordException(filter); | 79 recordException(filter); |
| 78 if (filter instanceof filterClasses.ElemHideFilter) | 80 if (filter instanceof filterClasses.ElemHideFilter) |
| 79 elemhideFilters.push(filter); | 81 elemhideFilters.push(filter); |
| 80 if (filter instanceof filterClasses.ElemHideException) | 82 if (filter instanceof filterClasses.ElemHideException) |
| 81 recordSelectorException(filter); | 83 recordSelectorException(filter); |
| 82 } | 84 } |
| 83 | 85 |
| 84 function escapeRegExp(s) | 86 function escapeRegExp(s) |
| 85 { | 87 { |
| 86 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); | 88 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); |
| 87 } | 89 } |
| 88 | 90 |
| 89 function matchDomain(domain) | 91 function matchDomain(domain) |
| 90 { | 92 { |
| 91 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]"; | 93 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]"; |
| 92 } | 94 } |
| 93 | 95 |
| 94 function convertElemHideFilter(filter) | 96 function convertElemHideFilter(filter) |
| 95 { | 97 { |
| 96 var included = []; | 98 let included = []; |
| 97 var excluded = []; | 99 let excluded = []; |
| 98 var rules = []; | 100 let rules = []; |
| 99 | 101 |
| 100 parseDomains(filter.domains, included, excluded); | 102 parseDomains(filter.domains, included, excluded); |
| 101 | 103 |
| 102 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) | 104 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) |
| 103 return [included.map(matchDomain), filter.selector]; | 105 return {matchDomains: included.map(matchDomain), selector: filter.selector}; |
| 104 } | 106 } |
| 105 | 107 |
| 106 function toRegExp(text) | 108 function toRegExp(text) |
| 107 { | 109 { |
| 108 var result = ""; | 110 let result = ""; |
| 109 var lastIndex = text.length - 1; | 111 let lastIndex = text.length - 1; |
| 110 | 112 |
| 111 for (var i = 0; i < text.length; i++) | 113 for (let i = 0; i < text.length; i++) |
| 112 { | 114 { |
| 113 var c = text[i]; | 115 let c = text[i]; |
| 114 | 116 |
| 115 switch (c) | 117 switch (c) |
| 116 { | 118 { |
| 117 case "*": | 119 case "*": |
| 118 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") | 120 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") |
| 119 result += ".*"; | 121 result += ".*"; |
| 120 break; | 122 break; |
| 121 case "^": | 123 case "^": |
| 122 if (i < lastIndex) | 124 if (i < lastIndex) |
| 123 result += "."; | 125 result += "."; |
| (...skipping 21 matching lines...) Expand all Loading... | |
| 145 default: | 147 default: |
| 146 result += c; | 148 result += c; |
| 147 } | 149 } |
| 148 } | 150 } |
| 149 | 151 |
| 150 return result; | 152 return result; |
| 151 } | 153 } |
| 152 | 154 |
| 153 function getRegExpSource(filter) | 155 function getRegExpSource(filter) |
| 154 { | 156 { |
| 155 var source = toRegExp(filter.regexpSource.replace( | 157 let source = toRegExp(filter.regexpSource.replace( |
| 156 // Safari expects punycode, filter lists use unicode | 158 // Safari expects punycode, filter lists use unicode |
| 157 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i, | 159 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i, |
| 158 function (match, prefix, domain) | 160 function (match, prefix, domain) |
| 159 { | 161 { |
| 160 return prefix + punycode.toASCII(domain); | 162 return prefix + punycode.toASCII(domain); |
| 161 } | 163 } |
| 162 )); | 164 )); |
| 163 | 165 |
| 164 // Limit rules to to HTTP(S) URLs | 166 // Limit rules to to HTTP(S) URLs |
| 165 if (!/^(\^|http)/i.test(source)) | 167 if (!/^(\^|http)/i.test(source)) |
| 166 source = "^https?://.*" + source; | 168 source = "^https?://.*" + source; |
| 167 | 169 |
| 168 return source; | 170 return source; |
| 169 } | 171 } |
| 170 | 172 |
| 171 function getResourceTypes(filter) | 173 function getResourceTypes(filter) |
| 172 { | 174 { |
| 173 var types = []; | 175 let types = []; |
| 174 | 176 |
| 175 if (filter.contentType & typeMap.IMAGE) | 177 if (filter.contentType & typeMap.IMAGE) |
| 176 types.push("image"); | 178 types.push("image"); |
| 177 if (filter.contentType & typeMap.STYLESHEET) | 179 if (filter.contentType & typeMap.STYLESHEET) |
| 178 types.push("style-sheet"); | 180 types.push("style-sheet"); |
| 179 if (filter.contentType & typeMap.SCRIPT) | 181 if (filter.contentType & typeMap.SCRIPT) |
| 180 types.push("script"); | 182 types.push("script"); |
| 181 if (filter.contentType & typeMap.FONT) | 183 if (filter.contentType & typeMap.FONT) |
| 182 types.push("font"); | 184 types.push("font"); |
| 183 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) | 185 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) |
| 184 types.push("media"); | 186 types.push("media"); |
| 185 if (filter.contentType & typeMap.POPUP) | 187 if (filter.contentType & typeMap.POPUP) |
| 186 types.push("popup"); | 188 types.push("popup"); |
| 187 if (filter.contentType & (typeMap.XMLHTTPREQUEST | typeMap.OBJECT_SUBREQUEST | 189 if (filter.contentType & (typeMap.XMLHTTPREQUEST | typeMap.OBJECT_SUBREQUEST |
| 188 | typeMap.PING | typeMap.OTHER)) | 190 | typeMap.PING | typeMap.OTHER)) |
| 189 types.push("raw"); | 191 types.push("raw"); |
| 190 if (filter.contentType & typeMap.SUBDOCUMENT) | 192 if (filter.contentType & typeMap.SUBDOCUMENT) |
| 191 types.push("document"); | 193 types.push("document"); |
| 192 | 194 |
| 193 return types; | 195 return types; |
| 194 } | 196 } |
| 195 | 197 |
| 196 function addDomainPrefix(domains) | 198 function addDomainPrefix(domains) |
| 197 { | 199 { |
| 198 var result = []; | 200 let result = []; |
| 199 | 201 |
| 200 for (var i = 0; i < domains.length; i++) | 202 for (let domain of domains) |
| 201 { | 203 { |
| 202 var domain = domains[i]; | |
| 203 result.push(domain); | 204 result.push(domain); |
| 204 | 205 |
| 205 if (tldjs.getSubdomain(domain) == "") | 206 if (tldjs.getSubdomain(domain) == "") |
| 206 result.push("www." + domain); | 207 result.push("www." + domain); |
| 207 } | 208 } |
| 208 | 209 |
| 209 return result; | 210 return result; |
| 210 } | 211 } |
| 211 | 212 |
| 212 function convertFilter(filter, action, withResourceTypes) | 213 function convertFilter(filter, action, withResourceTypes) |
| 213 { | 214 { |
| 214 var trigger = {"url-filter": getRegExpSource(filter)}; | 215 let trigger = {"url-filter": getRegExpSource(filter)}; |
| 215 var included = []; | 216 let included = []; |
| 216 var excluded = []; | 217 let excluded = []; |
| 217 | 218 |
| 218 parseDomains(filter.domains, included, excluded); | 219 parseDomains(filter.domains, included, excluded); |
| 219 | 220 |
| 220 if (withResourceTypes) | 221 if (withResourceTypes) |
| 221 trigger["resource-type"] = getResourceTypes(filter); | 222 trigger["resource-type"] = getResourceTypes(filter); |
| 222 if (filter.thirdParty != null) | 223 if (filter.thirdParty != null) |
| 223 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; | 224 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |
| 224 | 225 |
| 225 if (included.length > 0) | 226 if (included.length > 0) |
| 226 trigger["if-domain"] = addDomainPrefix(included); | 227 trigger["if-domain"] = addDomainPrefix(included); |
| 227 else if (excluded.length > 0) | 228 else if (excluded.length > 0) |
| 228 trigger["unless-domain"] = addDomainPrefix(excluded); | 229 trigger["unless-domain"] = addDomainPrefix(excluded); |
| 229 | 230 |
| 230 return {trigger: trigger, action: {type: action}}; | 231 return {trigger: trigger, action: {type: action}}; |
| 231 } | 232 } |
| 232 | 233 |
| 233 function hasNonASCI(obj) | 234 function hasNonASCI(obj) |
| 234 { | 235 { |
| 235 if (typeof obj == "string") | 236 if (typeof obj == "string") |
| 236 { | 237 { |
| 237 if (/[^\x00-\x7F]/.test(obj)) | 238 if (/[^\x00-\x7F]/.test(obj)) |
| 238 return true; | 239 return true; |
| 239 } | 240 } |
| 240 | 241 |
| 241 if (typeof obj == "object") | 242 if (typeof obj == "object") |
| 242 { | 243 { |
| 243 var i; | |
| 244 if (obj instanceof Array) | 244 if (obj instanceof Array) |
| 245 for (i = 0; i < obj.length; i++) | 245 for (let item of obj) |
| 246 if (hasNonASCI(obj[i])) | 246 if (hasNonASCI(item)) |
| 247 return true; | 247 return true; |
| 248 | 248 |
| 249 var names = Object.getOwnPropertyNames(obj); | 249 for (let name of Object.getOwnPropertyNames(obj)) |
| 250 for (i = 0; i < names.length; i++) | 250 if (hasNonASCI(obj[name])) |
| 251 if (hasNonASCI(obj[names[i]])) | |
| 252 return true; | 251 return true; |
| 253 } | 252 } |
| 254 | 253 |
| 255 return false; | 254 return false; |
| 256 } | 255 } |
| 257 | 256 |
| 258 function logRules() | 257 function logRules() |
| 259 { | 258 { |
| 260 var rules = []; | 259 let rules = []; |
| 261 var i; | |
| 262 | 260 |
| 263 function addRule(rule) | 261 function addRule(rule) |
| 264 { | 262 { |
| 265 if (!hasNonASCI(rule)) | 263 if (!hasNonASCI(rule)) |
| 266 rules.push(rule); | 264 rules.push(rule); |
| 267 } | 265 } |
| 268 | 266 |
| 269 let groupedElemhideFilters = new Map(); | 267 let groupedElemhideFilters = new Map(); |
| 270 for (let filter of elemhideFilters) | 268 for (let filter of elemhideFilters) |
|
Sebastian Noack
2016/02/15 14:33:11
Does these ES2015 features even work in node.js wi
kzar
2016/02/15 18:19:20
(As discussed in IRC we will use ES2015 features t
| |
| 271 { | 269 { |
| 272 let result = convertElemHideFilter(filter); | 270 let result = convertElemHideFilter(filter); |
| 273 if (!result) | 271 if (!result) |
| 274 continue; | 272 continue; |
| 275 let targetDomains = result[0]; | 273 |
| 276 let selector = result[1]; | 274 if (result.matchDomains.length == 0) |
| 277 | 275 result.matchDomains = ["^https?://"]; |
| 278 if (targetDomains.length == 0) | 276 |
| 279 targetDomains = ["^https?://"]; | 277 for (let matchDomain of result.matchDomains) |
| 280 | 278 { |
| 281 for (let domain of targetDomains) | 279 let group = groupedElemhideFilters.get(matchDomain) || []; |
| 282 { | 280 group.push(result.selector); |
| 283 if (!groupedElemhideFilters.has(domain)) | 281 groupedElemhideFilters.set(matchDomain, group); |
| 284 groupedElemhideFilters.set(domain, []); | 282 } |
| 285 groupedElemhideFilters.get(domain).push(selector); | 283 } |
| 286 } | 284 |
| 287 } | 285 groupedElemhideFilters.forEach((selectors, matchDomain) => |
| 288 | 286 { |
| 289 groupedElemhideFilters.forEach((selectors, domain) => | 287 while (selectors.length) |
| 290 { | 288 { |
| 291 let rule = {trigger: {"url-filter": domain}, | 289 addRule({ |
| 292 action: {type: "css-display-none"}}; | 290 trigger: {"url-filter": matchDomain}, |
| 293 | 291 action: {type: "css-display-none", |
| 294 if (selectors.length == 1) | 292 selector: selectors.splice(0, selectorLimit).join(", ")} |
| 295 rule["action"]["selector"] = selectors[0]; | 293 }); |
| 296 else | 294 } |
| 297 rule["action"]["selector"] = ":matches(" + selectors.join(", ") + ")"; | |
|
kzar
2016/02/13 19:33:24
Note: I wasn't sure if we need to escape the selec
Sebastian Noack
2016/02/15 14:33:11
Well, if any CSS selector is invalid it will break
kzar
2016/02/15 18:19:20
Acknowledged.
| |
| 298 | |
| 299 addRule(rule); | |
| 300 }); | 295 }); |
| 301 | 296 |
| 302 for (i = 0; i < elemhideExceptions.length; i++) | 297 for (let filter of elemhideExceptions) |
| 303 addRule(convertFilter(elemhideExceptions[i], "ignore-previous-rules", false) ); | 298 addRule(convertFilter(filter, "ignore-previous-rules", false)); |
| 304 | 299 |
| 305 for (i = 0; i < requestFilters.length; i++) | 300 for (let filter of requestFilters) |
| 306 addRule(convertFilter(requestFilters[i], "block", true)); | 301 addRule(convertFilter(filter, "block", true)); |
| 307 for (i = 0; i < requestExceptions.length; i++) | 302 for (let filter of requestExceptions) |
| 308 addRule(convertFilter(requestExceptions[i], "ignore-previous-rules", true)); | 303 addRule(convertFilter(filter, "ignore-previous-rules", true)); |
| 309 | 304 |
| 310 console.log(JSON.stringify(rules, null, "\t")); | 305 console.log(JSON.stringify(rules, null, "\t")); |
| 311 } | 306 } |
| 312 | 307 |
| 313 var rl = readline.createInterface({input: process.stdin, terminal: false}); | 308 let rl = readline.createInterface({input: process.stdin, terminal: false}); |
| 314 rl.on("line", parseFilter); | 309 rl.on("line", parseFilter); |
| 315 rl.on("close", logRules); | 310 rl.on("close", logRules); |
| LEFT | RIGHT |