| OLD | NEW |
| 1 /* | 1 /* |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
| 3 * Copyright (C) 2006-2017 eyeo GmbH | 3 * Copyright (C) 2006-2017 eyeo GmbH |
| 4 * | 4 * |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
| 8 * | 8 * |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| (...skipping 15 matching lines...) Expand all Loading... |
| 26 const typeMap = filterClasses.RegExpFilter.typeMap; | 26 const typeMap = filterClasses.RegExpFilter.typeMap; |
| 27 const whitelistableRequestTypes = (typeMap.IMAGE | 27 const whitelistableRequestTypes = (typeMap.IMAGE |
| 28 | typeMap.STYLESHEET | 28 | typeMap.STYLESHEET |
| 29 | typeMap.SCRIPT | 29 | typeMap.SCRIPT |
| 30 | typeMap.FONT | 30 | typeMap.FONT |
| 31 | typeMap.MEDIA | 31 | typeMap.MEDIA |
| 32 | typeMap.POPUP | 32 | typeMap.POPUP |
| 33 | typeMap.OBJECT | 33 | typeMap.OBJECT |
| 34 | typeMap.OBJECT_SUBREQUEST | 34 | typeMap.OBJECT_SUBREQUEST |
| 35 | typeMap.XMLHTTPREQUEST | 35 | typeMap.XMLHTTPREQUEST |
| 36 | typeMap.WEBSOCKET |
| 37 | typeMap.WEBRTC |
| 36 | typeMap.PING | 38 | typeMap.PING |
| 37 | typeMap.SUBDOCUMENT | 39 | typeMap.SUBDOCUMENT |
| 38 | typeMap.OTHER); | 40 | typeMap.OTHER); |
| 39 | 41 |
| 40 function parseDomains(domains, included, excluded) | 42 function parseDomains(domains, included, excluded) |
| 41 { | 43 { |
| 42 for (let domain in domains) | 44 for (let domain in domains) |
| 43 { | 45 { |
| 44 if (domain != "") | 46 if (domain != "") |
| 45 { | 47 { |
| (...skipping 11 matching lines...) Expand all Loading... |
| 57 function escapeRegExp(s) | 59 function escapeRegExp(s) |
| 58 { | 60 { |
| 59 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); | 61 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); |
| 60 } | 62 } |
| 61 | 63 |
| 62 function matchDomain(domain) | 64 function matchDomain(domain) |
| 63 { | 65 { |
| 64 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; | 66 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; |
| 65 } | 67 } |
| 66 | 68 |
| 69 function getURLSchemes(contentType) |
| 70 { |
| 71 if (contentType == typeMap.WEBSOCKET) |
| 72 return ["wss?://"]; |
| 73 |
| 74 if (contentType == typeMap.WEBRTC) |
| 75 return ["stuns?:", "turns?:"]; |
| 76 |
| 77 return ["https?://"]; |
| 78 } |
| 79 |
| 67 function findSubdomainsInList(domain, list) | 80 function findSubdomainsInList(domain, list) |
| 68 { | 81 { |
| 69 let subdomains = []; | 82 let subdomains = []; |
| 70 let suffixLength = domain.length + 1; | 83 let suffixLength = domain.length + 1; |
| 71 | 84 |
| 72 for (let name of list) | 85 for (let name of list) |
| 73 { | 86 { |
| 74 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain) | 87 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain) |
| 75 subdomains.push(name.slice(0, -suffixLength)); | 88 subdomains.push(name.slice(0, -suffixLength)); |
| 76 } | 89 } |
| (...skipping 13 matching lines...) Expand all Loading... |
| 90 return {matchDomains: included.map(matchDomain), selector: filter.selector}; | 103 return {matchDomains: included.map(matchDomain), selector: filter.selector}; |
| 91 } | 104 } |
| 92 | 105 |
| 93 /** | 106 /** |
| 94 * Parse the given filter "regexpSource" string. Producing a regular expression, | 107 * Parse the given filter "regexpSource" string. Producing a regular expression, |
| 95 * extracting the hostname (if any), deciding if the regular expression is safe | 108 * extracting the hostname (if any), deciding if the regular expression is safe |
| 96 * to be converted + matched as lower case and noting if the source contains | 109 * to be converted + matched as lower case and noting if the source contains |
| 97 * anything after the hostname.) | 110 * anything after the hostname.) |
| 98 * | 111 * |
| 99 * @param {string} text regexpSource property of a filter | 112 * @param {string} text regexpSource property of a filter |
| 113 * @param {string} urlScheme The URL scheme to use in the regular expression |
| 100 * @returns {object} An object containing a regular expression string, a bool | 114 * @returns {object} An object containing a regular expression string, a bool |
| 101 * indicating if the filter can be safely matched as lower | 115 * indicating if the filter can be safely matched as lower |
| 102 * case, a hostname string (or undefined) and a bool | 116 * case, a hostname string (or undefined) and a bool |
| 103 * indicating if the source only contains a hostname or not: | 117 * indicating if the source only contains a hostname or not: |
| 104 * {regexp: "...", | 118 * {regexp: "...", |
| 105 * canSafelyMatchAsLowercase: true/false, | 119 * canSafelyMatchAsLowercase: true/false, |
| 106 * hostname: "...", | 120 * hostname: "...", |
| 107 * justHostname: true/false} | 121 * justHostname: true/false} |
| 108 */ | 122 */ |
| 109 function parseFilterRegexpSource(text) | 123 function parseFilterRegexpSource(text, urlScheme) |
| 110 { | 124 { |
| 111 let regexp = []; | 125 let regexp = []; |
| 112 let lastIndex = text.length - 1; | 126 let lastIndex = text.length - 1; |
| 113 let hostname; | 127 let hostname; |
| 114 let hostnameStart = null; | 128 let hostnameStart = null; |
| 115 let hostnameFinished = false; | 129 let hostnameFinished = false; |
| 116 let justHostname = false; | 130 let justHostname = false; |
| 117 let canSafelyMatchAsLowercase = false; | 131 let canSafelyMatchAsLowercase = false; |
| 118 | 132 |
| 133 if (!urlScheme) |
| 134 urlScheme = getURLSchemes()[0]; |
| 135 |
| 119 for (let i = 0; i < text.length; i++) | 136 for (let i = 0; i < text.length; i++) |
| 120 { | 137 { |
| 121 let c = text[i]; | 138 let c = text[i]; |
| 122 | 139 |
| 123 if (hostnameFinished) | 140 if (hostnameFinished) |
| 124 justHostname = false; | 141 justHostname = false; |
| 125 | 142 |
| 126 // If we're currently inside the hostname we have to be careful not to | 143 // If we're currently inside the hostname we have to be careful not to |
| 127 // escape any characters until after we have converted it to punycode. | 144 // escape any characters until after we have converted it to punycode. |
| 128 if (hostnameStart != null && !hostnameFinished) | 145 if (hostnameStart != null && !hostnameFinished) |
| (...skipping 30 matching lines...) Expand all Loading... |
| 159 } | 176 } |
| 160 if (i == lastIndex) | 177 if (i == lastIndex) |
| 161 { | 178 { |
| 162 regexp.push("$"); | 179 regexp.push("$"); |
| 163 break; | 180 break; |
| 164 } | 181 } |
| 165 if (i == 1 && text[0] == "|") | 182 if (i == 1 && text[0] == "|") |
| 166 { | 183 { |
| 167 hostnameStart = i + 1; | 184 hostnameStart = i + 1; |
| 168 canSafelyMatchAsLowercase = true; | 185 canSafelyMatchAsLowercase = true; |
| 169 regexp.push("https?://([^/]+\\.)?"); | 186 regexp.push(urlScheme + "([^/]+\\.)?"); |
| 170 break; | 187 break; |
| 171 } | 188 } |
| 172 regexp.push("\\|"); | 189 regexp.push("\\|"); |
| 173 break; | 190 break; |
| 174 case "/": | 191 case "/": |
| 175 if (!hostnameFinished && | 192 if (!hostnameFinished && |
| 176 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") | 193 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") |
| 177 { | 194 { |
| 178 hostnameStart = i + 1; | 195 hostnameStart = i + 1; |
| 179 canSafelyMatchAsLowercase = true; | 196 canSafelyMatchAsLowercase = true; |
| (...skipping 14 matching lines...) Expand all Loading... |
| 194 } | 211 } |
| 195 | 212 |
| 196 return { | 213 return { |
| 197 regexp: regexp.join(""), | 214 regexp: regexp.join(""), |
| 198 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, | 215 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
| 199 hostname: hostname, | 216 hostname: hostname, |
| 200 justHostname: justHostname | 217 justHostname: justHostname |
| 201 }; | 218 }; |
| 202 } | 219 } |
| 203 | 220 |
| 204 function getResourceTypes(filter) | 221 function getResourceTypes(contentType) |
| 205 { | 222 { |
| 206 let types = []; | 223 let types = []; |
| 207 | 224 |
| 208 if (filter.contentType & typeMap.IMAGE) | 225 if (contentType & typeMap.IMAGE) |
| 209 types.push("image"); | 226 types.push("image"); |
| 210 if (filter.contentType & typeMap.STYLESHEET) | 227 if (contentType & typeMap.STYLESHEET) |
| 211 types.push("style-sheet"); | 228 types.push("style-sheet"); |
| 212 if (filter.contentType & typeMap.SCRIPT) | 229 if (contentType & typeMap.SCRIPT) |
| 213 types.push("script"); | 230 types.push("script"); |
| 214 if (filter.contentType & typeMap.FONT) | 231 if (contentType & typeMap.FONT) |
| 215 types.push("font"); | 232 types.push("font"); |
| 216 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) | 233 if (contentType & (typeMap.MEDIA | typeMap.OBJECT)) |
| 217 types.push("media"); | 234 types.push("media"); |
| 218 if (filter.contentType & typeMap.POPUP) | 235 if (contentType & typeMap.POPUP) |
| 219 types.push("popup"); | 236 types.push("popup"); |
| 220 if (filter.contentType & (typeMap.XMLHTTPREQUEST | | 237 if (contentType & (typeMap.XMLHTTPREQUEST | |
| 238 typeMap.WEBSOCKET | |
| 239 typeMap.WEBRTC | |
| 221 typeMap.OBJECT_SUBREQUEST | | 240 typeMap.OBJECT_SUBREQUEST | |
| 222 typeMap.PING | | 241 typeMap.PING | |
| 223 typeMap.OTHER)) | 242 typeMap.OTHER)) |
| 243 { |
| 224 types.push("raw"); | 244 types.push("raw"); |
| 225 if (filter.contentType & typeMap.SUBDOCUMENT) | 245 } |
| 246 if (contentType & typeMap.SUBDOCUMENT) |
| 226 types.push("document"); | 247 types.push("document"); |
| 227 | 248 |
| 228 return types; | 249 return types; |
| 229 } | 250 } |
| 230 | 251 |
| 252 function makeRuleCopies(trigger, action, urlSchemes) |
| 253 { |
| 254 let copies = []; |
| 255 |
| 256 // Always make a deep copy of the rule, since rules may have to be |
| 257 // manipulated individually at a later stage. |
| 258 let stringifiedTrigger = JSON.stringify(trigger); |
| 259 |
| 260 let filterPattern = trigger["url-filter"].substring(1); |
| 261 let startIndex = 0; |
| 262 |
| 263 // If the URL filter already begins with the first URL scheme pattern, skip |
| 264 // it. |
| 265 if (trigger["url-filter"].startsWith("^" + urlSchemes[0])) |
| 266 { |
| 267 filterPattern = filterPattern.substring(urlSchemes[0].length); |
| 268 startIndex = 1; |
| 269 } |
| 270 else |
| 271 { |
| 272 filterPattern = ".*" + filterPattern; |
| 273 } |
| 274 |
| 275 for (let i = startIndex; i < urlSchemes.length; i++) |
| 276 { |
| 277 let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), { |
| 278 "url-filter": "^" + urlSchemes[i] + filterPattern |
| 279 }); |
| 280 copies.push({trigger: copyTrigger, action}); |
| 281 } |
| 282 |
| 283 return copies; |
| 284 } |
| 285 |
| 231 function convertFilterAddRules(rules, filter, action, withResourceTypes, | 286 function convertFilterAddRules(rules, filter, action, withResourceTypes, |
| 232 exceptionDomains) | 287 exceptionDomains) |
| 233 { | 288 { |
| 234 let parsed = parseFilterRegexpSource(filter.regexpSource); | 289 let contentType = filter.contentType; |
| 290 |
| 291 // Support WebSocket and WebRTC only if they're the only option. If we try to |
| 292 // support them otherwise (e.g. $xmlhttprequest,websocket,webrtc), we end up |
| 293 // having to generate multiple rules, which bloats the rule set and is not |
| 294 // really necessary in practice. |
| 295 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET) || |
| 296 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC)) |
| 297 { |
| 298 contentType &= ~(typeMap.WEBSOCKET | typeMap.WEBRTC); |
| 299 } |
| 300 |
| 301 let urlSchemes = getURLSchemes(contentType); |
| 302 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]); |
| 235 | 303 |
| 236 // For the special case of $document whitelisting filters with just a domain | 304 // For the special case of $document whitelisting filters with just a domain |
| 237 // we can generate an equivalent blocking rule exception using if-domain. | 305 // we can generate an equivalent blocking rule exception using if-domain. |
| 238 if (filter instanceof filterClasses.WhitelistFilter && | 306 if (filter instanceof filterClasses.WhitelistFilter && |
| 239 filter.contentType & typeMap.DOCUMENT && | 307 contentType & typeMap.DOCUMENT && |
| 240 parsed.justHostname) | 308 parsed.justHostname) |
| 241 { | 309 { |
| 242 rules.push({ | 310 rules.push({ |
| 243 trigger: { | 311 trigger: { |
| 244 "url-filter": ".*", | 312 "url-filter": ".*", |
| 245 "if-domain": ["*" + parsed.hostname] | 313 "if-domain": ["*" + parsed.hostname] |
| 246 }, | 314 }, |
| 247 action: {type: "ignore-previous-rules"} | 315 action: {type: "ignore-previous-rules"} |
| 248 }); | 316 }); |
| 249 // If the filter contains other supported options we'll need to generate | 317 // If the filter contains other supported options we'll need to generate |
| 250 // further rules for it, but if not we can simply return now. | 318 // further rules for it, but if not we can simply return now. |
| 251 if (!(filter.contentType & whitelistableRequestTypes)) | 319 if (!(contentType & whitelistableRequestTypes)) |
| 252 return; | 320 return; |
| 253 } | 321 } |
| 254 | 322 |
| 255 let trigger = {"url-filter": parsed.regexp}; | 323 let trigger = {"url-filter": parsed.regexp}; |
| 256 | 324 |
| 257 // Limit rules to HTTP(S) URLs | 325 // If the URL filter begins with one of the URL schemes for this content |
| 258 if (!/^(\^|http)/i.test(trigger["url-filter"])) | 326 // type, we generate additional rules for all the URL scheme patterns; |
| 259 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; | 327 // otherwise, if the start of the URL filter literally matches the first URL |
| 328 // scheme pattern, we just generate additional rules for the remaining URL |
| 329 // scheme patterns. |
| 330 // |
| 331 // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^" |
| 332 // in front of this and generate two additional rules for |
| 333 // "^stuns?:.*stun:foo" and "^turns?:.*stun:foo". On the other hand, |
| 334 // "||foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate |
| 335 // "^turns?:([^/]+\\.)?foo" in addition. |
| 336 // |
| 337 // Note that the filter can be already anchored to the beginning |
| 338 // (e.g. "|stun:foo$webrtc"), in which case we do not generate any additional |
| 339 // rules. |
| 340 let needAltRules = trigger["url-filter"][0] != "^" || |
| 341 trigger["url-filter"].startsWith("^" + urlSchemes[0]); |
| 342 |
| 343 if (trigger["url-filter"][0] != "^") |
| 344 { |
| 345 if (!urlSchemes.some(scheme => new RegExp("^" + scheme) |
| 346 .test(trigger["url-filter"]))) |
| 347 { |
| 348 trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"]; |
| 349 } |
| 350 |
| 351 trigger["url-filter"] = "^" + trigger["url-filter"]; |
| 352 } |
| 260 | 353 |
| 261 // For rules containing only a hostname we know that we're matching against | 354 // For rules containing only a hostname we know that we're matching against |
| 262 // a lowercase string unless the matchCase option was passed. | 355 // a lowercase string unless the matchCase option was passed. |
| 263 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) | 356 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) |
| 264 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); | 357 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); |
| 265 | 358 |
| 266 if (parsed.canSafelyMatchAsLowercase || filter.matchCase) | 359 if (parsed.canSafelyMatchAsLowercase || filter.matchCase) |
| 267 trigger["url-filter-is-case-sensitive"] = true; | 360 trigger["url-filter-is-case-sensitive"] = true; |
| 268 | 361 |
| 269 let included = []; | 362 let included = []; |
| 270 let excluded = []; | 363 let excluded = []; |
| 271 | 364 |
| 272 parseDomains(filter.domains, included, excluded); | 365 parseDomains(filter.domains, included, excluded); |
| 273 | 366 |
| 274 if (exceptionDomains) | 367 if (exceptionDomains) |
| 275 excluded = excluded.concat(exceptionDomains); | 368 excluded = excluded.concat(exceptionDomains); |
| 276 | 369 |
| 277 if (withResourceTypes) | 370 if (withResourceTypes) |
| 278 { | 371 { |
| 279 trigger["resource-type"] = getResourceTypes(filter); | 372 trigger["resource-type"] = getResourceTypes(contentType); |
| 280 | 373 |
| 281 if (trigger["resource-type"].length == 0) | 374 if (trigger["resource-type"].length == 0) |
| 282 return; | 375 return; |
| 283 } | 376 } |
| 284 | 377 |
| 285 if (filter.thirdParty != null) | 378 if (filter.thirdParty != null) |
| 286 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; | 379 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |
| 287 | 380 |
| 288 if (included.length > 0) | 381 if (included.length > 0) |
| 289 { | 382 { |
| (...skipping 19 matching lines...) Expand all Loading... |
| 309 trigger["if-domain"].push("*" + name); | 402 trigger["if-domain"].push("*" + name); |
| 310 } | 403 } |
| 311 } | 404 } |
| 312 } | 405 } |
| 313 else if (excluded.length > 0) | 406 else if (excluded.length > 0) |
| 314 { | 407 { |
| 315 trigger["unless-domain"] = excluded.map(name => "*" + name); | 408 trigger["unless-domain"] = excluded.map(name => "*" + name); |
| 316 } | 409 } |
| 317 | 410 |
| 318 rules.push({trigger: trigger, action: {type: action}}); | 411 rules.push({trigger: trigger, action: {type: action}}); |
| 412 |
| 413 if (needAltRules) |
| 414 { |
| 415 // Generate additional rules for any alternative URL schemes. |
| 416 for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes)) |
| 417 rules.push(altRule); |
| 418 } |
| 319 } | 419 } |
| 320 | 420 |
| 321 function hasNonASCI(obj) | 421 function hasNonASCI(obj) |
| 322 { | 422 { |
| 323 if (typeof obj == "string") | 423 if (typeof obj == "string") |
| 324 { | 424 { |
| 325 if (/[^\x00-\x7F]/.test(obj)) | 425 if (/[^\x00-\x7F]/.test(obj)) |
| 326 return true; | 426 return true; |
| 327 } | 427 } |
| 328 | 428 |
| (...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 532 { | 632 { |
| 533 convertFilterAddRules(rules, filter, "block", true, | 633 convertFilterAddRules(rules, filter, "block", true, |
| 534 requestFilterExceptionDomains); | 634 requestFilterExceptionDomains); |
| 535 } | 635 } |
| 536 | 636 |
| 537 for (let filter of this.requestExceptions) | 637 for (let filter of this.requestExceptions) |
| 538 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 638 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |
| 539 | 639 |
| 540 return rules.filter(rule => !hasNonASCI(rule)); | 640 return rules.filter(rule => !hasNonASCI(rule)); |
| 541 }; | 641 }; |
| OLD | NEW |