lib/abp2blocklist.js - Issue 29473555: Issue 5345 - Whitelist $elemhide and $generichide domains where possible

Delta Between Two Patch Sets: lib/abp2blocklist.js

Issue 29473555: Issue 5345 - Whitelist $elemhide and $generichide domains where possible (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist

Left Patch Set: Generate unless-domain value outside while loop Created July 11, 2017, 11:14 a.m.

Right Patch Set: Rebase Created July 11, 2017, 5:28 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 /*	1 /*

2 * This file is part of Adblock Plus <https://adblockplus.org/>,	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

3 * Copyright (C) 2006-2017 eyeo GmbH	3 * Copyright (C) 2006-2017 eyeo GmbH

4 *	4 *

5 * Adblock Plus is free software: you can redistribute it and/or modify	5 * Adblock Plus is free software: you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License version 3 as	6 * it under the terms of the GNU General Public License version 3 as

7 * published by the Free Software Foundation.	7 * published by the Free Software Foundation.

8 *	8 *

9 * Adblock Plus is distributed in the hope that it will be useful,	9 * Adblock Plus is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

(...skipping 103 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
114 * case, a hostname string (or undefined) and a bool	114 * case, a hostname string (or undefined) and a bool

115 * indicating if the source only contains a hostname or not:	115 * indicating if the source only contains a hostname or not:

116 * {regexp: "...",	116 * {regexp: "...",

117 * canSafelyMatchAsLowercase: true/false,	117 * canSafelyMatchAsLowercase: true/false,

118 * hostname: "...",	118 * hostname: "...",

119 * justHostname: true/false}	119 * justHostname: true/false}

120 */	120 */

121 function parseFilterRegexpSource(text)	121 function parseFilterRegexpSource(text)

122 {	122 {

123 let regexp = [];	123 let regexp = [];

124 let lastIndex = text.length - 1;	124

	125 // Convert the text into an array of Unicode characters.

	126 //

	127 // In the case of surrogate pairs (the smiley emoji, for example), one

	128 // Unicode code point is represented by two JavaScript characters together.

	129 // We want to iterate over Unicode code points rather than JavaScript

	130 // characters.

	131 let characters = Array.from(text);

	132

	133 let lastIndex = characters.length - 1;

125 let hostname;	134 let hostname;

126 let hostnameStart = null;	135 let hostnameStart = null;

127 let hostnameFinished = false;	136 let hostnameFinished = false;

128 let justHostname = false;	137 let justHostname = false;

129 let canSafelyMatchAsLowercase = false;	138 let canSafelyMatchAsLowercase = false;

130	139

131 for (let i = 0; i < text.length; i++)	140 for (let i = 0; i < characters.length; i++)

132 {	141 {

133 let c = text[i];	142 let c = characters[i];

134	143

135 if (hostnameFinished)	144 if (hostnameFinished)

136 justHostname = false;	145 justHostname = false;

137	146

138 // If we're currently inside the hostname we have to be careful not to	147 // If we're currently inside the hostname we have to be careful not to

139 // escape any characters until after we have converted it to punycode.	148 // escape any characters until after we have converted it to punycode.

140 if (hostnameStart != null && !hostnameFinished)	149 if (hostnameStart != null && !hostnameFinished)

141 {	150 {

142 let endingChar = (c == "*" \|\| c == "^" \|\|	151 let endingChar = (c == "*" \|\| c == "^" \|\|

143 c == "?" \|\| c == "/" \|\| c == "\|");	152 c == "?" \|\| c == "/" \|\| c == "\|");

144 if (!endingChar && i != lastIndex)	153 if (!endingChar && i != lastIndex)

145 continue;	154 continue;

146	155

147 hostname = punycode.toASCII(	156 hostname = punycode.toASCII(

148 text.substring(hostnameStart, endingChar ? i : i + 1).toLowerCase()	157 characters.slice(hostnameStart, endingChar ? i : i + 1).join("")
	Manish Jethani 2017/07/12 08:59:56 This was the only conflict during rebase, since te This was the only conflict during rebase, since text (String) changed to characters (Array) in another commit.
	158 .toLowerCase()

149 );	159 );

150 hostnameFinished = justHostname = true;	160 hostnameFinished = justHostname = true;

151 regexp.push(escapeRegExp(hostname));	161 regexp.push(escapeRegExp(hostname));

152 if (!endingChar)	162 if (!endingChar)

153 break;	163 break;

154 }	164 }

155	165

156 switch (c)	166 switch (c)

157 {	167 {

158 case "*":	168 case "*":

159 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*")	169 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*")

160 regexp.push(".*");	170 regexp.push(".*");

161 break;	171 break;

162 case "^":	172 case "^":

163 if (i < lastIndex)	173 if (i < lastIndex)

164 regexp.push(".");	174 regexp.push(".");

165 break;	175 break;

166 case "\|":	176 case "\|":

167 if (i == 0)	177 if (i == 0)

168 {	178 {

169 regexp.push("^");	179 regexp.push("^");

170 break;	180 break;

171 }	181 }

172 if (i == lastIndex)	182 if (i == lastIndex)

173 {	183 {

174 regexp.push("$");	184 regexp.push("$");

175 break;	185 break;

176 }	186 }

177 if (i == 1 && text[0] == "\|")	187 if (i == 1 && characters[0] == "\|")

178 {	188 {

179 hostnameStart = i + 1;	189 hostnameStart = i + 1;

180 canSafelyMatchAsLowercase = true;	190 canSafelyMatchAsLowercase = true;

181 regexp.push("https?://([^/]+\\.)?");	191 regexp.push("https?://([^/]+\\.)?");

182 break;	192 break;

183 }	193 }

184 regexp.push("\\\|");	194 regexp.push("\\\|");

185 break;	195 break;

186 case "/":	196 case "/":

187 if (!hostnameFinished &&	197 if (!hostnameFinished &&

188 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")	198 characters[i - 2] == ":" && characters[i - 1] == "/")

189 {	199 {

190 hostnameStart = i + 1;	200 hostnameStart = i + 1;

191 canSafelyMatchAsLowercase = true;	201 canSafelyMatchAsLowercase = true;

192 }	202 }

193 regexp.push("/");	203 regexp.push("/");

194 break;	204 break;

195 case ".": case "+": case "$": case "?":	205 case ".": case "+": case "$": case "?":

196 case "{": case "}": case "(": case ")":	206 case "{": case "}": case "(": case ")":

197 case "[": case "]": case "\\":	207 case "[": case "]": case "\\":

198 regexp.push("\\", c);	208 regexp.push("\\", c);

199 break;	209 break;

200 default:	210 default:

201 if (hostnameFinished && (c >= "a" && c <= "z" \|\|	211 if (hostnameFinished && (c >= "a" && c <= "z" \|\|

202 c >= "A" && c <= "Z"))	212 c >= "A" && c <= "Z"))

203 canSafelyMatchAsLowercase = false;	213 canSafelyMatchAsLowercase = false;

204 regexp.push(c);	214 regexp.push(c == "%" ? c : encodeURI(c));

205 }	215 }

206 }	216 }

207	217

208 return {	218 return {

209 regexp: regexp.join(""),	219 regexp: regexp.join(""),

210 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,	220 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,

211 hostname: hostname,	221 hostname: hostname,

212 justHostname: justHostname	222 justHostname: justHostname

213 };	223 };

214 }	224 }

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
281 let included = [];	291 let included = [];

282 let excluded = [];	292 let excluded = [];

283	293

284 parseDomains(filter.domains, included, excluded);	294 parseDomains(filter.domains, included, excluded);

285	295

286 if (exceptionDomains)	296 if (exceptionDomains)

287 excluded = excluded.concat(exceptionDomains);	297 excluded = excluded.concat(exceptionDomains);

288	298

289 if (withResourceTypes)	299 if (withResourceTypes)

290 {	300 {

291 trigger["resource-type"] = getResourceTypes(filter);	301 let resourceTypes = getResourceTypes(filter);

292	302

293 if (trigger["resource-type"].length == 0)	303 // Content blocker rules can't differentiate between sub-document requests

	304 // (iframes) and top-level document requests. To avoid too many false

	305 // positives, we prevent rules with no hostname part from blocking document

	306 // requests.

	307 //

	308 // Once Safari 11 becomes our minimum supported version, we could change

	309 // our approach here to use the new "unless-top-url" property instead.

	310 if (filter instanceof filterClasses.BlockingFilter && !parsed.hostname)

	311 resourceTypes = resourceTypes.filter(type => type != "document");

	312

	313 if (resourceTypes.length == 0)

294 return;	314 return;

	315

	316 trigger["resource-type"] = resourceTypes;

295 }	317 }

296	318

297 if (filter.thirdParty != null)	319 if (filter.thirdParty != null)

298 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];	320 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];

299	321

300 if (included.length > 0)	322 if (included.length > 0)

301 {	323 {

302 trigger["if-domain"] = [];	324 trigger["if-domain"] = [];

303	325

304 for (let name of included)	326 for (let name of included)

(...skipping 15 matching lines...) Expand all Loading...
320 {	342 {

321 trigger["if-domain"].push("*" + name);	343 trigger["if-domain"].push("*" + name);

322 }	344 }

323 }	345 }

324 }	346 }

325 else if (excluded.length > 0)	347 else if (excluded.length > 0)

326 {	348 {

327 trigger["unless-domain"] = excluded.map(name => "*" + name);	349 trigger["unless-domain"] = excluded.map(name => "*" + name);

328 }	350 }

329 else if (filter instanceof filterClasses.BlockingFilter &&	351 else if (filter instanceof filterClasses.BlockingFilter &&

330 filter.contentType & typeMap.SUBDOCUMENT)	352 filter.contentType & typeMap.SUBDOCUMENT && parsed.hostname)

331 {	353 {

	354 // Rules with a hostname part are still allowed to block document requests,

	355 // but we add an exception for top-level documents.

	356 //

	357 // Note that we can only do this if there's no "unless-domain" property for

	358 // now. This also only works in Safari 11 onwards, while older versions

	359 // simply ignore this property. Once Safari 11 becomes our minimum

	360 // supported version, we can merge "unless-domain" into "unless-top-url".

332 trigger["unless-top-url"] = [trigger["url-filter"]];	361 trigger["unless-top-url"] = [trigger["url-filter"]];

333 if (trigger["url-filter-is-case-sensitive"])	362 if (trigger["url-filter-is-case-sensitive"])

334 trigger["top-url-filter-is-case-sensitive"] = true;	363 trigger["top-url-filter-is-case-sensitive"] = true;

335 }	364 }

336	365

337 rules.push({trigger: trigger, action: {type: action}});	366 rules.push({trigger: trigger, action: {type: action}});

338 }

339

340 function hasNonASCI(obj)

341 {

342 if (typeof obj == "string")

343 {

344 if (/[^\x00-\x7F]/.test(obj))

345 return true;

346 }

347

348 if (typeof obj == "object")

349 {

350 if (obj instanceof Array)

351 for (let item of obj)

352 if (hasNonASCI(item))

353 return true;

354

355 let names = Object.getOwnPropertyNames(obj);

356 for (let name of names)

357 if (hasNonASCI(obj[name]))

358 return true;

359 }

360

361 return false;

362 }	367 }

363	368

364 function convertIDSelectorsToAttributeSelectors(selector)	369 function convertIDSelectorsToAttributeSelectors(selector)

365 {	370 {

366 // First we figure out where all the IDs are	371 // First we figure out where all the IDs are

367 let sep = "";	372 let sep = "";

368 let start = null;	373 let start = null;

369 let positions = [];	374 let positions = [];

370 for (let i = 0; i < selector.length; i++)	375 for (let i = 0; i < selector.length; i++)

371 {	376 {

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
407 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');	412 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');

408 i = pos.end;	413 i = pos.end;

409 }	414 }

410 newSelector.push(selector.substring(i));	415 newSelector.push(selector.substring(i));

411	416

412 return newSelector.join("");	417 return newSelector.join("");

413 }	418 }

414	419

415 function addCSSRules(rules, selectors, matchDomain, exceptionDomains)	420 function addCSSRules(rules, selectors, matchDomain, exceptionDomains)

416 {	421 {

417 let unlessDomain = exceptionDomains.size > 0 ? [] : null;	422 let unlessDomain = exceptionDomains.size > 0 ? [] : null;
kzar 2017/07/11 12:20:03 Nit: This seems like overkill since `[]` is falsey Nit: This seems like overkill since `[]` is falsey anyway. Manish Jethani 2017/07/11 16:28:39 [] evaluates to true. If you mean checking the len Show quoted text On 2017/07/11 12:20:03, kzar wrote: > Nit: This seems like overkill since `[]` is falsey anyway. [] evaluates to true. If you mean checking the length property, we could do that, but then I'm just wondering why since it's inside the loop. kzar 2017/07/11 16:30:36 Whoops, you're right. Show quoted text On 2017/07/11 16:28:39, Manish Jethani wrote: > On 2017/07/11 12:20:03, kzar wrote: > > Nit: This seems like overkill since `[]` is falsey anyway. > > [] evaluates to true. If you mean checking the length property, we could do > that, but then I'm just wondering why since it's inside the loop. Whoops, you're right.
418	423

419 exceptionDomains.forEach(name => unlessDomain.push("*" + name));	424 exceptionDomains.forEach(name => unlessDomain.push("*" + name));

420	425

421 while (selectors.length)	426 while (selectors.length)

422 {	427 {

423 let selector = selectors.splice(0, selectorLimit).join(", ");	428 let selector = selectors.splice(0, selectorLimit).join(", ");

424	429

425 // As of Safari 9.0 element IDs are matched as lowercase. We work around	430 // As of Safari 9.0 element IDs are matched as lowercase. We work around

426 // this by converting to the attribute format [id="elementID"]	431 // this by converting to the attribute format [id="elementID"]

427 selector = convertIDSelectorsToAttributeSelectors(selector);	432 selector = convertIDSelectorsToAttributeSelectors(selector);

(...skipping 144 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
572	577

573 for (let filter of this.requestFilters)	578 for (let filter of this.requestFilters)

574 {	579 {

575 convertFilterAddRules(rules, filter, "block", true,	580 convertFilterAddRules(rules, filter, "block", true,

576 requestFilterExceptionDomains);	581 requestFilterExceptionDomains);

577 }	582 }

578	583

579 for (let filter of this.requestExceptions)	584 for (let filter of this.requestExceptions)

580 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);	585 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);

581	586

582 return rules.filter(rule => !hasNonASCI(rule));	587 return rules;

583 };	588 };

LEFT	RIGHT