lib/abp2blocklist.js - Issue 29337803: Issue 3710 - Unify hostname logic

Side by Side Diff: lib/abp2blocklist.js

Issue 29337803: Issue 3710 - Unify hostname logic (Closed)

Patch Set: Created Feb. 27, 2016, 2:23 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * This file is part of Adblock Plus <https://adblockplus.org/>,	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

3 * Copyright (C) 2006-2016 Eyeo GmbH	3 * Copyright (C) 2006-2016 Eyeo GmbH

4 *	4 *

5 * Adblock Plus is free software: you can redistribute it and/or modify	5 * Adblock Plus is free software: you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License version 3 as	6 * it under the terms of the GNU General Public License version 3 as

7 * published by the Free Software Foundation.	7 * published by the Free Software Foundation.

8 *	8 *

9 * Adblock Plus is distributed in the hope that it will be useful,	9 * Adblock Plus is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
59 let excluded = [];	59 let excluded = [];

60 let rules = [];	60 let rules = [];

61	61

62 parseDomains(filter.domains, included, excluded);	62 parseDomains(filter.domains, included, excluded);

63	63

64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))	64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))

65 return {matchDomains: included.map(matchDomain), selector: filter.selector};	65 return {matchDomains: included.map(matchDomain), selector: filter.selector};

66 }	66 }

67	67

68 /**	68 /**

69 * Convert the given filter "regexpSource" string into a regular expression.	69 * Convert the given filter "regexpSource" string into a regular expression,

	70 * handling the conversion of unicode inside hostnames to punycode.

70 * (Also deciding if the regular expression can be safely converted to and	71 * (Also deciding if the regular expression can be safely converted to and

71 * matched as lower case or not.)	72 * matched as lower case or not.)

72 *	73 *

73 * @param {string} text regexpSource property of a filter	74 * @param {string} text regexpSource property of a filter

74 * @returns {object} An object containing a regular expression string and a bool	75 * @returns {object} An object containing a regular expression string and a bool

75 * indicating if the filter can be safely matched as lower	76 * indicating if the filter can be safely matched as lower

76 * case: {regexp: "...", caseSenstive: true/false}	77 * case: {regexp: "...", canSafelyMatchAsLowercase: true/false }

77 */	78 */

78 function toRegExp(text)	79 function toRegExp(text)

79 {	80 {

80 let result = [];	81 let result = [];

81 let lastIndex = text.length - 1;	82 let lastIndex = text.length - 1;

82 let hostnameStarted = false;	83 let hostnameStart = null;

83 let hostnameFinished = false;	84 let hostnameFinished = false;

84 let caseSensitive = false;	85 let canSafelyMatchAsLowercase = false;

85	86

86 for (let i = 0; i < text.length; i++)	87 for (let i = 0; i < text.length; i++)

87 {	88 {

88 let c = text[i];	89 let c = text[i];

89	90

	91 // If we're currently inside the hostname we have to be careful not to

	92 // escape any characters until after we have converted it to punycode.

	93 if (hostnameStart != null && !hostnameFinished)

	94 {

	95 if (c == "*" \|\| c == "^" \|\| c == "?" \|\| c == "/" \|\| i == lastIndex)
	Sebastian Noack 2016/02/27 20:30:38 If you turn the logic here the other way around, y If you turn the logic here the other way around, you don't need an else block. if (c != "" && c != "^" && c != "?" && c != "/" && i < lastIndex) continue; Sebastian Noack* 2016/02/27 20:30:38 I'm not entirely sure if the case of last index is I'm not entirely sure if the case of last index is handled correctly. So if I get the logic right you'd consider just "example.co" part of the hostname in "\|\|example.com". kzar 2016/02/27 21:28:53 Done. Show quoted text On 2016/02/27 20:30:38, Sebastian Noack wrote: > If you turn the logic here the other way around, you don't need an else block. > > if (c != "" && c != "^" && c != "?" && c != "/" && i < lastIndex) > continue; Done. kzar* 2016/02/27 21:28:53 Good point but it's even more complicated, what if Show quoted text On 2016/02/27 20:30:38, Sebastian Noack wrote: > I'm not entirely sure if the case of last index is handled correctly. So if I > get the logic right you'd consider just "example.co" part of the hostname in > "\|\|example.com". Good point but it's even more complicated, what if we're at the last index but also the current character is one that would have ended a hostname?! I've done my best to keep the logic simple, but complexity crept in a bit.
	96 {

	97 hostnameFinished = true;

	98 let hostname = text.substring(hostnameStart, i);

	99 result.push(escapeRegExp(punycode.toASCII(hostname)));

	100 }

	101 else

	102 continue;

	103 }

	104

90 switch (c)	105 switch (c)

91 {	106 {

92 case "*":	107 case "*":

93 if (hostnameStarted)

94 hostnameFinished = true;

95 if (result.length > 0 && i < lastIndex && text[i + 1] != "*")	108 if (result.length > 0 && i < lastIndex && text[i + 1] != "*")

96 result.push(".*");	109 result.push(".*");

97 break;	110 break;

98 case "^":	111 case "^":

99 if (hostnameStarted)

100 hostnameFinished = true;

101 if (i < lastIndex)	112 if (i < lastIndex)

102 result.push(".");	113 result.push(".");

103 break;	114 break;

104 case "\|":	115 case "\|":

105 if (i == 0)	116 if (i == 0)

106 {	117 {

107 result.push("^");	118 result.push("^");

108 break;	119 break;

109 }	120 }

110 if (i == lastIndex)	121 if (i == lastIndex)

111 {	122 {

112 result.push("$");	123 result.push("$");

113 break;	124 break;

114 }	125 }

115 if (i == 1 && text[0] == "\|")	126 if (i == 1 && text[0] == "\|")

116 {	127 {

117 hostnameStarted = caseSensitive = true;

118 result.push("https?://");	128 result.push("https?://");
Sebastian Noack 2016/02/27 20:30:38 Nit: Mind moving that line to just above the break Nit: Mind moving that line to just above the break, for consistency? kzar 2016/02/27 21:28:53 Done. Show quoted text On 2016/02/27 20:30:38, Sebastian Noack wrote: > Nit: Mind moving that line to just above the break, for consistency? Done.
	129 hostnameStart = i + 1;

	130 canSafelyMatchAsLowercase = true;

119 break;	131 break;

120 }	132 }

121 result.push("\\", c);	133 result.push("\\\|");

122 break;	134 break;

123 case "?":	135 case "/":

124 if (hostnameStarted)	136 result.push("/");
	Sebastian Noack 2016/02/27 20:30:38 Nit: Mind moving that line to just above the break Nit: Mind moving that line to just above the break, for consistency? kzar 2016/02/27 21:28:53 Done. Show quoted text On 2016/02/27 20:30:38, Sebastian Noack wrote: > Nit: Mind moving that line to just above the break, for consistency? Done.
125 hostnameFinished = true;	137 if (!hostnameFinished &&
	Sebastian Noack 2016/02/27 20:30:38 Nit: It doesn't matter, but I personally find that Nit: It doesn't matter, but I personally find that it reads slightly better if you have similar code aligned when wrapping: if (!hostnameFinished && text.charAt(i-2) == ":" && text.charAt(i-1) == "/") kzar 2016/02/27 21:28:53 I'd rather leave this one as it is. Show quoted text On 2016/02/27 20:30:38, Sebastian Noack wrote: > Nit: It doesn't matter, but I personally find that it reads slightly better if > you have similar code aligned when wrapping: > > if (!hostnameFinished && text.charAt(i-2) == ":" > && text.charAt(i-1) == "/") I'd rather leave this one as it is.
126 case ".": case "+": case "$": case "{": case "}":	138 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")

	139 {

	140 hostnameStart = i + 1;

	141 canSafelyMatchAsLowercase = true;

	142 }

	143 break;

	144 case ".": case "+": case "$": case "{": case "}": case "?":
	Sebastian Noack 2016/02/27 20:30:38 Nit: I think the way this block was originally wra Nit: I think the way this block was originally wrapped was a little nicer: case ".": case "+": case "$": case "?": case "{": case "}": case "(": case ")": case "[": case "]": case "\\": But I guess it's personal preference, so I leave it up you. kzar 2016/02/27 21:28:53 Done. Show quoted text On 2016/02/27 20:30:38, Sebastian Noack wrote: > Nit: I think the way this block was originally wrapped was a little nicer: > > case ".": case "+": case "$": case "?": > case "{": case "}": case "(": case ")": > case "[": case "]": case "\\": > > But I guess it's personal preference, so I leave it up you. Done.
127 case "(": case ")": case "[": case "]": case "\\":	145 case "(": case ")": case "[": case "]": case "\\":

128 result.push("\\", c);	146 result.push("\\", c);

129 break;	147 break;

130 case "/":

131 if (hostnameStarted)

132 hostnameFinished = true;

133 else if (text.charAt(i-2) == ":" && text.charAt(i-1) == "/")

134 hostnameStarted = caseSensitive = true;
kzar 2016/02/27 14:29:34 (I've switched this around as I decided that sneak (I've switched this around as I decided that sneaky fall-throughs in this big switch were making things more confusing.)
135 default:	148 default:

136 if (hostnameFinished && (c >= "a" && c <= "z" \|\|	149 if (hostnameFinished && (c >= "a" && c <= "z" \|\|

137 c >= "A" && c <= "Z"))	150 c >= "A" && c <= "Z"))

138 caseSensitive = false;	151 canSafelyMatchAsLowercase = false;

139 result.push(c);	152 result.push(c);

140 }	153 }

141 }	154 }

142	155

143 return {regexp: result.join(""), caseSensitive: caseSensitive};	156 return {regexp: result.join(""),

	157 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase};

144 }	158 }

145	159

146 function getRegExpTrigger(filter)	160 function getRegExpTrigger(filter)

147 {	161 {

148 let result = toRegExp(filter.regexpSource.replace(	162 let result = toRegExp(filter.regexpSource);

149 // Safari expects punycode, filter lists use unicode

150 /^(\\|\\|\|\\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i,

151 function (match, prefix, domain)

152 {

153 return prefix + punycode.toASCII(domain);

154 }

155 ));

156	163

157 let trigger = {"url-filter": result.regexp};	164 let trigger = {"url-filter": result.regexp};

158	165

159 // Limit rules to to HTTP(S) URLs	166 // Limit rules to to HTTP(S) URLs

160 if (!/^(\^\|http)/i.test(trigger["url-filter"]))	167 if (!/^(\^\|http)/i.test(trigger["url-filter"]))

161 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];	168 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];

162	169

163 // For rules containing only a hostname we know that we're matching against	170 // For rules containing only a hostname we know that we're matching against

164 // a lowercase string unless the matchCase option was passed.	171 // a lowercase string unless the matchCase option was passed.

165 if (result.caseSensitive && !filter.matchCase)	172 if (result.canSafelyMatchAsLowercase && !filter.matchCase)

166 trigger["url-filter"] = trigger["url-filter"].toLowerCase();	173 trigger["url-filter"] = trigger["url-filter"].toLowerCase();

167	174

168 if (result.caseSensitive \|\| filter.matchCase)	175 if (result.canSafelyMatchAsLowercase \|\| filter.matchCase)

169 trigger["url-filter-is-case-sensitive"] = true;	176 trigger["url-filter-is-case-sensitive"] = true;

170	177

171 return trigger;	178 return trigger;

172 }	179 }

173	180

174 function getResourceTypes(filter)	181 function getResourceTypes(filter)

175 {	182 {

176 let types = [];	183 let types = [];

177	184

178 if (filter.contentType & typeMap.IMAGE)	185 if (filter.contentType & typeMap.IMAGE)

(...skipping 248 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
427	434

428 for (let filter of this.elemhideExceptions)	435 for (let filter of this.elemhideExceptions)

429 addRule(convertFilter(filter, "ignore-previous-rules", false));	436 addRule(convertFilter(filter, "ignore-previous-rules", false));

430 for (let filter of this.requestFilters)	437 for (let filter of this.requestFilters)

431 addRule(convertFilter(filter, "block", true));	438 addRule(convertFilter(filter, "block", true));

432 for (let filter of this.requestExceptions)	439 for (let filter of this.requestExceptions)

433 addRule(convertFilter(filter, "ignore-previous-rules", true));	440 addRule(convertFilter(filter, "ignore-previous-rules", true));

434	441

435 return rules;	442 return rules;

436 };	443 };

OLD	NEW

« no previous file with comments | « abp2blocklist.js ('k') | no next file » | no next file with comments »