compiled/filter/ElemHideBase.cpp - Issue 29595633: Issue 5870 - Implement the new ElemHideEmulation filter type

Delta Between Two Patch Sets: compiled/filter/ElemHideBase.cpp

Issue 29595633: Issue 5870 - Implement the new ElemHideEmulation filter type (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/

Left Patch Set: Reworked the logic and added tests Created Jan. 31, 2018, 9:07 p.m.

Right Patch Set: Deal with ill formed filters. Created Feb. 14, 2018, 5:05 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 /*	1 /*

2 * This file is part of Adblock Plus <https://adblockplus.org/>,	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

3 * Copyright (C) 2006-present eyeo GmbH	3 * Copyright (C) 2006-present eyeo GmbH

4 *	4 *

5 * Adblock Plus is free software: you can redistribute it and/or modify	5 * Adblock Plus is free software: you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License version 3 as	6 * it under the terms of the GNU General Public License version 3 as

7 * published by the Free Software Foundation.	7 * published by the Free Software Foundation.

8 *	8 *

9 * Adblock Plus is distributed in the hope that it will be useful,	9 * Adblock Plus is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12 * GNU General Public License for more details.	12 * GNU General Public License for more details.

13 *	13 *

14 * You should have received a copy of the GNU General Public License	14 * You should have received a copy of the GNU General Public License

15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

16 */	16 */

17	17

18 #include <cstring>	18 #include <cstring>

19	19

20 #include "ElemHideBase.h"	20 #include "ElemHideBase.h"

21 #include "../StringScanner.h"	21 #include "../StringScanner.h"

	22 #include "../Utils.h"

	23

	24 ABP_NS_USING

22	25

23 namespace	26 namespace

24 {	27 {

25 void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd,	28 void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd,

26 String::size_type& selectorStart)	29 String::size_type& selectorStart)

27 {	30 {

28 // For element hiding filters we only want to remove spaces preceding the	31 // For element hiding filters we only want to remove spaces preceding the

29 // selector part. The positions we've determined already have to be adjusted	32 // selector part. The positions we've determined already have to be adjusted

30 // accordingly.	33 // accordingly.

31	34

(...skipping 10 matching lines...) Expand all Loading...
42 // Only spaces before selectorStart position should be removed.	45 // Only spaces before selectorStart position should be removed.

43 if (pos < selectorStart && text[pos] == ' ')	46 if (pos < selectorStart && text[pos] == ' ')

44 delta++;	47 delta++;

45 else	48 else

46 text[pos - delta] = text[pos];	49 text[pos - delta] = text[pos];

47 }	50 }

48 selectorStart -= delta;	51 selectorStart -= delta;

49	52

50 text.reset(text, 0, len - delta);	53 text.reset(text, 0, len - delta);

51 }	54 }

	55

	56 static constexpr String::value_type ELEM_HIDE_DELIMITER[] = u"##";

	57 static constexpr String::size_type ELEM_HIDE_DELIMITER_LEN = str_length_of(ELE M_HIDE_DELIMITER);

	58

	59 static constexpr String::value_type ELEM_HIDE_EMULATION_DELIMITER[] = u"#?#";

	60 static constexpr String::size_type ELEM_HIDE_EMULATION_DELIMITER_LEN = str_len gth_of(ELEM_HIDE_EMULATION_DELIMITER);

	61

	62 static constexpr String::value_type OLD_PROPS_SELECTOR[] = u"[-abp-properties= ";

	63 static constexpr String::size_type OLD_PROPS_SELECTOR_LEN = str_length_of(OLD_ PROPS_SELECTOR);

	64

	65 static constexpr String::value_type PROPS_SELECTOR[] = u":-abp-properties(";

	66 static constexpr String::size_type PROPS_SELECTOR_LEN = str_length_of(PROPS_SE LECTOR);

52 }	67 }

53	68

54 ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& da ta)	69 ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& da ta)

55 : ActiveFilter(type, text, false),	70 : ActiveFilter(type, text, false), mData(data)

56 mData(data)

57 {	71 {

58 if (mData.HasDomains())	72 if (mData.HasDomains())

59 ParseDomains(mData.GetDomainsSource(mText), u',');	73 ParseDomains(mData.GetDomainsSource(mText), u',');

60 }	74 }

61	75

62 Filter::Type ElemHideBase::Parse(DependentString& text, ElemHideData& data)	76 Filter::Type ElemHideBase::Parse(DependentString& text, ElemHideData& data, bool & needConversion)

63 {	77 {

	78 needConversion = false;

	79

64 StringScanner scanner(text);	80 StringScanner scanner(text);

65	81

66 // Domains part	82 // Domains part

67 bool seenSpaces = false;	83 bool seenSpaces = false;

68 while (!scanner.done())	84 while (!scanner.done())

69 {	85 {

70 String::value_type next = scanner.next();	86 String::value_type next = scanner.next();

71 if (next == u'#')	87 if (next == u'#')

72 {	88 {

73 data.mDomainsEnd = scanner.position();	89 data.mDomainsEnd = scanner.position();

(...skipping 28 matching lines...) Expand all Loading...
102 return Type::UNKNOWN;	118 return Type::UNKNOWN;

103	119

104 // Selector part	120 // Selector part

105	121

106 // Selector shouldn't be empty	122 // Selector shouldn't be empty

107 seenSpaces \|= scanner.skip(u' ');	123 seenSpaces \|= scanner.skip(u' ');

108 if (scanner.done())	124 if (scanner.done())

109 return Type::UNKNOWN;	125 return Type::UNKNOWN;

110	126

111 data.mSelectorStart = scanner.position() + 1;	127 data.mSelectorStart = scanner.position() + 1;

112 data.mNeedConversion = false;

113	128

114 // We are done validating, now we can normalize whitespace and the domain part	129 // We are done validating, now we can normalize whitespace and the domain part

115 if (seenSpaces)	130 if (seenSpaces)

116 NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart);	131 NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart);

117 DependentString(text, 0, data.mDomainsEnd).toLower();	132 DependentString(text, 0, data.mDomainsEnd).toLower();

118	133

119 // We still need to check the old syntax. It will be converted when	134 // We still need to check the old syntax. It will be converted when

120 // we instantiate the filter.	135 // we instantiate the filter.

121 if (!emulation &&	136 if (!emulation &&

122 text.find(u"[-abp-properties="_str, data.mSelectorStart) != text.npos)	137 text.find(OLD_PROPS_SELECTOR, data.mSelectorStart, OLD_PROPS_SELECTOR_LEN) != text.npos)

123 {	138 {

124 data.mNeedConversion = true;	139 needConversion = true;

125 emulation = !exception;	140 emulation = !exception;
sergei 2018/02/05 14:51:04 Should it be an invalid filter if it's exception w Should it be an invalid filter if it's exception with emulation? hub 2018/02/07 04:13:35 no. This is actually how exceptions for element hi Show quoted text On 2018/02/05 14:51:04, sergei wrote: > Should it be an invalid filter if it's exception with emulation? no. This is actually how exceptions for element hiding emulation are specified.
126 }	141 }

127	142

128 if (exception)	143 if (exception)

129 return Type::ELEMHIDEEXCEPTION;	144 return Type::ELEMHIDEEXCEPTION;

130	145

131 if (emulation)	146 if (emulation)

132 return Type::ELEMHIDEEMULATION;	147 return Type::ELEMHIDEEMULATION;

133	148

134 return Type::ELEMHIDE;	149 return Type::ELEMHIDE;

135 }	150 }

136	151

	152 namespace

	153 {

	154 struct Range

	155 {

	156 String::size_type start;

	157 String::size_type end;

	158 String::size_type len() const

	159 {

	160 return end - start;

	161 }

	162 String::size_type byte_len() const

	163 {

	164 return len() * sizeof(String::value_type);

	165 }

	166 };

	167 }

	168

137 // Convert filter from the old syntax to the new.	169 // Convert filter from the old syntax to the new.

138 OwnedString ElemHideBase::ConvertFilter(const String& text, String::size_type& a t)	170 DependentString ElemHideBase::ConvertFilter(String& text, String::size_type& at)

139 {	171 {

140 static const auto propsSelector = u"[-abp-properties="_str;	172 Range prefix = {at, text.find(OLD_PROPS_SELECTOR, at, OLD_PROPS_SELECTOR_LEN)} ;

141 static const auto newPropsSelector = u":-abp-properties("_str;	173 if (prefix.end == text.npos)

142 static const auto elemHideDelimiter = u"##"_str;	174 return DependentString(text);

143 auto selectorPos = text.find(propsSelector, at);	175

144 if (selectorPos != text.npos)	176 auto length = text.length();

145 {	177 Range suffix = {at, length};

146 auto length = text.length();	178 Range properties = { prefix.end + OLD_PROPS_SELECTOR_LEN, 0 };

147 auto properties = selectorPos + propsSelector.length();	179 String::value_type quote = 0;

148 String::value_type quote = 0;	180 for (auto index = properties.start;

149 bool escape = false;	181 index < length && (suffix.start == at); index++)

150 String::size_type removed = 0; // how many chars we remove	182 {

151 String::size_type end = properties;	183 auto c = text[index];

152 String::size_type quote_start = 0;	184 switch (c)

153 String::size_type quote_end = 0;	185 {

154 for (auto index = properties;	186 case u'"':

155 index < length && end == properties; index++)	187 case u'\'':

156 {	188 if (quote == 0)

157 if (escape)	189 {

158 {	190 // syntax error: we already have a quoted section.

159 escape = false;	191 if (properties.end)

160 continue;	192 return DependentString();

161 }	193

162	194 if (properties.start != index)

163 auto c = text[index];	195 return DependentString();

164 switch (c)	196

165 {	197 quote = c;

166 case '\\':	198 properties.start = index + 1;

167 escape = true;	199 }

168 break;	200 else if (quote == c)

169 case '"':	201 {

170 case '\'':	202 // end of quoted.

171 if (quote == 0)	203 quote = 0;

172 {	204 properties.end = index;

173 quote = c;	205 }

174 quote_start = index + 1;	206 break;

175 }	207 case u']':

176 else if (quote == c)	208 if (quote == 0)

177 {	209 {

178 // end of quoted.	210 if (properties.end == 0)

179 quote = 0;	211 return DependentString();

180 removed += 2;	212 if (properties.end + 1 != index)

181 quote_end = index;	213 return DependentString();

182 }	214 suffix.start = index + 1;

183 break;	215 }

184 case ']':	216 break;

185 if (quote == 0)	217 default:

186 end = index + 1; // end of properties (after ])	218 break;

187 break;	219 }

188 default:	220 }

189 break;	221

190 }	222 if (suffix.start == at)
	sergei 2018/02/27 10:56:53 Just for reference, I think it (what the whole for Just for reference, I think it (what the whole for-loop above is doing) can be written in a simpler form, but since it's some sort of interim function it's fine.
191 }	223 return DependentString();

192	224

193 if (quote != 0)	225 String::size_type delimiter = text.find(ELEM_HIDE_DELIMITER, 0,

194 quote_end = end - 1;	226 ELEM_HIDE_DELIMITER_LEN);

195 else if (quote_end <= quote_start)	227 // +1 for the replacement of "##" by "#?#"

196 {	228 if (delimiter != text.npos)

197 // we likely didn't find a quoted content so we just take it as is.	229 at++;

198 quote_start = properties;	230 auto new_len = at + prefix.len() + PROPS_SELECTOR_LEN + properties.len() + 1 / * ) */ + suffix.len();

199 quote_end = end - 1;	231

200 }	232 assert2(new_len + 1 == length \|\| (delimiter == text.npos && new_len + 2 == len gth), u"Inconsistent length in filter conversion."_str);
	sergei 2018/02/27 10:56:53 not important just for reference length == new_len not important just for reference length == new_len + (delimiter == text.npos ? 2 : 1) looks shorted and maybe even more logical. hub 2018/02/27 13:32:30 Acknowledged. Show quoted text On 2018/02/27 10:56:53, sergei wrote: > not important just for reference > length == new_len + (delimiter == text.npos ? 2 : 1) > looks shorted and maybe even more logical. Acknowledged.
201	233

202 // +1 for the replacement of "##" by "#?#"	234 DependentString converted(text, 0, new_len);

203 String::size_type offset = 0;	235

204	236 if (suffix.len())

205 String::size_type delimiter = text.find(elemHideDelimiter);	237 {

206 OwnedString converted(length + ((delimiter != text.npos) ? 1 : 0) - removed) ;	238 new_len -= suffix.len();

207 if (delimiter != text.npos)	239 std::memmove(converted.data() + new_len,

208 {	240 text.data() + suffix.start,

209 if (delimiter >= selectorPos)	241 suffix.byte_len());

210 return OwnedString(text);	242 }

211	243 new_len--;

212 at++;	244 // here we need to move the properties before inserting the ')'

213 std::memcpy(converted.data(), text.data(),	245 auto parens = new_len;

214 delimiter * sizeof(String::value_type));	246 if (properties.len())

215 offset += delimiter;	247 {

216 std::memcpy(converted.data() + offset, u"#?#",	248 new_len -= properties.len();

217 3 * sizeof(String::value_type));	249 std::memmove(converted.data() + new_len,

218 offset += 3;	250 text.data() + properties.start, properties.byte_len());

219 delimiter += 2;	251 }

220 // we have already parsed to past the delimiter.	252 converted[parens] = u')';

221 selectorPos -= delimiter;	253

222 }	254 new_len -= PROPS_SELECTOR_LEN;

223 else	255 std::memcpy(converted.data() + new_len,

224 delimiter = 0;	256 PROPS_SELECTOR,

225	257 PROPS_SELECTOR_LEN * sizeof(String::value_type));

226	258 if (prefix.len())

227 std::memcpy(converted.data() + offset, text.data() + delimiter,	259 {

228 selectorPos * sizeof(String::value_type));	260 new_len -= prefix.len();

229 offset += selectorPos;	261 std::memmove(converted.data() + new_len,

230	262 text.data() + prefix.start, prefix.byte_len());

231 std::memcpy(converted.data() + offset, newPropsSelector.data(),	263 }

232 newPropsSelector.length() * sizeof(String::value_type));	264

233 offset += newPropsSelector.length();	265 if (delimiter != String::npos)

234	266 {

235 std::memcpy(converted.data() + offset, text.data() + quote_start,	267 std::memcpy(converted.data() + delimiter, ELEM_HIDE_EMULATION_DELIMITER,

236 (quote_end - quote_start) * sizeof(String::value_type));	268 ELEM_HIDE_EMULATION_DELIMITER_LEN * sizeof(String::value_type));

237 offset += quote_end - quote_start;	269 }

238	270

239 std::memcpy(converted.data() + offset, u")", sizeof(String::value_type));	271 return converted;

240 offset++;

241

242 std::memcpy(converted.data() + offset, text.data() + end,

243 (length - end) * sizeof(String::value_type));

244 offset += (length - end) * sizeof(String::value_type);

245

246 return converted;

247 }

248

249 return OwnedString(text);

250 }	272 }

251	273

252 namespace	274 namespace

253 {	275 {

254 static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B ";	276 static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B ";

255 static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D ";	277 static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D ";

256 static constexpr String::size_type CURLY_REPLACEMENT_SIZE = sizeof(OPENING_CUR LY_REPLACEMENT) / sizeof(OPENING_CURLY_REPLACEMENT[0]) - 1;	278 static constexpr String::size_type CURLY_REPLACEMENT_SIZE = str_length_of(OPEN ING_CURLY_REPLACEMENT);

257	279

258 OwnedString EscapeCurlies(String::size_type replacementCount,	280 OwnedString EscapeCurlies(String::size_type replacementCount,

259 const DependentString& str)	281 const DependentString& str)

260 {	282 {

261 OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1));	283 OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1));

262	284

263 String::value_type* current = result.data();	285 String::value_type* current = result.data();

264 for (String::size_type i = 0; i < str.length(); i++)	286 for (String::size_type i = 0; i < str.length(); i++)

265 {	287 {

266 switch(str[i])	288 switch(str[i])

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
310 if (item.second && !item.first.empty())	332 if (item.second && !item.first.empty())

311 {	333 {

312 if (!result.empty())	334 if (!result.empty())

313 result.append(u',');	335 result.append(u',');

314 result.append(item.first);	336 result.append(item.first);

315 }	337 }

316 }	338 }

317 }	339 }

318 return result;	340 return result;

319 }	341 }

LEFT	RIGHT