compiled/filter/ElemHideBase.cpp - Issue 29595633: Issue 5870 - Implement the new ElemHideEmulation filter type

Side by Side Diff: compiled/filter/ElemHideBase.cpp

Issue 29595633: Issue 5870 - Implement the new ElemHideEmulation filter type (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/

Patch Set: Added missing ABP_NS macros Created Feb. 9, 2018, 8:34 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * This file is part of Adblock Plus <https://adblockplus.org/>,	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

3 * Copyright (C) 2006-present eyeo GmbH	3 * Copyright (C) 2006-present eyeo GmbH

4 *	4 *

5 * Adblock Plus is free software: you can redistribute it and/or modify	5 * Adblock Plus is free software: you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License version 3 as	6 * it under the terms of the GNU General Public License version 3 as

7 * published by the Free Software Foundation.	7 * published by the Free Software Foundation.

8 *	8 *

9 * Adblock Plus is distributed in the hope that it will be useful,	9 * Adblock Plus is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12 * GNU General Public License for more details.	12 * GNU General Public License for more details.

13 *	13 *

14 * You should have received a copy of the GNU General Public License	14 * You should have received a copy of the GNU General Public License

15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

16 */	16 */

17	17

18 #include <cstring>	18 #include <cstring>

19	19

20 #include "ElemHideBase.h"	20 #include "ElemHideBase.h"

21 #include "../StringScanner.h"	21 #include "../StringScanner.h"

	22 #include "../Utils.h"

22	23

23 ABP_NS_USING	24 ABP_NS_USING

24	25

25 namespace	26 namespace

26 {	27 {

27 void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd,	28 void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd,

28 String::size_type& selectorStart)	29 String::size_type& selectorStart)

29 {	30 {

30 // For element hiding filters we only want to remove spaces preceding the	31 // For element hiding filters we only want to remove spaces preceding the

31 // selector part. The positions we've determined already have to be adjusted	32 // selector part. The positions we've determined already have to be adjusted

(...skipping 12 matching lines...) Expand all Loading...
44 // Only spaces before selectorStart position should be removed.	45 // Only spaces before selectorStart position should be removed.

45 if (pos < selectorStart && text[pos] == ' ')	46 if (pos < selectorStart && text[pos] == ' ')

46 delta++;	47 delta++;

47 else	48 else

48 text[pos - delta] = text[pos];	49 text[pos - delta] = text[pos];

49 }	50 }

50 selectorStart -= delta;	51 selectorStart -= delta;

51	52

52 text.reset(text, 0, len - delta);	53 text.reset(text, 0, len - delta);

53 }	54 }

	55

	56 static constexpr String::value_type ELEM_HIDE_DELIMITER[] = u"##";

	57 static constexpr String::size_type ELEM_HIDE_DELIMITER_LEN = LENGTH_OF(ELEM_HI DE_DELIMITER);

	58

	59 static constexpr String::value_type ELEM_HIDE_EMULATION_DELIMITER[] = u"#?#";

	60 static constexpr String::size_type ELEM_HIDE_EMULATION_DELIMITER_LEN = LENGTH_ OF(ELEM_HIDE_EMULATION_DELIMITER);

	61

	62 static constexpr String::value_type OLD_PROPS_SELECTOR[] = u"[-abp-properties= ";

	63 static constexpr String::size_type OLD_PROPS_SELECTOR_LEN = LENGTH_OF(OLD_PROP S_SELECTOR);

	64

	65 static constexpr String::value_type PROPS_SELECTOR[] = u":-abp-properties(";

	66 static constexpr String::size_type PROPS_SELECTOR_LEN = LENGTH_OF(PROPS_SELECT OR);

54 }	67 }

55	68

56 ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& da ta)	69 ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& da ta)

57 : ActiveFilter(type, text, false), mData(data)	70 : ActiveFilter(type, text, false), mData(data)

58 {	71 {

59 if (mData.HasDomains())	72 if (mData.HasDomains())

60 ParseDomains(mData.GetDomainsSource(mText), u',');	73 ParseDomains(mData.GetDomainsSource(mText), u',');

61 }	74 }

62	75

63 Filter::Type ElemHideBase::Parse(DependentString& text, ElemHideData& data)	76 Filter::Type ElemHideBase::Parse(DependentString& text, ElemHideData& data, bool & needConversion)

64 {	77 {

65 StringScanner scanner(text);	78 StringScanner scanner(text);

66	79

67 // Domains part	80 // Domains part

68 bool seenSpaces = false;	81 bool seenSpaces = false;

69 while (!scanner.done())	82 while (!scanner.done())

70 {	83 {

71 String::value_type next = scanner.next();	84 String::value_type next = scanner.next();

72 if (next == u'#')	85 if (next == u'#')

73 {	86 {

(...skipping 10 matching lines...) Expand all Loading...
84 case u'"':	97 case u'"':

85 case u'!':	98 case u'!':

86 return Type::UNKNOWN;	99 return Type::UNKNOWN;

87 case u' ':	100 case u' ':

88 seenSpaces = true;	101 seenSpaces = true;

89 break;	102 break;

90 }	103 }

91 }	104 }

92	105

93 seenSpaces \|= scanner.skip(u' ');	106 seenSpaces \|= scanner.skip(u' ');

	107 bool emulation = false;

94 bool exception = scanner.skipOne(u'@');	108 bool exception = scanner.skipOne(u'@');

95 if (exception)	109 if (exception)

96 seenSpaces \|= scanner.skip(u' ');	110 seenSpaces \|= scanner.skip(u' ');

	111 else

	112 emulation = scanner.skipOne(u'?');

97	113

98 String::value_type next = scanner.next();	114 String::value_type next = scanner.next();

99 if (next != u'#')	115 if (next != u'#')

100 return Type::UNKNOWN;	116 return Type::UNKNOWN;

101	117

102 // Selector part	118 // Selector part

103	119

104 // Selector shouldn't be empty	120 // Selector shouldn't be empty

105 seenSpaces \|= scanner.skip(u' ');	121 seenSpaces \|= scanner.skip(u' ');

106 if (scanner.done())	122 if (scanner.done())

107 return Type::UNKNOWN;	123 return Type::UNKNOWN;

108	124

109 data.mSelectorStart = scanner.position() + 1;	125 data.mSelectorStart = scanner.position() + 1;

110	126

111 // We are done validating, now we can normalize whitespace and the domain part	127 // We are done validating, now we can normalize whitespace and the domain part

112 if (seenSpaces)	128 if (seenSpaces)

113 NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart);	129 NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart);

114 DependentString(text, 0, data.mDomainsEnd).toLower();	130 DependentString(text, 0, data.mDomainsEnd).toLower();

115	131

	132 // We still need to check the old syntax. It will be converted when

	133 // we instantiate the filter.

	134 if (!emulation &&

	135 text.find(OLD_PROPS_SELECTOR, data.mSelectorStart, OLD_PROPS_SELECTOR_LEN) != text.npos)

	136 {

	137 needConversion = true;

	138 emulation = !exception;

	139 }

	140

116 if (exception)	141 if (exception)

117 return Type::ELEMHIDEEXCEPTION;	142 return Type::ELEMHIDEEXCEPTION;

118	143

119 if (text.find(u"[-abp-properties="_str, data.mSelectorStart) != text.npos)	144 if (emulation)

120 return Type::ELEMHIDEEMULATION;	145 return Type::ELEMHIDEEMULATION;

121	146

122 return Type::ELEMHIDE;	147 return Type::ELEMHIDE;

123 }	148 }

124	149

125 namespace	150 namespace

126 {	151 {

	152 struct Range

	153 {

	154 String::size_type start;

	155 String::size_type end;

	156 String::size_type len() const

	157 {

	158 return end - start;

	159 }

	160 String::size_type byte_len() const

	161 {

	162 return len() * sizeof(String::value_type);

	163 }

	164 };

	165 }

	166

	167 // Convert filter from the old syntax to the new.

	168 DependentString ElemHideBase::ConvertFilter(String& text, String::size_type& at)

	169 {

	170 auto length = text.length();

	171 Range prefix = {at, 0};

	172 Range suffix = {at, length};

	173 prefix.end = text.find(OLD_PROPS_SELECTOR, at, OLD_PROPS_SELECTOR_LEN);
	sergei 2018/02/12 12:53:18 not important but perhaps it would be better to sa not important but perhaps it would be better to say Range prefix = {at, text.find(OLD_PROPS_SELECTOR, at, OLD_PROPS_SELECTOR_LEN)}; hub 2018/02/12 18:14:39 Done. Show quoted text On 2018/02/12 12:53:18, sergei wrote: > not important but perhaps it would be better to say > Range prefix = {at, text.find(OLD_PROPS_SELECTOR, at, OLD_PROPS_SELECTOR_LEN)}; Done.
	174 if (prefix.end != text.npos)
	sergei 2018/02/12 12:53:19 What about returning DependentString(text); here i What about returning DependentString(text); here in order to reduce the indentation below?Subjectively it also improves readability, BTW `suffix` and `length` will be declared after that `if`. hub 2018/02/12 18:14:39 Done. Show quoted text On 2018/02/12 12:53:19, sergei wrote: > What about returning DependentString(text); here in order to reduce the > indentation below?Subjectively it also improves readability, BTW `suffix` and > `length` will be declared after that `if`. Done.
	175 {

	176 Range properties = { prefix.end + OLD_PROPS_SELECTOR_LEN, 0 };

	177 String::value_type quote = 0;

	178 bool closing = false;

	179 for (auto index = properties.start;

	180 index < length && !closing; index++)

	181 {

	182 auto c = text[index];

	183 switch (c)

	184 {

	185 case u'"':

	186 case u'\'':

	187 if (quote == 0)

	188 {

	189 // syntax error: we already have a quoted section.

	190 if (properties.end)

	191 return DependentString();

	192

	193 quote = c;

	194 properties.start = index + 1;

	195 }

	196 else if (quote == c)

	197 {

	198 // end of quoted.

	199 quote = 0;

	200 properties.end = index;

	201 }

	202 break;

	203 case u']':

	204 if (quote == 0)

	205 {

	206 if (properties.end == 0)

	207 return DependentString();

	208 suffix.start = index + 1;

	209 closing = true;
	sergei 2018/02/12 12:53:19 What about using of suffix.start instead of `closi What about using of suffix.start instead of `closing`? We could initialize it to zero. In addition to that if suffix.start is still zero after finishing the loop then the filter is malformed because there is no closing ']'. Continuing without taking it into account can result in bad sizes below. hub 2018/02/12 18:14:39 suffix.start is initialised to `at`. So I'll use t Show quoted text On 2018/02/12 12:53:19, sergei wrote: > What about using of suffix.start instead of `closing`? We could initialize it to > zero. In addition to that if suffix.start is still zero after finishing the loop > then the filter is malformed because there is no closing ']'. Continuing without > taking it into account can result in bad sizes below. suffix.start is initialised to `at`. So I'll use that. And I'll return an invalid string.
	210 }

	211 break;

	212 default:

	213 break;

	214 }

	215 }

	216

	217 String::size_type delimiter = text.find(ELEM_HIDE_DELIMITER, 0,

	218 ELEM_HIDE_DELIMITER_LEN);

	219 // +1 for the replacement of "##" by "#?#"

	220 if (delimiter != text.npos)

	221 at++;
	sergei 2018/02/12 12:53:20 BTW, if there is no such delimiter then is it a ma BTW, if there is no such delimiter then is it a malformed filter string? hub 2018/02/12 18:14:40 no. #@# is valid too. We could check if it is #@# Show quoted text On 2018/02/12 12:53:20, sergei wrote: > BTW, if there is no such delimiter then is it a malformed filter string? no. #@# is valid too. We could check if it is #@# eventually but I don't think it is part of this.
	222 auto new_len = prefix.len() + suffix.len() + properties.len()

	223 + PROPS_SELECTOR_LEN + 1 + at;
	sergei 2018/02/12 12:53:19 What about at + prefix.len() + PROPS_SELECTOR_LEN What about at + prefix.len() + PROPS_SELECTOR_LEN + properties.len() + 1 /* ')' / + suffix.len() ? hub* 2018/02/12 18:14:39 Done. Show quoted text On 2018/02/12 12:53:19, sergei wrote: > What about > at + prefix.len() + PROPS_SELECTOR_LEN + properties.len() + 1 /* ')' */ + > suffix.len() > ? Done.
	224
	sergei 2018/02/12 12:53:18 Could you please add the assert here that new_len Could you please add the assert here that new_len + 1 == length \|\| (delimiter == text.npos && new_len + 2 == length) just in case? Despite there are tests ensuring that nothing is lost, it just looks scaring :)). hub 2018/02/12 18:14:40 Done. Show quoted text On 2018/02/12 12:53:18, sergei wrote: > Could you please add the assert here that > new_len + 1 == length \|\| (delimiter == text.npos && new_len + 2 == length) > just in case? Despite there are tests ensuring that nothing is lost, it just > looks scaring :)). Done.
	225 DependentString converted(text, 0, new_len);

	226

	227 if (suffix.len())

	228 {

	229 new_len -= suffix.len();

	230 std::memmove(converted.data() + new_len,

	231 text.data() + suffix.start,

	232 suffix.byte_len());

	233 }

	234 new_len--;

	235 // here we need to move the properties before inserting the ')'

	236 auto parens = new_len;

	237 if (properties.len())

	238 {

	239 new_len -= properties.len();

	240 std::memmove(converted.data() + new_len,

	241 text.data() + properties.start, properties.byte_len());

	242 }

	243 converted[parens] = u')';

	244

	245 new_len -= PROPS_SELECTOR_LEN;

	246 std::memmove(converted.data() + new_len,
	sergei 2018/02/12 12:53:18 logically it should be memcpy but since memmove do logically it should be memcpy but since memmove does not modify the memory accessed by `src` argument it's fine. The same for ELEM_HIDE_EMULATION_DELIMITER below. hub 2018/02/12 18:14:39 You are right. Changing it to memcpy(). Show quoted text On 2018/02/12 12:53:18, sergei wrote: > logically it should be memcpy but since memmove does not modify the memory > accessed by `src` argument it's fine. > > The same for ELEM_HIDE_EMULATION_DELIMITER below. You are right. Changing it to memcpy().
	247 PROPS_SELECTOR,

	248 PROPS_SELECTOR_LEN * sizeof(String::value_type));

	249 if (prefix.len())

	250 {

	251 new_len -= prefix.len();

	252 std::memmove(converted.data() + new_len,

	253 text.data() + prefix.start, prefix.byte_len());

	254 }

	255

	256 if (delimiter != String::npos)

	257 {

	258 std::memmove(converted.data() + delimiter, ELEM_HIDE_EMULATION_DELIMITER,

	259 ELEM_HIDE_EMULATION_DELIMITER_LEN * sizeof(String::value_type ));

	260 }

	261

	262 return converted;

	263 }

	264

	265 return DependentString(text);

	266 }

	267

	268 namespace

	269 {

127 static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B ";	270 static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B ";

128 static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D ";	271 static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D ";

129 static constexpr String::size_type CURLY_REPLACEMENT_SIZE = sizeof(OPENING_CUR LY_REPLACEMENT) / sizeof(OPENING_CURLY_REPLACEMENT[0]) - 1;	272 static constexpr String::size_type CURLY_REPLACEMENT_SIZE = LENGTH_OF(OPENING_ CURLY_REPLACEMENT);

130	273

131 OwnedString EscapeCurlies(String::size_type replacementCount,	274 OwnedString EscapeCurlies(String::size_type replacementCount,

132 const DependentString& str)	275 const DependentString& str)

133 {	276 {

134 OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1));	277 OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1));

135	278

136 String::value_type* current = result.data();	279 String::value_type* current = result.data();

137 for (String::size_type i = 0; i < str.length(); i++)	280 for (String::size_type i = 0; i < str.length(); i++)

138 {	281 {

139 switch(str[i])	282 switch(str[i])

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
183 if (item.second && !item.first.empty())	326 if (item.second && !item.first.empty())

184 {	327 {

185 if (!result.empty())	328 if (!result.empty())

186 result.append(u',');	329 result.append(u',');

187 result.append(item.first);	330 result.append(item.first);

188 }	331 }

189 }	332 }

190 }	333 }

191 return result;	334 return result;

192 }	335 }

OLD	NEW

« compiled/filter/ElemHideBase.h ('K') | « compiled/filter/ElemHideBase.h ('k') | compiled/filter/Filter.cpp » ('j') | compiled/filter/Filter.cpp » ('J')