compiled/filter/ElemHideBase.cpp - Issue 29600641: Issue 5175 - Reject element hiding filter with empty domain names

Delta Between Two Patch Sets: compiled/filter/ElemHideBase.cpp

Issue 29600641: Issue 5175 - Reject element hiding filter with empty domain names (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/

Left Patch Set: Created Nov. 7, 2017, 11:17 p.m.

Right Patch Set: Make the new test more functional Created March 6, 2018, 7:47 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 /*	1 /*

2 * This file is part of Adblock Plus <https://adblockplus.org/>,	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

3 * Copyright (C) 2006-present eyeo GmbH	3 * Copyright (C) 2006-present eyeo GmbH

4 *	4 *

5 * Adblock Plus is free software: you can redistribute it and/or modify	5 * Adblock Plus is free software: you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License version 3 as	6 * it under the terms of the GNU General Public License version 3 as

7 * published by the Free Software Foundation.	7 * published by the Free Software Foundation.

8 *	8 *

9 * Adblock Plus is distributed in the hope that it will be useful,	9 * Adblock Plus is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12 * GNU General Public License for more details.	12 * GNU General Public License for more details.

13 *	13 *

14 * You should have received a copy of the GNU General Public License	14 * You should have received a copy of the GNU General Public License

15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

16 */	16 */

17	17

18 #include <cstring>	18 #include <cstring>

19	19

20 #include "ElemHideBase.h"	20 #include "ElemHideBase.h"

21 #include "../StringScanner.h"	21 #include "../StringScanner.h"

	22 #include "../Utils.h"

	23

	24 ABP_NS_USING

22	25

23 namespace	26 namespace

24 {	27 {

25 void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd,	28 void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd,

26 String::size_type& selectorStart)	29 String::size_type& selectorStart)

27 {	30 {

28 // For element hiding filters we only want to remove spaces preceding the	31 // For element hiding filters we only want to remove spaces preceding the

29 // selector part. The positions we've determined already have to be adjusted	32 // selector part. The positions we've determined already have to be adjusted

30 // accordingly.	33 // accordingly.

31	34

(...skipping 11 matching lines...) Expand all Loading...
43 if (pos < selectorStart && text[pos] == ' ')	46 if (pos < selectorStart && text[pos] == ' ')

44 delta++;	47 delta++;

45 else	48 else

46 text[pos - delta] = text[pos];	49 text[pos - delta] = text[pos];

47 }	50 }

48 selectorStart -= delta;	51 selectorStart -= delta;

49	52

50 text.reset(text, 0, len - delta);	53 text.reset(text, 0, len - delta);

51 }	54 }

52	55

53 // Convert filter from the old syntax to the new.	56 static constexpr String::value_type ELEM_HIDE_DELIMITER[] = u"##";

54 OwnedString ConvertFilter(const String& text, String::size_type at)	57 static constexpr String::size_type ELEM_HIDE_DELIMITER_LEN = str_length_of(ELE M_HIDE_DELIMITER);

55 {	58

56 static const auto propsSelector = u"[-abp-properties="_str;	59 static constexpr String::value_type ELEM_HIDE_EMULATION_DELIMITER[] = u"#?#";

57 static const auto newPropsSelector = u":-abp-properties("_str;	60 static constexpr String::size_type ELEM_HIDE_EMULATION_DELIMITER_LEN = str_len gth_of(ELEM_HIDE_EMULATION_DELIMITER);

58 auto selectorPos = text.find(propsSelector, at);	61

59 if (selectorPos != text.npos)	62 static constexpr String::value_type OLD_PROPS_SELECTOR[] = u"[-abp-properties= ";

60 {	63 static constexpr String::size_type OLD_PROPS_SELECTOR_LEN = str_length_of(OLD_ PROPS_SELECTOR);

61 auto length = text.length();	64

62 auto properties = selectorPos + propsSelector.length();	65 static constexpr String::value_type PROPS_SELECTOR[] = u":-abp-properties(";

63 String::value_type quote = 0;	66 static constexpr String::size_type PROPS_SELECTOR_LEN = str_length_of(PROPS_SE LECTOR);

64 bool escape = false;	67 }

65 String::size_type removed = 0; // how many chars we remove	68

66 String::size_type end = properties;	69 ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& da ta, const ParsedDomains& parsedDomains)

67 String::size_type quote_start = 0;	70 : ActiveFilter(type, text, false), mData(data)

68 String::size_type quote_end = 0;

69 for (auto index = properties;

70 index < length && end == properties; index++)

71 {

72 if (escape)

73 {

74 escape = false;

75 continue;

76 }

77

78 auto c = text[index];

79 switch (c)

80 {

81 case '\\':

82 escape = true;

83 break;

84 case '"':

85 case '\'':

86 if (quote == 0)

87 {

88 quote = c;

89 quote_start = index + 1;

90 }

91 else if (quote == c)

92 {

93 // end of quoted.

94 quote = 0;

95 removed += 2;

96 quote_end = index;

97 }

98 break;

99 case ']':

100 if (quote == 0)

101 end = index + 1; // end of properties (after ])

102 break;

103 default:

104 break;

105 }

106 }

107

108 if (quote != 0)

109 quote_end = end - 1;

110 else if (quote_end <= quote_start)

111 {

112 // we likely didn't find a quoted content so we just take it as is.

113 quote_start = properties;

114 quote_end = end - 1;

115 }

116

117 OwnedString converted(length - removed);

118 String::size_type offset = 0;

119 std::memcpy(converted.data(), text.data(),

120 selectorPos * sizeof(String::value_type));

121 offset += selectorPos;

122

123 std::memcpy(converted.data() + offset, newPropsSelector.data(),

124 newPropsSelector.length() * sizeof(String::value_type));

125 offset += newPropsSelector.length();

126

127 std::memcpy(converted.data() + offset, text.data() + quote_start,

128 (quote_end - quote_start) * sizeof(String::value_type));

129 offset += quote_end - quote_start;

130

131 std::memcpy(converted.data() + offset, u")", sizeof(String::value_type));

132 offset++;

133

134 std::memcpy(converted.data() + offset, text.data() + end,

135 (length - end) * sizeof(String::value_type));

136 offset += (length - end) * sizeof(String::value_type);

137

138 return converted;

139 }

140

141 return OwnedString(text);

142 }

143 }

144

145 ElemHideBase::ElemHideBase(Type type, const String& text,

146 const ElemHideData& data, const ParsedDomains& parsedDomains)

147 : ActiveFilter(type, ConvertFilter(text, data.mSelectorStart), false),

148 mData(data)

149 {	71 {

150 if (mData.HasDomains())	72 if (mData.HasDomains())

151 FillDomains(mData.GetDomainsSource(mText), parsedDomains);	73 FillDomains(mData.GetDomainsSource(mText), parsedDomains);

152 }	74 }

153	75

154 Filter::Type ElemHideBase::Parse(DependentString& text, DependentString& error,	76 Filter::Type ElemHideBase::Parse(DependentString& text, DependentString& error,

155 ElemHideData& data, ParsedDomains& parsedDomains)	77 ElemHideData& data, bool& needConversion,

156 {	78 ParsedDomains& parsedDomains)

	79 {

	80 needConversion = false;

	81

157 StringScanner scanner(text);	82 StringScanner scanner(text);

158	83

159 // Domains part	84 // Domains part

160 bool seenSpaces = false;	85 bool seenSpaces = false;

161 while (!scanner.done())	86 while (!scanner.done())

162 {	87 {

163 String::value_type next = scanner.next();	88 String::value_type next = scanner.next();

164 if (next == u'#')	89 if (next == u'#')

165 {	90 {

166 data.mDomainsEnd = scanner.position();	91 data.mDomainsEnd = scanner.position();

(...skipping 28 matching lines...) Expand all Loading...
195 return Type::UNKNOWN;	120 return Type::UNKNOWN;

196	121

197 // Selector part	122 // Selector part

198	123

199 // Selector shouldn't be empty	124 // Selector shouldn't be empty

200 seenSpaces \|= scanner.skip(u' ');	125 seenSpaces \|= scanner.skip(u' ');

201 if (scanner.done())	126 if (scanner.done())

202 return Type::UNKNOWN;	127 return Type::UNKNOWN;

203	128

204 data.mSelectorStart = scanner.position() + 1;	129 data.mSelectorStart = scanner.position() + 1;

205 while (!scanner.done())

206 {

207 switch (scanner.next())

208 {

209 case u'{':

210 case u'}':

211 return Type::UNKNOWN;

212 }

213 }

214	130

215 // We are done validating, now we can normalize whitespace and the domain part	131 // We are done validating, now we can normalize whitespace and the domain part

216 if (seenSpaces)	132 if (seenSpaces)

217 NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart);	133 NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart);

218 DependentString(text, 0, data.mDomainsEnd).toLower();	134 DependentString(text, 0, data.mDomainsEnd).toLower();

219	135

220 parsedDomains =	136 parsedDomains =

221 ParseDomainsInternal(data.GetDomainsSource(text), u',', false);	137 ParseDomainsInternal(data.GetDomainsSource(text), u',', false);

222 if (parsedDomains.hasEmpty)	138 if (parsedDomains.hasEmpty)

223 {	139 {

224 error = u"filter_invalid_domain"_str;	140 error = u"filter_invalid_domain"_str;

225 return Type::INVALID;	141 return Type::INVALID;

226 }	142 }

	143 // We still need to check the old syntax. It will be converted when

	144 // we instantiate the filter.

	145 if (!emulation &&

	146 text.find(OLD_PROPS_SELECTOR, data.mSelectorStart, OLD_PROPS_SELECTOR_LEN) != text.npos)

	147 {

	148 needConversion = true;

	149 emulation = !exception;

	150 }

227	151

228 if (exception)	152 if (exception)

229 return Type::ELEMHIDEEXCEPTION;	153 return Type::ELEMHIDEEXCEPTION;

230	154

231 if (emulation)	155 if (emulation)

232 return Type::ELEMHIDEEMULATION;	156 return Type::ELEMHIDEEMULATION;

233	157

234 return Type::ELEMHIDE;	158 return Type::ELEMHIDE;

	159 }

	160

	161 namespace

	162 {

	163 struct Range

	164 {

	165 String::size_type start;

	166 String::size_type end;

	167 String::size_type len() const

	168 {

	169 return end - start;

	170 }

	171 String::size_type byte_len() const

	172 {

	173 return len() * sizeof(String::value_type);

	174 }

	175 };

	176 }

	177

	178 // Convert filter from the old syntax to the new.

	179 DependentString ElemHideBase::ConvertFilter(String& text, String::size_type& at)

	180 {

	181 Range prefix = {at, text.find(OLD_PROPS_SELECTOR, at, OLD_PROPS_SELECTOR_LEN)} ;

	182 if (prefix.end == text.npos)

	183 return DependentString(text);

	184

	185 auto length = text.length();

	186 Range suffix = {at, length};

	187 Range properties = { prefix.end + OLD_PROPS_SELECTOR_LEN, 0 };

	188 String::value_type quote = 0;

	189 for (auto index = properties.start;

	190 index < length && (suffix.start == at); index++)

	191 {

	192 auto c = text[index];

	193 switch (c)

	194 {

	195 case u'"':

	196 case u'\'':

	197 if (quote == 0)

	198 {

	199 // syntax error: we already have a quoted section.

	200 if (properties.end)

	201 return DependentString();

	202

	203 if (properties.start != index)

	204 return DependentString();

	205

	206 quote = c;

	207 properties.start = index + 1;

	208 }

	209 else if (quote == c)

	210 {

	211 // end of quoted.

	212 quote = 0;

	213 properties.end = index;

	214 }

	215 break;

	216 case u']':

	217 if (quote == 0)

	218 {

	219 if (properties.end == 0)

	220 return DependentString();

	221 if (properties.end + 1 != index)

	222 return DependentString();

	223 suffix.start = index + 1;

	224 }

	225 break;

	226 default:

	227 break;

	228 }

	229 }

	230

	231 if (suffix.start == at)

	232 return DependentString();

	233

	234 String::size_type delimiter = text.find(ELEM_HIDE_DELIMITER, 0,

	235 ELEM_HIDE_DELIMITER_LEN);

	236 // +1 for the replacement of "##" by "#?#"

	237 if (delimiter != text.npos)

	238 at++;

	239 auto new_len = at + prefix.len() + PROPS_SELECTOR_LEN + properties.len() + 1 / * ) */ + suffix.len();

	240

	241 assert2(length == new_len + (delimiter == text.npos ? 2 : 1), u"Inconsistent l ength in filter conversion."_str);

	242

	243 DependentString converted(text, 0, new_len);

	244

	245 if (suffix.len())

	246 {

	247 new_len -= suffix.len();

	248 std::memmove(converted.data() + new_len,

	249 text.data() + suffix.start,

	250 suffix.byte_len());

	251 }

	252 new_len--;

	253 // here we need to move the properties before inserting the ')'

	254 auto parens = new_len;

	255 if (properties.len())

	256 {

	257 new_len -= properties.len();

	258 std::memmove(converted.data() + new_len,

	259 text.data() + properties.start, properties.byte_len());

	260 }

	261 converted[parens] = u')';

	262

	263 new_len -= PROPS_SELECTOR_LEN;

	264 std::memcpy(converted.data() + new_len,

	265 PROPS_SELECTOR,

	266 PROPS_SELECTOR_LEN * sizeof(String::value_type));

	267 if (prefix.len())

	268 {

	269 new_len -= prefix.len();

	270 std::memmove(converted.data() + new_len,

	271 text.data() + prefix.start, prefix.byte_len());

	272 }

	273

	274 if (delimiter != String::npos)

	275 {

	276 std::memcpy(converted.data() + delimiter, ELEM_HIDE_EMULATION_DELIMITER,

	277 ELEM_HIDE_EMULATION_DELIMITER_LEN * sizeof(String::value_type));

	278 }

	279

	280 return converted;

	281 }

	282

	283 namespace

	284 {

	285 static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B ";

	286 static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D ";

	287 static constexpr String::size_type CURLY_REPLACEMENT_SIZE = str_length_of(OPEN ING_CURLY_REPLACEMENT);

	288

	289 OwnedString EscapeCurlies(String::size_type replacementCount,

	290 const DependentString& str)

	291 {

	292 OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1));

	293

	294 String::value_type* current = result.data();

	295 for (String::size_type i = 0; i < str.length(); i++)

	296 {

	297 switch(str[i])

	298 {

	299 case u'}':

	300 std::memcpy(current, CLOSING_CURLY_REPLACEMENT,

	301 sizeof(String::value_type) * CURLY_REPLACEMENT_SIZE);

	302 current += CURLY_REPLACEMENT_SIZE;

	303 break;

	304 case u'{':

	305 std::memcpy(current, OPENING_CURLY_REPLACEMENT,

	306 sizeof(String::value_type) * CURLY_REPLACEMENT_SIZE);

	307 current += CURLY_REPLACEMENT_SIZE;

	308 break;

	309 default:

	310 *current = str[i];

	311 current++;

	312 break;

	313 }

	314 }

	315

	316 return result;

	317 }

	318 }

	319

	320 OwnedString ElemHideBase::GetSelector() const

	321 {

	322 const DependentString selector = mData.GetSelector(mText);

	323 String::size_type replacementCount = 0;

	324 for (String::size_type i = 0; i < selector.length(); i++)

	325 if (selector[i] == '}' \|\| selector[i] == '{')

	326 replacementCount++;

	327 if (replacementCount)

	328 return EscapeCurlies(replacementCount, selector);

	329

	330 return OwnedString(selector);

235 }	331 }

236	332

237 OwnedString ElemHideBase::GetSelectorDomain() const	333 OwnedString ElemHideBase::GetSelectorDomain() const

238 {	334 {

239 /* TODO this is inefficient */	335 /* TODO this is inefficient */

240 OwnedString result;	336 OwnedString result;

241 if (mDomains)	337 if (mDomains)

242 {	338 {

243 for (const auto& item : *mDomains)	339 for (const auto& item : *mDomains)

244 {	340 {

245 if (item.second && !item.first.empty())	341 if (item.second && !item.first.empty())

246 {	342 {

247 if (!result.empty())	343 if (!result.empty())

248 result.append(u',');	344 result.append(u',');

249 result.append(item.first);	345 result.append(item.first);

250 }	346 }

251 }	347 }

252 }	348 }

253 return result;	349 return result;

254 }	350 }

LEFT	RIGHT