Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: compiled/filter/ElemHideBase.cpp

Issue 29595633: Issue 5870 - Implement the new ElemHideEmulation filter type (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Left Patch Set: Refactor consts Created Feb. 1, 2018, 11:30 p.m.
Right Patch Set: Deal with ill formed filters. Created Feb. 14, 2018, 5:05 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « compiled/filter/ElemHideBase.h ('k') | compiled/filter/Filter.cpp » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 #include <cstring> 18 #include <cstring>
19 19
20 #include "ElemHideBase.h" 20 #include "ElemHideBase.h"
21 #include "../StringScanner.h" 21 #include "../StringScanner.h"
22 22 #include "../Utils.h"
23 // the length of a static string array 23
24 #define LENGTH_OF(x) ((sizeof(x) / sizeof(x[0])) - 1) 24 ABP_NS_USING
sergei 2018/02/05 14:51:06 I think we may use constexpr function here, and ma
hub 2018/02/07 04:13:37 No. This has to be a macro because of sizeof() and
sergei 2018/02/12 12:53:17 The following does work template<typename T, size_
hub 2018/02/12 18:14:38 Done.
sergei 2018/02/13 09:12:49 What about replacing of "static string array" by s
sergei 2018/02/13 16:05:59 It seems only this is left.
hub 2018/02/13 16:23:36 oops. Done
25 25
26 namespace 26 namespace
27 { 27 {
28 void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd, 28 void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd,
29 String::size_type& selectorStart) 29 String::size_type& selectorStart)
30 { 30 {
31 // For element hiding filters we only want to remove spaces preceding the 31 // For element hiding filters we only want to remove spaces preceding the
32 // selector part. The positions we've determined already have to be adjusted 32 // selector part. The positions we've determined already have to be adjusted
33 // accordingly. 33 // accordingly.
34 34
(...skipping 12 matching lines...) Expand all
47 delta++; 47 delta++;
48 else 48 else
49 text[pos - delta] = text[pos]; 49 text[pos - delta] = text[pos];
50 } 50 }
51 selectorStart -= delta; 51 selectorStart -= delta;
52 52
53 text.reset(text, 0, len - delta); 53 text.reset(text, 0, len - delta);
54 } 54 }
55 55
56 static constexpr String::value_type ELEM_HIDE_DELIMITER[] = u"##"; 56 static constexpr String::value_type ELEM_HIDE_DELIMITER[] = u"##";
57 static constexpr String::size_type ELEM_HIDE_DELIMITER_LEN = LENGTH_OF(ELEM_HI DE_DELIMITER); 57 static constexpr String::size_type ELEM_HIDE_DELIMITER_LEN = str_length_of(ELE M_HIDE_DELIMITER);
58 58
59 static constexpr String::value_type ELEM_HIDE_EMULATION_DELIMITER[] = u"#?#"; 59 static constexpr String::value_type ELEM_HIDE_EMULATION_DELIMITER[] = u"#?#";
60 static constexpr String::size_type ELEM_HIDE_EMULATION_DELIMITER_LEN = LENGTH_ OF(ELEM_HIDE_EMULATION_DELIMITER); 60 static constexpr String::size_type ELEM_HIDE_EMULATION_DELIMITER_LEN = str_len gth_of(ELEM_HIDE_EMULATION_DELIMITER);
61 61
62 static constexpr String::value_type PROPS_SELECTOR[] = u"[-abp-properties="; 62 static constexpr String::value_type OLD_PROPS_SELECTOR[] = u"[-abp-properties= ";
63 static constexpr String::size_type PROPS_SELECTOR_LEN = LENGTH_OF(PROPS_SELECT OR); 63 static constexpr String::size_type OLD_PROPS_SELECTOR_LEN = str_length_of(OLD_ PROPS_SELECTOR);
64 64
65 static constexpr String::value_type NEW_PROPS_SELECTOR[] = u":-abp-properties( "; 65 static constexpr String::value_type PROPS_SELECTOR[] = u":-abp-properties(";
sergei 2018/02/05 14:51:05 I would rather prefer to call it PROPS_SELECTOR an
hub 2018/02/07 04:13:36 Done.
66 static constexpr String::size_type NEW_PROPS_SELECTOR_LEN = LENGTH_OF(NEW_PROP S_SELECTOR); 66 static constexpr String::size_type PROPS_SELECTOR_LEN = str_length_of(PROPS_SE LECTOR);
67 } 67 }
68 68
69 ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& da ta) 69 ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& da ta)
70 : ActiveFilter(type, text, false), mData(data) 70 : ActiveFilter(type, text, false), mData(data)
71 { 71 {
72 if (mData.HasDomains()) 72 if (mData.HasDomains())
73 ParseDomains(mData.GetDomainsSource(mText), u','); 73 ParseDomains(mData.GetDomainsSource(mText), u',');
74 } 74 }
75 75
76 Filter::Type ElemHideBase::Parse(DependentString& text, ElemHideData& data) 76 Filter::Type ElemHideBase::Parse(DependentString& text, ElemHideData& data, bool & needConversion)
77 { 77 {
78 needConversion = false;
79
78 StringScanner scanner(text); 80 StringScanner scanner(text);
79 81
80 // Domains part 82 // Domains part
81 bool seenSpaces = false; 83 bool seenSpaces = false;
82 while (!scanner.done()) 84 while (!scanner.done())
83 { 85 {
84 String::value_type next = scanner.next(); 86 String::value_type next = scanner.next();
85 if (next == u'#') 87 if (next == u'#')
86 { 88 {
87 data.mDomainsEnd = scanner.position(); 89 data.mDomainsEnd = scanner.position();
(...skipping 28 matching lines...) Expand all
116 return Type::UNKNOWN; 118 return Type::UNKNOWN;
117 119
118 // Selector part 120 // Selector part
119 121
120 // Selector shouldn't be empty 122 // Selector shouldn't be empty
121 seenSpaces |= scanner.skip(u' '); 123 seenSpaces |= scanner.skip(u' ');
122 if (scanner.done()) 124 if (scanner.done())
123 return Type::UNKNOWN; 125 return Type::UNKNOWN;
124 126
125 data.mSelectorStart = scanner.position() + 1; 127 data.mSelectorStart = scanner.position() + 1;
126 data.mNeedConversion = false;
127 128
128 // We are done validating, now we can normalize whitespace and the domain part 129 // We are done validating, now we can normalize whitespace and the domain part
129 if (seenSpaces) 130 if (seenSpaces)
130 NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart); 131 NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart);
131 DependentString(text, 0, data.mDomainsEnd).toLower(); 132 DependentString(text, 0, data.mDomainsEnd).toLower();
132 133
133 // We still need to check the old syntax. It will be converted when 134 // We still need to check the old syntax. It will be converted when
134 // we instantiate the filter. 135 // we instantiate the filter.
135 if (!emulation && 136 if (!emulation &&
136 text.find(PROPS_SELECTOR, data.mSelectorStart, PROPS_SELECTOR_LEN) != text .npos) 137 text.find(OLD_PROPS_SELECTOR, data.mSelectorStart, OLD_PROPS_SELECTOR_LEN) != text.npos)
137 { 138 {
138 data.mNeedConversion = true; 139 needConversion = true;
139 emulation = !exception; 140 emulation = !exception;
140 } 141 }
141 142
142 if (exception) 143 if (exception)
143 return Type::ELEMHIDEEXCEPTION; 144 return Type::ELEMHIDEEXCEPTION;
144 145
145 if (emulation) 146 if (emulation)
146 return Type::ELEMHIDEEMULATION; 147 return Type::ELEMHIDEEMULATION;
147 148
148 return Type::ELEMHIDE; 149 return Type::ELEMHIDE;
149 } 150 }
150 151
152 namespace
153 {
154 struct Range
155 {
156 String::size_type start;
157 String::size_type end;
158 String::size_type len() const
159 {
160 return end - start;
161 }
162 String::size_type byte_len() const
163 {
164 return len() * sizeof(String::value_type);
165 }
166 };
167 }
168
151 // Convert filter from the old syntax to the new. 169 // Convert filter from the old syntax to the new.
152 OwnedString ElemHideBase::ConvertFilter(const String& text, String::size_type& a t) 170 DependentString ElemHideBase::ConvertFilter(String& text, String::size_type& at)
153 { 171 {
154 auto selectorPos = text.find(PROPS_SELECTOR, at, PROPS_SELECTOR_LEN); 172 Range prefix = {at, text.find(OLD_PROPS_SELECTOR, at, OLD_PROPS_SELECTOR_LEN)} ;
155 if (selectorPos != text.npos) 173 if (prefix.end == text.npos)
156 { 174 return DependentString(text);
157 auto length = text.length(); 175
158 auto properties = selectorPos + PROPS_SELECTOR_LEN; 176 auto length = text.length();
sergei 2018/02/05 14:51:06 should it be something like propertiesPos?
hub 2018/02/07 04:13:36 Done.
159 String::value_type quote = 0; 177 Range suffix = {at, length};
160 bool escape = false; 178 Range properties = { prefix.end + OLD_PROPS_SELECTOR_LEN, 0 };
161 String::size_type removed = 0; // how many chars we remove 179 String::value_type quote = 0;
162 String::size_type end = properties; 180 for (auto index = properties.start;
163 String::size_type quote_start = 0; 181 index < length && (suffix.start == at); index++)
164 String::size_type quote_end = 0; 182 {
165 for (auto index = properties; 183 auto c = text[index];
166 index < length && end == properties; index++) 184 switch (c)
167 { 185 {
168 if (escape) 186 case u'"':
169 { 187 case u'\'':
170 escape = false; 188 if (quote == 0)
171 continue; 189 {
172 } 190 // syntax error: we already have a quoted section.
173 191 if (properties.end)
174 auto c = text[index]; 192 return DependentString();
175 switch (c) 193
176 { 194 if (properties.start != index)
177 case '\\': 195 return DependentString();
sergei 2018/02/05 14:51:05 should they be u'\\', u'"', etc? Since we keep in
hub 2018/02/07 04:13:36 yes it should be u'', but with C it works without
178 escape = true; 196
179 break; 197 quote = c;
180 case '"': 198 properties.start = index + 1;
181 case '\'': 199 }
182 if (quote == 0) 200 else if (quote == c)
183 { 201 {
184 quote = c; 202 // end of quoted.
185 quote_start = index + 1; 203 quote = 0;
186 } 204 properties.end = index;
187 else if (quote == c) 205 }
188 { 206 break;
189 // end of quoted. 207 case u']':
190 quote = 0; 208 if (quote == 0)
191 removed += 2; 209 {
192 quote_end = index; 210 if (properties.end == 0)
193 } 211 return DependentString();
194 break; 212 if (properties.end + 1 != index)
195 case ']': 213 return DependentString();
196 if (quote == 0) 214 suffix.start = index + 1;
197 end = index + 1; // end of properties (after ]) 215 }
198 break; 216 break;
199 default: 217 default:
200 break; 218 break;
201 } 219 }
202 } 220 }
sergei 2018/02/05 14:51:05 It also differs from the regexp in current js impl
sergei 2018/02/05 14:51:06 What if filter is malformed and `end` is equal to
hub 2018/02/07 04:13:36 end cannot be equal to zero. It will be at least e
hub 2018/02/07 04:13:36 It's actually worse: the parser doesn't like it at
203 221
204 if (quote != 0) 222 if (suffix.start == at)
sergei 2018/02/27 10:56:53 Just for reference, I think it (what the whole for
205 quote_end = end - 1; 223 return DependentString();
206 else if (quote_end <= quote_start) 224
207 { 225 String::size_type delimiter = text.find(ELEM_HIDE_DELIMITER, 0,
208 // we likely didn't find a quoted content so we just take it as is. 226 ELEM_HIDE_DELIMITER_LEN);
209 quote_start = properties; 227 // +1 for the replacement of "##" by "#?#"
210 quote_end = end - 1; 228 if (delimiter != text.npos)
211 } 229 at++;
212 230 auto new_len = at + prefix.len() + PROPS_SELECTOR_LEN + properties.len() + 1 / * ) */ + suffix.len();
213 // +1 for the replacement of "##" by "#?#" 231
214 String::size_type offset = 0; 232 assert2(new_len + 1 == length || (delimiter == text.npos && new_len + 2 == len gth), u"Inconsistent length in filter conversion."_str);
sergei 2018/02/27 10:56:53 not important just for reference length == new_len
hub 2018/02/27 13:32:30 Acknowledged.
215 233
216 String::size_type delimiter = text.find(ELEM_HIDE_DELIMITER, 0, 234 DependentString converted(text, 0, new_len);
217 ELEM_HIDE_DELIMITER_LEN); 235
218 OwnedString converted(length + ((delimiter != text.npos) ? 1 : 0) - removed) ; 236 if (suffix.len())
sergei 2018/02/05 14:51:06 Since removed cannot be negative (no growth), `tex
hub 2018/02/07 04:13:36 I'm not really fond of modifying in place, but we
219 if (delimiter != text.npos) 237 {
220 { 238 new_len -= suffix.len();
221 if (delimiter >= selectorPos) 239 std::memmove(converted.data() + new_len,
222 return OwnedString(text); 240 text.data() + suffix.start,
223 241 suffix.byte_len());
224 at++; 242 }
225 std::memcpy(converted.data(), text.data(), 243 new_len--;
226 delimiter * sizeof(String::value_type)); 244 // here we need to move the properties before inserting the ')'
227 offset += delimiter; 245 auto parens = new_len;
228 std::memcpy(converted.data() + offset, ELEM_HIDE_EMULATION_DELIMITER, 246 if (properties.len())
229 ELEM_HIDE_EMULATION_DELIMITER_LEN * sizeof(String::value_type) ); 247 {
230 offset += ELEM_HIDE_EMULATION_DELIMITER_LEN; 248 new_len -= properties.len();
231 delimiter += ELEM_HIDE_DELIMITER_LEN; 249 std::memmove(converted.data() + new_len,
232 // we have already parsed to past the delimiter. 250 text.data() + properties.start, properties.byte_len());
233 selectorPos -= delimiter; 251 }
234 } 252 converted[parens] = u')';
235 else 253
236 delimiter = 0; 254 new_len -= PROPS_SELECTOR_LEN;
237 255 std::memcpy(converted.data() + new_len,
238 256 PROPS_SELECTOR,
239 std::memcpy(converted.data() + offset, text.data() + delimiter, 257 PROPS_SELECTOR_LEN * sizeof(String::value_type));
240 selectorPos * sizeof(String::value_type)); 258 if (prefix.len())
241 offset += selectorPos; 259 {
242 260 new_len -= prefix.len();
243 std::memcpy(converted.data() + offset, NEW_PROPS_SELECTOR, 261 std::memmove(converted.data() + new_len,
244 NEW_PROPS_SELECTOR_LEN * sizeof(String::value_type)); 262 text.data() + prefix.start, prefix.byte_len());
245 offset += NEW_PROPS_SELECTOR_LEN; 263 }
246 264
247 std::memcpy(converted.data() + offset, text.data() + quote_start, 265 if (delimiter != String::npos)
248 (quote_end - quote_start) * sizeof(String::value_type)); 266 {
249 offset += quote_end - quote_start; 267 std::memcpy(converted.data() + delimiter, ELEM_HIDE_EMULATION_DELIMITER,
250 268 ELEM_HIDE_EMULATION_DELIMITER_LEN * sizeof(String::value_type));
251 std::memcpy(converted.data() + offset, u")", sizeof(String::value_type)); 269 }
252 offset++; 270
253 271 return converted;
254 std::memcpy(converted.data() + offset, text.data() + end,
255 (length - end) * sizeof(String::value_type));
256 offset += (length - end) * sizeof(String::value_type);
257
258 return converted;
259 }
260
261 return OwnedString(text);
262 } 272 }
263 273
264 namespace 274 namespace
265 { 275 {
266 static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B "; 276 static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B ";
267 static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D "; 277 static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D ";
268 static constexpr String::size_type CURLY_REPLACEMENT_SIZE = LENGTH_OF(OPENING_ CURLY_REPLACEMENT); 278 static constexpr String::size_type CURLY_REPLACEMENT_SIZE = str_length_of(OPEN ING_CURLY_REPLACEMENT);
269 279
270 OwnedString EscapeCurlies(String::size_type replacementCount, 280 OwnedString EscapeCurlies(String::size_type replacementCount,
271 const DependentString& str) 281 const DependentString& str)
272 { 282 {
273 OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1)); 283 OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1));
274 284
275 String::value_type* current = result.data(); 285 String::value_type* current = result.data();
276 for (String::size_type i = 0; i < str.length(); i++) 286 for (String::size_type i = 0; i < str.length(); i++)
277 { 287 {
278 switch(str[i]) 288 switch(str[i])
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
322 if (item.second && !item.first.empty()) 332 if (item.second && !item.first.empty())
323 { 333 {
324 if (!result.empty()) 334 if (!result.empty())
325 result.append(u','); 335 result.append(u',');
326 result.append(item.first); 336 result.append(item.first);
327 } 337 }
328 } 338 }
329 } 339 }
330 return result; 340 return result;
331 } 341 }
LEFTRIGHT

Powered by Google App Engine
This is Rietveld