Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/filter/ElemHideBase.cpp

Issue 29595633: Issue 5870 - Implement the new ElemHideEmulation filter type (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Refactor consts Created Feb. 1, 2018, 11:30 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 #include <cstring> 18 #include <cstring>
19 19
20 #include "ElemHideBase.h" 20 #include "ElemHideBase.h"
21 #include "../StringScanner.h" 21 #include "../StringScanner.h"
22 22
23 // the length of a static string array
24 #define LENGTH_OF(x) ((sizeof(x) / sizeof(x[0])) - 1)
sergei 2018/02/05 14:51:06 I think we may use constexpr function here, and ma
hub 2018/02/07 04:13:37 No. This has to be a macro because of sizeof() and
sergei 2018/02/12 12:53:17 The following does work template<typename T, size_
hub 2018/02/12 18:14:38 Done.
sergei 2018/02/13 09:12:49 What about replacing of "static string array" by s
sergei 2018/02/13 16:05:59 It seems only this is left.
hub 2018/02/13 16:23:36 oops. Done
25
23 namespace 26 namespace
24 { 27 {
25 void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd, 28 void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd,
26 String::size_type& selectorStart) 29 String::size_type& selectorStart)
27 { 30 {
28 // For element hiding filters we only want to remove spaces preceding the 31 // For element hiding filters we only want to remove spaces preceding the
29 // selector part. The positions we've determined already have to be adjusted 32 // selector part. The positions we've determined already have to be adjusted
30 // accordingly. 33 // accordingly.
31 34
32 String::size_type delta = 0; 35 String::size_type delta = 0;
33 String::size_type len = text.length(); 36 String::size_type len = text.length();
34 37
35 // The first character is guaranteed to be a non-space, the string has been 38 // The first character is guaranteed to be a non-space, the string has been
36 // trimmed earlier. 39 // trimmed earlier.
37 for (String::size_type pos = 1; pos < len; pos++) 40 for (String::size_type pos = 1; pos < len; pos++)
38 { 41 {
39 if (pos == domainsEnd) 42 if (pos == domainsEnd)
40 domainsEnd -= delta; 43 domainsEnd -= delta;
41 44
42 // Only spaces before selectorStart position should be removed. 45 // Only spaces before selectorStart position should be removed.
43 if (pos < selectorStart && text[pos] == ' ') 46 if (pos < selectorStart && text[pos] == ' ')
44 delta++; 47 delta++;
45 else 48 else
46 text[pos - delta] = text[pos]; 49 text[pos - delta] = text[pos];
47 } 50 }
48 selectorStart -= delta; 51 selectorStart -= delta;
49 52
50 text.reset(text, 0, len - delta); 53 text.reset(text, 0, len - delta);
51 } 54 }
55
56 static constexpr String::value_type ELEM_HIDE_DELIMITER[] = u"##";
57 static constexpr String::size_type ELEM_HIDE_DELIMITER_LEN = LENGTH_OF(ELEM_HI DE_DELIMITER);
58
59 static constexpr String::value_type ELEM_HIDE_EMULATION_DELIMITER[] = u"#?#";
60 static constexpr String::size_type ELEM_HIDE_EMULATION_DELIMITER_LEN = LENGTH_ OF(ELEM_HIDE_EMULATION_DELIMITER);
61
62 static constexpr String::value_type PROPS_SELECTOR[] = u"[-abp-properties=";
63 static constexpr String::size_type PROPS_SELECTOR_LEN = LENGTH_OF(PROPS_SELECT OR);
64
65 static constexpr String::value_type NEW_PROPS_SELECTOR[] = u":-abp-properties( ";
sergei 2018/02/05 14:51:05 I would rather prefer to call it PROPS_SELECTOR an
hub 2018/02/07 04:13:36 Done.
66 static constexpr String::size_type NEW_PROPS_SELECTOR_LEN = LENGTH_OF(NEW_PROP S_SELECTOR);
52 } 67 }
53 68
54 ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& da ta) 69 ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& da ta)
55 : ActiveFilter(type, text, false), mData(data) 70 : ActiveFilter(type, text, false), mData(data)
56 { 71 {
57 if (mData.HasDomains()) 72 if (mData.HasDomains())
58 ParseDomains(mData.GetDomainsSource(mText), u','); 73 ParseDomains(mData.GetDomainsSource(mText), u',');
59 } 74 }
60 75
61 Filter::Type ElemHideBase::Parse(DependentString& text, ElemHideData& data) 76 Filter::Type ElemHideBase::Parse(DependentString& text, ElemHideData& data)
(...skipping 20 matching lines...) Expand all
82 case u'"': 97 case u'"':
83 case u'!': 98 case u'!':
84 return Type::UNKNOWN; 99 return Type::UNKNOWN;
85 case u' ': 100 case u' ':
86 seenSpaces = true; 101 seenSpaces = true;
87 break; 102 break;
88 } 103 }
89 } 104 }
90 105
91 seenSpaces |= scanner.skip(u' '); 106 seenSpaces |= scanner.skip(u' ');
107 bool emulation = false;
92 bool exception = scanner.skipOne(u'@'); 108 bool exception = scanner.skipOne(u'@');
93 if (exception) 109 if (exception)
94 seenSpaces |= scanner.skip(u' '); 110 seenSpaces |= scanner.skip(u' ');
111 else
112 emulation = scanner.skipOne(u'?');
95 113
96 String::value_type next = scanner.next(); 114 String::value_type next = scanner.next();
97 if (next != u'#') 115 if (next != u'#')
98 return Type::UNKNOWN; 116 return Type::UNKNOWN;
99 117
100 // Selector part 118 // Selector part
101 119
102 // Selector shouldn't be empty 120 // Selector shouldn't be empty
103 seenSpaces |= scanner.skip(u' '); 121 seenSpaces |= scanner.skip(u' ');
104 if (scanner.done()) 122 if (scanner.done())
105 return Type::UNKNOWN; 123 return Type::UNKNOWN;
106 124
107 data.mSelectorStart = scanner.position() + 1; 125 data.mSelectorStart = scanner.position() + 1;
126 data.mNeedConversion = false;
108 127
109 // We are done validating, now we can normalize whitespace and the domain part 128 // We are done validating, now we can normalize whitespace and the domain part
110 if (seenSpaces) 129 if (seenSpaces)
111 NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart); 130 NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart);
112 DependentString(text, 0, data.mDomainsEnd).toLower(); 131 DependentString(text, 0, data.mDomainsEnd).toLower();
113 132
133 // We still need to check the old syntax. It will be converted when
134 // we instantiate the filter.
135 if (!emulation &&
136 text.find(PROPS_SELECTOR, data.mSelectorStart, PROPS_SELECTOR_LEN) != text .npos)
137 {
138 data.mNeedConversion = true;
139 emulation = !exception;
140 }
141
114 if (exception) 142 if (exception)
115 return Type::ELEMHIDEEXCEPTION; 143 return Type::ELEMHIDEEXCEPTION;
116 144
117 if (text.find(u"[-abp-properties="_str, data.mSelectorStart) != text.npos) 145 if (emulation)
118 return Type::ELEMHIDEEMULATION; 146 return Type::ELEMHIDEEMULATION;
119 147
120 return Type::ELEMHIDE; 148 return Type::ELEMHIDE;
121 } 149 }
122 150
151 // Convert filter from the old syntax to the new.
152 OwnedString ElemHideBase::ConvertFilter(const String& text, String::size_type& a t)
153 {
154 auto selectorPos = text.find(PROPS_SELECTOR, at, PROPS_SELECTOR_LEN);
155 if (selectorPos != text.npos)
156 {
157 auto length = text.length();
158 auto properties = selectorPos + PROPS_SELECTOR_LEN;
sergei 2018/02/05 14:51:06 should it be something like propertiesPos?
hub 2018/02/07 04:13:36 Done.
159 String::value_type quote = 0;
160 bool escape = false;
161 String::size_type removed = 0; // how many chars we remove
162 String::size_type end = properties;
163 String::size_type quote_start = 0;
164 String::size_type quote_end = 0;
165 for (auto index = properties;
166 index < length && end == properties; index++)
167 {
168 if (escape)
169 {
170 escape = false;
171 continue;
172 }
173
174 auto c = text[index];
175 switch (c)
176 {
177 case '\\':
sergei 2018/02/05 14:51:05 should they be u'\\', u'"', etc? Since we keep in
hub 2018/02/07 04:13:36 yes it should be u'', but with C it works without
178 escape = true;
179 break;
180 case '"':
181 case '\'':
182 if (quote == 0)
183 {
184 quote = c;
185 quote_start = index + 1;
186 }
187 else if (quote == c)
188 {
189 // end of quoted.
190 quote = 0;
191 removed += 2;
192 quote_end = index;
193 }
194 break;
195 case ']':
196 if (quote == 0)
197 end = index + 1; // end of properties (after ])
198 break;
199 default:
200 break;
201 }
202 }
sergei 2018/02/05 14:51:05 It also differs from the regexp in current js impl
sergei 2018/02/05 14:51:06 What if filter is malformed and `end` is equal to
hub 2018/02/07 04:13:36 end cannot be equal to zero. It will be at least e
hub 2018/02/07 04:13:36 It's actually worse: the parser doesn't like it at
203
204 if (quote != 0)
205 quote_end = end - 1;
206 else if (quote_end <= quote_start)
207 {
208 // we likely didn't find a quoted content so we just take it as is.
209 quote_start = properties;
210 quote_end = end - 1;
211 }
212
213 // +1 for the replacement of "##" by "#?#"
214 String::size_type offset = 0;
215
216 String::size_type delimiter = text.find(ELEM_HIDE_DELIMITER, 0,
217 ELEM_HIDE_DELIMITER_LEN);
218 OwnedString converted(length + ((delimiter != text.npos) ? 1 : 0) - removed) ;
sergei 2018/02/05 14:51:06 Since removed cannot be negative (no growth), `tex
hub 2018/02/07 04:13:36 I'm not really fond of modifying in place, but we
219 if (delimiter != text.npos)
220 {
221 if (delimiter >= selectorPos)
222 return OwnedString(text);
223
224 at++;
225 std::memcpy(converted.data(), text.data(),
226 delimiter * sizeof(String::value_type));
227 offset += delimiter;
228 std::memcpy(converted.data() + offset, ELEM_HIDE_EMULATION_DELIMITER,
229 ELEM_HIDE_EMULATION_DELIMITER_LEN * sizeof(String::value_type) );
230 offset += ELEM_HIDE_EMULATION_DELIMITER_LEN;
231 delimiter += ELEM_HIDE_DELIMITER_LEN;
232 // we have already parsed to past the delimiter.
233 selectorPos -= delimiter;
234 }
235 else
236 delimiter = 0;
237
238
239 std::memcpy(converted.data() + offset, text.data() + delimiter,
240 selectorPos * sizeof(String::value_type));
241 offset += selectorPos;
242
243 std::memcpy(converted.data() + offset, NEW_PROPS_SELECTOR,
244 NEW_PROPS_SELECTOR_LEN * sizeof(String::value_type));
245 offset += NEW_PROPS_SELECTOR_LEN;
246
247 std::memcpy(converted.data() + offset, text.data() + quote_start,
248 (quote_end - quote_start) * sizeof(String::value_type));
249 offset += quote_end - quote_start;
250
251 std::memcpy(converted.data() + offset, u")", sizeof(String::value_type));
252 offset++;
253
254 std::memcpy(converted.data() + offset, text.data() + end,
255 (length - end) * sizeof(String::value_type));
256 offset += (length - end) * sizeof(String::value_type);
257
258 return converted;
259 }
260
261 return OwnedString(text);
262 }
263
123 namespace 264 namespace
124 { 265 {
125 static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B "; 266 static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B ";
126 static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D "; 267 static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D ";
127 static constexpr String::size_type CURLY_REPLACEMENT_SIZE = sizeof(OPENING_CUR LY_REPLACEMENT) / sizeof(OPENING_CURLY_REPLACEMENT[0]) - 1; 268 static constexpr String::size_type CURLY_REPLACEMENT_SIZE = LENGTH_OF(OPENING_ CURLY_REPLACEMENT);
128 269
129 OwnedString EscapeCurlies(String::size_type replacementCount, 270 OwnedString EscapeCurlies(String::size_type replacementCount,
130 const DependentString& str) 271 const DependentString& str)
131 { 272 {
132 OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1)); 273 OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1));
133 274
134 String::value_type* current = result.data(); 275 String::value_type* current = result.data();
135 for (String::size_type i = 0; i < str.length(); i++) 276 for (String::size_type i = 0; i < str.length(); i++)
136 { 277 {
137 switch(str[i]) 278 switch(str[i])
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
181 if (item.second && !item.first.empty()) 322 if (item.second && !item.first.empty())
182 { 323 {
183 if (!result.empty()) 324 if (!result.empty())
184 result.append(u','); 325 result.append(u',');
185 result.append(item.first); 326 result.append(item.first);
186 } 327 }
187 } 328 }
188 } 329 }
189 return result; 330 return result;
190 } 331 }
OLDNEW

Powered by Google App Engine
This is Rietveld