Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 /* | 1 /* |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
3 * Copyright (C) 2006-present eyeo GmbH | 3 * Copyright (C) 2006-present eyeo GmbH |
4 * | 4 * |
5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
8 * | 8 * |
9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. |
13 * | 13 * |
14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License |
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
16 */ | 16 */ |
17 | 17 |
18 #include <cstring> | 18 #include <cstring> |
19 | 19 |
20 #include "ElemHideBase.h" | 20 #include "ElemHideBase.h" |
21 #include "../StringScanner.h" | 21 #include "../StringScanner.h" |
22 | 22 #include "../Utils.h" |
23 // the length of a static string array | 23 |
24 #define LENGTH_OF(x) ((sizeof(x) / sizeof(x[0])) - 1) | 24 ABP_NS_USING |
sergei
2018/02/05 14:51:06
I think we may use constexpr function here, and ma
hub
2018/02/07 04:13:37
No. This has to be a macro because of sizeof() and
sergei
2018/02/12 12:53:17
The following does work
template<typename T, size_
hub
2018/02/12 18:14:38
Done.
sergei
2018/02/13 09:12:49
What about replacing of "static string array" by s
sergei
2018/02/13 16:05:59
It seems only this is left.
hub
2018/02/13 16:23:36
oops.
Done
| |
25 | 25 |
26 namespace | 26 namespace |
27 { | 27 { |
28 void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd, | 28 void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd, |
29 String::size_type& selectorStart) | 29 String::size_type& selectorStart) |
30 { | 30 { |
31 // For element hiding filters we only want to remove spaces preceding the | 31 // For element hiding filters we only want to remove spaces preceding the |
32 // selector part. The positions we've determined already have to be adjusted | 32 // selector part. The positions we've determined already have to be adjusted |
33 // accordingly. | 33 // accordingly. |
34 | 34 |
(...skipping 12 matching lines...) Expand all Loading... | |
47 delta++; | 47 delta++; |
48 else | 48 else |
49 text[pos - delta] = text[pos]; | 49 text[pos - delta] = text[pos]; |
50 } | 50 } |
51 selectorStart -= delta; | 51 selectorStart -= delta; |
52 | 52 |
53 text.reset(text, 0, len - delta); | 53 text.reset(text, 0, len - delta); |
54 } | 54 } |
55 | 55 |
56 static constexpr String::value_type ELEM_HIDE_DELIMITER[] = u"##"; | 56 static constexpr String::value_type ELEM_HIDE_DELIMITER[] = u"##"; |
57 static constexpr String::size_type ELEM_HIDE_DELIMITER_LEN = LENGTH_OF(ELEM_HI DE_DELIMITER); | 57 static constexpr String::size_type ELEM_HIDE_DELIMITER_LEN = str_length_of(ELE M_HIDE_DELIMITER); |
58 | 58 |
59 static constexpr String::value_type ELEM_HIDE_EMULATION_DELIMITER[] = u"#?#"; | 59 static constexpr String::value_type ELEM_HIDE_EMULATION_DELIMITER[] = u"#?#"; |
60 static constexpr String::size_type ELEM_HIDE_EMULATION_DELIMITER_LEN = LENGTH_ OF(ELEM_HIDE_EMULATION_DELIMITER); | 60 static constexpr String::size_type ELEM_HIDE_EMULATION_DELIMITER_LEN = str_len gth_of(ELEM_HIDE_EMULATION_DELIMITER); |
61 | 61 |
62 static constexpr String::value_type PROPS_SELECTOR[] = u"[-abp-properties="; | 62 static constexpr String::value_type OLD_PROPS_SELECTOR[] = u"[-abp-properties= "; |
63 static constexpr String::size_type PROPS_SELECTOR_LEN = LENGTH_OF(PROPS_SELECT OR); | 63 static constexpr String::size_type OLD_PROPS_SELECTOR_LEN = str_length_of(OLD_ PROPS_SELECTOR); |
64 | 64 |
65 static constexpr String::value_type NEW_PROPS_SELECTOR[] = u":-abp-properties( "; | 65 static constexpr String::value_type PROPS_SELECTOR[] = u":-abp-properties("; |
sergei
2018/02/05 14:51:05
I would rather prefer to call it PROPS_SELECTOR an
hub
2018/02/07 04:13:36
Done.
| |
66 static constexpr String::size_type NEW_PROPS_SELECTOR_LEN = LENGTH_OF(NEW_PROP S_SELECTOR); | 66 static constexpr String::size_type PROPS_SELECTOR_LEN = str_length_of(PROPS_SE LECTOR); |
67 } | 67 } |
68 | 68 |
69 ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& da ta) | 69 ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& da ta) |
70 : ActiveFilter(type, text, false), mData(data) | 70 : ActiveFilter(type, text, false), mData(data) |
71 { | 71 { |
72 if (mData.HasDomains()) | 72 if (mData.HasDomains()) |
73 ParseDomains(mData.GetDomainsSource(mText), u','); | 73 ParseDomains(mData.GetDomainsSource(mText), u','); |
74 } | 74 } |
75 | 75 |
76 Filter::Type ElemHideBase::Parse(DependentString& text, ElemHideData& data) | 76 Filter::Type ElemHideBase::Parse(DependentString& text, ElemHideData& data, bool & needConversion) |
77 { | 77 { |
78 needConversion = false; | |
79 | |
78 StringScanner scanner(text); | 80 StringScanner scanner(text); |
79 | 81 |
80 // Domains part | 82 // Domains part |
81 bool seenSpaces = false; | 83 bool seenSpaces = false; |
82 while (!scanner.done()) | 84 while (!scanner.done()) |
83 { | 85 { |
84 String::value_type next = scanner.next(); | 86 String::value_type next = scanner.next(); |
85 if (next == u'#') | 87 if (next == u'#') |
86 { | 88 { |
87 data.mDomainsEnd = scanner.position(); | 89 data.mDomainsEnd = scanner.position(); |
(...skipping 28 matching lines...) Expand all Loading... | |
116 return Type::UNKNOWN; | 118 return Type::UNKNOWN; |
117 | 119 |
118 // Selector part | 120 // Selector part |
119 | 121 |
120 // Selector shouldn't be empty | 122 // Selector shouldn't be empty |
121 seenSpaces |= scanner.skip(u' '); | 123 seenSpaces |= scanner.skip(u' '); |
122 if (scanner.done()) | 124 if (scanner.done()) |
123 return Type::UNKNOWN; | 125 return Type::UNKNOWN; |
124 | 126 |
125 data.mSelectorStart = scanner.position() + 1; | 127 data.mSelectorStart = scanner.position() + 1; |
126 data.mNeedConversion = false; | |
127 | 128 |
128 // We are done validating, now we can normalize whitespace and the domain part | 129 // We are done validating, now we can normalize whitespace and the domain part |
129 if (seenSpaces) | 130 if (seenSpaces) |
130 NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart); | 131 NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart); |
131 DependentString(text, 0, data.mDomainsEnd).toLower(); | 132 DependentString(text, 0, data.mDomainsEnd).toLower(); |
132 | 133 |
133 // We still need to check the old syntax. It will be converted when | 134 // We still need to check the old syntax. It will be converted when |
134 // we instantiate the filter. | 135 // we instantiate the filter. |
135 if (!emulation && | 136 if (!emulation && |
136 text.find(PROPS_SELECTOR, data.mSelectorStart, PROPS_SELECTOR_LEN) != text .npos) | 137 text.find(OLD_PROPS_SELECTOR, data.mSelectorStart, OLD_PROPS_SELECTOR_LEN) != text.npos) |
137 { | 138 { |
138 data.mNeedConversion = true; | 139 needConversion = true; |
139 emulation = !exception; | 140 emulation = !exception; |
140 } | 141 } |
141 | 142 |
142 if (exception) | 143 if (exception) |
143 return Type::ELEMHIDEEXCEPTION; | 144 return Type::ELEMHIDEEXCEPTION; |
144 | 145 |
145 if (emulation) | 146 if (emulation) |
146 return Type::ELEMHIDEEMULATION; | 147 return Type::ELEMHIDEEMULATION; |
147 | 148 |
148 return Type::ELEMHIDE; | 149 return Type::ELEMHIDE; |
149 } | 150 } |
150 | 151 |
152 namespace | |
153 { | |
154 struct Range | |
155 { | |
156 String::size_type start; | |
157 String::size_type end; | |
158 String::size_type len() const | |
159 { | |
160 return end - start; | |
161 } | |
162 String::size_type byte_len() const | |
163 { | |
164 return len() * sizeof(String::value_type); | |
165 } | |
166 }; | |
167 } | |
168 | |
151 // Convert filter from the old syntax to the new. | 169 // Convert filter from the old syntax to the new. |
152 OwnedString ElemHideBase::ConvertFilter(const String& text, String::size_type& a t) | 170 DependentString ElemHideBase::ConvertFilter(String& text, String::size_type& at) |
153 { | 171 { |
154 auto selectorPos = text.find(PROPS_SELECTOR, at, PROPS_SELECTOR_LEN); | 172 Range prefix = {at, text.find(OLD_PROPS_SELECTOR, at, OLD_PROPS_SELECTOR_LEN)} ; |
155 if (selectorPos != text.npos) | 173 if (prefix.end == text.npos) |
156 { | 174 return DependentString(text); |
157 auto length = text.length(); | 175 |
158 auto properties = selectorPos + PROPS_SELECTOR_LEN; | 176 auto length = text.length(); |
sergei
2018/02/05 14:51:06
should it be something like propertiesPos?
hub
2018/02/07 04:13:36
Done.
| |
159 String::value_type quote = 0; | 177 Range suffix = {at, length}; |
160 bool escape = false; | 178 Range properties = { prefix.end + OLD_PROPS_SELECTOR_LEN, 0 }; |
161 String::size_type removed = 0; // how many chars we remove | 179 String::value_type quote = 0; |
162 String::size_type end = properties; | 180 for (auto index = properties.start; |
163 String::size_type quote_start = 0; | 181 index < length && (suffix.start == at); index++) |
164 String::size_type quote_end = 0; | 182 { |
165 for (auto index = properties; | 183 auto c = text[index]; |
166 index < length && end == properties; index++) | 184 switch (c) |
167 { | 185 { |
168 if (escape) | 186 case u'"': |
169 { | 187 case u'\'': |
170 escape = false; | 188 if (quote == 0) |
171 continue; | 189 { |
172 } | 190 // syntax error: we already have a quoted section. |
173 | 191 if (properties.end) |
174 auto c = text[index]; | 192 return DependentString(); |
175 switch (c) | 193 |
176 { | 194 if (properties.start != index) |
177 case '\\': | 195 return DependentString(); |
sergei
2018/02/05 14:51:05
should they be u'\\', u'"', etc? Since we keep in
hub
2018/02/07 04:13:36
yes it should be u'', but with C it works without
| |
178 escape = true; | 196 |
179 break; | 197 quote = c; |
180 case '"': | 198 properties.start = index + 1; |
181 case '\'': | 199 } |
182 if (quote == 0) | 200 else if (quote == c) |
183 { | 201 { |
184 quote = c; | 202 // end of quoted. |
185 quote_start = index + 1; | 203 quote = 0; |
186 } | 204 properties.end = index; |
187 else if (quote == c) | 205 } |
188 { | 206 break; |
189 // end of quoted. | 207 case u']': |
190 quote = 0; | 208 if (quote == 0) |
191 removed += 2; | 209 { |
192 quote_end = index; | 210 if (properties.end == 0) |
193 } | 211 return DependentString(); |
194 break; | 212 if (properties.end + 1 != index) |
195 case ']': | 213 return DependentString(); |
196 if (quote == 0) | 214 suffix.start = index + 1; |
197 end = index + 1; // end of properties (after ]) | 215 } |
198 break; | 216 break; |
199 default: | 217 default: |
200 break; | 218 break; |
201 } | 219 } |
202 } | 220 } |
sergei
2018/02/05 14:51:05
It also differs from the regexp in current js impl
sergei
2018/02/05 14:51:06
What if filter is malformed and `end` is equal to
hub
2018/02/07 04:13:36
end cannot be equal to zero. It will be at least e
hub
2018/02/07 04:13:36
It's actually worse: the parser doesn't like it at
| |
203 | 221 |
204 if (quote != 0) | 222 if (suffix.start == at) |
sergei
2018/02/27 10:56:53
Just for reference, I think it (what the whole for
| |
205 quote_end = end - 1; | 223 return DependentString(); |
206 else if (quote_end <= quote_start) | 224 |
207 { | 225 String::size_type delimiter = text.find(ELEM_HIDE_DELIMITER, 0, |
208 // we likely didn't find a quoted content so we just take it as is. | 226 ELEM_HIDE_DELIMITER_LEN); |
209 quote_start = properties; | 227 // +1 for the replacement of "##" by "#?#" |
210 quote_end = end - 1; | 228 if (delimiter != text.npos) |
211 } | 229 at++; |
212 | 230 auto new_len = at + prefix.len() + PROPS_SELECTOR_LEN + properties.len() + 1 / * ) */ + suffix.len(); |
213 // +1 for the replacement of "##" by "#?#" | 231 |
214 String::size_type offset = 0; | 232 assert2(new_len + 1 == length || (delimiter == text.npos && new_len + 2 == len gth), u"Inconsistent length in filter conversion."_str); |
sergei
2018/02/27 10:56:53
not important just for reference
length == new_len
hub
2018/02/27 13:32:30
Acknowledged.
| |
215 | 233 |
216 String::size_type delimiter = text.find(ELEM_HIDE_DELIMITER, 0, | 234 DependentString converted(text, 0, new_len); |
217 ELEM_HIDE_DELIMITER_LEN); | 235 |
218 OwnedString converted(length + ((delimiter != text.npos) ? 1 : 0) - removed) ; | 236 if (suffix.len()) |
sergei
2018/02/05 14:51:06
Since removed cannot be negative (no growth), `tex
hub
2018/02/07 04:13:36
I'm not really fond of modifying in place, but we
| |
219 if (delimiter != text.npos) | 237 { |
220 { | 238 new_len -= suffix.len(); |
221 if (delimiter >= selectorPos) | 239 std::memmove(converted.data() + new_len, |
222 return OwnedString(text); | 240 text.data() + suffix.start, |
223 | 241 suffix.byte_len()); |
224 at++; | 242 } |
225 std::memcpy(converted.data(), text.data(), | 243 new_len--; |
226 delimiter * sizeof(String::value_type)); | 244 // here we need to move the properties before inserting the ')' |
227 offset += delimiter; | 245 auto parens = new_len; |
228 std::memcpy(converted.data() + offset, ELEM_HIDE_EMULATION_DELIMITER, | 246 if (properties.len()) |
229 ELEM_HIDE_EMULATION_DELIMITER_LEN * sizeof(String::value_type) ); | 247 { |
230 offset += ELEM_HIDE_EMULATION_DELIMITER_LEN; | 248 new_len -= properties.len(); |
231 delimiter += ELEM_HIDE_DELIMITER_LEN; | 249 std::memmove(converted.data() + new_len, |
232 // we have already parsed to past the delimiter. | 250 text.data() + properties.start, properties.byte_len()); |
233 selectorPos -= delimiter; | 251 } |
234 } | 252 converted[parens] = u')'; |
235 else | 253 |
236 delimiter = 0; | 254 new_len -= PROPS_SELECTOR_LEN; |
237 | 255 std::memcpy(converted.data() + new_len, |
238 | 256 PROPS_SELECTOR, |
239 std::memcpy(converted.data() + offset, text.data() + delimiter, | 257 PROPS_SELECTOR_LEN * sizeof(String::value_type)); |
240 selectorPos * sizeof(String::value_type)); | 258 if (prefix.len()) |
241 offset += selectorPos; | 259 { |
242 | 260 new_len -= prefix.len(); |
243 std::memcpy(converted.data() + offset, NEW_PROPS_SELECTOR, | 261 std::memmove(converted.data() + new_len, |
244 NEW_PROPS_SELECTOR_LEN * sizeof(String::value_type)); | 262 text.data() + prefix.start, prefix.byte_len()); |
245 offset += NEW_PROPS_SELECTOR_LEN; | 263 } |
246 | 264 |
247 std::memcpy(converted.data() + offset, text.data() + quote_start, | 265 if (delimiter != String::npos) |
248 (quote_end - quote_start) * sizeof(String::value_type)); | 266 { |
249 offset += quote_end - quote_start; | 267 std::memcpy(converted.data() + delimiter, ELEM_HIDE_EMULATION_DELIMITER, |
250 | 268 ELEM_HIDE_EMULATION_DELIMITER_LEN * sizeof(String::value_type)); |
251 std::memcpy(converted.data() + offset, u")", sizeof(String::value_type)); | 269 } |
252 offset++; | 270 |
253 | 271 return converted; |
254 std::memcpy(converted.data() + offset, text.data() + end, | |
255 (length - end) * sizeof(String::value_type)); | |
256 offset += (length - end) * sizeof(String::value_type); | |
257 | |
258 return converted; | |
259 } | |
260 | |
261 return OwnedString(text); | |
262 } | 272 } |
263 | 273 |
264 namespace | 274 namespace |
265 { | 275 { |
266 static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B "; | 276 static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B "; |
267 static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D "; | 277 static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D "; |
268 static constexpr String::size_type CURLY_REPLACEMENT_SIZE = LENGTH_OF(OPENING_ CURLY_REPLACEMENT); | 278 static constexpr String::size_type CURLY_REPLACEMENT_SIZE = str_length_of(OPEN ING_CURLY_REPLACEMENT); |
269 | 279 |
270 OwnedString EscapeCurlies(String::size_type replacementCount, | 280 OwnedString EscapeCurlies(String::size_type replacementCount, |
271 const DependentString& str) | 281 const DependentString& str) |
272 { | 282 { |
273 OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1)); | 283 OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1)); |
274 | 284 |
275 String::value_type* current = result.data(); | 285 String::value_type* current = result.data(); |
276 for (String::size_type i = 0; i < str.length(); i++) | 286 for (String::size_type i = 0; i < str.length(); i++) |
277 { | 287 { |
278 switch(str[i]) | 288 switch(str[i]) |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
322 if (item.second && !item.first.empty()) | 332 if (item.second && !item.first.empty()) |
323 { | 333 { |
324 if (!result.empty()) | 334 if (!result.empty()) |
325 result.append(u','); | 335 result.append(u','); |
326 result.append(item.first); | 336 result.append(item.first); |
327 } | 337 } |
328 } | 338 } |
329 } | 339 } |
330 return result; | 340 return result; |
331 } | 341 } |
LEFT | RIGHT |