Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: compiled/filter/ElemHideBase.cpp

Issue 29595633: Issue 5870 - Implement the new ElemHideEmulation filter type (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Left Patch Set: Rebased Created Jan. 30, 2018, 6:16 p.m.
Right Patch Set: Deal with ill formed filters. Created Feb. 14, 2018, 5:05 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « compiled/filter/ElemHideBase.h ('k') | compiled/filter/Filter.cpp » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 #include <cstring> 18 #include <cstring>
19 19
20 #include "ElemHideBase.h" 20 #include "ElemHideBase.h"
21 #include "../StringScanner.h" 21 #include "../StringScanner.h"
22 #include "../Utils.h"
23
24 ABP_NS_USING
22 25
23 namespace 26 namespace
24 { 27 {
25 void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd, 28 void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd,
26 String::size_type& selectorStart) 29 String::size_type& selectorStart)
27 { 30 {
28 // For element hiding filters we only want to remove spaces preceding the 31 // For element hiding filters we only want to remove spaces preceding the
29 // selector part. The positions we've determined already have to be adjusted 32 // selector part. The positions we've determined already have to be adjusted
30 // accordingly. 33 // accordingly.
31 34
(...skipping 11 matching lines...) Expand all
43 if (pos < selectorStart && text[pos] == ' ') 46 if (pos < selectorStart && text[pos] == ' ')
44 delta++; 47 delta++;
45 else 48 else
46 text[pos - delta] = text[pos]; 49 text[pos - delta] = text[pos];
47 } 50 }
48 selectorStart -= delta; 51 selectorStart -= delta;
49 52
50 text.reset(text, 0, len - delta); 53 text.reset(text, 0, len - delta);
51 } 54 }
52 55
53 // Convert filter from the old syntax to the new. 56 static constexpr String::value_type ELEM_HIDE_DELIMITER[] = u"##";
54 OwnedString ConvertFilter(const String& text, String::size_type at) 57 static constexpr String::size_type ELEM_HIDE_DELIMITER_LEN = str_length_of(ELE M_HIDE_DELIMITER);
sergei 2018/01/30 19:02:40 I'm not sure that we need to build OwnedString for
hub 2018/01/30 22:53:13 Since we copy the string at the end in the Filter
55 { 58
56 static const auto propsSelector = u"[-abp-properties="_str; 59 static constexpr String::value_type ELEM_HIDE_EMULATION_DELIMITER[] = u"#?#";
57 static const auto newPropsSelector = u":-abp-properties("_str; 60 static constexpr String::size_type ELEM_HIDE_EMULATION_DELIMITER_LEN = str_len gth_of(ELEM_HIDE_EMULATION_DELIMITER);
58 auto selectorPos = text.find(propsSelector, at); 61
59 if (selectorPos != text.npos) 62 static constexpr String::value_type OLD_PROPS_SELECTOR[] = u"[-abp-properties= ";
60 { 63 static constexpr String::size_type OLD_PROPS_SELECTOR_LEN = str_length_of(OLD_ PROPS_SELECTOR);
61 auto length = text.length(); 64
62 auto properties = selectorPos + propsSelector.length(); 65 static constexpr String::value_type PROPS_SELECTOR[] = u":-abp-properties(";
63 String::value_type quote = 0; 66 static constexpr String::size_type PROPS_SELECTOR_LEN = str_length_of(PROPS_SE LECTOR);
64 bool escape = false;
65 String::size_type removed = 0; // how many chars we remove
66 String::size_type end = properties;
67 String::size_type quote_start = 0;
68 String::size_type quote_end = 0;
69 for (auto index = properties;
70 index < length && end == properties; index++)
71 {
72 if (escape)
73 {
74 escape = false;
75 continue;
76 }
77
78 auto c = text[index];
79 switch (c)
80 {
81 case '\\':
82 escape = true;
83 break;
84 case '"':
85 case '\'':
86 if (quote == 0)
87 {
88 quote = c;
89 quote_start = index + 1;
90 }
91 else if (quote == c)
92 {
93 // end of quoted.
94 quote = 0;
95 removed += 2;
96 quote_end = index;
97 }
98 break;
99 case ']':
100 if (quote == 0)
101 end = index + 1; // end of properties (after ])
102 break;
103 default:
104 break;
105 }
106 }
107
108 if (quote != 0)
109 quote_end = end - 1;
110 else if (quote_end <= quote_start)
111 {
112 // we likely didn't find a quoted content so we just take it as is.
113 quote_start = properties;
114 quote_end = end - 1;
115 }
116
117 OwnedString converted(length - removed);
118 String::size_type offset = 0;
119 std::memcpy(converted.data(), text.data(),
120 selectorPos * sizeof(String::value_type));
121 offset += selectorPos;
122
123 std::memcpy(converted.data() + offset, newPropsSelector.data(),
124 newPropsSelector.length() * sizeof(String::value_type));
125 offset += newPropsSelector.length();
126
127 std::memcpy(converted.data() + offset, text.data() + quote_start,
128 (quote_end - quote_start) * sizeof(String::value_type));
129 offset += quote_end - quote_start;
130
131 std::memcpy(converted.data() + offset, u")", sizeof(String::value_type));
132 offset++;
133
134 std::memcpy(converted.data() + offset, text.data() + end,
135 (length - end) * sizeof(String::value_type));
136 offset += (length - end) * sizeof(String::value_type);
137
138 return converted;
139 }
140
141 return OwnedString(text);
142 }
143 } 67 }
144 68
145 ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& da ta) 69 ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& da ta)
146 : ActiveFilter(type, ConvertFilter(text, data.mSelectorStart), false), 70 : ActiveFilter(type, text, false), mData(data)
147 mData(data)
148 { 71 {
149 if (mData.HasDomains()) 72 if (mData.HasDomains())
150 ParseDomains(mData.GetDomainsSource(mText), u','); 73 ParseDomains(mData.GetDomainsSource(mText), u',');
151 } 74 }
152 75
153 Filter::Type ElemHideBase::Parse(DependentString& text, ElemHideData& data) 76 Filter::Type ElemHideBase::Parse(DependentString& text, ElemHideData& data, bool & needConversion)
154 { 77 {
78 needConversion = false;
79
155 StringScanner scanner(text); 80 StringScanner scanner(text);
156 81
157 // Domains part 82 // Domains part
158 bool seenSpaces = false; 83 bool seenSpaces = false;
159 while (!scanner.done()) 84 while (!scanner.done())
160 { 85 {
161 String::value_type next = scanner.next(); 86 String::value_type next = scanner.next();
162 if (next == u'#') 87 if (next == u'#')
163 { 88 {
164 data.mDomainsEnd = scanner.position(); 89 data.mDomainsEnd = scanner.position();
(...skipping 14 matching lines...) Expand all
179 break; 104 break;
180 } 105 }
181 } 106 }
182 107
183 seenSpaces |= scanner.skip(u' '); 108 seenSpaces |= scanner.skip(u' ');
184 bool emulation = false; 109 bool emulation = false;
185 bool exception = scanner.skipOne(u'@'); 110 bool exception = scanner.skipOne(u'@');
186 if (exception) 111 if (exception)
187 seenSpaces |= scanner.skip(u' '); 112 seenSpaces |= scanner.skip(u' ');
188 else 113 else
189 emulation = scanner.skipOne(u'?'); 114 emulation = scanner.skipOne(u'?');
sergei 2018/01/30 19:02:40 It's actually different from "Filter.elemhideRegEx
hub 2018/01/30 22:53:13 #@?# is "rejected". actually according to issue 62
190 115
191 String::value_type next = scanner.next(); 116 String::value_type next = scanner.next();
192 if (next != u'#') 117 if (next != u'#')
193 return Type::UNKNOWN; 118 return Type::UNKNOWN;
194 119
195 // Selector part 120 // Selector part
196 121
197 // Selector shouldn't be empty 122 // Selector shouldn't be empty
198 seenSpaces |= scanner.skip(u' '); 123 seenSpaces |= scanner.skip(u' ');
199 if (scanner.done()) 124 if (scanner.done())
200 return Type::UNKNOWN; 125 return Type::UNKNOWN;
201 126
202 data.mSelectorStart = scanner.position() + 1; 127 data.mSelectorStart = scanner.position() + 1;
203 128
204 // We are done validating, now we can normalize whitespace and the domain part 129 // We are done validating, now we can normalize whitespace and the domain part
205 if (seenSpaces) 130 if (seenSpaces)
206 NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart); 131 NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart);
207 DependentString(text, 0, data.mDomainsEnd).toLower(); 132 DependentString(text, 0, data.mDomainsEnd).toLower();
208 133
134 // We still need to check the old syntax. It will be converted when
135 // we instantiate the filter.
136 if (!emulation &&
137 text.find(OLD_PROPS_SELECTOR, data.mSelectorStart, OLD_PROPS_SELECTOR_LEN) != text.npos)
138 {
139 needConversion = true;
140 emulation = !exception;
141 }
142
209 if (exception) 143 if (exception)
210 return Type::ELEMHIDEEXCEPTION; 144 return Type::ELEMHIDEEXCEPTION;
211 145
212 if (emulation) 146 if (emulation)
213 return Type::ELEMHIDEEMULATION; 147 return Type::ELEMHIDEEMULATION;
214 148
215 return Type::ELEMHIDE; 149 return Type::ELEMHIDE;
216 } 150 }
217 151
218 namespace 152 namespace
219 { 153 {
154 struct Range
155 {
156 String::size_type start;
157 String::size_type end;
158 String::size_type len() const
159 {
160 return end - start;
161 }
162 String::size_type byte_len() const
163 {
164 return len() * sizeof(String::value_type);
165 }
166 };
167 }
168
169 // Convert filter from the old syntax to the new.
170 DependentString ElemHideBase::ConvertFilter(String& text, String::size_type& at)
171 {
172 Range prefix = {at, text.find(OLD_PROPS_SELECTOR, at, OLD_PROPS_SELECTOR_LEN)} ;
173 if (prefix.end == text.npos)
174 return DependentString(text);
175
176 auto length = text.length();
177 Range suffix = {at, length};
178 Range properties = { prefix.end + OLD_PROPS_SELECTOR_LEN, 0 };
179 String::value_type quote = 0;
180 for (auto index = properties.start;
181 index < length && (suffix.start == at); index++)
182 {
183 auto c = text[index];
184 switch (c)
185 {
186 case u'"':
187 case u'\'':
188 if (quote == 0)
189 {
190 // syntax error: we already have a quoted section.
191 if (properties.end)
192 return DependentString();
193
194 if (properties.start != index)
195 return DependentString();
196
197 quote = c;
198 properties.start = index + 1;
199 }
200 else if (quote == c)
201 {
202 // end of quoted.
203 quote = 0;
204 properties.end = index;
205 }
206 break;
207 case u']':
208 if (quote == 0)
209 {
210 if (properties.end == 0)
211 return DependentString();
212 if (properties.end + 1 != index)
213 return DependentString();
214 suffix.start = index + 1;
215 }
216 break;
217 default:
218 break;
219 }
220 }
221
222 if (suffix.start == at)
sergei 2018/02/27 10:56:53 Just for reference, I think it (what the whole for
223 return DependentString();
224
225 String::size_type delimiter = text.find(ELEM_HIDE_DELIMITER, 0,
226 ELEM_HIDE_DELIMITER_LEN);
227 // +1 for the replacement of "##" by "#?#"
228 if (delimiter != text.npos)
229 at++;
230 auto new_len = at + prefix.len() + PROPS_SELECTOR_LEN + properties.len() + 1 / * ) */ + suffix.len();
231
232 assert2(new_len + 1 == length || (delimiter == text.npos && new_len + 2 == len gth), u"Inconsistent length in filter conversion."_str);
sergei 2018/02/27 10:56:53 not important just for reference length == new_len
hub 2018/02/27 13:32:30 Acknowledged.
233
234 DependentString converted(text, 0, new_len);
235
236 if (suffix.len())
237 {
238 new_len -= suffix.len();
239 std::memmove(converted.data() + new_len,
240 text.data() + suffix.start,
241 suffix.byte_len());
242 }
243 new_len--;
244 // here we need to move the properties before inserting the ')'
245 auto parens = new_len;
246 if (properties.len())
247 {
248 new_len -= properties.len();
249 std::memmove(converted.data() + new_len,
250 text.data() + properties.start, properties.byte_len());
251 }
252 converted[parens] = u')';
253
254 new_len -= PROPS_SELECTOR_LEN;
255 std::memcpy(converted.data() + new_len,
256 PROPS_SELECTOR,
257 PROPS_SELECTOR_LEN * sizeof(String::value_type));
258 if (prefix.len())
259 {
260 new_len -= prefix.len();
261 std::memmove(converted.data() + new_len,
262 text.data() + prefix.start, prefix.byte_len());
263 }
264
265 if (delimiter != String::npos)
266 {
267 std::memcpy(converted.data() + delimiter, ELEM_HIDE_EMULATION_DELIMITER,
268 ELEM_HIDE_EMULATION_DELIMITER_LEN * sizeof(String::value_type));
269 }
270
271 return converted;
272 }
273
274 namespace
275 {
220 static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B "; 276 static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B ";
221 static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D "; 277 static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D ";
222 static constexpr String::size_type CURLY_REPLACEMENT_SIZE = sizeof(OPENING_CUR LY_REPLACEMENT) / sizeof(OPENING_CURLY_REPLACEMENT[0]) - 1; 278 static constexpr String::size_type CURLY_REPLACEMENT_SIZE = str_length_of(OPEN ING_CURLY_REPLACEMENT);
223 279
224 OwnedString EscapeCurlies(String::size_type replacementCount, 280 OwnedString EscapeCurlies(String::size_type replacementCount,
225 const DependentString& str) 281 const DependentString& str)
226 { 282 {
227 OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1)); 283 OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1));
228 284
229 String::value_type* current = result.data(); 285 String::value_type* current = result.data();
230 for (String::size_type i = 0; i < str.length(); i++) 286 for (String::size_type i = 0; i < str.length(); i++)
231 { 287 {
232 switch(str[i]) 288 switch(str[i])
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
276 if (item.second && !item.first.empty()) 332 if (item.second && !item.first.empty())
277 { 333 {
278 if (!result.empty()) 334 if (!result.empty())
279 result.append(u','); 335 result.append(u',');
280 result.append(item.first); 336 result.append(item.first);
281 } 337 }
282 } 338 }
283 } 339 }
284 return result; 340 return result;
285 } 341 }
LEFTRIGHT

Powered by Google App Engine
This is Rietveld