 Issue 29595633:
  Issue 5870 - Implement the new ElemHideEmulation filter type  (Closed) 
  Base URL: https://hg.adblockplus.org/adblockpluscore/
    
  
    Issue 29595633:
  Issue 5870 - Implement the new ElemHideEmulation filter type  (Closed) 
  Base URL: https://hg.adblockplus.org/adblockpluscore/| Left: | ||
| Right: | 
| OLD | NEW | 
|---|---|
| 1 /* | 1 /* | 
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 
| 3 * Copyright (C) 2006-present eyeo GmbH | 3 * Copyright (C) 2006-present eyeo GmbH | 
| 4 * | 4 * | 
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify | 
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as | 
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. | 
| 8 * | 8 * | 
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, | 
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
| 12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. | 
| 13 * | 13 * | 
| 14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License | 
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 
| 16 */ | 16 */ | 
| 17 | 17 | 
| 18 #include <cstring> | 18 #include <cstring> | 
| 19 | 19 | 
| 20 #include "ElemHideBase.h" | 20 #include "ElemHideBase.h" | 
| 21 #include "../StringScanner.h" | 21 #include "../StringScanner.h" | 
| 22 | 22 | 
| 23 // the length of a static string array | |
| 24 #define LENGTH_OF(x) ((sizeof(x) / sizeof(x[0])) - 1) | |
| 
sergei
2018/02/05 14:51:06
I think we may use constexpr function here, and ma
 
hub
2018/02/07 04:13:37
No. This has to be a macro because of sizeof() and
 
sergei
2018/02/12 12:53:17
The following does work
template<typename T, size_
 
hub
2018/02/12 18:14:38
Done.
 
sergei
2018/02/13 09:12:49
What about replacing of "static string array" by s
 
sergei
2018/02/13 16:05:59
It seems only this is left.
 
hub
2018/02/13 16:23:36
oops.
Done
 | |
| 25 | |
| 23 namespace | 26 namespace | 
| 24 { | 27 { | 
| 25 void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd, | 28 void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd, | 
| 26 String::size_type& selectorStart) | 29 String::size_type& selectorStart) | 
| 27 { | 30 { | 
| 28 // For element hiding filters we only want to remove spaces preceding the | 31 // For element hiding filters we only want to remove spaces preceding the | 
| 29 // selector part. The positions we've determined already have to be adjusted | 32 // selector part. The positions we've determined already have to be adjusted | 
| 30 // accordingly. | 33 // accordingly. | 
| 31 | 34 | 
| 32 String::size_type delta = 0; | 35 String::size_type delta = 0; | 
| 33 String::size_type len = text.length(); | 36 String::size_type len = text.length(); | 
| 34 | 37 | 
| 35 // The first character is guaranteed to be a non-space, the string has been | 38 // The first character is guaranteed to be a non-space, the string has been | 
| 36 // trimmed earlier. | 39 // trimmed earlier. | 
| 37 for (String::size_type pos = 1; pos < len; pos++) | 40 for (String::size_type pos = 1; pos < len; pos++) | 
| 38 { | 41 { | 
| 39 if (pos == domainsEnd) | 42 if (pos == domainsEnd) | 
| 40 domainsEnd -= delta; | 43 domainsEnd -= delta; | 
| 41 | 44 | 
| 42 // Only spaces before selectorStart position should be removed. | 45 // Only spaces before selectorStart position should be removed. | 
| 43 if (pos < selectorStart && text[pos] == ' ') | 46 if (pos < selectorStart && text[pos] == ' ') | 
| 44 delta++; | 47 delta++; | 
| 45 else | 48 else | 
| 46 text[pos - delta] = text[pos]; | 49 text[pos - delta] = text[pos]; | 
| 47 } | 50 } | 
| 48 selectorStart -= delta; | 51 selectorStart -= delta; | 
| 49 | 52 | 
| 50 text.reset(text, 0, len - delta); | 53 text.reset(text, 0, len - delta); | 
| 51 } | 54 } | 
| 55 | |
| 56 static constexpr String::value_type ELEM_HIDE_DELIMITER[] = u"##"; | |
| 57 static constexpr String::size_type ELEM_HIDE_DELIMITER_LEN = LENGTH_OF(ELEM_HI DE_DELIMITER); | |
| 58 | |
| 59 static constexpr String::value_type ELEM_HIDE_EMULATION_DELIMITER[] = u"#?#"; | |
| 60 static constexpr String::size_type ELEM_HIDE_EMULATION_DELIMITER_LEN = LENGTH_ OF(ELEM_HIDE_EMULATION_DELIMITER); | |
| 61 | |
| 62 static constexpr String::value_type PROPS_SELECTOR[] = u"[-abp-properties="; | |
| 63 static constexpr String::size_type PROPS_SELECTOR_LEN = LENGTH_OF(PROPS_SELECT OR); | |
| 64 | |
| 65 static constexpr String::value_type NEW_PROPS_SELECTOR[] = u":-abp-properties( "; | |
| 
sergei
2018/02/05 14:51:05
I would rather prefer to call it PROPS_SELECTOR an
 
hub
2018/02/07 04:13:36
Done.
 | |
| 66 static constexpr String::size_type NEW_PROPS_SELECTOR_LEN = LENGTH_OF(NEW_PROP S_SELECTOR); | |
| 52 } | 67 } | 
| 53 | 68 | 
| 54 ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& da ta) | 69 ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& da ta) | 
| 55 : ActiveFilter(type, text, false), mData(data) | 70 : ActiveFilter(type, text, false), mData(data) | 
| 56 { | 71 { | 
| 57 if (mData.HasDomains()) | 72 if (mData.HasDomains()) | 
| 58 ParseDomains(mData.GetDomainsSource(mText), u','); | 73 ParseDomains(mData.GetDomainsSource(mText), u','); | 
| 59 } | 74 } | 
| 60 | 75 | 
| 61 Filter::Type ElemHideBase::Parse(DependentString& text, ElemHideData& data) | 76 Filter::Type ElemHideBase::Parse(DependentString& text, ElemHideData& data) | 
| (...skipping 20 matching lines...) Expand all Loading... | |
| 82 case u'"': | 97 case u'"': | 
| 83 case u'!': | 98 case u'!': | 
| 84 return Type::UNKNOWN; | 99 return Type::UNKNOWN; | 
| 85 case u' ': | 100 case u' ': | 
| 86 seenSpaces = true; | 101 seenSpaces = true; | 
| 87 break; | 102 break; | 
| 88 } | 103 } | 
| 89 } | 104 } | 
| 90 | 105 | 
| 91 seenSpaces |= scanner.skip(u' '); | 106 seenSpaces |= scanner.skip(u' '); | 
| 107 bool emulation = false; | |
| 92 bool exception = scanner.skipOne(u'@'); | 108 bool exception = scanner.skipOne(u'@'); | 
| 93 if (exception) | 109 if (exception) | 
| 94 seenSpaces |= scanner.skip(u' '); | 110 seenSpaces |= scanner.skip(u' '); | 
| 111 else | |
| 112 emulation = scanner.skipOne(u'?'); | |
| 95 | 113 | 
| 96 String::value_type next = scanner.next(); | 114 String::value_type next = scanner.next(); | 
| 97 if (next != u'#') | 115 if (next != u'#') | 
| 98 return Type::UNKNOWN; | 116 return Type::UNKNOWN; | 
| 99 | 117 | 
| 100 // Selector part | 118 // Selector part | 
| 101 | 119 | 
| 102 // Selector shouldn't be empty | 120 // Selector shouldn't be empty | 
| 103 seenSpaces |= scanner.skip(u' '); | 121 seenSpaces |= scanner.skip(u' '); | 
| 104 if (scanner.done()) | 122 if (scanner.done()) | 
| 105 return Type::UNKNOWN; | 123 return Type::UNKNOWN; | 
| 106 | 124 | 
| 107 data.mSelectorStart = scanner.position() + 1; | 125 data.mSelectorStart = scanner.position() + 1; | 
| 126 data.mNeedConversion = false; | |
| 108 | 127 | 
| 109 // We are done validating, now we can normalize whitespace and the domain part | 128 // We are done validating, now we can normalize whitespace and the domain part | 
| 110 if (seenSpaces) | 129 if (seenSpaces) | 
| 111 NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart); | 130 NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart); | 
| 112 DependentString(text, 0, data.mDomainsEnd).toLower(); | 131 DependentString(text, 0, data.mDomainsEnd).toLower(); | 
| 113 | 132 | 
| 133 // We still need to check the old syntax. It will be converted when | |
| 134 // we instantiate the filter. | |
| 135 if (!emulation && | |
| 136 text.find(PROPS_SELECTOR, data.mSelectorStart, PROPS_SELECTOR_LEN) != text .npos) | |
| 137 { | |
| 138 data.mNeedConversion = true; | |
| 139 emulation = !exception; | |
| 140 } | |
| 141 | |
| 114 if (exception) | 142 if (exception) | 
| 115 return Type::ELEMHIDEEXCEPTION; | 143 return Type::ELEMHIDEEXCEPTION; | 
| 116 | 144 | 
| 117 if (text.find(u"[-abp-properties="_str, data.mSelectorStart) != text.npos) | 145 if (emulation) | 
| 118 return Type::ELEMHIDEEMULATION; | 146 return Type::ELEMHIDEEMULATION; | 
| 119 | 147 | 
| 120 return Type::ELEMHIDE; | 148 return Type::ELEMHIDE; | 
| 121 } | 149 } | 
| 122 | 150 | 
| 151 // Convert filter from the old syntax to the new. | |
| 152 OwnedString ElemHideBase::ConvertFilter(const String& text, String::size_type& a t) | |
| 153 { | |
| 154 auto selectorPos = text.find(PROPS_SELECTOR, at, PROPS_SELECTOR_LEN); | |
| 155 if (selectorPos != text.npos) | |
| 156 { | |
| 157 auto length = text.length(); | |
| 158 auto properties = selectorPos + PROPS_SELECTOR_LEN; | |
| 
sergei
2018/02/05 14:51:06
should it be something like propertiesPos?
 
hub
2018/02/07 04:13:36
Done.
 | |
| 159 String::value_type quote = 0; | |
| 160 bool escape = false; | |
| 161 String::size_type removed = 0; // how many chars we remove | |
| 162 String::size_type end = properties; | |
| 163 String::size_type quote_start = 0; | |
| 164 String::size_type quote_end = 0; | |
| 165 for (auto index = properties; | |
| 166 index < length && end == properties; index++) | |
| 167 { | |
| 168 if (escape) | |
| 169 { | |
| 170 escape = false; | |
| 171 continue; | |
| 172 } | |
| 173 | |
| 174 auto c = text[index]; | |
| 175 switch (c) | |
| 176 { | |
| 177 case '\\': | |
| 
sergei
2018/02/05 14:51:05
should they be u'\\', u'"', etc? Since we keep in
 
hub
2018/02/07 04:13:36
yes it should be u'', but with C it works without
 | |
| 178 escape = true; | |
| 179 break; | |
| 180 case '"': | |
| 181 case '\'': | |
| 182 if (quote == 0) | |
| 183 { | |
| 184 quote = c; | |
| 185 quote_start = index + 1; | |
| 186 } | |
| 187 else if (quote == c) | |
| 188 { | |
| 189 // end of quoted. | |
| 190 quote = 0; | |
| 191 removed += 2; | |
| 192 quote_end = index; | |
| 193 } | |
| 194 break; | |
| 195 case ']': | |
| 196 if (quote == 0) | |
| 197 end = index + 1; // end of properties (after ]) | |
| 198 break; | |
| 199 default: | |
| 200 break; | |
| 201 } | |
| 202 } | |
| 
sergei
2018/02/05 14:51:05
It also differs from the regexp in current js impl
 
sergei
2018/02/05 14:51:06
What if filter is malformed and `end` is equal to
 
hub
2018/02/07 04:13:36
end cannot be equal to zero. It will be at least e
 
hub
2018/02/07 04:13:36
It's actually worse: the parser doesn't like it at
 | |
| 203 | |
| 204 if (quote != 0) | |
| 205 quote_end = end - 1; | |
| 206 else if (quote_end <= quote_start) | |
| 207 { | |
| 208 // we likely didn't find a quoted content so we just take it as is. | |
| 209 quote_start = properties; | |
| 210 quote_end = end - 1; | |
| 211 } | |
| 212 | |
| 213 // +1 for the replacement of "##" by "#?#" | |
| 214 String::size_type offset = 0; | |
| 215 | |
| 216 String::size_type delimiter = text.find(ELEM_HIDE_DELIMITER, 0, | |
| 217 ELEM_HIDE_DELIMITER_LEN); | |
| 218 OwnedString converted(length + ((delimiter != text.npos) ? 1 : 0) - removed) ; | |
| 
sergei
2018/02/05 14:51:06
Since removed cannot be negative (no growth), `tex
 
hub
2018/02/07 04:13:36
I'm not really fond of modifying in place, but we
 | |
| 219 if (delimiter != text.npos) | |
| 220 { | |
| 221 if (delimiter >= selectorPos) | |
| 222 return OwnedString(text); | |
| 223 | |
| 224 at++; | |
| 225 std::memcpy(converted.data(), text.data(), | |
| 226 delimiter * sizeof(String::value_type)); | |
| 227 offset += delimiter; | |
| 228 std::memcpy(converted.data() + offset, ELEM_HIDE_EMULATION_DELIMITER, | |
| 229 ELEM_HIDE_EMULATION_DELIMITER_LEN * sizeof(String::value_type) ); | |
| 230 offset += ELEM_HIDE_EMULATION_DELIMITER_LEN; | |
| 231 delimiter += ELEM_HIDE_DELIMITER_LEN; | |
| 232 // we have already parsed to past the delimiter. | |
| 233 selectorPos -= delimiter; | |
| 234 } | |
| 235 else | |
| 236 delimiter = 0; | |
| 237 | |
| 238 | |
| 239 std::memcpy(converted.data() + offset, text.data() + delimiter, | |
| 240 selectorPos * sizeof(String::value_type)); | |
| 241 offset += selectorPos; | |
| 242 | |
| 243 std::memcpy(converted.data() + offset, NEW_PROPS_SELECTOR, | |
| 244 NEW_PROPS_SELECTOR_LEN * sizeof(String::value_type)); | |
| 245 offset += NEW_PROPS_SELECTOR_LEN; | |
| 246 | |
| 247 std::memcpy(converted.data() + offset, text.data() + quote_start, | |
| 248 (quote_end - quote_start) * sizeof(String::value_type)); | |
| 249 offset += quote_end - quote_start; | |
| 250 | |
| 251 std::memcpy(converted.data() + offset, u")", sizeof(String::value_type)); | |
| 252 offset++; | |
| 253 | |
| 254 std::memcpy(converted.data() + offset, text.data() + end, | |
| 255 (length - end) * sizeof(String::value_type)); | |
| 256 offset += (length - end) * sizeof(String::value_type); | |
| 257 | |
| 258 return converted; | |
| 259 } | |
| 260 | |
| 261 return OwnedString(text); | |
| 262 } | |
| 263 | |
| 123 namespace | 264 namespace | 
| 124 { | 265 { | 
| 125 static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B "; | 266 static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B "; | 
| 126 static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D "; | 267 static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D "; | 
| 127 static constexpr String::size_type CURLY_REPLACEMENT_SIZE = sizeof(OPENING_CUR LY_REPLACEMENT) / sizeof(OPENING_CURLY_REPLACEMENT[0]) - 1; | 268 static constexpr String::size_type CURLY_REPLACEMENT_SIZE = LENGTH_OF(OPENING_ CURLY_REPLACEMENT); | 
| 128 | 269 | 
| 129 OwnedString EscapeCurlies(String::size_type replacementCount, | 270 OwnedString EscapeCurlies(String::size_type replacementCount, | 
| 130 const DependentString& str) | 271 const DependentString& str) | 
| 131 { | 272 { | 
| 132 OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1)); | 273 OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1)); | 
| 133 | 274 | 
| 134 String::value_type* current = result.data(); | 275 String::value_type* current = result.data(); | 
| 135 for (String::size_type i = 0; i < str.length(); i++) | 276 for (String::size_type i = 0; i < str.length(); i++) | 
| 136 { | 277 { | 
| 137 switch(str[i]) | 278 switch(str[i]) | 
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 181 if (item.second && !item.first.empty()) | 322 if (item.second && !item.first.empty()) | 
| 182 { | 323 { | 
| 183 if (!result.empty()) | 324 if (!result.empty()) | 
| 184 result.append(u','); | 325 result.append(u','); | 
| 185 result.append(item.first); | 326 result.append(item.first); | 
| 186 } | 327 } | 
| 187 } | 328 } | 
| 188 } | 329 } | 
| 189 return result; | 330 return result; | 
| 190 } | 331 } | 
| OLD | NEW |