 Issue 29595633:
  Issue 5870 - Implement the new ElemHideEmulation filter type  (Closed) 
  Base URL: https://hg.adblockplus.org/adblockpluscore/
    
  
    Issue 29595633:
  Issue 5870 - Implement the new ElemHideEmulation filter type  (Closed) 
  Base URL: https://hg.adblockplus.org/adblockpluscore/| Index: compiled/filter/ElemHideBase.cpp | 
| =================================================================== | 
| --- a/compiled/filter/ElemHideBase.cpp | 
| +++ b/compiled/filter/ElemHideBase.cpp | 
| @@ -15,16 +15,19 @@ | 
| * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 
| */ | 
| #include <cstring> | 
| #include "ElemHideBase.h" | 
| #include "../StringScanner.h" | 
| +// the length of a static string array | 
| +#define LENGTH_OF(x) ((sizeof(x) / sizeof(x[0])) - 1) | 
| 
sergei
2018/02/05 14:51:06
I think we may use constexpr function here, and ma
 
hub
2018/02/07 04:13:37
No. This has to be a macro because of sizeof() and
 
sergei
2018/02/12 12:53:17
The following does work
template<typename T, size_
 
hub
2018/02/12 18:14:38
Done.
 
sergei
2018/02/13 09:12:49
What about replacing of "static string array" by s
 
sergei
2018/02/13 16:05:59
It seems only this is left.
 
hub
2018/02/13 16:23:36
oops.
Done
 | 
| + | 
| namespace | 
| { | 
| void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd, | 
| String::size_type& selectorStart) | 
| { | 
| // For element hiding filters we only want to remove spaces preceding the | 
| // selector part. The positions we've determined already have to be adjusted | 
| // accordingly. | 
| @@ -44,16 +47,28 @@ | 
| delta++; | 
| else | 
| text[pos - delta] = text[pos]; | 
| } | 
| selectorStart -= delta; | 
| text.reset(text, 0, len - delta); | 
| } | 
| + | 
| + static constexpr String::value_type ELEM_HIDE_DELIMITER[] = u"##"; | 
| + static constexpr String::size_type ELEM_HIDE_DELIMITER_LEN = LENGTH_OF(ELEM_HIDE_DELIMITER); | 
| + | 
| + static constexpr String::value_type ELEM_HIDE_EMULATION_DELIMITER[] = u"#?#"; | 
| + static constexpr String::size_type ELEM_HIDE_EMULATION_DELIMITER_LEN = LENGTH_OF(ELEM_HIDE_EMULATION_DELIMITER); | 
| + | 
| + static constexpr String::value_type PROPS_SELECTOR[] = u"[-abp-properties="; | 
| + static constexpr String::size_type PROPS_SELECTOR_LEN = LENGTH_OF(PROPS_SELECTOR); | 
| + | 
| + static constexpr String::value_type NEW_PROPS_SELECTOR[] = u":-abp-properties("; | 
| 
sergei
2018/02/05 14:51:05
I would rather prefer to call it PROPS_SELECTOR an
 
hub
2018/02/07 04:13:36
Done.
 | 
| + static constexpr String::size_type NEW_PROPS_SELECTOR_LEN = LENGTH_OF(NEW_PROPS_SELECTOR); | 
| } | 
| ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& data) | 
| : ActiveFilter(type, text, false), mData(data) | 
| { | 
| if (mData.HasDomains()) | 
| ParseDomains(mData.GetDomainsSource(mText), u','); | 
| } | 
| @@ -84,52 +99,178 @@ | 
| return Type::UNKNOWN; | 
| case u' ': | 
| seenSpaces = true; | 
| break; | 
| } | 
| } | 
| seenSpaces |= scanner.skip(u' '); | 
| + bool emulation = false; | 
| bool exception = scanner.skipOne(u'@'); | 
| if (exception) | 
| seenSpaces |= scanner.skip(u' '); | 
| + else | 
| + emulation = scanner.skipOne(u'?'); | 
| String::value_type next = scanner.next(); | 
| if (next != u'#') | 
| return Type::UNKNOWN; | 
| // Selector part | 
| // Selector shouldn't be empty | 
| seenSpaces |= scanner.skip(u' '); | 
| if (scanner.done()) | 
| return Type::UNKNOWN; | 
| data.mSelectorStart = scanner.position() + 1; | 
| + data.mNeedConversion = false; | 
| // We are done validating, now we can normalize whitespace and the domain part | 
| if (seenSpaces) | 
| NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart); | 
| DependentString(text, 0, data.mDomainsEnd).toLower(); | 
| + // We still need to check the old syntax. It will be converted when | 
| + // we instantiate the filter. | 
| + if (!emulation && | 
| + text.find(PROPS_SELECTOR, data.mSelectorStart, PROPS_SELECTOR_LEN) != text.npos) | 
| + { | 
| + data.mNeedConversion = true; | 
| + emulation = !exception; | 
| + } | 
| + | 
| if (exception) | 
| return Type::ELEMHIDEEXCEPTION; | 
| - if (text.find(u"[-abp-properties="_str, data.mSelectorStart) != text.npos) | 
| + if (emulation) | 
| return Type::ELEMHIDEEMULATION; | 
| return Type::ELEMHIDE; | 
| } | 
| +// Convert filter from the old syntax to the new. | 
| +OwnedString ElemHideBase::ConvertFilter(const String& text, String::size_type& at) | 
| +{ | 
| + auto selectorPos = text.find(PROPS_SELECTOR, at, PROPS_SELECTOR_LEN); | 
| + if (selectorPos != text.npos) | 
| + { | 
| + auto length = text.length(); | 
| + auto properties = selectorPos + PROPS_SELECTOR_LEN; | 
| 
sergei
2018/02/05 14:51:06
should it be something like propertiesPos?
 
hub
2018/02/07 04:13:36
Done.
 | 
| + String::value_type quote = 0; | 
| + bool escape = false; | 
| + String::size_type removed = 0; // how many chars we remove | 
| + String::size_type end = properties; | 
| + String::size_type quote_start = 0; | 
| + String::size_type quote_end = 0; | 
| + for (auto index = properties; | 
| + index < length && end == properties; index++) | 
| + { | 
| + if (escape) | 
| + { | 
| + escape = false; | 
| + continue; | 
| + } | 
| + | 
| + auto c = text[index]; | 
| + switch (c) | 
| + { | 
| + case '\\': | 
| 
sergei
2018/02/05 14:51:05
should they be u'\\', u'"', etc? Since we keep in
 
hub
2018/02/07 04:13:36
yes it should be u'', but with C it works without
 | 
| + escape = true; | 
| + break; | 
| + case '"': | 
| + case '\'': | 
| + if (quote == 0) | 
| + { | 
| + quote = c; | 
| + quote_start = index + 1; | 
| + } | 
| + else if (quote == c) | 
| + { | 
| + // end of quoted. | 
| + quote = 0; | 
| + removed += 2; | 
| + quote_end = index; | 
| + } | 
| + break; | 
| + case ']': | 
| + if (quote == 0) | 
| + end = index + 1; // end of properties (after ]) | 
| + break; | 
| + default: | 
| + break; | 
| + } | 
| + } | 
| 
sergei
2018/02/05 14:51:05
It also differs from the regexp in current js impl
 
sergei
2018/02/05 14:51:06
What if filter is malformed and `end` is equal to
 
hub
2018/02/07 04:13:36
end cannot be equal to zero. It will be at least e
 
hub
2018/02/07 04:13:36
It's actually worse: the parser doesn't like it at
 | 
| + | 
| + if (quote != 0) | 
| + quote_end = end - 1; | 
| + else if (quote_end <= quote_start) | 
| + { | 
| + // we likely didn't find a quoted content so we just take it as is. | 
| + quote_start = properties; | 
| + quote_end = end - 1; | 
| + } | 
| + | 
| + // +1 for the replacement of "##" by "#?#" | 
| + String::size_type offset = 0; | 
| + | 
| + String::size_type delimiter = text.find(ELEM_HIDE_DELIMITER, 0, | 
| + ELEM_HIDE_DELIMITER_LEN); | 
| + OwnedString converted(length + ((delimiter != text.npos) ? 1 : 0) - removed); | 
| 
sergei
2018/02/05 14:51:06
Since removed cannot be negative (no growth), `tex
 
hub
2018/02/07 04:13:36
I'm not really fond of modifying in place, but we
 | 
| + if (delimiter != text.npos) | 
| + { | 
| + if (delimiter >= selectorPos) | 
| + return OwnedString(text); | 
| + | 
| + at++; | 
| + std::memcpy(converted.data(), text.data(), | 
| + delimiter * sizeof(String::value_type)); | 
| + offset += delimiter; | 
| + std::memcpy(converted.data() + offset, ELEM_HIDE_EMULATION_DELIMITER, | 
| + ELEM_HIDE_EMULATION_DELIMITER_LEN * sizeof(String::value_type)); | 
| + offset += ELEM_HIDE_EMULATION_DELIMITER_LEN; | 
| + delimiter += ELEM_HIDE_DELIMITER_LEN; | 
| + // we have already parsed to past the delimiter. | 
| + selectorPos -= delimiter; | 
| + } | 
| + else | 
| + delimiter = 0; | 
| + | 
| + | 
| + std::memcpy(converted.data() + offset, text.data() + delimiter, | 
| + selectorPos * sizeof(String::value_type)); | 
| + offset += selectorPos; | 
| + | 
| + std::memcpy(converted.data() + offset, NEW_PROPS_SELECTOR, | 
| + NEW_PROPS_SELECTOR_LEN * sizeof(String::value_type)); | 
| + offset += NEW_PROPS_SELECTOR_LEN; | 
| + | 
| + std::memcpy(converted.data() + offset, text.data() + quote_start, | 
| + (quote_end - quote_start) * sizeof(String::value_type)); | 
| + offset += quote_end - quote_start; | 
| + | 
| + std::memcpy(converted.data() + offset, u")", sizeof(String::value_type)); | 
| + offset++; | 
| + | 
| + std::memcpy(converted.data() + offset, text.data() + end, | 
| + (length - end) * sizeof(String::value_type)); | 
| + offset += (length - end) * sizeof(String::value_type); | 
| + | 
| + return converted; | 
| + } | 
| + | 
| + return OwnedString(text); | 
| +} | 
| + | 
| namespace | 
| { | 
| static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B "; | 
| static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D "; | 
| - static constexpr String::size_type CURLY_REPLACEMENT_SIZE = sizeof(OPENING_CURLY_REPLACEMENT) / sizeof(OPENING_CURLY_REPLACEMENT[0]) - 1; | 
| + static constexpr String::size_type CURLY_REPLACEMENT_SIZE = LENGTH_OF(OPENING_CURLY_REPLACEMENT); | 
| OwnedString EscapeCurlies(String::size_type replacementCount, | 
| const DependentString& str) | 
| { | 
| OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1)); | 
| String::value_type* current = result.data(); | 
| for (String::size_type i = 0; i < str.length(); i++) |