compiled/filter/ElemHideBase.cpp - Issue 29595633: Issue 5870 - Implement the new ElemHideEmulation filter type

Unified Diff: compiled/filter/ElemHideBase.cpp

Issue 29595633: Issue 5870 - Implement the new ElemHideEmulation filter type (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/

Patch Set: Refactor consts Created Feb. 1, 2018, 11:30 p.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: compiled/filter/ElemHideBase.cpp

===================================================================

--- a/compiled/filter/ElemHideBase.cpp

+++ b/compiled/filter/ElemHideBase.cpp

@@ -15,16 +15,19 @@

* along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

#include <cstring>

#include "ElemHideBase.h"

#include "../StringScanner.h"

+// the length of a static string array

+#define LENGTH_OF(x) ((sizeof(x) / sizeof(x[0])) - 1)

sergei 2018/02/05 14:51:06 I think we may use constexpr function here, and ma

hub 2018/02/07 04:13:37 No. This has to be a macro because of sizeof() and

sergei 2018/02/12 12:53:17 The following does work template<typename T, size_

hub 2018/02/12 18:14:38 Done.

sergei 2018/02/13 09:12:49 What about replacing of "static string array" by s

sergei 2018/02/13 16:05:59 It seems only this is left.

hub 2018/02/13 16:23:36 oops. Done

namespace

{

void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd,

String::size_type& selectorStart)

{

// For element hiding filters we only want to remove spaces preceding the

// selector part. The positions we've determined already have to be adjusted

// accordingly.

@@ -44,16 +47,28 @@

delta++;

else

text[pos - delta] = text[pos];

}

selectorStart -= delta;

text.reset(text, 0, len - delta);

}

+ static constexpr String::value_type ELEM_HIDE_DELIMITER[] = u"##";

+ static constexpr String::size_type ELEM_HIDE_DELIMITER_LEN = LENGTH_OF(ELEM_HIDE_DELIMITER);

+ static constexpr String::value_type ELEM_HIDE_EMULATION_DELIMITER[] = u"#?#";

+ static constexpr String::size_type ELEM_HIDE_EMULATION_DELIMITER_LEN = LENGTH_OF(ELEM_HIDE_EMULATION_DELIMITER);

+ static constexpr String::value_type PROPS_SELECTOR[] = u"[-abp-properties=";

+ static constexpr String::size_type PROPS_SELECTOR_LEN = LENGTH_OF(PROPS_SELECTOR);

+ static constexpr String::value_type NEW_PROPS_SELECTOR[] = u":-abp-properties(";

sergei 2018/02/05 14:51:05 I would rather prefer to call it PROPS_SELECTOR an

hub 2018/02/07 04:13:36 Done.

+ static constexpr String::size_type NEW_PROPS_SELECTOR_LEN = LENGTH_OF(NEW_PROPS_SELECTOR);

}

ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& data)

: ActiveFilter(type, text, false), mData(data)

{

if (mData.HasDomains())

ParseDomains(mData.GetDomainsSource(mText), u',');

}

@@ -84,52 +99,178 @@

return Type::UNKNOWN;

case u' ':

seenSpaces = true;

break;

}

seenSpaces |= scanner.skip(u' ');

+ bool emulation = false;

bool exception = scanner.skipOne(u'@');

if (exception)

seenSpaces |= scanner.skip(u' ');

+ else

+ emulation = scanner.skipOne(u'?');

String::value_type next = scanner.next();

if (next != u'#')

return Type::UNKNOWN;

// Selector part

// Selector shouldn't be empty

seenSpaces |= scanner.skip(u' ');

if (scanner.done())

return Type::UNKNOWN;

data.mSelectorStart = scanner.position() + 1;

+ data.mNeedConversion = false;

// We are done validating, now we can normalize whitespace and the domain part

if (seenSpaces)

NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart);

DependentString(text, 0, data.mDomainsEnd).toLower();

+ // We still need to check the old syntax. It will be converted when

+ // we instantiate the filter.

+ if (!emulation &&

+ text.find(PROPS_SELECTOR, data.mSelectorStart, PROPS_SELECTOR_LEN) != text.npos)

+ {

+ data.mNeedConversion = true;

+ emulation = !exception;

+ }

if (exception)

return Type::ELEMHIDEEXCEPTION;

- if (text.find(u"[-abp-properties="_str, data.mSelectorStart) != text.npos)

+ if (emulation)

return Type::ELEMHIDEEMULATION;

return Type::ELEMHIDE;

}

+// Convert filter from the old syntax to the new.

+OwnedString ElemHideBase::ConvertFilter(const String& text, String::size_type& at)

+ auto selectorPos = text.find(PROPS_SELECTOR, at, PROPS_SELECTOR_LEN);

+ if (selectorPos != text.npos)

+ {

+ auto length = text.length();

+ auto properties = selectorPos + PROPS_SELECTOR_LEN;

sergei 2018/02/05 14:51:06 should it be something like propertiesPos?

hub 2018/02/07 04:13:36 Done.

+ String::value_type quote = 0;

+ bool escape = false;

+ String::size_type removed = 0; // how many chars we remove

+ String::size_type end = properties;

+ String::size_type quote_start = 0;

+ String::size_type quote_end = 0;

+ for (auto index = properties;

+ index < length && end == properties; index++)

+ {

+ if (escape)

+ {

+ escape = false;

+ continue;

+ }

+ auto c = text[index];

+ switch (c)

+ {

+ case '\\':

sergei 2018/02/05 14:51:05 should they be u'\\', u'"', etc? Since we keep in

hub 2018/02/07 04:13:36 yes it should be u'', but with C it works without

+ escape = true;

+ break;

+ case '"':

+ case '\'':

+ if (quote == 0)

+ {

+ quote = c;

+ quote_start = index + 1;

+ }

+ else if (quote == c)

+ {

+ // end of quoted.

+ quote = 0;

+ removed += 2;

+ quote_end = index;

+ }

+ break;

+ case ']':

+ if (quote == 0)

+ end = index + 1; // end of properties (after ])

+ break;

+ default:

+ break;

+ }

sergei 2018/02/05 14:51:05 It also differs from the regexp in current js impl

sergei 2018/02/05 14:51:06 What if filter is malformed and `end` is equal to

hub 2018/02/07 04:13:36 end cannot be equal to zero. It will be at least e

hub 2018/02/07 04:13:36 It's actually worse: the parser doesn't like it at

+ if (quote != 0)

+ quote_end = end - 1;

+ else if (quote_end <= quote_start)

+ {

+ // we likely didn't find a quoted content so we just take it as is.

+ quote_start = properties;

+ quote_end = end - 1;

+ }

+ // +1 for the replacement of "##" by "#?#"

+ String::size_type offset = 0;

+ String::size_type delimiter = text.find(ELEM_HIDE_DELIMITER, 0,

+ ELEM_HIDE_DELIMITER_LEN);

+ OwnedString converted(length + ((delimiter != text.npos) ? 1 : 0) - removed);

sergei 2018/02/05 14:51:06 Since removed cannot be negative (no growth), `tex

hub 2018/02/07 04:13:36 I'm not really fond of modifying in place, but we

+ if (delimiter != text.npos)

+ {

+ if (delimiter >= selectorPos)

+ return OwnedString(text);

+ at++;

+ std::memcpy(converted.data(), text.data(),

+ delimiter * sizeof(String::value_type));

+ offset += delimiter;

+ std::memcpy(converted.data() + offset, ELEM_HIDE_EMULATION_DELIMITER,

+ ELEM_HIDE_EMULATION_DELIMITER_LEN * sizeof(String::value_type));

+ offset += ELEM_HIDE_EMULATION_DELIMITER_LEN;

+ delimiter += ELEM_HIDE_DELIMITER_LEN;

+ // we have already parsed to past the delimiter.

+ selectorPos -= delimiter;

+ }

+ else

+ delimiter = 0;

+ std::memcpy(converted.data() + offset, text.data() + delimiter,

+ selectorPos * sizeof(String::value_type));

+ offset += selectorPos;

+ std::memcpy(converted.data() + offset, NEW_PROPS_SELECTOR,

+ NEW_PROPS_SELECTOR_LEN * sizeof(String::value_type));

+ offset += NEW_PROPS_SELECTOR_LEN;

+ std::memcpy(converted.data() + offset, text.data() + quote_start,

+ (quote_end - quote_start) * sizeof(String::value_type));

+ offset += quote_end - quote_start;

+ std::memcpy(converted.data() + offset, u")", sizeof(String::value_type));

+ offset++;

+ std::memcpy(converted.data() + offset, text.data() + end,

+ (length - end) * sizeof(String::value_type));

+ offset += (length - end) * sizeof(String::value_type);

+ return converted;

+ }

+ return OwnedString(text);

namespace

{

static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B ";

static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D ";

- static constexpr String::size_type CURLY_REPLACEMENT_SIZE = sizeof(OPENING_CURLY_REPLACEMENT) / sizeof(OPENING_CURLY_REPLACEMENT[0]) - 1;

+ static constexpr String::size_type CURLY_REPLACEMENT_SIZE = LENGTH_OF(OPENING_CURLY_REPLACEMENT);

OwnedString EscapeCurlies(String::size_type replacementCount,

const DependentString& str)

{

OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1));

String::value_type* current = result.data();

for (String::size_type i = 0; i < str.length(); i++)

« compiled/filter/ElemHideBase.h ('K') | « compiled/filter/ElemHideBase.h ('k') | compiled/filter/Filter.cpp » ('j') | no next file with comments »