Index: compiled/filter/ElemHideBase.cpp |
=================================================================== |
--- a/compiled/filter/ElemHideBase.cpp |
+++ b/compiled/filter/ElemHideBase.cpp |
@@ -15,16 +15,19 @@ |
* along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
*/ |
#include <cstring> |
#include "ElemHideBase.h" |
#include "../StringScanner.h" |
+// the length of a static string array |
+#define LENGTH_OF(x) ((sizeof(x) / sizeof(x[0])) - 1) |
sergei
2018/02/05 14:51:06
I think we may use constexpr function here, and ma
hub
2018/02/07 04:13:37
No. This has to be a macro because of sizeof() and
sergei
2018/02/12 12:53:17
The following does work
template<typename T, size_
hub
2018/02/12 18:14:38
Done.
sergei
2018/02/13 09:12:49
What about replacing of "static string array" by s
sergei
2018/02/13 16:05:59
It seems only this is left.
hub
2018/02/13 16:23:36
oops.
Done
|
+ |
namespace |
{ |
void NormalizeWhitespace(DependentString& text, String::size_type& domainsEnd, |
String::size_type& selectorStart) |
{ |
// For element hiding filters we only want to remove spaces preceding the |
// selector part. The positions we've determined already have to be adjusted |
// accordingly. |
@@ -44,16 +47,28 @@ |
delta++; |
else |
text[pos - delta] = text[pos]; |
} |
selectorStart -= delta; |
text.reset(text, 0, len - delta); |
} |
+ |
+ static constexpr String::value_type ELEM_HIDE_DELIMITER[] = u"##"; |
+ static constexpr String::size_type ELEM_HIDE_DELIMITER_LEN = LENGTH_OF(ELEM_HIDE_DELIMITER); |
+ |
+ static constexpr String::value_type ELEM_HIDE_EMULATION_DELIMITER[] = u"#?#"; |
+ static constexpr String::size_type ELEM_HIDE_EMULATION_DELIMITER_LEN = LENGTH_OF(ELEM_HIDE_EMULATION_DELIMITER); |
+ |
+ static constexpr String::value_type PROPS_SELECTOR[] = u"[-abp-properties="; |
+ static constexpr String::size_type PROPS_SELECTOR_LEN = LENGTH_OF(PROPS_SELECTOR); |
+ |
+ static constexpr String::value_type NEW_PROPS_SELECTOR[] = u":-abp-properties("; |
sergei
2018/02/05 14:51:05
I would rather prefer to call it PROPS_SELECTOR an
hub
2018/02/07 04:13:36
Done.
|
+ static constexpr String::size_type NEW_PROPS_SELECTOR_LEN = LENGTH_OF(NEW_PROPS_SELECTOR); |
} |
ElemHideBase::ElemHideBase(Type type, const String& text, const ElemHideData& data) |
: ActiveFilter(type, text, false), mData(data) |
{ |
if (mData.HasDomains()) |
ParseDomains(mData.GetDomainsSource(mText), u','); |
} |
@@ -84,52 +99,178 @@ |
return Type::UNKNOWN; |
case u' ': |
seenSpaces = true; |
break; |
} |
} |
seenSpaces |= scanner.skip(u' '); |
+ bool emulation = false; |
bool exception = scanner.skipOne(u'@'); |
if (exception) |
seenSpaces |= scanner.skip(u' '); |
+ else |
+ emulation = scanner.skipOne(u'?'); |
String::value_type next = scanner.next(); |
if (next != u'#') |
return Type::UNKNOWN; |
// Selector part |
// Selector shouldn't be empty |
seenSpaces |= scanner.skip(u' '); |
if (scanner.done()) |
return Type::UNKNOWN; |
data.mSelectorStart = scanner.position() + 1; |
+ data.mNeedConversion = false; |
// We are done validating, now we can normalize whitespace and the domain part |
if (seenSpaces) |
NormalizeWhitespace(text, data.mDomainsEnd, data.mSelectorStart); |
DependentString(text, 0, data.mDomainsEnd).toLower(); |
+ // We still need to check the old syntax. It will be converted when |
+ // we instantiate the filter. |
+ if (!emulation && |
+ text.find(PROPS_SELECTOR, data.mSelectorStart, PROPS_SELECTOR_LEN) != text.npos) |
+ { |
+ data.mNeedConversion = true; |
+ emulation = !exception; |
+ } |
+ |
if (exception) |
return Type::ELEMHIDEEXCEPTION; |
- if (text.find(u"[-abp-properties="_str, data.mSelectorStart) != text.npos) |
+ if (emulation) |
return Type::ELEMHIDEEMULATION; |
return Type::ELEMHIDE; |
} |
+// Convert filter from the old syntax to the new. |
+OwnedString ElemHideBase::ConvertFilter(const String& text, String::size_type& at) |
+{ |
+ auto selectorPos = text.find(PROPS_SELECTOR, at, PROPS_SELECTOR_LEN); |
+ if (selectorPos != text.npos) |
+ { |
+ auto length = text.length(); |
+ auto properties = selectorPos + PROPS_SELECTOR_LEN; |
sergei
2018/02/05 14:51:06
should it be something like propertiesPos?
hub
2018/02/07 04:13:36
Done.
|
+ String::value_type quote = 0; |
+ bool escape = false; |
+ String::size_type removed = 0; // how many chars we remove |
+ String::size_type end = properties; |
+ String::size_type quote_start = 0; |
+ String::size_type quote_end = 0; |
+ for (auto index = properties; |
+ index < length && end == properties; index++) |
+ { |
+ if (escape) |
+ { |
+ escape = false; |
+ continue; |
+ } |
+ |
+ auto c = text[index]; |
+ switch (c) |
+ { |
+ case '\\': |
sergei
2018/02/05 14:51:05
should they be u'\\', u'"', etc? Since we keep in
hub
2018/02/07 04:13:36
yes it should be u'', but with C it works without
|
+ escape = true; |
+ break; |
+ case '"': |
+ case '\'': |
+ if (quote == 0) |
+ { |
+ quote = c; |
+ quote_start = index + 1; |
+ } |
+ else if (quote == c) |
+ { |
+ // end of quoted. |
+ quote = 0; |
+ removed += 2; |
+ quote_end = index; |
+ } |
+ break; |
+ case ']': |
+ if (quote == 0) |
+ end = index + 1; // end of properties (after ]) |
+ break; |
+ default: |
+ break; |
+ } |
+ } |
sergei
2018/02/05 14:51:05
It also differs from the regexp in current js impl
sergei
2018/02/05 14:51:06
What if filter is malformed and `end` is equal to
hub
2018/02/07 04:13:36
end cannot be equal to zero. It will be at least e
hub
2018/02/07 04:13:36
It's actually worse: the parser doesn't like it at
|
+ |
+ if (quote != 0) |
+ quote_end = end - 1; |
+ else if (quote_end <= quote_start) |
+ { |
+ // we likely didn't find a quoted content so we just take it as is. |
+ quote_start = properties; |
+ quote_end = end - 1; |
+ } |
+ |
+ // +1 for the replacement of "##" by "#?#" |
+ String::size_type offset = 0; |
+ |
+ String::size_type delimiter = text.find(ELEM_HIDE_DELIMITER, 0, |
+ ELEM_HIDE_DELIMITER_LEN); |
+ OwnedString converted(length + ((delimiter != text.npos) ? 1 : 0) - removed); |
sergei
2018/02/05 14:51:06
Since removed cannot be negative (no growth), `tex
hub
2018/02/07 04:13:36
I'm not really fond of modifying in place, but we
|
+ if (delimiter != text.npos) |
+ { |
+ if (delimiter >= selectorPos) |
+ return OwnedString(text); |
+ |
+ at++; |
+ std::memcpy(converted.data(), text.data(), |
+ delimiter * sizeof(String::value_type)); |
+ offset += delimiter; |
+ std::memcpy(converted.data() + offset, ELEM_HIDE_EMULATION_DELIMITER, |
+ ELEM_HIDE_EMULATION_DELIMITER_LEN * sizeof(String::value_type)); |
+ offset += ELEM_HIDE_EMULATION_DELIMITER_LEN; |
+ delimiter += ELEM_HIDE_DELIMITER_LEN; |
+ // we have already parsed to past the delimiter. |
+ selectorPos -= delimiter; |
+ } |
+ else |
+ delimiter = 0; |
+ |
+ |
+ std::memcpy(converted.data() + offset, text.data() + delimiter, |
+ selectorPos * sizeof(String::value_type)); |
+ offset += selectorPos; |
+ |
+ std::memcpy(converted.data() + offset, NEW_PROPS_SELECTOR, |
+ NEW_PROPS_SELECTOR_LEN * sizeof(String::value_type)); |
+ offset += NEW_PROPS_SELECTOR_LEN; |
+ |
+ std::memcpy(converted.data() + offset, text.data() + quote_start, |
+ (quote_end - quote_start) * sizeof(String::value_type)); |
+ offset += quote_end - quote_start; |
+ |
+ std::memcpy(converted.data() + offset, u")", sizeof(String::value_type)); |
+ offset++; |
+ |
+ std::memcpy(converted.data() + offset, text.data() + end, |
+ (length - end) * sizeof(String::value_type)); |
+ offset += (length - end) * sizeof(String::value_type); |
+ |
+ return converted; |
+ } |
+ |
+ return OwnedString(text); |
+} |
+ |
namespace |
{ |
static constexpr String::value_type OPENING_CURLY_REPLACEMENT[] = u"\\7B "; |
static constexpr String::value_type CLOSING_CURLY_REPLACEMENT[] = u"\\7D "; |
- static constexpr String::size_type CURLY_REPLACEMENT_SIZE = sizeof(OPENING_CURLY_REPLACEMENT) / sizeof(OPENING_CURLY_REPLACEMENT[0]) - 1; |
+ static constexpr String::size_type CURLY_REPLACEMENT_SIZE = LENGTH_OF(OPENING_CURLY_REPLACEMENT); |
OwnedString EscapeCurlies(String::size_type replacementCount, |
const DependentString& str) |
{ |
OwnedString result(str.length() + replacementCount * (CURLY_REPLACEMENT_SIZE - 1)); |
String::value_type* current = result.data(); |
for (String::size_type i = 0; i < str.length(); i++) |