Index: compiled/RegExpFilter.cpp |
=================================================================== |
--- a/compiled/RegExpFilter.cpp |
+++ b/compiled/RegExpFilter.cpp |
@@ -49,16 +49,75 @@ namespace |
{u"genericblock"_str, TYPE_GENERICBLOCK}, |
{u"generichide"_str, TYPE_GENERICHIDE}, |
{u"elemhide"_str, TYPE_ELEMHIDE}, |
}; |
const int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | |
TYPE_POPUP | TYPE_GENERICBLOCK | TYPE_GENERICHIDE); |
+ OwnedString RegExpFromSource(const String& source) |
Wladimir Palant
2017/03/14 14:44:43
This function was moved into an anonymous namespac
|
+ { |
+ /* TODO: this is very inefficient */ |
+ |
+ // Note: This doesn't remove trailing wildcards, otherwise the result should |
+ // be identical to Filter.toRegExp(). |
+ OwnedString result; |
+ String::value_type prevChar = u'*'; |
+ for (String::size_type i = 0; i < source.length(); ++i) |
+ { |
+ String::value_type currChar = source[i]; |
+ switch (currChar) |
+ { |
+ case u'*': |
+ if (prevChar != u'*') |
+ result.append(u".*"_str); |
+ break; |
+ case u'^': |
+ result.append(u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60\\x7B-\\x7F]|$)"_str); |
+ break; |
+ case u'|': |
+ if (i == 0) |
+ { |
+ // Anchor at expression start, maybe extended anchor? |
+ if (i + 1 < source.length() && source[i + 1] == u'|') |
+ { |
+ result.append(u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"_str); |
+ ++i; |
+ } |
+ else |
+ result.append(u'^'); |
+ } |
+ else if (i == source.length() - 1) |
+ { |
+ // Anchor at expression end, ignore if following separator placeholder |
+ if (prevChar != u'^') |
+ result.append(u'$'); |
+ } |
+ else |
+ { |
+ // Not actually an anchor, escape it |
+ result.append(u"\\|"_str); |
+ } |
+ break; |
+ default: |
+ if (!(currChar >= u'a' && currChar <= u'z') && |
+ !(currChar >= u'A' && currChar <= u'Z') && |
+ !(currChar >= u'0' && currChar <= u'9') && |
+ currChar < 128) |
+ { |
+ result.append(u'\\'); |
+ } |
+ result.append(currChar); |
+ } |
+ prevChar = currChar; |
+ } |
+ return result; |
+ } |
+ |
int GenerateRegExp(const String& regexp, bool matchCase) |
{ |
return EM_ASM_INT(return regexps.create($0, $1), ®exp, matchCase); |
} |
void NormalizeWhitespace(DependentString& text) |
{ |
// We want to remove all spaces but bail out early in the common scenario |
@@ -268,75 +327,16 @@ void RegExpFilter::ParseSitekeys(const S |
void RegExpFilter::InitJSTypes() |
{ |
EM_ASM(exports.RegExpFilter.typeMap = {};); |
for (auto it = typeMap.begin(); it != typeMap.end(); ++it) |
EM_ASM_ARGS(exports.RegExpFilter.typeMap[readString($0).replace("-", "_").toUpperCase()] = $1, &(it->first), it->second); |
} |
-OwnedString RegExpFilter::RegExpFromSource(const String& source) |
-{ |
- /* TODO: this is very inefficient */ |
- |
- // Note: This doesn't remove trailing wildcards, otherwise the result should |
- // be identical to Filter.toRegExp(). |
- OwnedString result; |
- String::value_type prevChar = u'*'; |
- for (String::size_type i = 0; i < source.length(); ++i) |
- { |
- String::value_type currChar = source[i]; |
- switch (currChar) |
- { |
- case u'*': |
- if (prevChar != u'*') |
- result.append(u".*"_str); |
- break; |
- case u'^': |
- result.append(u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60\\x7B-\\x7F]|$)"_str); |
- break; |
- case u'|': |
- if (i == 0) |
- { |
- // Anchor at expression start, maybe extended anchor? |
- if (i + 1 < source.length() && source[i + 1] == u'|') |
- { |
- result.append(u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"_str); |
- ++i; |
- } |
- else |
- result.append(u'^'); |
- } |
- else if (i == source.length() - 1) |
- { |
- // Anchor at expression end, ignore if following separator placeholder |
- if (prevChar != u'^') |
- result.append(u'$'); |
- } |
- else |
- { |
- // Not actually an anchor, escape it |
- result.append(u"\\|"_str); |
- } |
- break; |
- default: |
- if (!(currChar >= u'a' && currChar <= u'z') && |
- !(currChar >= u'A' && currChar <= u'Z') && |
- !(currChar >= u'0' && currChar <= u'9') && |
- currChar < 128) |
- { |
- result.append(u'\\'); |
- } |
- result.append(currChar); |
- } |
- prevChar = currChar; |
- } |
- return result; |
-} |
- |
RegExpFilter::DomainMap* RegExpFilter::GetDomains() const |
{ |
if (!mData.DomainsParsingDone()) |
{ |
ParseDomains(mData.GetDomainsSource(mText), u'|'); |
mData.SetDomainsParsingDone(); |
} |
return ActiveFilter::GetDomains(); |