| OLD | NEW |
| 1 #include <climits> | 1 #include <climits> |
| 2 | 2 |
| 3 #include <emscripten.h> | 3 #include <emscripten.h> |
| 4 | 4 |
| 5 #include "RegExpFilter.h" | 5 #include "RegExpFilter.h" |
| 6 #include "StringScanner.h" | 6 #include "StringScanner.h" |
| 7 #include "StringMap.h" | 7 #include "StringMap.h" |
| 8 | 8 |
| 9 namespace | 9 namespace |
| 10 { | 10 { |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 47 | 47 |
| 48 {u"popup"_str, TYPE_POPUP}, | 48 {u"popup"_str, TYPE_POPUP}, |
| 49 {u"genericblock"_str, TYPE_GENERICBLOCK}, | 49 {u"genericblock"_str, TYPE_GENERICBLOCK}, |
| 50 {u"generichide"_str, TYPE_GENERICHIDE}, | 50 {u"generichide"_str, TYPE_GENERICHIDE}, |
| 51 {u"elemhide"_str, TYPE_ELEMHIDE}, | 51 {u"elemhide"_str, TYPE_ELEMHIDE}, |
| 52 }; | 52 }; |
| 53 | 53 |
| 54 const int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | | 54 const int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | |
| 55 TYPE_POPUP | TYPE_GENERICBLOCK | TYPE_GENERICHIDE); | 55 TYPE_POPUP | TYPE_GENERICBLOCK | TYPE_GENERICHIDE); |
| 56 | 56 |
| 57 OwnedString RegExpFromSource(const String& source) |
| 58 { |
| 59 /* TODO: this is very inefficient */ |
| 60 |
| 61 // Note: This doesn't remove trailing wildcards, otherwise the result should |
| 62 // be identical to Filter.toRegExp(). |
| 63 OwnedString result; |
| 64 String::value_type prevChar = u'*'; |
| 65 for (String::size_type i = 0; i < source.length(); ++i) |
| 66 { |
| 67 String::value_type currChar = source[i]; |
| 68 switch (currChar) |
| 69 { |
| 70 case u'*': |
| 71 if (prevChar != u'*') |
| 72 result.append(u".*"_str); |
| 73 break; |
| 74 case u'^': |
| 75 result.append(u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\
\x60\\x7B-\\x7F]|$)"_str); |
| 76 break; |
| 77 case u'|': |
| 78 if (i == 0) |
| 79 { |
| 80 // Anchor at expression start, maybe extended anchor? |
| 81 if (i + 1 < source.length() && source[i + 1] == u'|') |
| 82 { |
| 83 result.append(u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"_str); |
| 84 ++i; |
| 85 } |
| 86 else |
| 87 result.append(u'^'); |
| 88 } |
| 89 else if (i == source.length() - 1) |
| 90 { |
| 91 // Anchor at expression end, ignore if following separator placehold
er |
| 92 if (prevChar != u'^') |
| 93 result.append(u'$'); |
| 94 } |
| 95 else |
| 96 { |
| 97 // Not actually an anchor, escape it |
| 98 result.append(u"\\|"_str); |
| 99 } |
| 100 break; |
| 101 default: |
| 102 if (!(currChar >= u'a' && currChar <= u'z') && |
| 103 !(currChar >= u'A' && currChar <= u'Z') && |
| 104 !(currChar >= u'0' && currChar <= u'9') && |
| 105 currChar < 128) |
| 106 { |
| 107 result.append(u'\\'); |
| 108 } |
| 109 result.append(currChar); |
| 110 } |
| 111 prevChar = currChar; |
| 112 } |
| 113 return result; |
| 114 } |
| 115 |
| 57 int GenerateRegExp(const String& regexp, bool matchCase) | 116 int GenerateRegExp(const String& regexp, bool matchCase) |
| 58 { | 117 { |
| 59 return EM_ASM_INT(return regexps.create($0, $1), ®exp, matchCase); | 118 return EM_ASM_INT(return regexps.create($0, $1), ®exp, matchCase); |
| 60 } | 119 } |
| 61 | 120 |
| 62 void NormalizeWhitespace(DependentString& text) | 121 void NormalizeWhitespace(DependentString& text) |
| 63 { | 122 { |
| 64 // We want to remove all spaces but bail out early in the common scenario | 123 // We want to remove all spaces but bail out early in the common scenario |
| 65 // that the string contains no spaces. | 124 // that the string contains no spaces. |
| 66 | 125 |
| (...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 266 } | 325 } |
| 267 } | 326 } |
| 268 | 327 |
| 269 void RegExpFilter::InitJSTypes() | 328 void RegExpFilter::InitJSTypes() |
| 270 { | 329 { |
| 271 EM_ASM(exports.RegExpFilter.typeMap = {};); | 330 EM_ASM(exports.RegExpFilter.typeMap = {};); |
| 272 for (auto it = typeMap.begin(); it != typeMap.end(); ++it) | 331 for (auto it = typeMap.begin(); it != typeMap.end(); ++it) |
| 273 EM_ASM_ARGS(exports.RegExpFilter.typeMap[readString($0).replace("-", "_").to
UpperCase()] = $1, &(it->first), it->second); | 332 EM_ASM_ARGS(exports.RegExpFilter.typeMap[readString($0).replace("-", "_").to
UpperCase()] = $1, &(it->first), it->second); |
| 274 } | 333 } |
| 275 | 334 |
| 276 OwnedString RegExpFilter::RegExpFromSource(const String& source) | |
| 277 { | |
| 278 /* TODO: this is very inefficient */ | |
| 279 | |
| 280 // Note: This doesn't remove trailing wildcards, otherwise the result should | |
| 281 // be identical to Filter.toRegExp(). | |
| 282 OwnedString result; | |
| 283 String::value_type prevChar = u'*'; | |
| 284 for (String::size_type i = 0; i < source.length(); ++i) | |
| 285 { | |
| 286 String::value_type currChar = source[i]; | |
| 287 switch (currChar) | |
| 288 { | |
| 289 case u'*': | |
| 290 if (prevChar != u'*') | |
| 291 result.append(u".*"_str); | |
| 292 break; | |
| 293 case u'^': | |
| 294 result.append(u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x
60\\x7B-\\x7F]|$)"_str); | |
| 295 break; | |
| 296 case u'|': | |
| 297 if (i == 0) | |
| 298 { | |
| 299 // Anchor at expression start, maybe extended anchor? | |
| 300 if (i + 1 < source.length() && source[i + 1] == u'|') | |
| 301 { | |
| 302 result.append(u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"_str); | |
| 303 ++i; | |
| 304 } | |
| 305 else | |
| 306 result.append(u'^'); | |
| 307 } | |
| 308 else if (i == source.length() - 1) | |
| 309 { | |
| 310 // Anchor at expression end, ignore if following separator placeholder | |
| 311 if (prevChar != u'^') | |
| 312 result.append(u'$'); | |
| 313 } | |
| 314 else | |
| 315 { | |
| 316 // Not actually an anchor, escape it | |
| 317 result.append(u"\\|"_str); | |
| 318 } | |
| 319 break; | |
| 320 default: | |
| 321 if (!(currChar >= u'a' && currChar <= u'z') && | |
| 322 !(currChar >= u'A' && currChar <= u'Z') && | |
| 323 !(currChar >= u'0' && currChar <= u'9') && | |
| 324 currChar < 128) | |
| 325 { | |
| 326 result.append(u'\\'); | |
| 327 } | |
| 328 result.append(currChar); | |
| 329 } | |
| 330 prevChar = currChar; | |
| 331 } | |
| 332 return result; | |
| 333 } | |
| 334 | |
| 335 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const | 335 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const |
| 336 { | 336 { |
| 337 if (!mData.DomainsParsingDone()) | 337 if (!mData.DomainsParsingDone()) |
| 338 { | 338 { |
| 339 ParseDomains(mData.GetDomainsSource(mText), u'|'); | 339 ParseDomains(mData.GetDomainsSource(mText), u'|'); |
| 340 mData.SetDomainsParsingDone(); | 340 mData.SetDomainsParsingDone(); |
| 341 } | 341 } |
| 342 return ActiveFilter::GetDomains(); | 342 return ActiveFilter::GetDomains(); |
| 343 } | 343 } |
| 344 | 344 |
| (...skipping 18 matching lines...) Expand all Loading... |
| 363 return false; | 363 return false; |
| 364 } | 364 } |
| 365 | 365 |
| 366 if (!mData.RegExpParsingDone()) | 366 if (!mData.RegExpParsingDone()) |
| 367 { | 367 { |
| 368 const OwnedString pattern(mData.GetRegExpSource(mText)); | 368 const OwnedString pattern(mData.GetRegExpSource(mText)); |
| 369 mData.SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mData.mMatchCase))
; | 369 mData.SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mData.mMatchCase))
; |
| 370 } | 370 } |
| 371 return EM_ASM_INT(return regexps.test($0, $1), mData.mRegexpId, &location); | 371 return EM_ASM_INT(return regexps.test($0, $1), mData.mRegexpId, &location); |
| 372 } | 372 } |
| OLD | NEW |