OLD | NEW |
1 #include <climits> | 1 #include <climits> |
2 | 2 |
3 #include <emscripten.h> | 3 #include <emscripten.h> |
4 | 4 |
5 #include "RegExpFilter.h" | 5 #include "RegExpFilter.h" |
6 #include "StringScanner.h" | 6 #include "StringScanner.h" |
7 #include "StringMap.h" | 7 #include "StringMap.h" |
8 | 8 |
9 namespace | 9 namespace |
10 { | 10 { |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
47 | 47 |
48 {u"popup"_str, TYPE_POPUP}, | 48 {u"popup"_str, TYPE_POPUP}, |
49 {u"genericblock"_str, TYPE_GENERICBLOCK}, | 49 {u"genericblock"_str, TYPE_GENERICBLOCK}, |
50 {u"generichide"_str, TYPE_GENERICHIDE}, | 50 {u"generichide"_str, TYPE_GENERICHIDE}, |
51 {u"elemhide"_str, TYPE_ELEMHIDE}, | 51 {u"elemhide"_str, TYPE_ELEMHIDE}, |
52 }; | 52 }; |
53 | 53 |
54 const int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | | 54 const int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | |
55 TYPE_POPUP | TYPE_GENERICBLOCK | TYPE_GENERICHIDE); | 55 TYPE_POPUP | TYPE_GENERICBLOCK | TYPE_GENERICHIDE); |
56 | 56 |
| 57 OwnedString RegExpFromSource(const String& source) |
| 58 { |
| 59 /* TODO: this is very inefficient */ |
| 60 |
| 61 // Note: This doesn't remove trailing wildcards, otherwise the result should |
| 62 // be identical to Filter.toRegExp(). |
| 63 OwnedString result; |
| 64 String::value_type prevChar = u'*'; |
| 65 for (String::size_type i = 0; i < source.length(); ++i) |
| 66 { |
| 67 String::value_type currChar = source[i]; |
| 68 switch (currChar) |
| 69 { |
| 70 case u'*': |
| 71 if (prevChar != u'*') |
| 72 result.append(u".*"_str); |
| 73 break; |
| 74 case u'^': |
| 75 result.append(u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\
\x60\\x7B-\\x7F]|$)"_str); |
| 76 break; |
| 77 case u'|': |
| 78 if (i == 0) |
| 79 { |
| 80 // Anchor at expression start, maybe extended anchor? |
| 81 if (i + 1 < source.length() && source[i + 1] == u'|') |
| 82 { |
| 83 result.append(u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"_str); |
| 84 ++i; |
| 85 } |
| 86 else |
| 87 result.append(u'^'); |
| 88 } |
| 89 else if (i == source.length() - 1) |
| 90 { |
| 91 // Anchor at expression end, ignore if following separator placehold
er |
| 92 if (prevChar != u'^') |
| 93 result.append(u'$'); |
| 94 } |
| 95 else |
| 96 { |
| 97 // Not actually an anchor, escape it |
| 98 result.append(u"\\|"_str); |
| 99 } |
| 100 break; |
| 101 default: |
| 102 if (!(currChar >= u'a' && currChar <= u'z') && |
| 103 !(currChar >= u'A' && currChar <= u'Z') && |
| 104 !(currChar >= u'0' && currChar <= u'9') && |
| 105 currChar < 128) |
| 106 { |
| 107 result.append(u'\\'); |
| 108 } |
| 109 result.append(currChar); |
| 110 } |
| 111 prevChar = currChar; |
| 112 } |
| 113 return result; |
| 114 } |
| 115 |
57 int GenerateRegExp(const String& regexp, bool matchCase) | 116 int GenerateRegExp(const String& regexp, bool matchCase) |
58 { | 117 { |
59 return EM_ASM_INT(return regexps.create($0, $1), ®exp, matchCase); | 118 return EM_ASM_INT(return regexps.create($0, $1), ®exp, matchCase); |
60 } | 119 } |
61 | 120 |
62 void NormalizeWhitespace(DependentString& text) | 121 void NormalizeWhitespace(DependentString& text) |
63 { | 122 { |
64 // We want to remove all spaces but bail out early in the common scenario | 123 // We want to remove all spaces but bail out early in the common scenario |
65 // that the string contains no spaces. | 124 // that the string contains no spaces. |
66 | 125 |
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
266 } | 325 } |
267 } | 326 } |
268 | 327 |
269 void RegExpFilter::InitJSTypes() | 328 void RegExpFilter::InitJSTypes() |
270 { | 329 { |
271 EM_ASM(exports.RegExpFilter.typeMap = {};); | 330 EM_ASM(exports.RegExpFilter.typeMap = {};); |
272 for (auto it = typeMap.begin(); it != typeMap.end(); ++it) | 331 for (auto it = typeMap.begin(); it != typeMap.end(); ++it) |
273 EM_ASM_ARGS(exports.RegExpFilter.typeMap[readString($0).replace("-", "_").to
UpperCase()] = $1, &(it->first), it->second); | 332 EM_ASM_ARGS(exports.RegExpFilter.typeMap[readString($0).replace("-", "_").to
UpperCase()] = $1, &(it->first), it->second); |
274 } | 333 } |
275 | 334 |
276 OwnedString RegExpFilter::RegExpFromSource(const String& source) | |
277 { | |
278 /* TODO: this is very inefficient */ | |
279 | |
280 // Note: This doesn't remove trailing wildcards, otherwise the result should | |
281 // be identical to Filter.toRegExp(). | |
282 OwnedString result; | |
283 String::value_type prevChar = u'*'; | |
284 for (String::size_type i = 0; i < source.length(); ++i) | |
285 { | |
286 String::value_type currChar = source[i]; | |
287 switch (currChar) | |
288 { | |
289 case u'*': | |
290 if (prevChar != u'*') | |
291 result.append(u".*"_str); | |
292 break; | |
293 case u'^': | |
294 result.append(u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x
60\\x7B-\\x7F]|$)"_str); | |
295 break; | |
296 case u'|': | |
297 if (i == 0) | |
298 { | |
299 // Anchor at expression start, maybe extended anchor? | |
300 if (i + 1 < source.length() && source[i + 1] == u'|') | |
301 { | |
302 result.append(u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"_str); | |
303 ++i; | |
304 } | |
305 else | |
306 result.append(u'^'); | |
307 } | |
308 else if (i == source.length() - 1) | |
309 { | |
310 // Anchor at expression end, ignore if following separator placeholder | |
311 if (prevChar != u'^') | |
312 result.append(u'$'); | |
313 } | |
314 else | |
315 { | |
316 // Not actually an anchor, escape it | |
317 result.append(u"\\|"_str); | |
318 } | |
319 break; | |
320 default: | |
321 if (!(currChar >= u'a' && currChar <= u'z') && | |
322 !(currChar >= u'A' && currChar <= u'Z') && | |
323 !(currChar >= u'0' && currChar <= u'9') && | |
324 currChar < 128) | |
325 { | |
326 result.append(u'\\'); | |
327 } | |
328 result.append(currChar); | |
329 } | |
330 prevChar = currChar; | |
331 } | |
332 return result; | |
333 } | |
334 | |
335 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const | 335 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const |
336 { | 336 { |
337 if (!mData.DomainsParsingDone()) | 337 if (!mData.DomainsParsingDone()) |
338 { | 338 { |
339 ParseDomains(mData.GetDomainsSource(mText), u'|'); | 339 ParseDomains(mData.GetDomainsSource(mText), u'|'); |
340 mData.SetDomainsParsingDone(); | 340 mData.SetDomainsParsingDone(); |
341 } | 341 } |
342 return ActiveFilter::GetDomains(); | 342 return ActiveFilter::GetDomains(); |
343 } | 343 } |
344 | 344 |
(...skipping 18 matching lines...) Expand all Loading... |
363 return false; | 363 return false; |
364 } | 364 } |
365 | 365 |
366 if (!mData.RegExpParsingDone()) | 366 if (!mData.RegExpParsingDone()) |
367 { | 367 { |
368 const OwnedString pattern(mData.GetRegExpSource(mText)); | 368 const OwnedString pattern(mData.GetRegExpSource(mText)); |
369 mData.SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mData.mMatchCase))
; | 369 mData.SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mData.mMatchCase))
; |
370 } | 370 } |
371 return EM_ASM_INT(return regexps.test($0, $1), mData.mRegexpId, &location); | 371 return EM_ASM_INT(return regexps.test($0, $1), mData.mRegexpId, &location); |
372 } | 372 } |
OLD | NEW |