Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/RegExpFilter.cpp

Issue 29383799: Issue 4988 - [emscripten] Adjust API for Element Hiding Emulation filters (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore
Patch Set: Created March 14, 2017, 2:42 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 #include <climits> 1 #include <climits>
2 2
3 #include <emscripten.h> 3 #include <emscripten.h>
4 4
5 #include "RegExpFilter.h" 5 #include "RegExpFilter.h"
6 #include "StringScanner.h" 6 #include "StringScanner.h"
7 #include "StringMap.h" 7 #include "StringMap.h"
8 8
9 namespace 9 namespace
10 { 10 {
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
47 47
48 {u"popup"_str, TYPE_POPUP}, 48 {u"popup"_str, TYPE_POPUP},
49 {u"genericblock"_str, TYPE_GENERICBLOCK}, 49 {u"genericblock"_str, TYPE_GENERICBLOCK},
50 {u"generichide"_str, TYPE_GENERICHIDE}, 50 {u"generichide"_str, TYPE_GENERICHIDE},
51 {u"elemhide"_str, TYPE_ELEMHIDE}, 51 {u"elemhide"_str, TYPE_ELEMHIDE},
52 }; 52 };
53 53
54 const int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | 54 const int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE |
55 TYPE_POPUP | TYPE_GENERICBLOCK | TYPE_GENERICHIDE); 55 TYPE_POPUP | TYPE_GENERICBLOCK | TYPE_GENERICHIDE);
56 56
57 OwnedString RegExpFromSource(const String& source)
Wladimir Palant 2017/03/14 14:44:43 This function was moved into an anonymous namespac
58 {
59 /* TODO: this is very inefficient */
60
61 // Note: This doesn't remove trailing wildcards, otherwise the result should
62 // be identical to Filter.toRegExp().
63 OwnedString result;
64 String::value_type prevChar = u'*';
65 for (String::size_type i = 0; i < source.length(); ++i)
66 {
67 String::value_type currChar = source[i];
68 switch (currChar)
69 {
70 case u'*':
71 if (prevChar != u'*')
72 result.append(u".*"_str);
73 break;
74 case u'^':
75 result.append(u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\ \x60\\x7B-\\x7F]|$)"_str);
76 break;
77 case u'|':
78 if (i == 0)
79 {
80 // Anchor at expression start, maybe extended anchor?
81 if (i + 1 < source.length() && source[i + 1] == u'|')
82 {
83 result.append(u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"_str);
84 ++i;
85 }
86 else
87 result.append(u'^');
88 }
89 else if (i == source.length() - 1)
90 {
91 // Anchor at expression end, ignore if following separator placehold er
92 if (prevChar != u'^')
93 result.append(u'$');
94 }
95 else
96 {
97 // Not actually an anchor, escape it
98 result.append(u"\\|"_str);
99 }
100 break;
101 default:
102 if (!(currChar >= u'a' && currChar <= u'z') &&
103 !(currChar >= u'A' && currChar <= u'Z') &&
104 !(currChar >= u'0' && currChar <= u'9') &&
105 currChar < 128)
106 {
107 result.append(u'\\');
108 }
109 result.append(currChar);
110 }
111 prevChar = currChar;
112 }
113 return result;
114 }
115
57 int GenerateRegExp(const String& regexp, bool matchCase) 116 int GenerateRegExp(const String& regexp, bool matchCase)
58 { 117 {
59 return EM_ASM_INT(return regexps.create($0, $1), &regexp, matchCase); 118 return EM_ASM_INT(return regexps.create($0, $1), &regexp, matchCase);
60 } 119 }
61 120
62 void NormalizeWhitespace(DependentString& text) 121 void NormalizeWhitespace(DependentString& text)
63 { 122 {
64 // We want to remove all spaces but bail out early in the common scenario 123 // We want to remove all spaces but bail out early in the common scenario
65 // that the string contains no spaces. 124 // that the string contains no spaces.
66 125
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after
266 } 325 }
267 } 326 }
268 327
269 void RegExpFilter::InitJSTypes() 328 void RegExpFilter::InitJSTypes()
270 { 329 {
271 EM_ASM(exports.RegExpFilter.typeMap = {};); 330 EM_ASM(exports.RegExpFilter.typeMap = {};);
272 for (auto it = typeMap.begin(); it != typeMap.end(); ++it) 331 for (auto it = typeMap.begin(); it != typeMap.end(); ++it)
273 EM_ASM_ARGS(exports.RegExpFilter.typeMap[readString($0).replace("-", "_").to UpperCase()] = $1, &(it->first), it->second); 332 EM_ASM_ARGS(exports.RegExpFilter.typeMap[readString($0).replace("-", "_").to UpperCase()] = $1, &(it->first), it->second);
274 } 333 }
275 334
276 OwnedString RegExpFilter::RegExpFromSource(const String& source)
277 {
278 /* TODO: this is very inefficient */
279
280 // Note: This doesn't remove trailing wildcards, otherwise the result should
281 // be identical to Filter.toRegExp().
282 OwnedString result;
283 String::value_type prevChar = u'*';
284 for (String::size_type i = 0; i < source.length(); ++i)
285 {
286 String::value_type currChar = source[i];
287 switch (currChar)
288 {
289 case u'*':
290 if (prevChar != u'*')
291 result.append(u".*"_str);
292 break;
293 case u'^':
294 result.append(u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x 60\\x7B-\\x7F]|$)"_str);
295 break;
296 case u'|':
297 if (i == 0)
298 {
299 // Anchor at expression start, maybe extended anchor?
300 if (i + 1 < source.length() && source[i + 1] == u'|')
301 {
302 result.append(u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"_str);
303 ++i;
304 }
305 else
306 result.append(u'^');
307 }
308 else if (i == source.length() - 1)
309 {
310 // Anchor at expression end, ignore if following separator placeholder
311 if (prevChar != u'^')
312 result.append(u'$');
313 }
314 else
315 {
316 // Not actually an anchor, escape it
317 result.append(u"\\|"_str);
318 }
319 break;
320 default:
321 if (!(currChar >= u'a' && currChar <= u'z') &&
322 !(currChar >= u'A' && currChar <= u'Z') &&
323 !(currChar >= u'0' && currChar <= u'9') &&
324 currChar < 128)
325 {
326 result.append(u'\\');
327 }
328 result.append(currChar);
329 }
330 prevChar = currChar;
331 }
332 return result;
333 }
334
335 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const 335 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const
336 { 336 {
337 if (!mData.DomainsParsingDone()) 337 if (!mData.DomainsParsingDone())
338 { 338 {
339 ParseDomains(mData.GetDomainsSource(mText), u'|'); 339 ParseDomains(mData.GetDomainsSource(mText), u'|');
340 mData.SetDomainsParsingDone(); 340 mData.SetDomainsParsingDone();
341 } 341 }
342 return ActiveFilter::GetDomains(); 342 return ActiveFilter::GetDomains();
343 } 343 }
344 344
(...skipping 18 matching lines...) Expand all
363 return false; 363 return false;
364 } 364 }
365 365
366 if (!mData.RegExpParsingDone()) 366 if (!mData.RegExpParsingDone())
367 { 367 {
368 const OwnedString pattern(mData.GetRegExpSource(mText)); 368 const OwnedString pattern(mData.GetRegExpSource(mText));
369 mData.SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mData.mMatchCase)) ; 369 mData.SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mData.mMatchCase)) ;
370 } 370 }
371 return EM_ASM_INT(return regexps.test($0, $1), mData.mRegexpId, &location); 371 return EM_ASM_INT(return regexps.test($0, $1), mData.mRegexpId, &location);
372 } 372 }
OLDNEW
« compiled/ElemHideBase.cpp ('K') | « compiled/RegExpFilter.h ('k') | compiled/bindings.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld