compiled/RegExpFilter.cpp - Issue 29383799: Issue 4988 - [emscripten] Adjust API for Element Hiding Emulation filters

Side by Side Diff: compiled/RegExpFilter.cpp

Issue 29383799: Issue 4988 - [emscripten] Adjust API for Element Hiding Emulation filters (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore

Patch Set: Created March 14, 2017, 2:42 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 #include <climits>	1 #include <climits>

2	2

3 #include <emscripten.h>	3 #include <emscripten.h>

4	4

5 #include "RegExpFilter.h"	5 #include "RegExpFilter.h"

6 #include "StringScanner.h"	6 #include "StringScanner.h"

7 #include "StringMap.h"	7 #include "StringMap.h"

8	8

9 namespace	9 namespace

10 {	10 {

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
47	47

48 {u"popup"_str, TYPE_POPUP},	48 {u"popup"_str, TYPE_POPUP},

49 {u"genericblock"_str, TYPE_GENERICBLOCK},	49 {u"genericblock"_str, TYPE_GENERICBLOCK},

50 {u"generichide"_str, TYPE_GENERICHIDE},	50 {u"generichide"_str, TYPE_GENERICHIDE},

51 {u"elemhide"_str, TYPE_ELEMHIDE},	51 {u"elemhide"_str, TYPE_ELEMHIDE},

52 };	52 };

53	53

54 const int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT \| TYPE_ELEMHIDE \|	54 const int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT \| TYPE_ELEMHIDE \|

55 TYPE_POPUP \| TYPE_GENERICBLOCK \| TYPE_GENERICHIDE);	55 TYPE_POPUP \| TYPE_GENERICBLOCK \| TYPE_GENERICHIDE);

56	56

	57 OwnedString RegExpFromSource(const String& source)
	Wladimir Palant 2017/03/14 14:44:43 This function was moved into an anonymous namespac This function was moved into an anonymous namespace without any further changes.
	58 {

	59 /* TODO: this is very inefficient */

	60

	61 // Note: This doesn't remove trailing wildcards, otherwise the result should

	62 // be identical to Filter.toRegExp().

	63 OwnedString result;

	64 String::value_type prevChar = u'*';

	65 for (String::size_type i = 0; i < source.length(); ++i)

	66 {

	67 String::value_type currChar = source[i];

	68 switch (currChar)

	69 {

	70 case u'*':

	71 if (prevChar != u'*')

	72 result.append(u".*"_str);

	73 break;

	74 case u'^':

	75 result.append(u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\ \x60\\x7B-\\x7F]\|$)"_str);

	76 break;

	77 case u'\|':

	78 if (i == 0)

	79 {

	80 // Anchor at expression start, maybe extended anchor?

	81 if (i + 1 < source.length() && source[i + 1] == u'\|')

	82 {

	83 result.append(u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"_str);

	84 ++i;

	85 }

	86 else

	87 result.append(u'^');

	88 }

	89 else if (i == source.length() - 1)

	90 {

	91 // Anchor at expression end, ignore if following separator placehold er

	92 if (prevChar != u'^')

	93 result.append(u'$');

	94 }

	95 else

	96 {

	97 // Not actually an anchor, escape it

	98 result.append(u"\\\|"_str);

	99 }

	100 break;

	101 default:

	102 if (!(currChar >= u'a' && currChar <= u'z') &&

	103 !(currChar >= u'A' && currChar <= u'Z') &&

	104 !(currChar >= u'0' && currChar <= u'9') &&

	105 currChar < 128)

	106 {

	107 result.append(u'\\');

	108 }

	109 result.append(currChar);

	110 }

	111 prevChar = currChar;

	112 }

	113 return result;

	114 }

	115

57 int GenerateRegExp(const String& regexp, bool matchCase)	116 int GenerateRegExp(const String& regexp, bool matchCase)

58 {	117 {

59 return EM_ASM_INT(return regexps.create($0, $1), &regexp, matchCase);	118 return EM_ASM_INT(return regexps.create($0, $1), &regexp, matchCase);

60 }	119 }

61	120

62 void NormalizeWhitespace(DependentString& text)	121 void NormalizeWhitespace(DependentString& text)

63 {	122 {

64 // We want to remove all spaces but bail out early in the common scenario	123 // We want to remove all spaces but bail out early in the common scenario

65 // that the string contains no spaces.	124 // that the string contains no spaces.

66	125

(...skipping 199 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
266 }	325 }

267 }	326 }

268	327

269 void RegExpFilter::InitJSTypes()	328 void RegExpFilter::InitJSTypes()

270 {	329 {

271 EM_ASM(exports.RegExpFilter.typeMap = {};);	330 EM_ASM(exports.RegExpFilter.typeMap = {};);

272 for (auto it = typeMap.begin(); it != typeMap.end(); ++it)	331 for (auto it = typeMap.begin(); it != typeMap.end(); ++it)

273 EM_ASM_ARGS(exports.RegExpFilter.typeMap[readString($0).replace("-", "_").to UpperCase()] = $1, &(it->first), it->second);	332 EM_ASM_ARGS(exports.RegExpFilter.typeMap[readString($0).replace("-", "_").to UpperCase()] = $1, &(it->first), it->second);

274 }	333 }

275	334

276 OwnedString RegExpFilter::RegExpFromSource(const String& source)

277 {

278 /* TODO: this is very inefficient */

279

280 // Note: This doesn't remove trailing wildcards, otherwise the result should

281 // be identical to Filter.toRegExp().

282 OwnedString result;

283 String::value_type prevChar = u'*';

284 for (String::size_type i = 0; i < source.length(); ++i)

285 {

286 String::value_type currChar = source[i];

287 switch (currChar)

288 {

289 case u'*':

290 if (prevChar != u'*')

291 result.append(u".*"_str);

292 break;

293 case u'^':

294 result.append(u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x 60\\x7B-\\x7F]\|$)"_str);

295 break;

296 case u'\|':

297 if (i == 0)

298 {

299 // Anchor at expression start, maybe extended anchor?

300 if (i + 1 < source.length() && source[i + 1] == u'\|')

301 {

302 result.append(u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"_str);

303 ++i;

304 }

305 else

306 result.append(u'^');

307 }

308 else if (i == source.length() - 1)

309 {

310 // Anchor at expression end, ignore if following separator placeholder

311 if (prevChar != u'^')

312 result.append(u'$');

313 }

314 else

315 {

316 // Not actually an anchor, escape it

317 result.append(u"\\\|"_str);

318 }

319 break;

320 default:

321 if (!(currChar >= u'a' && currChar <= u'z') &&

322 !(currChar >= u'A' && currChar <= u'Z') &&

323 !(currChar >= u'0' && currChar <= u'9') &&

324 currChar < 128)

325 {

326 result.append(u'\\');

327 }

328 result.append(currChar);

329 }

330 prevChar = currChar;

331 }

332 return result;

333 }

334

335 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const	335 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const

336 {	336 {

337 if (!mData.DomainsParsingDone())	337 if (!mData.DomainsParsingDone())

338 {	338 {

339 ParseDomains(mData.GetDomainsSource(mText), u'\|');	339 ParseDomains(mData.GetDomainsSource(mText), u'\|');

340 mData.SetDomainsParsingDone();	340 mData.SetDomainsParsingDone();

341 }	341 }

342 return ActiveFilter::GetDomains();	342 return ActiveFilter::GetDomains();

343 }	343 }

344	344

(...skipping 18 matching lines...) Expand all Loading...
363 return false;	363 return false;

364 }	364 }

365	365

366 if (!mData.RegExpParsingDone())	366 if (!mData.RegExpParsingDone())

367 {	367 {

368 const OwnedString pattern(mData.GetRegExpSource(mText));	368 const OwnedString pattern(mData.GetRegExpSource(mText));

369 mData.SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mData.mMatchCase)) ;	369 mData.SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mData.mMatchCase)) ;

370 }	370 }

371 return EM_ASM_INT(return regexps.test($0, $1), mData.mRegexpId, &location);	371 return EM_ASM_INT(return regexps.test($0, $1), mData.mRegexpId, &location);

372 }	372 }

OLD	NEW

« compiled/ElemHideBase.cpp ('K') | « compiled/RegExpFilter.h ('k') | compiled/bindings.cpp » ('j') | no next file with comments »