compiled/RegExpFilter.cpp - Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++

Delta Between Two Patch Sets: compiled/RegExpFilter.cpp

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)

Left Patch Set: Replaced old filter classes unit tests completely Created Feb. 18, 2016, 4:40 p.m.

Right Patch Set: Addressed comments from Patch Set 28 Created March 21, 2017, 10:04 a.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 #include <climits>	1 #include <climits>

2	2

3 #include <emscripten.h>	3 #include <emscripten.h>

4	4

5 #include "RegExpFilter.h"	5 #include "RegExpFilter.h"

6 #include "StringScanner.h"	6 #include "StringScanner.h"

7 #include "StringMap.h"	7 #include "StringMap.h"

8	8

9 namespace	9 namespace

10 {	10 {

(...skipping 107 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
118 data.mContentType &= ~it->second;	118 data.mContentType &= ~it->second;

119 else	119 else

120 data.mContentType \|= it->second;	120 data.mContentType \|= it->second;

121 }	121 }

122 else if (name.equals(u"domain"_str))	122 else if (name.equals(u"domain"_str))

123 {	123 {

124 if (valueStart >= 0 && valueEnd > valueStart)	124 if (valueStart >= 0 && valueEnd > valueStart)

125 {	125 {

126 data.mDomainsStart = valueStart;	126 data.mDomainsStart = valueStart;

127 data.mDomainsEnd = valueEnd;	127 data.mDomainsEnd = valueEnd;

128 ActiveFilter::ToLower(text, data.mDomainsStart, data.mDomainsEnd);	128 DependentString(text, valueStart, valueEnd - valueStart).toLower();

129 }	129 }

130 }	130 }

131 else if (name.equals(u"sitekey"_str))	131 else if (name.equals(u"sitekey"_str))

132 {	132 {

133 if (valueStart >= 0 && valueEnd > valueStart)	133 if (valueStart >= 0 && valueEnd > valueStart)

134 {	134 {

135 data.mSitekeysStart = valueStart;	135 data.mSitekeysStart = valueStart;

136 data.mSitekeysEnd = valueEnd;	136 data.mSitekeysEnd = valueEnd;

137 }	137 }

138 }	138 }

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
193 valueStart = -1;	193 valueStart = -1;

194 break;	194 break;

195 }	195 }

196 }	196 }

197	197

198 if (data.mContentType < 0)	198 if (data.mContentType < 0)

199 data.mContentType = defaultTypeMask;	199 data.mContentType = defaultTypeMask;

200 }	200 }

201 }	201 }

202	202

203 RegExpFilter::RegExpFilter(const String& text, const RegExpFilterData& data)	203 RegExpFilter::RegExpFilter(Type type, const String& text, const RegExpFilterData & data)

204 : ActiveFilter(text, true), RegExpFilterData(data)	204 : ActiveFilter(type, text, true), mData(data)

205 {	205 {

206 }	206 }

207	207

208 RegExpFilter::~RegExpFilter()	208 RegExpFilter::~RegExpFilter()

209 {	209 {

210 if (HasRegExp())	210 if (mData.HasRegExp())

211 EM_ASM_ARGS(regexps.delete($0), mRegexpId);	211 EM_ASM_ARGS(regexps.delete($0), mData.mRegexpId);

212 }	212 }

213	213

214 Filter::Type RegExpFilter::Parse(DependentString& text, DependentString& error,	214 Filter::Type RegExpFilter::Parse(DependentString& text, DependentString& error,

215 RegExpFilterData& data)	215 RegExpFilterData& data)

216 {	216 {

217 NormalizeWhitespace(text);	217 NormalizeWhitespace(text);

218	218

219 bool blocking = true;	219 Filter::Type type = Type::BLOCKING;

220	220

221 data.mPatternStart = 0;	221 data.mPatternStart = 0;

222 if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@')	222 if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@')

223 {	223 {

224 blocking = false;	224 type = Type::WHITELIST;

225 data.mPatternStart = 2;	225 data.mPatternStart = 2;

226 }	226 }

227	227

228 data.mPatternEnd = text.find(u'$', data.mPatternStart);	228 data.mPatternEnd = text.find(u'$', data.mPatternStart);

229 if (data.mPatternEnd == text.npos)	229 if (data.mPatternEnd == text.npos)

230 data.mPatternEnd = text.length();	230 data.mPatternEnd = text.length();

231	231

232 ParseOptions(text, error, data, data.mPatternEnd + 1);	232 ParseOptions(text, error, data, data.mPatternEnd + 1);

233 if (!error.empty())	233 if (!error.empty())

234 return Type::INVALID;	234 return Type::INVALID;

235	235

236 if (data.mPatternEnd - data.mPatternStart >= 2 &&	236 if (data.mPatternEnd - data.mPatternStart >= 2 &&

237 text[data.mPatternStart] == u'/' &&	237 text[data.mPatternStart] == u'/' &&

238 text[data.mPatternEnd - 1] == u'/')	238 text[data.mPatternEnd - 1] == u'/')

239 {	239 {

240 data.SetRegExp(GenerateRegExp(DependentString(text, data.mPatternStart + 1,	240 data.SetRegExp(GenerateRegExp(DependentString(text, data.mPatternStart + 1,

241 data.mPatternEnd - data.mPatternStart - 2), data.mMatchCase));	241 data.mPatternEnd - data.mPatternStart - 2), data.mMatchCase));

242 if (data.mRegexpId == -1)	242 if (data.mRegexpId == -1)

243 {	243 {

244 error.reset(u"filter_invalid_regexp"_str);	244 error.reset(u"filter_invalid_regexp"_str);

245 return Type::INVALID;	245 return Type::INVALID;

246 }	246 }

247 }	247 }

248	248

249 if (blocking)	249 return type;

250 return Type::BLOCKING;

251 else

252 return Type::WHITELIST;

253 }	250 }

254	251

255 void RegExpFilter::ParseSitekeys(const String& sitekeys) const	252 void RegExpFilter::ParseSitekeys(const String& sitekeys) const

256 {	253 {

257 StringScanner scanner(sitekeys, 0, u'\|');	254 StringScanner scanner(sitekeys, 0, u'\|');

258 size_t start = 0;	255 size_t start = 0;

259 bool done = false;	256 bool done = false;

260 while (!done)	257 while (!done)

261 {	258 {

262 done = scanner.done();	259 done = scanner.done();

263 if (scanner.next() == u'\|')	260 if (scanner.next() == u'\|')

264 {	261 {

265 if (scanner.position() > start)	262 if (scanner.position() > start)

266 AddSitekey(DependentString(sitekeys, start, scanner.position() - start)) ;	263 AddSitekey(DependentString(sitekeys, start, scanner.position() - start)) ;

267 start = scanner.position() + 1;	264 start = scanner.position() + 1;

268 }	265 }

269 }	266 }

270 }	267 }

271	268

272 void RegExpFilter::InitJSTypes()	269 void RegExpFilter::InitJSTypes()

273 {	270 {

274 EM_ASM(exports.RegExpFilter.typeMap = {};);	271 EM_ASM(exports.RegExpFilter.typeMap = {};);

275 for (auto it = typeMap.begin(); it != typeMap.end(); ++it)	272 for (auto it = typeMap.begin(); it != typeMap.end(); ++it)

276 EM_ASM_ARGS(exports.RegExpFilter.typeMap[getStringData($0).replace("-", "_") .toUpperCase()] = $1, &(it->first), it->second);	273 EM_ASM_ARGS(exports.RegExpFilter.typeMap[readString($0).replace("-", "_").to UpperCase()] = $1, &(it->first), it->second);

277 }	274 }

278	275

279 OwnedString RegExpFilter::RegExpFromSource(const String& source)	276 OwnedString RegExpFilter::RegExpFromSource(const String& source)

280 {	277 {

281 /* TODO: this is very inefficient */	278 /* TODO: this is very inefficient */

282	279

283 // Note: This doesn't remove trailing wildcards, otherwise the result should	280 // Note: This doesn't remove trailing wildcards, otherwise the result should

284 // be identical to Filter.toRegExp().	281 // be identical to Filter.toRegExp().

285 OwnedString result;	282 OwnedString result;

286 String::value_type prevChar = u'*';	283 String::value_type prevChar = u'*';

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
330 }	327 }

331 result.append(currChar);	328 result.append(currChar);

332 }	329 }

333 prevChar = currChar;	330 prevChar = currChar;

334 }	331 }

335 return result;	332 return result;

336 }	333 }

337	334

338 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const	335 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const

339 {	336 {

340 if (!DomainsParsingDone())	337 if (!mData.DomainsParsingDone())

341 {	338 {

342 ParseDomains(GetDomainsSource(mText), u'\|');	339 ParseDomains(mData.GetDomainsSource(mText), u'\|');

343 SetDomainsParsingDone();	340 mData.SetDomainsParsingDone();

344 }	341 }

345 return ActiveFilter::GetDomains();	342 return ActiveFilter::GetDomains();

346 }	343 }

347	344

348 RegExpFilter::SitekeySet* RegExpFilter::GetSitekeys() const	345 RegExpFilter::SitekeySet* RegExpFilter::GetSitekeys() const

349 {	346 {

350 if (!SitekeyParsingDone())	347 if (!mData.SitekeyParsingDone())

351 {	348 {

352 ParseSitekeys(GetSitekeysSource(mText));	349 ParseSitekeys(mData.GetSitekeysSource(mText));

353 SetSitekeysParsingDone();	350 mData.SetSitekeysParsingDone();

354 }	351 }

355 return ActiveFilter::GetSitekeys();	352 return ActiveFilter::GetSitekeys();

356 }	353 }

357	354

358 bool RegExpFilter::Matches(const String& location, int typeMask,	355 bool RegExpFilter::Matches(const String& location, int typeMask,

359 DependentString& docDomain, bool thirdParty, const String& sitekey) const	356 DependentString& docDomain, bool thirdParty, const String& sitekey) const

360 {	357 {

361 if (!(mContentType & typeMask) \|\|	358 if (!(mData.mContentType & typeMask) \|\|

362 (mThirdParty == TrippleState::YES && !thirdParty) \|\|	359 (mData.mThirdParty == TrippleState::YES && !thirdParty) \|\|

363 (mThirdParty == TrippleState::NO && thirdParty) \|\|	360 (mData.mThirdParty == TrippleState::NO && thirdParty) \|\|

364 !IsActiveOnDomain(docDomain, sitekey))	361 !IsActiveOnDomain(docDomain, sitekey))

365 {	362 {

366 return false;	363 return false;

367 }	364 }

368	365

369 if (!RegExpParsingDone())	366 if (!mData.RegExpParsingDone())

370 {	367 {

371 const OwnedString pattern(GetRegExpSource(mText));	368 const OwnedString pattern(mData.GetRegExpSource(mText));

372 SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mMatchCase));	369 mData.SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mData.mMatchCase)) ;

373 }	370 }

374 return EM_ASM_INT(return regexps.test($0, $1), mRegexpId, &location);	371 return EM_ASM_INT(return regexps.test($0, $1), mData.mRegexpId, &location);

375 }	372 }

LEFT	RIGHT