compiled/RegExpFilter.cpp - Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++

Delta Between Two Patch Sets: compiled/RegExpFilter.cpp

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)

Left Patch Set: Updated unit test framework to the current state of the repository Created Nov. 24, 2016, 3:40 p.m.

Right Patch Set: Addressed comments from Patch Set 28 Created March 21, 2017, 10:04 a.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 #include <climits>	1 #include <climits>

2	2

3 #include <emscripten.h>	3 #include <emscripten.h>

4	4

5 #include "RegExpFilter.h"	5 #include "RegExpFilter.h"

6 #include "StringScanner.h"	6 #include "StringScanner.h"

7 #include "StringMap.h"	7 #include "StringMap.h"

8	8

9 namespace	9 namespace

10 {	10 {

(...skipping 107 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
118 data.mContentType &= ~it->second;	118 data.mContentType &= ~it->second;

119 else	119 else

120 data.mContentType \|= it->second;	120 data.mContentType \|= it->second;

121 }	121 }

122 else if (name.equals(u"domain"_str))	122 else if (name.equals(u"domain"_str))

123 {	123 {

124 if (valueStart >= 0 && valueEnd > valueStart)	124 if (valueStart >= 0 && valueEnd > valueStart)

125 {	125 {

126 data.mDomainsStart = valueStart;	126 data.mDomainsStart = valueStart;

127 data.mDomainsEnd = valueEnd;	127 data.mDomainsEnd = valueEnd;

128 ActiveFilter::ToLower(DependentString(text, valueStart,	128 DependentString(text, valueStart, valueEnd - valueStart).toLower();

129 valueEnd - valueStart));

130 }	129 }

131 }	130 }

132 else if (name.equals(u"sitekey"_str))	131 else if (name.equals(u"sitekey"_str))

133 {	132 {

134 if (valueStart >= 0 && valueEnd > valueStart)	133 if (valueStart >= 0 && valueEnd > valueStart)

135 {	134 {

136 data.mSitekeysStart = valueStart;	135 data.mSitekeysStart = valueStart;

137 data.mSitekeysEnd = valueEnd;	136 data.mSitekeysEnd = valueEnd;

138 }	137 }

139 }	138 }

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
194 valueStart = -1;	193 valueStart = -1;

195 break;	194 break;

196 }	195 }

197 }	196 }

198	197

199 if (data.mContentType < 0)	198 if (data.mContentType < 0)

200 data.mContentType = defaultTypeMask;	199 data.mContentType = defaultTypeMask;

201 }	200 }

202 }	201 }

203	202

204 RegExpFilter::RegExpFilter(const String& text, const RegExpFilterData& data)	203 RegExpFilter::RegExpFilter(Type type, const String& text, const RegExpFilterData & data)

205 : ActiveFilter(text, true), RegExpFilterData(data)	204 : ActiveFilter(type, text, true), mData(data)

206 {	205 {

207 }	206 }

208	207

209 RegExpFilter::~RegExpFilter()	208 RegExpFilter::~RegExpFilter()

210 {	209 {

211 if (HasRegExp())	210 if (mData.HasRegExp())

212 EM_ASM_ARGS(regexps.delete($0), mRegexpId);	211 EM_ASM_ARGS(regexps.delete($0), mData.mRegexpId);

213 }	212 }

214	213

215 Filter::Type RegExpFilter::Parse(DependentString& text, DependentString& error,	214 Filter::Type RegExpFilter::Parse(DependentString& text, DependentString& error,

216 RegExpFilterData& data)	215 RegExpFilterData& data)

217 {	216 {

218 NormalizeWhitespace(text);	217 NormalizeWhitespace(text);

219	218

220 bool blocking = true;	219 Filter::Type type = Type::BLOCKING;

221	220

222 data.mPatternStart = 0;	221 data.mPatternStart = 0;

223 if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@')	222 if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@')

224 {	223 {

225 blocking = false;	224 type = Type::WHITELIST;

226 data.mPatternStart = 2;	225 data.mPatternStart = 2;

227 }	226 }

228	227

229 data.mPatternEnd = text.find(u'$', data.mPatternStart);	228 data.mPatternEnd = text.find(u'$', data.mPatternStart);

230 if (data.mPatternEnd == text.npos)	229 if (data.mPatternEnd == text.npos)

231 data.mPatternEnd = text.length();	230 data.mPatternEnd = text.length();

232	231

233 ParseOptions(text, error, data, data.mPatternEnd + 1);	232 ParseOptions(text, error, data, data.mPatternEnd + 1);

234 if (!error.empty())	233 if (!error.empty())

235 return Type::INVALID;	234 return Type::INVALID;

236	235

237 if (data.mPatternEnd - data.mPatternStart >= 2 &&	236 if (data.mPatternEnd - data.mPatternStart >= 2 &&

238 text[data.mPatternStart] == u'/' &&	237 text[data.mPatternStart] == u'/' &&

239 text[data.mPatternEnd - 1] == u'/')	238 text[data.mPatternEnd - 1] == u'/')

240 {	239 {

241 data.SetRegExp(GenerateRegExp(DependentString(text, data.mPatternStart + 1,	240 data.SetRegExp(GenerateRegExp(DependentString(text, data.mPatternStart + 1,

242 data.mPatternEnd - data.mPatternStart - 2), data.mMatchCase));	241 data.mPatternEnd - data.mPatternStart - 2), data.mMatchCase));

243 if (data.mRegexpId == -1)	242 if (data.mRegexpId == -1)

244 {	243 {

245 error.reset(u"filter_invalid_regexp"_str);	244 error.reset(u"filter_invalid_regexp"_str);

246 return Type::INVALID;	245 return Type::INVALID;

247 }	246 }

248 }	247 }

249	248

250 if (blocking)	249 return type;

251 return Type::BLOCKING;

252 else

253 return Type::WHITELIST;

254 }	250 }

255	251

256 void RegExpFilter::ParseSitekeys(const String& sitekeys) const	252 void RegExpFilter::ParseSitekeys(const String& sitekeys) const

257 {	253 {

258 StringScanner scanner(sitekeys, 0, u'\|');	254 StringScanner scanner(sitekeys, 0, u'\|');

259 size_t start = 0;	255 size_t start = 0;

260 bool done = false;	256 bool done = false;

261 while (!done)	257 while (!done)

262 {	258 {

263 done = scanner.done();	259 done = scanner.done();

264 if (scanner.next() == u'\|')	260 if (scanner.next() == u'\|')

265 {	261 {

266 if (scanner.position() > start)	262 if (scanner.position() > start)

267 AddSitekey(DependentString(sitekeys, start, scanner.position() - start)) ;	263 AddSitekey(DependentString(sitekeys, start, scanner.position() - start)) ;

268 start = scanner.position() + 1;	264 start = scanner.position() + 1;

269 }	265 }

270 }	266 }

271 }	267 }

272	268

273 void RegExpFilter::InitJSTypes()	269 void RegExpFilter::InitJSTypes()

274 {	270 {

275 EM_ASM(exports.RegExpFilter.typeMap = {};);	271 EM_ASM(exports.RegExpFilter.typeMap = {};);

276 for (auto it = typeMap.begin(); it != typeMap.end(); ++it)	272 for (auto it = typeMap.begin(); it != typeMap.end(); ++it)

277 EM_ASM_ARGS(exports.RegExpFilter.typeMap[getStringData($0).replace("-", "_") .toUpperCase()] = $1, &(it->first), it->second);	273 EM_ASM_ARGS(exports.RegExpFilter.typeMap[readString($0).replace("-", "_").to UpperCase()] = $1, &(it->first), it->second);

278 }	274 }

279	275

280 OwnedString RegExpFilter::RegExpFromSource(const String& source)	276 OwnedString RegExpFilter::RegExpFromSource(const String& source)

281 {	277 {

282 /* TODO: this is very inefficient */	278 /* TODO: this is very inefficient */

283	279

284 // Note: This doesn't remove trailing wildcards, otherwise the result should	280 // Note: This doesn't remove trailing wildcards, otherwise the result should

285 // be identical to Filter.toRegExp().	281 // be identical to Filter.toRegExp().

286 OwnedString result;	282 OwnedString result;

287 String::value_type prevChar = u'*';	283 String::value_type prevChar = u'*';

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
331 }	327 }

332 result.append(currChar);	328 result.append(currChar);

333 }	329 }

334 prevChar = currChar;	330 prevChar = currChar;

335 }	331 }

336 return result;	332 return result;

337 }	333 }

338	334

339 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const	335 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const

340 {	336 {

341 if (!DomainsParsingDone())	337 if (!mData.DomainsParsingDone())

342 {	338 {

343 ParseDomains(GetDomainsSource(mText), u'\|');	339 ParseDomains(mData.GetDomainsSource(mText), u'\|');

344 SetDomainsParsingDone();	340 mData.SetDomainsParsingDone();

345 }	341 }

346 return ActiveFilter::GetDomains();	342 return ActiveFilter::GetDomains();

347 }	343 }

348	344

349 RegExpFilter::SitekeySet* RegExpFilter::GetSitekeys() const	345 RegExpFilter::SitekeySet* RegExpFilter::GetSitekeys() const

350 {	346 {

351 if (!SitekeyParsingDone())	347 if (!mData.SitekeyParsingDone())

352 {	348 {

353 ParseSitekeys(GetSitekeysSource(mText));	349 ParseSitekeys(mData.GetSitekeysSource(mText));

354 SetSitekeysParsingDone();	350 mData.SetSitekeysParsingDone();

355 }	351 }

356 return ActiveFilter::GetSitekeys();	352 return ActiveFilter::GetSitekeys();

357 }	353 }

358	354

359 bool RegExpFilter::Matches(const String& location, int typeMask,	355 bool RegExpFilter::Matches(const String& location, int typeMask,

360 DependentString& docDomain, bool thirdParty, const String& sitekey) const	356 DependentString& docDomain, bool thirdParty, const String& sitekey) const

361 {	357 {

362 if (!(mContentType & typeMask) \|\|	358 if (!(mData.mContentType & typeMask) \|\|

363 (mThirdParty == TrippleState::YES && !thirdParty) \|\|	359 (mData.mThirdParty == TrippleState::YES && !thirdParty) \|\|

364 (mThirdParty == TrippleState::NO && thirdParty) \|\|	360 (mData.mThirdParty == TrippleState::NO && thirdParty) \|\|

365 !IsActiveOnDomain(docDomain, sitekey))	361 !IsActiveOnDomain(docDomain, sitekey))

366 {	362 {

367 return false;	363 return false;

368 }	364 }

369	365

370 if (!RegExpParsingDone())	366 if (!mData.RegExpParsingDone())

371 {	367 {

372 const OwnedString pattern(GetRegExpSource(mText));	368 const OwnedString pattern(mData.GetRegExpSource(mText));

373 SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mMatchCase));	369 mData.SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mData.mMatchCase)) ;

374 }	370 }

375 return EM_ASM_INT(return regexps.test($0, $1), mRegexpId, &location);	371 return EM_ASM_INT(return regexps.test($0, $1), mData.mRegexpId, &location);

376 }	372 }

LEFT	RIGHT