Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: compiled/RegExpFilter.cpp

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Left Patch Set: Reworked JS binding generation Created Feb. 1, 2016, 9:14 p.m.
Right Patch Set: Addressed comments from Patch Set 28 Created March 21, 2017, 10:04 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « compiled/RegExpFilter.h ('k') | compiled/String.h » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #include <climits> 1 #include <climits>
2 2
3 #include <emscripten.h> 3 #include <emscripten.h>
4 4
5 #include "RegExpFilter.h" 5 #include "RegExpFilter.h"
6 #include "WhitelistFilter.h"
7 #include "InvalidFilter.h"
8 #include "StringScanner.h" 6 #include "StringScanner.h"
9 #include "StringMap.h" 7 #include "StringMap.h"
10 8
11 namespace 9 namespace
12 { 10 {
13 enum 11 enum
14 { 12 {
15 TYPE_OTHER = 0x1, 13 TYPE_OTHER = 0x1,
16 TYPE_SCRIPT = 0x2, 14 TYPE_SCRIPT = 0x2,
17 TYPE_IMAGE = 0x4, 15 TYPE_IMAGE = 0x4,
(...skipping 28 matching lines...) Expand all
46 {u"media"_str, TYPE_MEDIA}, 44 {u"media"_str, TYPE_MEDIA},
47 {u"font"_str, TYPE_FONT}, 45 {u"font"_str, TYPE_FONT},
48 {u"background"_str, TYPE_IMAGE}, // Backwards compat 46 {u"background"_str, TYPE_IMAGE}, // Backwards compat
49 47
50 {u"popup"_str, TYPE_POPUP}, 48 {u"popup"_str, TYPE_POPUP},
51 {u"genericblock"_str, TYPE_GENERICBLOCK}, 49 {u"genericblock"_str, TYPE_GENERICBLOCK},
52 {u"generichide"_str, TYPE_GENERICHIDE}, 50 {u"generichide"_str, TYPE_GENERICHIDE},
53 {u"elemhide"_str, TYPE_ELEMHIDE}, 51 {u"elemhide"_str, TYPE_ELEMHIDE},
54 }; 52 };
55 53
56 int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | TYPE_POPUP | 54 const int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE |
57 TYPE_GENERICBLOCK | TYPE_GENERICHIDE); 55 TYPE_POPUP | TYPE_GENERICBLOCK | TYPE_GENERICHIDE);
58 56
59 int GenerateRegExp(const String& regexp, bool matchCase) 57 int GenerateRegExp(const String& regexp, bool matchCase)
60 { 58 {
61 return EM_ASM_INT(return regexps.create($0, $1), &regexp, matchCase); 59 return EM_ASM_INT(return regexps.create($0, $1), &regexp, matchCase);
62 } 60 }
63 } 61
64 62 void NormalizeWhitespace(DependentString& text)
65 RegExpFilter::RegExpFilter(const String& text, 63 {
66 String::size_type patternStart, String::size_type patternEnd) 64 // We want to remove all spaces but bail out early in the common scenario
67 : ActiveFilter(text, true), mRegexpId(0), 65 // that the string contains no spaces.
68 mRegexpSource(String(mText, patternStart, patternEnd - patternStart)), 66
69 mContentType(-1), mMatchCase(false), mThirdParty(TrippleState::ANY) 67 // Look for the first space
70 { 68 String::size_type len = text.length();
71 String options(mText, patternEnd + 1); 69 String::size_type pos;
72 StringScanner scanner(options, u','); 70 for (pos = 0; pos < len; pos++)
73 int optionStart = 0; 71 if (text[pos] == ' ')
74 int optionEnd = -1; 72 break;
75 int valueStart = -1; 73
74 if (pos >= len)
75 return;
76
77 // Found spaces, move characters to remove them
78 String::size_type delta = 1;
79 for (pos = pos + 1; pos < len; pos++)
80 {
81 if (text[pos] == ' ')
82 delta++;
83 else
84 text[pos - delta] = text[pos];
85 }
86 text.reset(text, 0, len - delta);
87 }
88
89 void ParseOption(String& text, DependentString& error, RegExpFilterData& data,
90 int optionStart, int optionEnd, int valueStart, int valueEnd)
91 {
92 if (optionEnd <= optionStart)
93 return;
94
95 bool reverse = false;
96 if (text[optionStart] == u'~')
97 {
98 reverse = true;
99 optionStart++;
100 }
101
102 DependentString name(text, optionStart, optionEnd - optionStart);
103 for (size_t i = 0; i < name.length(); ++i)
104 {
105 char16_t currChar = name[i];
106 if (currChar >= u'A' && currChar <= u'Z')
107 name[i] = currChar + u'a' - u'A';
108 else if (currChar == u'_')
109 name[i] = u'-';
110 }
111
112 auto it = typeMap.find(name);
113 if (it)
114 {
115 if (data.mContentType < 0)
116 data.mContentType = reverse ? defaultTypeMask : 0;
117 if (reverse)
118 data.mContentType &= ~it->second;
119 else
120 data.mContentType |= it->second;
121 }
122 else if (name.equals(u"domain"_str))
123 {
124 if (valueStart >= 0 && valueEnd > valueStart)
125 {
126 data.mDomainsStart = valueStart;
127 data.mDomainsEnd = valueEnd;
128 DependentString(text, valueStart, valueEnd - valueStart).toLower();
129 }
130 }
131 else if (name.equals(u"sitekey"_str))
132 {
133 if (valueStart >= 0 && valueEnd > valueStart)
134 {
135 data.mSitekeysStart = valueStart;
136 data.mSitekeysEnd = valueEnd;
137 }
138 }
139 else if (name.equals(u"match-case"_str))
140 data.mMatchCase = !reverse;
141 else if (name.equals(u"third-party"_str))
142 data.mThirdParty = reverse ? TrippleState::NO : TrippleState::YES;
143 else if (name.equals(u"collapse"_str))
144 data.mCollapse = reverse ? TrippleState::NO : TrippleState::YES;
145 else
146 error.reset(u"filter_unknown_option"_str);
147 }
148
149 void ParseOptions(String& text, DependentString& error, RegExpFilterData& data ,
150 String::size_type optionsStart)
151 {
152 data.mMatchCase = false;
153 data.mThirdParty = TrippleState::ANY;
154 data.mCollapse = TrippleState::ANY;
155 data.mDomainsStart = String::npos;
156 data.mSitekeysStart = String::npos;
157 if (optionsStart >= text.length())
158 {
159 data.mContentType = defaultTypeMask;
160 return;
161 }
162
163 data.mContentType = -1;
164
165 int optionStart = data.mPatternEnd + 1;
166 int optionEnd = -1;
167 int valueStart = -1;
168
169 StringScanner scanner(text, optionStart, u',');
170 bool done = false;
171 while (!done)
172 {
173 done = scanner.done();
174 switch (scanner.next())
175 {
176 case u'=':
177 if (optionEnd < 0)
178 {
179 optionEnd = scanner.position();
180 valueStart = optionEnd + 1;
181 }
182 break;
183 case u',':
184 if (optionEnd < 0)
185 optionEnd = scanner.position();
186 ParseOption(text, error, data, optionStart, optionEnd, valueStart,
187 scanner.position());
188 if (!error.empty())
189 return;
190
191 optionStart = scanner.position() + 1;
192 optionEnd = -1;
193 valueStart = -1;
194 break;
195 }
196 }
197
198 if (data.mContentType < 0)
199 data.mContentType = defaultTypeMask;
200 }
201 }
202
203 RegExpFilter::RegExpFilter(Type type, const String& text, const RegExpFilterData & data)
204 : ActiveFilter(type, text, true), mData(data)
205 {
206 }
207
208 RegExpFilter::~RegExpFilter()
209 {
210 if (mData.HasRegExp())
211 EM_ASM_ARGS(regexps.delete($0), mData.mRegexpId);
212 }
213
214 Filter::Type RegExpFilter::Parse(DependentString& text, DependentString& error,
215 RegExpFilterData& data)
216 {
217 NormalizeWhitespace(text);
218
219 Filter::Type type = Type::BLOCKING;
220
221 data.mPatternStart = 0;
222 if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@')
223 {
224 type = Type::WHITELIST;
225 data.mPatternStart = 2;
226 }
227
228 data.mPatternEnd = text.find(u'$', data.mPatternStart);
229 if (data.mPatternEnd == text.npos)
230 data.mPatternEnd = text.length();
231
232 ParseOptions(text, error, data, data.mPatternEnd + 1);
233 if (!error.empty())
234 return Type::INVALID;
235
236 if (data.mPatternEnd - data.mPatternStart >= 2 &&
237 text[data.mPatternStart] == u'/' &&
238 text[data.mPatternEnd - 1] == u'/')
239 {
240 data.SetRegExp(GenerateRegExp(DependentString(text, data.mPatternStart + 1,
241 data.mPatternEnd - data.mPatternStart - 2), data.mMatchCase));
242 if (data.mRegexpId == -1)
243 {
244 error.reset(u"filter_invalid_regexp"_str);
245 return Type::INVALID;
246 }
247 }
248
249 return type;
250 }
251
252 void RegExpFilter::ParseSitekeys(const String& sitekeys) const
253 {
254 StringScanner scanner(sitekeys, 0, u'|');
255 size_t start = 0;
76 bool done = false; 256 bool done = false;
77 while (!done) 257 while (!done)
78 { 258 {
79 done = scanner.done(); 259 done = scanner.done();
80 switch (scanner.next()) 260 if (scanner.next() == u'|')
81 { 261 {
82 case u'=': 262 if (scanner.position() > start)
83 if (optionEnd < 0) 263 AddSitekey(DependentString(sitekeys, start, scanner.position() - start)) ;
84 { 264 start = scanner.position() + 1;
85 optionEnd = scanner.position(); 265 }
86 valueStart = optionEnd + 1;
87 }
88 break;
89 case u',':
90 if (optionEnd < 0)
91 optionEnd = scanner.position();
92 ProcessOption(options, optionStart, optionEnd, valueStart, scanner.posit ion());
93 optionStart = scanner.position() + 1;
94 optionEnd = -1;
95 valueStart = -1;
96 break;
97 }
98 }
99 if (mContentType < 0)
100 mContentType = defaultTypeMask;
101
102 size_t len = mRegexpSource.length();
103 if (len >= 2 && mRegexpSource[0] == u'/' && mRegexpSource[len - 1] == u'/')
104 {
105 mRegexpSource.reset(mRegexpSource, 1 , len - 2);
106 mRegexpId = GenerateRegExp(mRegexpSource, mMatchCase);
107
108 int errorLength = EM_ASM_INT(return regexps.getErrorLength($0), mRegexpId);
109 if (errorLength >= 0)
110 {
111 String error(errorLength);
112 EM_ASM_ARGS(regexps.getError($0, $1), mRegexpId, error.data());
113 throw error;
114 }
115 }
116 }
117
118 RegExpFilter::~RegExpFilter()
119 {
120 if (mRegexpId)
121 EM_ASM_ARGS(regexps.delete($0), mRegexpId);
122 }
123
124 void RegExpFilter::ProcessOption(String& options, int optionStart,
125 int optionEnd, int valueStart, int valueEnd)
126 {
127 if (optionEnd <= optionStart)
128 return;
129
130 bool reverse = false;
131 if (options[optionStart] == u'~')
132 {
133 reverse = true;
134 optionStart++;
135 }
136
137 String name(options, optionStart, optionEnd - optionStart);
138 for (size_t i = 0; i < name.length(); ++i)
139 {
140 char16_t currChar = name[i];
141 if (currChar >= u'A' && currChar <= u'Z')
142 name[i] = currChar + u'a' - u'A';
143 else if (currChar == u'_')
144 name[i] = u'-';
145 }
146
147 auto it = typeMap.find(name);
148 if (it != typeMap.end())
149 {
150 if (mContentType < 0)
151 mContentType = reverse ? defaultTypeMask : 0;
152 if (reverse)
153 mContentType &= ~it->second;
154 else
155 mContentType |= it->second;
156 }
157 else if (name.equals(u"domain"_str))
158 {
159 if (valueStart >= 0 && valueEnd > valueStart)
160 ParseDomains(String(options, valueStart, valueEnd - valueStart), u'|');
161 }
162 else if (name.equals(u"sitekey"_str))
163 {
164 if (valueStart >= 0 && valueEnd > valueStart)
165 {
166 StringScanner scanner(String(options, valueStart, valueEnd - valueStart), u'|');
167 size_t start = 0;
168 bool done = false;
169 while (!done)
170 {
171 done = scanner.done();
172 if (scanner.next() == u'|')
173 {
174 if (scanner.position() > start)
175 AddSitekey(String(options, valueStart + start, scanner.position() - start));
176 start = scanner.position() + 1;
177 }
178 }
179 }
180 }
181 else if (name.equals(u"match-case"_str))
182 mMatchCase = !reverse;
183 else if (name.equals(u"third-party"_str))
184 mThirdParty = reverse ? TrippleState::NO : TrippleState::YES;
185 else if (name.equals(u"collapse"_str))
186 mCollapse = reverse ? TrippleState::NO : TrippleState::YES;
187 else
188 {
189 String error(u"Unknown option "_str);
190 error.append(name);
191 throw std::move(error);
192 }
193 }
194
195 Filter* RegExpFilter::Create(const String& text)
196 {
197 bool blocking = true;
198 String::size_type patternStart = 0;
199 if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@')
200 {
201 blocking = false;
202 patternStart = 2;
203 }
204
205 String::size_type patternEnd = text.find(u'$', patternStart);
206 if (patternEnd == text.npos)
207 patternEnd = text.length();
208
209 try
210 {
211 if (blocking)
212 return new RegExpFilter(text, patternStart, patternEnd);
213 else
214 return new WhitelistFilter(text, patternStart, patternEnd);
215 }
216 catch (const String& reason)
217 {
218 return new InvalidFilter(text, reason);
219 } 266 }
220 } 267 }
221 268
222 void RegExpFilter::InitJSTypes() 269 void RegExpFilter::InitJSTypes()
223 { 270 {
224 EM_ASM(exports.RegExpFilter.typeMap = {};); 271 EM_ASM(exports.RegExpFilter.typeMap = {};);
225 for (auto it = typeMap.begin(); it != typeMap.end(); ++it) 272 for (auto it = typeMap.begin(); it != typeMap.end(); ++it)
226 EM_ASM_ARGS(exports.RegExpFilter.typeMap[getStringData($0).replace("-", "_") .toUpperCase()] = $1, &(it->first), it->second); 273 EM_ASM_ARGS(exports.RegExpFilter.typeMap[readString($0).replace("-", "_").to UpperCase()] = $1, &(it->first), it->second);
227 } 274 }
228 275
229 String RegExpFilter::RegExpFromSource(const String& source) 276 OwnedString RegExpFilter::RegExpFromSource(const String& source)
230 { 277 {
231 /* TODO: this is very inefficient */ 278 /* TODO: this is very inefficient */
232 279
233 // Note: This doesn't remove trailing wildcards, otherwise the result should 280 // Note: This doesn't remove trailing wildcards, otherwise the result should
234 // be identical to Filter.toRegExp(). 281 // be identical to Filter.toRegExp().
235 String result; 282 OwnedString result;
236 String::value_type prevChar = u'*'; 283 String::value_type prevChar = u'*';
237 for (String::size_type i = 0; i < source.length(); ++i) 284 for (String::size_type i = 0; i < source.length(); ++i)
238 { 285 {
239 String::value_type currChar = source[i]; 286 String::value_type currChar = source[i];
240 switch (currChar) 287 switch (currChar)
241 { 288 {
242 case u'*': 289 case u'*':
243 if (prevChar != u'*') 290 if (prevChar != u'*')
244 result.append(u".*"_str); 291 result.append(u".*"_str);
245 break; 292 break;
(...skipping 29 matching lines...) Expand all
275 !(currChar >= u'A' && currChar <= u'Z') && 322 !(currChar >= u'A' && currChar <= u'Z') &&
276 !(currChar >= u'0' && currChar <= u'9') && 323 !(currChar >= u'0' && currChar <= u'9') &&
277 currChar < 128) 324 currChar < 128)
278 { 325 {
279 result.append(u'\\'); 326 result.append(u'\\');
280 } 327 }
281 result.append(currChar); 328 result.append(currChar);
282 } 329 }
283 prevChar = currChar; 330 prevChar = currChar;
284 } 331 }
285 return std::move(result); 332 return result;
286 } 333 }
287 334
288 Filter::Type RegExpFilter::GetType() const 335 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const
289 { 336 {
290 return Type::BLOCKING; 337 if (!mData.DomainsParsingDone())
338 {
339 ParseDomains(mData.GetDomainsSource(mText), u'|');
340 mData.SetDomainsParsingDone();
341 }
342 return ActiveFilter::GetDomains();
343 }
344
345 RegExpFilter::SitekeySet* RegExpFilter::GetSitekeys() const
346 {
347 if (!mData.SitekeyParsingDone())
348 {
349 ParseSitekeys(mData.GetSitekeysSource(mText));
350 mData.SetSitekeysParsingDone();
351 }
352 return ActiveFilter::GetSitekeys();
291 } 353 }
292 354
293 bool RegExpFilter::Matches(const String& location, int typeMask, 355 bool RegExpFilter::Matches(const String& location, int typeMask,
294 String& docDomain, bool thirdParty, const String& sitekey) const 356 DependentString& docDomain, bool thirdParty, const String& sitekey) const
295 { 357 {
296 if (!(mContentType & typeMask) || 358 if (!(mData.mContentType & typeMask) ||
297 (mThirdParty == TrippleState::YES && !thirdParty) || 359 (mData.mThirdParty == TrippleState::YES && !thirdParty) ||
298 (mThirdParty == TrippleState::NO && thirdParty) || 360 (mData.mThirdParty == TrippleState::NO && thirdParty) ||
299 !IsActiveOnDomain(docDomain, sitekey)) 361 !IsActiveOnDomain(docDomain, sitekey))
300 { 362 {
301 return false; 363 return false;
302 } 364 }
303 365
304 if (!mRegexpId) 366 if (!mData.RegExpParsingDone())
305 mRegexpId = GenerateRegExp(RegExpFromSource(mRegexpSource), mMatchCase); 367 {
306 return EM_ASM_INT(return regexps.test($0, $1), mRegexpId, &location); 368 const OwnedString pattern(mData.GetRegExpSource(mText));
307 } 369 mData.SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mData.mMatchCase)) ;
370 }
371 return EM_ASM_INT(return regexps.test($0, $1), mData.mRegexpId, &location);
372 }
LEFTRIGHT

Powered by Google App Engine
This is Rietveld