Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/RegExpFilter.cpp

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Patch Set: Improved performance Created Jan. 28, 2016, 2:31 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « compiled/RegExpFilter.h ('k') | compiled/String.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #include <climits>
2
3 #include <emscripten.h>
4
5 #include "RegExpFilter.h"
6 #include "WhitelistFilter.h"
7 #include "InvalidFilter.h"
8 #include "StringScanner.h"
9 #include "StringMap.h"
10
11 namespace
12 {
13 enum
14 {
15 TYPE_OTHER = 0x1,
16 TYPE_SCRIPT = 0x2,
17 TYPE_IMAGE = 0x4,
18 TYPE_STYLESHEET = 0x8,
19 TYPE_OBJECT = 0x10,
20 TYPE_SUBDOCUMENT = 0x20,
21 TYPE_DOCUMENT = 0x40,
22 TYPE_PING = 0x400,
23 TYPE_XMLHTTPREQUEST = 0x800,
24 TYPE_OBJECT_SUBREQUEST = 0x1000,
25 TYPE_MEDIA = 0x4000,
26 TYPE_FONT = 0x8000,
27 TYPE_POPUP = 0x8000000,
28 TYPE_GENERICBLOCK = 0x10000000,
29 TYPE_GENERICHIDE = 0x20000000,
30 TYPE_ELEMHIDE = 0x40000000,
31 };
32
33 StringMap<int> typeMap {
34 {u"other"_str, TYPE_OTHER},
35 {u"script"_str, TYPE_SCRIPT},
36 {u"image"_str, TYPE_IMAGE},
37 {u"stylesheet"_str, TYPE_STYLESHEET},
38 {u"object"_str, TYPE_OBJECT},
39 {u"subdocument"_str, TYPE_SUBDOCUMENT},
40 {u"document"_str, TYPE_DOCUMENT},
41 {u"xbl"_str, TYPE_OTHER}, // Backwards compat
42 {u"ping"_str, TYPE_PING},
43 {u"xmlhttprequest"_str, TYPE_XMLHTTPREQUEST},
44 {u"object-subrequest"_str, TYPE_OBJECT_SUBREQUEST},
45 {u"dtd"_str, TYPE_OTHER}, // Backwards compat
46 {u"media"_str, TYPE_MEDIA},
47 {u"font"_str, TYPE_FONT},
48 {u"background"_str, TYPE_IMAGE}, // Backwards compat
49
50 {u"popup"_str, TYPE_POPUP},
51 {u"genericblock"_str, TYPE_GENERICBLOCK},
52 {u"generichide"_str, TYPE_GENERICHIDE},
53 {u"elemhide"_str, TYPE_ELEMHIDE},
54 };
55
56 int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | TYPE_POPUP |
57 TYPE_GENERICBLOCK | TYPE_GENERICHIDE);
58
59 int GenerateRegExp(const String& regexp, bool matchCase)
60 {
61 return EM_ASM_INT(return regexps.create($0, $1), &regexp, matchCase);
62 }
63 }
64
65 RegExpFilter::RegExpFilter(const String& text,
66 String::size_type patternStart, String::size_type patternEnd)
67 : ActiveFilter(text, true), mRegexpId(0),
68 mRegexpSource(String(mText, patternStart, patternEnd - patternStart)),
69 mContentType(-1), mMatchCase(false), mThirdParty(TrippleState::ANY)
70 {
71 String options(mText, patternEnd + 1);
72 StringScanner scanner(options, u',');
73 int optionStart = 0;
74 int optionEnd = -1;
75 int valueStart = -1;
76 bool done = false;
77 while (!done)
78 {
79 done = scanner.done();
80 switch (scanner.next())
81 {
82 case u'=':
83 if (optionEnd < 0)
84 {
85 optionEnd = scanner.position();
86 valueStart = optionEnd + 1;
87 }
88 break;
89 case u',':
90 if (optionEnd < 0)
91 optionEnd = scanner.position();
92 ProcessOption(options, optionStart, optionEnd, valueStart, scanner.posit ion());
93 optionStart = scanner.position() + 1;
94 optionEnd = -1;
95 valueStart = -1;
96 break;
97 }
98 }
99 if (mContentType < 0)
100 mContentType = defaultTypeMask;
101
102 size_t len = mRegexpSource.length();
103 if (len >= 2 && mRegexpSource[0] == u'/' && mRegexpSource[len - 1] == u'/')
104 {
105 mRegexpSource.reset(mRegexpSource, 1 , len - 2);
106 mRegexpId = GenerateRegExp(mRegexpSource, mMatchCase);
107
108 int errorLength = EM_ASM_INT(return regexps.getErrorLength($0), mRegexpId);
109 if (errorLength >= 0)
110 {
111 String error(errorLength);
112 EM_ASM_ARGS(regexps.getError($0, $1), mRegexpId, error.data());
113 throw error;
114 }
115 }
116 }
117
118 RegExpFilter::~RegExpFilter()
119 {
120 if (mRegexpId)
121 EM_ASM_ARGS(regexps.delete($0), mRegexpId);
122 }
123
124 void RegExpFilter::ProcessOption(String& options, int optionStart,
125 int optionEnd, int valueStart, int valueEnd)
126 {
127 if (optionEnd <= optionStart)
128 return;
129
130 bool reverse = false;
131 if (options[optionStart] == u'~')
132 {
133 reverse = true;
134 optionStart++;
135 }
136
137 String name(options, optionStart, optionEnd - optionStart);
138 for (size_t i = 0; i < name.length(); ++i)
139 {
140 char16_t currChar = name[i];
141 if (currChar >= u'A' && currChar <= u'Z')
142 name[i] = currChar + u'a' - u'A';
143 else if (currChar == u'_')
144 name[i] = u'-';
145 }
146
147 auto it = typeMap.find(name);
148 if (it != typeMap.end())
149 {
150 if (mContentType < 0)
151 mContentType = reverse ? defaultTypeMask : 0;
152 if (reverse)
153 mContentType &= ~it->second;
154 else
155 mContentType |= it->second;
156 }
157 else if (name.equals(u"domain"_str))
158 {
159 if (valueStart >= 0 && valueEnd > valueStart)
160 ParseDomains(String(options, valueStart, valueEnd - valueStart), u'|');
161 }
162 else if (name.equals(u"sitekey"_str))
163 {
164 if (valueStart >= 0 && valueEnd > valueStart)
165 {
166 StringScanner scanner(String(options, valueStart, valueEnd - valueStart), u'|');
167 size_t start = 0;
168 bool done = false;
169 while (!done)
170 {
171 done = scanner.done();
172 if (scanner.next() == u'|')
173 {
174 if (scanner.position() > start)
175 AddSitekey(String(options, valueStart + start, scanner.position() - start));
176 start = scanner.position() + 1;
177 }
178 }
179 }
180 }
181 else if (name.equals(u"match-case"_str))
182 mMatchCase = !reverse;
183 else if (name.equals(u"third-party"_str))
184 mThirdParty = reverse ? TrippleState::NO : TrippleState::YES;
185 else if (name.equals(u"collapse"_str))
186 mCollapse = reverse ? TrippleState::NO : TrippleState::YES;
187 else
188 {
189 String error(u"Unknown option "_str);
190 error.append(name);
191 throw std::move(error);
192 }
193 }
194
195 Filter* RegExpFilter::Create(const String& text)
196 {
197 bool blocking = true;
198 String::size_type patternStart = 0;
199 if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@')
200 {
201 blocking = false;
202 patternStart = 2;
203 }
204
205 String::size_type patternEnd = text.find(u'$', patternStart);
206 if (patternEnd == text.npos)
207 patternEnd = text.length();
208
209 try
210 {
211 if (blocking)
212 return new RegExpFilter(text, patternStart, patternEnd);
213 else
214 return new WhitelistFilter(text, patternStart, patternEnd);
215 }
216 catch (const String& reason)
217 {
218 return new InvalidFilter(text, reason);
219 }
220 }
221
222 void RegExpFilter::InitJSTypes()
223 {
224 EM_ASM(exports.RegExpFilter.typeMap = {};);
225 for (auto it = typeMap.begin(); it != typeMap.end(); ++it)
226 EM_ASM_ARGS(exports.RegExpFilter.typeMap[getStringData($0).replace("-", "_") .toUpperCase()] = $1, &(it->first), it->second);
227 }
228
229 String RegExpFilter::RegExpFromSource(const String& source)
230 {
231 /* TODO: this is very inefficient */
232
233 // Note: This doesn't remove trailing wildcards, otherwise the result should
234 // be identical to Filter.toRegExp().
235 String result;
236 String::value_type prevChar = u'*';
237 for (String::size_type i = 0; i < source.length(); ++i)
238 {
239 String::value_type currChar = source[i];
240 switch (currChar)
241 {
242 case u'*':
243 if (prevChar != u'*')
244 result.append(u".*"_str);
245 break;
246 case u'^':
247 result.append(u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x 60\\x7B-\\x7F]|$)"_str);
248 break;
249 case u'|':
250 if (i == 0)
251 {
252 // Anchor at expression start, maybe extended anchor?
253 if (i + 1 < source.length() && source[i + 1] == u'|')
254 {
255 result.append(u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"_str);
256 ++i;
257 }
258 else
259 result.append(u'^');
260 }
261 else if (i == source.length() - 1)
262 {
263 // Anchor at expression end, ignore if following separator placeholder
264 if (prevChar != u'^')
265 result.append(u'$');
266 }
267 else
268 {
269 // Not actually an anchor, escape it
270 result.append(u"\\|"_str);
271 }
272 break;
273 default:
274 if (!(currChar >= u'a' && currChar <= u'z') &&
275 !(currChar >= u'A' && currChar <= u'Z') &&
276 !(currChar >= u'0' && currChar <= u'9') &&
277 currChar < 128)
278 {
279 result.append(u'\\');
280 }
281 result.append(currChar);
282 }
283 prevChar = currChar;
284 }
285 return std::move(result);
286 }
287
288 Filter::Type RegExpFilter::GetType() const
289 {
290 return Type::BLOCKING;
291 }
292
293 bool RegExpFilter::Matches(const String& location, int typeMask,
294 String& docDomain, bool thirdParty, const String& sitekey)
295 {
296 if (!(mContentType & typeMask) ||
297 (mThirdParty == TrippleState::YES && !thirdParty) ||
298 (mThirdParty == TrippleState::NO && thirdParty) ||
299 !IsActiveOnDomain(docDomain, sitekey))
300 {
301 return false;
302 }
303
304 if (!mRegexpId)
305 mRegexpId = GenerateRegExp(RegExpFromSource(mRegexpSource), mMatchCase);
306 return EM_ASM_INT(return regexps.test($0, $1), mRegexpId, &location);
307 }
OLDNEW
« no previous file with comments | « compiled/RegExpFilter.h ('k') | compiled/String.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld