Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/RegExpFilter.cpp

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Patch Set: Minor improvements Created Jan. 20, 2016, 2:41 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « compiled/RegExpFilter.h ('k') | compiled/StringScanner.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #include <climits>
2 #include <unordered_map>
3
4 #include <emscripten.h>
5
6 #include "RegExpFilter.h"
7 #include "WhiteListFilter.h"
8 #include "InvalidFilter.h"
9 #include "StringScanner.h"
10
11 namespace
12 {
13 enum
14 {
15 TYPE_OTHER = 0x1,
16 TYPE_SCRIPT = 0x2,
17 TYPE_IMAGE = 0x4,
18 TYPE_STYLESHEET = 0x8,
19 TYPE_OBJECT = 0x10,
20 TYPE_SUBDOCUMENT = 0x20,
21 TYPE_DOCUMENT = 0x40,
22 TYPE_PING = 0x400,
23 TYPE_XMLHTTPREQUEST = 0x800,
24 TYPE_OBJECT_SUBREQUEST = 0x1000,
25 TYPE_MEDIA = 0x4000,
26 TYPE_FONT = 0x8000,
27 TYPE_POPUP = 0x8000000,
28 TYPE_GENERICBLOCK = 0x10000000,
29 TYPE_GENERICHIDE = 0x20000000,
30 TYPE_ELEMHIDE = 0x40000000,
31 };
32
33 std::unordered_map<std::u16string,int> typeMap({
34 {u"OTHER", TYPE_OTHER},
35 {u"SCRIPT", TYPE_SCRIPT},
36 {u"IMAGE", TYPE_IMAGE},
37 {u"STYLESHEET", TYPE_STYLESHEET},
38 {u"OBJECT", TYPE_OBJECT},
39 {u"SUBDOCUMENT", TYPE_SUBDOCUMENT},
40 {u"DOCUMENT", TYPE_DOCUMENT},
41 {u"XBL", TYPE_OTHER}, // Backwards compat
42 {u"PING", TYPE_PING},
43 {u"XMLHTTPREQUEST", TYPE_XMLHTTPREQUEST},
44 {u"OBJECT_SUBREQUEST", TYPE_OBJECT_SUBREQUEST},
45 {u"DTD", TYPE_OTHER}, // Backwards compat
46 {u"MEDIA", TYPE_MEDIA},
47 {u"FONT", TYPE_FONT},
48 {u"BACKGROUND", TYPE_IMAGE}, // Backwards compat
49
50 {u"POPUP", TYPE_POPUP},
51 {u"GENERICBLOCK", TYPE_GENERICBLOCK},
52 {u"GENERICHIDE", TYPE_GENERICHIDE},
53 {u"ELEMHIDE", TYPE_ELEMHIDE},
54 });
55
56 int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | TYPE_POPUP |
57 TYPE_GENERICBLOCK | TYPE_GENERICHIDE);
58
59 int GenerateRegExp(const std::u16string& regexp, bool matchCase)
60 {
61 return EM_ASM_INT(return regexps.create($0, $1), &regexp, matchCase);
62 }
63 }
64
65 RegExpFilter::RegExpFilter(const std::u16string& text,
66 const std::u16string& pattern, const std::u16string& options)
67 : ActiveFilter(text, true), regexpId(0), contentType(-1), matchCase(false),
68 thirdParty(TrippleState::ANY)
69 {
70 int optionStart = 0;
71 int optionEnd = -1;
72 int valueStart = -1;
73 StringScanner scanner(options + u",");
74 while (!scanner.done())
75 {
76 switch (scanner.next())
77 {
78 case u'=':
79 if (optionEnd < 0)
80 {
81 optionEnd = scanner.position();
82 valueStart = optionEnd + 1;
83 }
84 break;
85 case u',':
86 if (optionEnd < 0)
87 optionEnd = scanner.position();
88 ProcessOption(options, optionStart, optionEnd, valueStart, scanner.posit ion());
89 optionStart = scanner.position() + 1;
90 optionEnd = -1;
91 valueStart = -1;
92 break;
93 }
94 }
95 if (contentType < 0)
96 contentType = defaultTypeMask;
97
98 size_t len = pattern.length();
99 if (len >= 2 && pattern[0] == u'/' && pattern[len - 1] == u'/')
100 {
101 regexpId = GenerateRegExp(pattern.substr(1, len - 2), matchCase);
102
103 std::u16string* error = reinterpret_cast<std::u16string*>(EM_ASM_INT(return regexps.getError($0), regexpId));
104 if (error)
105 {
106 EM_ASM_ARGS(regexps.delete($0), regexpId);
107 throw std::u16string(*error);
108 }
109 }
110 else
111 regexpSource = pattern;
112 }
113
114 RegExpFilter::~RegExpFilter()
115 {
116 if (regexpId)
117 EM_ASM_ARGS(regexps.delete($0), regexpId);
118 }
119
120 void RegExpFilter::ProcessOption(const std::u16string& options,
121 int optionStart, int optionEnd, int valueStart, int valueEnd)
122 {
123 if (optionEnd <= optionStart)
124 return;
125
126 bool reverse = false;
127 if (options[optionStart] == u'~')
128 {
129 reverse = true;
130 optionStart++;
131 }
132
133 std::u16string name(options.substr(optionStart, optionEnd - optionStart));
134 for (size_t i = 0, l = name.length(); i < l; ++i)
135 {
136 char16_t currChar = name[i];
137 if (currChar >= u'a' && currChar <= u'z')
138 name[i] = currChar + u'A' - u'a';
139 else if (currChar == u'-')
140 name[i] = u'_';
141 }
142
143 auto it = typeMap.find(name);
144 if (it != typeMap.end())
145 {
146 if (contentType < 0)
147 contentType = reverse ? defaultTypeMask : 0;
148 if (reverse)
149 contentType &= ~it->second;
150 else
151 contentType |= it->second;
152 }
153 else if (!name.compare(u"DOMAIN"))
154 {
155 if (valueStart >= 0 && valueEnd > valueStart)
156 ParseDomains(options.substr(valueStart, valueEnd - valueStart), u'|');
157 }
158 else if (!name.compare(u"SITEKEY"))
159 {
160 if (valueStart >= 0 && valueEnd > valueStart)
161 {
162 StringScanner scanner(options.substr(valueStart, valueEnd - valueStart) + u"|");
163 size_t start = 0;
164 while (!scanner.done())
165 {
166 if (scanner.next() == u'|')
167 {
168 if (scanner.position() > start)
169 sitekeys.insert(options.substr(valueStart + start, scanner.position( ) - start));
170 start = scanner.position() + 1;
171 }
172 }
173 }
174 }
175 else if (!name.compare(u"MATCH_CASE"))
176 matchCase = !reverse;
177 else if (!name.compare(u"THIRD_PARTY"))
178 thirdParty = reverse ? TrippleState::NO : TrippleState::YES;
179 else if (!name.compare(u"COLLAPSE"))
180 collapse = reverse ? TrippleState::NO : TrippleState::YES;
181 else
182 throw std::u16string(u"Unknown option " + name);
183 }
184
185 Filter* RegExpFilter::Create(const std::u16string& text)
186 {
187 bool blocking = true;
188 size_t patternStart = 0;
189 if (!text.compare(0, 2, u"@@"))
190 {
191 blocking = false;
192 patternStart = 2;
193 }
194
195 size_t patternEnd = text.find(u'$', patternStart);
196 size_t patternLength = (patternEnd != std::u16string::npos ?
197 patternEnd - patternStart : patternEnd);
198 std::u16string pattern(text.substr(patternStart, patternLength));
199 std::u16string options(patternEnd != std::u16string::npos ?
200 text.substr(patternEnd + 1) : u"");
201
202 try
203 {
204 if (blocking)
205 return new RegExpFilter(text, pattern, options);
206 else
207 return new WhiteListFilter(text, pattern, options);
208 }
209 catch (const std::u16string& reason)
210 {
211 return new InvalidFilter(text, reason);
212 }
213 }
214
215 void RegExpFilter::InitJSTypes()
216 {
217 for (auto it = typeMap.begin(); it != typeMap.end(); ++it)
218 EM_ASM_ARGS(Module.RegExpFilter_typeMap[getStringData($0)] = $1, &(it->first ), it->second);
219 }
220
221 const std::u16string RegExpFilter::RegExpFromSource(const std::u16string& source )
222 {
223 // Note: This doesn't remove trailing wildcards, otherwise the result should
224 // be identical to Filter.toRegExp().
225 std::u16string result;
226 char16_t prevChar = u'*';
227 for (size_t i = 0, l = source.length(); i < l; ++i)
228 {
229 char16_t currChar = source[i];
230 switch (currChar)
231 {
232 case u'*':
233 if (prevChar != u'*')
234 result += u".*";
235 break;
236 case u'^':
237 result += u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60\\ x7B-\\x7F]|$)";
238 break;
239 case u'|':
240 if (i == 0)
241 {
242 // Anchor at expression start, maybe extended anchor?
243 if (i + 1 < l && source[i + 1] == u'|')
244 {
245 result += u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?";
246 ++i;
247 }
248 else
249 result += u"^";
250 }
251 else if (i == l - 1)
252 {
253 // Anchor at expression end, ignore if following separator placeholder
254 if (prevChar != u'^')
255 result += u"$";
256 }
257 else
258 {
259 // Not actually an anchor, escape it
260 result += u"\\|";
261 }
262 break;
263 default:
264 if ((currChar >= u'a' && currChar <= u'z') ||
265 (currChar >= u'A' && currChar <= u'Z') ||
266 (currChar >= u'0' && currChar <= u'9') ||
267 currChar >= 128)
268 {
269 result += currChar;
270 }
271 else
272 {
273 result += u"\\";
274 result.append(1, currChar);
275 }
276 }
277 prevChar = currChar;
278 }
279 return result;
280 }
281
282 Filter::Type RegExpFilter::GetType() const
283 {
284 return Type::BLOCKING;
285 }
286
287 bool RegExpFilter::Matches(const std::u16string& location, int typeMask,
288 const std::u16string& docDomain, bool thirdParty,
289 const std::u16string& sitekey)
290 {
291 if (!(this->contentType & typeMask) ||
292 (this->thirdParty == TrippleState::YES && !thirdParty) ||
293 (this->thirdParty == TrippleState::NO && thirdParty) ||
294 !IsActiveOnDomain(docDomain, sitekey))
295 {
296 return false;
297 }
298
299 if (!regexpId)
300 {
301 regexpId = GenerateRegExp(RegExpFromSource(regexpSource), matchCase);
302 regexpSource.resize(0);
303 }
304 return EM_ASM_INT(return regexps.test($0, $1), regexpId, &location);
305 }
OLDNEW
« no previous file with comments | « compiled/RegExpFilter.h ('k') | compiled/StringScanner.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld