Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/RegExpFilter.cpp

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Patch Set: Almost complete implementation, missing CSS property filters Created Jan. 20, 2016, 12:04 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « compiled/RegExpFilter.h ('k') | compiled/StringScanner.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #include <climits>
2 #include <unordered_map>
3
4 #include <emscripten.h>
5
6 #include "RegExpFilter.h"
7 #include "WhiteListFilter.h"
8 #include "InvalidFilter.h"
9 #include "StringScanner.h"
10
11 namespace
12 {
13 enum
14 {
15 TYPE_OTHER = 0x1,
16 TYPE_SCRIPT = 0x2,
17 TYPE_IMAGE = 0x4,
18 TYPE_STYLESHEET = 0x8,
19 TYPE_OBJECT = 0x10,
20 TYPE_SUBDOCUMENT = 0x20,
21 TYPE_DOCUMENT = 0x40,
22 TYPE_PING = 0x400,
23 TYPE_XMLHTTPREQUEST = 0x800,
24 TYPE_OBJECT_SUBREQUEST = 0x1000,
25 TYPE_MEDIA = 0x4000,
26 TYPE_FONT = 0x8000,
27 TYPE_POPUP = 0x8000000,
28 TYPE_GENERICBLOCK = 0x10000000,
29 TYPE_GENERICHIDE = 0x20000000,
30 TYPE_ELEMHIDE = 0x40000000,
31 };
32
33 std::unordered_map<std::u16string,int> typeMap({
34 {u"OTHER", TYPE_OTHER},
35 {u"SCRIPT", TYPE_SCRIPT},
36 {u"IMAGE", TYPE_IMAGE},
37 {u"STYLESHEET", TYPE_STYLESHEET},
38 {u"OBJECT", TYPE_OBJECT},
39 {u"SUBDOCUMENT", TYPE_SUBDOCUMENT},
40 {u"DOCUMENT", TYPE_DOCUMENT},
41 {u"XBL", TYPE_OTHER}, // Backwards compat
42 {u"PING", TYPE_PING},
43 {u"XMLHTTPREQUEST", TYPE_XMLHTTPREQUEST},
44 {u"OBJECT_SUBREQUEST", TYPE_OBJECT_SUBREQUEST},
45 {u"DTD", TYPE_OTHER}, // Backwards compat
46 {u"MEDIA", TYPE_MEDIA},
47 {u"FONT", TYPE_FONT},
48 {u"BACKGROUND", TYPE_IMAGE}, // Backwards compat
49
50 {u"POPUP", TYPE_POPUP},
51 {u"GENERICBLOCK", TYPE_GENERICBLOCK},
52 {u"GENERICHIDE", TYPE_GENERICHIDE},
53 {u"ELEMHIDE", TYPE_ELEMHIDE},
54 });
55
56 int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | TYPE_POPUP |
57 TYPE_GENERICBLOCK | TYPE_GENERICHIDE);
58
59 int GenerateRegExp(const std::u16string& source, bool matchCase)
60 {
61 // Note: This doesn't remove trailing wildcards, otherwise the result should
62 // be identical to Filter.toRegExp().
63 std::u16string result;
64 char16_t prevChar = u'*';
65 for (size_t i = 0, l = source.length(); i < l; ++i)
66 {
67 char16_t currChar = source[i];
68 switch (currChar)
69 {
70 case u'*':
71 if (prevChar != u'*')
72 result += u".*";
73 break;
74 case u'^':
75 result += u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60 \\x7B-\\x7F]|$)";
76 break;
77 case u'|':
78 if (i == 0)
79 {
80 // Anchor at expression start, maybe extended anchor?
81 if (i + 1 < l && source[i + 1] == u'|')
82 {
83 result += u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?";
84 ++i;
85 }
86 else
87 result += u"^";
88 }
89 else if (i == l - 1)
90 {
91 // Anchor at expression end, ignore if following separator placehold er
92 if (prevChar != u'^')
93 result += u"$";
94 }
95 else
96 {
97 // Not actually an anchor, escape it
98 result += u"\\|";
99 }
100 break;
101 default:
102 if ((currChar >= u'a' && currChar <= u'z') ||
103 (currChar >= u'A' && currChar <= u'Z') ||
104 (currChar >= u'0' && currChar <= u'9') ||
105 currChar >= 128)
106 {
107 result += currChar;
108 }
109 else
110 {
111 result += u"\\";
112 result.append(1, currChar);
113 }
114 }
115 prevChar = currChar;
116 }
117 return EM_ASM_INT(return regexps.create($0, $1), &result, matchCase);
118 }
119 }
120
121 RegExpFilter::RegExpFilter(const std::u16string& text,
122 const std::u16string& pattern, const std::u16string& options)
123 : ActiveFilter(text, true), regexpId(0), contentType(-1), matchCase(false),
124 thirdParty(TrippleState::ANY)
125 {
126 int optionStart = 0;
127 int optionEnd = -1;
128 int valueStart = -1;
129 StringScanner scanner(options + u",");
130 while (!scanner.done())
131 {
132 switch (scanner.next())
133 {
134 case u'=':
135 if (optionEnd < 0)
136 {
137 optionEnd = scanner.position();
138 valueStart = optionEnd + 1;
139 }
140 break;
141 case u',':
142 if (optionEnd < 0)
143 optionEnd = scanner.position();
144 ProcessOption(options, optionStart, optionEnd, valueStart, scanner.posit ion());
145 optionStart = scanner.position() + 1;
146 optionEnd = -1;
147 valueStart = -1;
148 break;
149 }
150 }
151 if (contentType < 0)
152 contentType = defaultTypeMask;
153
154 size_t len = pattern.length();
155 if (len >= 2 && pattern[0] == u'/' && pattern[len - 1] == u'/')
156 {
157 std::u16string param = pattern.substr(1, len - 2);
158 regexpId = EM_ASM_INT(return regexps.create($0, $1), &param, matchCase);
159
160 std::u16string* error = reinterpret_cast<std::u16string*>(EM_ASM_INT(return regexps.getError($0), regexpId));
161 if (error)
162 {
163 EM_ASM_ARGS(regexps.delete($0), regexpId);
164 throw std::u16string(*error);
165 }
166 }
167 else
168 regexpSource = pattern;
169 }
170
171 RegExpFilter::~RegExpFilter()
172 {
173 if (regexpId)
174 EM_ASM_ARGS(regexps.delete($0), regexpId);
175 }
176
177 void RegExpFilter::ProcessOption(const std::u16string& options,
178 int optionStart, int optionEnd, int valueStart, int valueEnd)
179 {
180 if (optionEnd <= optionStart)
181 return;
182
183 bool reverse = false;
184 if (options[optionStart] == u'~')
185 {
186 reverse = true;
187 optionStart++;
188 }
189
190 std::u16string name(options.substr(optionStart, optionEnd - optionStart));
191 for (size_t i = 0, l = name.length(); i < l; ++i)
192 {
193 char16_t currChar = name[i];
194 if (currChar >= u'a' && currChar <= u'z')
195 name[i] = currChar + u'A' - u'a';
196 else if (currChar == u'-')
197 name[i] = u'_';
198 }
199
200 auto it = typeMap.find(name);
201 if (it != typeMap.end())
202 {
203 if (contentType < 0)
204 contentType = reverse ? defaultTypeMask : 0;
205 if (reverse)
206 contentType &= ~it->second;
207 else
208 contentType |= it->second;
209 }
210 else if (!name.compare(u"DOMAIN"))
211 {
212 if (valueStart >= 0 && valueEnd > valueStart)
213 ParseDomains(options.substr(valueStart, valueEnd - valueStart), u'|');
214 }
215 else if (!name.compare(u"SITEKEY"))
216 {
217 if (valueStart >= 0 && valueEnd > valueStart)
218 {
219 StringScanner scanner(options.substr(valueStart, valueEnd - valueStart) + u"|");
220 size_t start = 0;
221 while (!scanner.done())
222 {
223 if (scanner.next() == u'|')
224 {
225 if (scanner.position() > start)
226 sitekeys.insert(options.substr(valueStart + start, scanner.position( ) - start));
227 start = scanner.position() + 1;
228 }
229 }
230 }
231 }
232 else if (!name.compare(u"MATCH_CASE"))
233 matchCase = !reverse;
234 else if (!name.compare(u"THIRD_PARTY"))
235 thirdParty = reverse ? TrippleState::NO : TrippleState::YES;
236 else if (!name.compare(u"COLLAPSE"))
237 collapse = reverse ? TrippleState::NO : TrippleState::YES;
238 else
239 throw std::u16string(u"Unknown option " + name);
240 }
241
242 Filter* RegExpFilter::Create(const std::u16string& text)
243 {
244 bool blocking = true;
245 size_t patternStart = 0;
246 if (!text.compare(0, 2, u"@@"))
247 {
248 blocking = false;
249 patternStart = 2;
250 }
251
252 size_t patternEnd = text.find(u'$', patternStart);
253 size_t patternLength = (patternEnd != std::u16string::npos ?
254 patternEnd - patternStart : patternEnd);
255 std::u16string pattern(text.substr(patternStart, patternLength));
256 std::u16string options(patternEnd != std::u16string::npos ?
257 text.substr(patternEnd + 1) : u"");
258
259 try
260 {
261 if (blocking)
262 return new RegExpFilter(text, pattern, options);
263 else
264 return new WhiteListFilter(text, pattern, options);
265 }
266 catch (const std::u16string& reason)
267 {
268 return new InvalidFilter(text, reason);
269 }
270 }
271
272 void RegExpFilter::InitJSTypes()
273 {
274 for (auto it = typeMap.begin(); it != typeMap.end(); ++it)
275 EM_ASM_ARGS(Module.RegExpFilter_typeMap[getStringData($0)] = $1, &(it->first ), it->second);
276 }
277
278 Filter::Type RegExpFilter::GetType() const
279 {
280 return Type::BLOCKING;
281 }
282
283 bool RegExpFilter::Matches(const std::u16string& location, int typeMask,
284 const std::u16string& docDomain, bool thirdParty,
285 const std::u16string& sitekey)
286 {
287 if (!(this->contentType & typeMask) ||
288 (this->thirdParty == TrippleState::YES && !thirdParty) ||
289 (this->thirdParty == TrippleState::NO && thirdParty) ||
290 !IsActiveOnDomain(docDomain, sitekey))
291 {
292 return false;
293 }
294
295 if (!regexpId)
296 {
297 regexpId = GenerateRegExp(regexpSource, matchCase);
298 regexpSource.resize(0);
299 }
300 return EM_ASM_INT(return regexps.test($0, $1), regexpId, &location);
301 }
OLDNEW
« no previous file with comments | « compiled/RegExpFilter.h ('k') | compiled/StringScanner.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld