LEFT | RIGHT |
1 #include <climits> | 1 #include <climits> |
2 | 2 |
3 #include <emscripten.h> | 3 #include <emscripten.h> |
4 | 4 |
5 #include "RegExpFilter.h" | 5 #include "RegExpFilter.h" |
6 #include "StringScanner.h" | 6 #include "StringScanner.h" |
7 #include "StringMap.h" | 7 #include "StringMap.h" |
8 | 8 |
9 namespace | 9 namespace |
10 { | 10 { |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
44 {u"media"_str, TYPE_MEDIA}, | 44 {u"media"_str, TYPE_MEDIA}, |
45 {u"font"_str, TYPE_FONT}, | 45 {u"font"_str, TYPE_FONT}, |
46 {u"background"_str, TYPE_IMAGE}, // Backwards compat | 46 {u"background"_str, TYPE_IMAGE}, // Backwards compat |
47 | 47 |
48 {u"popup"_str, TYPE_POPUP}, | 48 {u"popup"_str, TYPE_POPUP}, |
49 {u"genericblock"_str, TYPE_GENERICBLOCK}, | 49 {u"genericblock"_str, TYPE_GENERICBLOCK}, |
50 {u"generichide"_str, TYPE_GENERICHIDE}, | 50 {u"generichide"_str, TYPE_GENERICHIDE}, |
51 {u"elemhide"_str, TYPE_ELEMHIDE}, | 51 {u"elemhide"_str, TYPE_ELEMHIDE}, |
52 }; | 52 }; |
53 | 53 |
54 int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | TYPE_POPUP | | 54 const int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | |
55 TYPE_GENERICBLOCK | TYPE_GENERICHIDE); | 55 TYPE_POPUP | TYPE_GENERICBLOCK | TYPE_GENERICHIDE); |
56 | 56 |
57 int GenerateRegExp(const String& regexp, bool matchCase) | 57 int GenerateRegExp(const String& regexp, bool matchCase) |
58 { | 58 { |
59 return EM_ASM_INT(return regexps.create($0, $1), ®exp, matchCase); | 59 return EM_ASM_INT(return regexps.create($0, $1), ®exp, matchCase); |
60 } | 60 } |
61 | 61 |
62 void NormalizeWhitespace(DependentString& text) | 62 void NormalizeWhitespace(DependentString& text) |
63 { | 63 { |
64 // We want to remove all spaces but bail out early in the common scenario | 64 // We want to remove all spaces but bail out early in the common scenario |
65 // that the string contains no spaces. | 65 // that the string contains no spaces. |
(...skipping 13 matching lines...) Expand all Loading... |
79 for (pos = pos + 1; pos < len; pos++) | 79 for (pos = pos + 1; pos < len; pos++) |
80 { | 80 { |
81 if (text[pos] == ' ') | 81 if (text[pos] == ' ') |
82 delta++; | 82 delta++; |
83 else | 83 else |
84 text[pos - delta] = text[pos]; | 84 text[pos - delta] = text[pos]; |
85 } | 85 } |
86 text.reset(text, 0, len - delta); | 86 text.reset(text, 0, len - delta); |
87 } | 87 } |
88 | 88 |
89 void ParseOption(String& text, OwnedString& error, RegExpFilterData& data, | 89 void ParseOption(String& text, DependentString& error, RegExpFilterData& data, |
90 int optionStart, int optionEnd, int valueStart, int valueEnd) | 90 int optionStart, int optionEnd, int valueStart, int valueEnd) |
91 { | 91 { |
92 if (optionEnd <= optionStart) | 92 if (optionEnd <= optionStart) |
93 return; | 93 return; |
94 | 94 |
95 bool reverse = false; | 95 bool reverse = false; |
96 if (text[optionStart] == u'~') | 96 if (text[optionStart] == u'~') |
97 { | 97 { |
98 reverse = true; | 98 reverse = true; |
99 optionStart++; | 99 optionStart++; |
100 } | 100 } |
101 | 101 |
102 DependentString name(text, optionStart, optionEnd - optionStart); | 102 DependentString name(text, optionStart, optionEnd - optionStart); |
103 for (size_t i = 0; i < name.length(); ++i) | 103 for (size_t i = 0; i < name.length(); ++i) |
104 { | 104 { |
105 char16_t currChar = name[i]; | 105 char16_t currChar = name[i]; |
106 if (currChar >= u'A' && currChar <= u'Z') | 106 if (currChar >= u'A' && currChar <= u'Z') |
107 name[i] = currChar + u'a' - u'A'; | 107 name[i] = currChar + u'a' - u'A'; |
108 else if (currChar == u'_') | 108 else if (currChar == u'_') |
109 name[i] = u'-'; | 109 name[i] = u'-'; |
110 } | 110 } |
111 | 111 |
112 auto it = typeMap.find(name); | 112 auto it = typeMap.find(name); |
113 if (it != typeMap.end()) | 113 if (it) |
114 { | 114 { |
115 if (data.mContentType < 0) | 115 if (data.mContentType < 0) |
116 data.mContentType = reverse ? defaultTypeMask : 0; | 116 data.mContentType = reverse ? defaultTypeMask : 0; |
117 if (reverse) | 117 if (reverse) |
118 data.mContentType &= ~it->second; | 118 data.mContentType &= ~it->second; |
119 else | 119 else |
120 data.mContentType |= it->second; | 120 data.mContentType |= it->second; |
121 } | 121 } |
122 else if (name.equals(u"domain"_str)) | 122 else if (name.equals(u"domain"_str)) |
123 { | 123 { |
124 if (valueStart >= 0 && valueEnd > valueStart) | 124 if (valueStart >= 0 && valueEnd > valueStart) |
125 { | 125 { |
126 data.mDomainsStart = valueStart; | 126 data.mDomainsStart = valueStart; |
127 data.mDomainsEnd = valueEnd; | 127 data.mDomainsEnd = valueEnd; |
128 ActiveFilter::ToLower(text, data.mDomainsStart, data.mDomainsEnd); | 128 DependentString(text, valueStart, valueEnd - valueStart).toLower(); |
129 } | 129 } |
130 } | 130 } |
131 else if (name.equals(u"sitekey"_str)) | 131 else if (name.equals(u"sitekey"_str)) |
132 { | 132 { |
133 if (valueStart >= 0 && valueEnd > valueStart) | 133 if (valueStart >= 0 && valueEnd > valueStart) |
134 { | 134 { |
135 data.mSitekeysStart = valueStart; | 135 data.mSitekeysStart = valueStart; |
136 data.mSitekeysEnd = valueEnd; | 136 data.mSitekeysEnd = valueEnd; |
137 } | 137 } |
138 } | 138 } |
139 else if (name.equals(u"match-case"_str)) | 139 else if (name.equals(u"match-case"_str)) |
140 data.mMatchCase = !reverse; | 140 data.mMatchCase = !reverse; |
141 else if (name.equals(u"third-party"_str)) | 141 else if (name.equals(u"third-party"_str)) |
142 data.mThirdParty = reverse ? TrippleState::NO : TrippleState::YES; | 142 data.mThirdParty = reverse ? TrippleState::NO : TrippleState::YES; |
143 else if (name.equals(u"collapse"_str)) | 143 else if (name.equals(u"collapse"_str)) |
144 data.mCollapse = reverse ? TrippleState::NO : TrippleState::YES; | 144 data.mCollapse = reverse ? TrippleState::NO : TrippleState::YES; |
145 else | 145 else |
146 { | 146 error.reset(u"filter_unknown_option"_str); |
147 error = u"Unknown option "_str; | 147 } |
148 error.append(name); | 148 |
149 } | 149 void ParseOptions(String& text, DependentString& error, RegExpFilterData& data
, |
150 } | |
151 | |
152 void ParseOptions(String& text, OwnedString& error, RegExpFilterData& data, | |
153 String::size_type optionsStart) | 150 String::size_type optionsStart) |
154 { | 151 { |
155 data.mMatchCase = false; | 152 data.mMatchCase = false; |
156 data.mThirdParty = TrippleState::ANY; | 153 data.mThirdParty = TrippleState::ANY; |
157 data.mCollapse = TrippleState::ANY; | 154 data.mCollapse = TrippleState::ANY; |
158 data.mDomainsStart = String::npos; | 155 data.mDomainsStart = String::npos; |
159 data.mSitekeysStart = String::npos; | 156 data.mSitekeysStart = String::npos; |
160 if (optionsStart >= text.length()) | 157 if (optionsStart >= text.length()) |
161 { | 158 { |
162 data.mContentType = defaultTypeMask; | 159 data.mContentType = defaultTypeMask; |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
196 valueStart = -1; | 193 valueStart = -1; |
197 break; | 194 break; |
198 } | 195 } |
199 } | 196 } |
200 | 197 |
201 if (data.mContentType < 0) | 198 if (data.mContentType < 0) |
202 data.mContentType = defaultTypeMask; | 199 data.mContentType = defaultTypeMask; |
203 } | 200 } |
204 } | 201 } |
205 | 202 |
206 RegExpFilter::RegExpFilter(const String& text, const RegExpFilterData& data) | 203 RegExpFilter::RegExpFilter(Type type, const String& text, const RegExpFilterData
& data) |
207 : ActiveFilter(text, true), RegExpFilterData(data) | 204 : ActiveFilter(type, text, true), mData(data) |
208 { | 205 { |
209 } | 206 } |
210 | 207 |
211 RegExpFilter::~RegExpFilter() | 208 RegExpFilter::~RegExpFilter() |
212 { | 209 { |
213 if (HasRegExp()) | 210 if (mData.HasRegExp()) |
214 EM_ASM_ARGS(regexps.delete($0), mRegexpId); | 211 EM_ASM_ARGS(regexps.delete($0), mData.mRegexpId); |
215 } | 212 } |
216 | 213 |
217 Filter::Type RegExpFilter::Parse(DependentString& text, OwnedString& error, | 214 Filter::Type RegExpFilter::Parse(DependentString& text, DependentString& error, |
218 RegExpFilterData& data) | 215 RegExpFilterData& data) |
219 { | 216 { |
220 NormalizeWhitespace(text); | 217 NormalizeWhitespace(text); |
221 | 218 |
222 bool blocking = true; | 219 Filter::Type type = Type::BLOCKING; |
223 | 220 |
224 data.mPatternStart = 0; | 221 data.mPatternStart = 0; |
225 if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@') | 222 if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@') |
226 { | 223 { |
227 blocking = false; | 224 type = Type::WHITELIST; |
228 data.mPatternStart = 2; | 225 data.mPatternStart = 2; |
229 } | 226 } |
230 | 227 |
231 data.mPatternEnd = text.find(u'$', data.mPatternStart); | 228 data.mPatternEnd = text.find(u'$', data.mPatternStart); |
232 if (data.mPatternEnd == text.npos) | 229 if (data.mPatternEnd == text.npos) |
233 data.mPatternEnd = text.length(); | 230 data.mPatternEnd = text.length(); |
234 | 231 |
235 ParseOptions(text, error, data, data.mPatternEnd + 1); | 232 ParseOptions(text, error, data, data.mPatternEnd + 1); |
236 if (!error.empty()) | 233 if (!error.empty()) |
237 return Type::INVALID; | 234 return Type::INVALID; |
238 | 235 |
239 if (data.mPatternEnd - data.mPatternStart >= 2 && | 236 if (data.mPatternEnd - data.mPatternStart >= 2 && |
240 text[data.mPatternStart] == u'/' && | 237 text[data.mPatternStart] == u'/' && |
241 text[data.mPatternEnd - 1] == u'/') | 238 text[data.mPatternEnd - 1] == u'/') |
242 { | 239 { |
243 data.SetRegExp(GenerateRegExp(DependentString(text, data.mPatternStart + 1, | 240 data.SetRegExp(GenerateRegExp(DependentString(text, data.mPatternStart + 1, |
244 data.mPatternEnd - data.mPatternStart - 2), data.mMatchCase)); | 241 data.mPatternEnd - data.mPatternStart - 2), data.mMatchCase)); |
245 | 242 if (data.mRegexpId == -1) |
246 int errorLength = EM_ASM_INT(return regexps.getErrorLength($0), | 243 { |
247 data.mRegexpId); | 244 error.reset(u"filter_invalid_regexp"_str); |
248 if (errorLength >= 0) | |
249 { | |
250 OwnedString regexpError(errorLength); | |
251 EM_ASM_ARGS(regexps.getError($0, $1), data.mRegexpId, regexpError.data()); | |
252 error = std::move(regexpError); | |
253 return Type::INVALID; | 245 return Type::INVALID; |
254 } | 246 } |
255 } | 247 } |
256 | 248 |
257 if (blocking) | 249 return type; |
258 return Type::BLOCKING; | |
259 else | |
260 return Type::WHITELIST; | |
261 } | 250 } |
262 | 251 |
263 void RegExpFilter::ParseSitekeys(const String& sitekeys) const | 252 void RegExpFilter::ParseSitekeys(const String& sitekeys) const |
264 { | 253 { |
265 StringScanner scanner(sitekeys, 0, u'|'); | 254 StringScanner scanner(sitekeys, 0, u'|'); |
266 size_t start = 0; | 255 size_t start = 0; |
267 bool done = false; | 256 bool done = false; |
268 while (!done) | 257 while (!done) |
269 { | 258 { |
270 done = scanner.done(); | 259 done = scanner.done(); |
271 if (scanner.next() == u'|') | 260 if (scanner.next() == u'|') |
272 { | 261 { |
273 if (scanner.position() > start) | 262 if (scanner.position() > start) |
274 AddSitekey(DependentString(sitekeys, start, scanner.position() - start))
; | 263 AddSitekey(DependentString(sitekeys, start, scanner.position() - start))
; |
275 start = scanner.position() + 1; | 264 start = scanner.position() + 1; |
276 } | 265 } |
277 } | 266 } |
278 } | 267 } |
279 | 268 |
280 void RegExpFilter::InitJSTypes() | 269 void RegExpFilter::InitJSTypes() |
281 { | 270 { |
282 EM_ASM(exports.RegExpFilter.typeMap = {};); | 271 EM_ASM(exports.RegExpFilter.typeMap = {};); |
283 for (auto it = typeMap.begin(); it != typeMap.end(); ++it) | 272 for (auto it = typeMap.begin(); it != typeMap.end(); ++it) |
284 EM_ASM_ARGS(exports.RegExpFilter.typeMap[getStringData($0).replace("-", "_")
.toUpperCase()] = $1, &(it->first), it->second); | 273 EM_ASM_ARGS(exports.RegExpFilter.typeMap[readString($0).replace("-", "_").to
UpperCase()] = $1, &(it->first), it->second); |
285 } | 274 } |
286 | 275 |
287 OwnedString RegExpFilter::RegExpFromSource(const String& source) | 276 OwnedString RegExpFilter::RegExpFromSource(const String& source) |
288 { | 277 { |
289 /* TODO: this is very inefficient */ | 278 /* TODO: this is very inefficient */ |
290 | 279 |
291 // Note: This doesn't remove trailing wildcards, otherwise the result should | 280 // Note: This doesn't remove trailing wildcards, otherwise the result should |
292 // be identical to Filter.toRegExp(). | 281 // be identical to Filter.toRegExp(). |
293 OwnedString result; | 282 OwnedString result; |
294 String::value_type prevChar = u'*'; | 283 String::value_type prevChar = u'*'; |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
333 !(currChar >= u'A' && currChar <= u'Z') && | 322 !(currChar >= u'A' && currChar <= u'Z') && |
334 !(currChar >= u'0' && currChar <= u'9') && | 323 !(currChar >= u'0' && currChar <= u'9') && |
335 currChar < 128) | 324 currChar < 128) |
336 { | 325 { |
337 result.append(u'\\'); | 326 result.append(u'\\'); |
338 } | 327 } |
339 result.append(currChar); | 328 result.append(currChar); |
340 } | 329 } |
341 prevChar = currChar; | 330 prevChar = currChar; |
342 } | 331 } |
343 return std::move(result); | 332 return result; |
344 } | |
345 | |
346 Filter::Type RegExpFilter::GetType() const | |
347 { | |
348 return Type::BLOCKING; | |
349 } | 333 } |
350 | 334 |
351 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const | 335 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const |
352 { | 336 { |
353 if (!DomainsParsingDone()) | 337 if (!mData.DomainsParsingDone()) |
354 { | 338 { |
355 ParseDomains(GetDomainsSource(mText), u'|'); | 339 ParseDomains(mData.GetDomainsSource(mText), u'|'); |
356 SetDomainsParsingDone(); | 340 mData.SetDomainsParsingDone(); |
357 } | 341 } |
358 return ActiveFilter::GetDomains(); | 342 return ActiveFilter::GetDomains(); |
359 } | 343 } |
360 | 344 |
361 RegExpFilter::SitekeySet* RegExpFilter::GetSitekeys() const | 345 RegExpFilter::SitekeySet* RegExpFilter::GetSitekeys() const |
362 { | 346 { |
363 if (!SitekeyParsingDone()) | 347 if (!mData.SitekeyParsingDone()) |
364 { | 348 { |
365 ParseSitekeys(GetSitekeysSource(mText)); | 349 ParseSitekeys(mData.GetSitekeysSource(mText)); |
366 SetSitekeysParsingDone(); | 350 mData.SetSitekeysParsingDone(); |
367 } | 351 } |
368 return ActiveFilter::GetSitekeys(); | 352 return ActiveFilter::GetSitekeys(); |
369 } | 353 } |
370 | 354 |
371 bool RegExpFilter::Matches(const String& location, int typeMask, | 355 bool RegExpFilter::Matches(const String& location, int typeMask, |
372 DependentString& docDomain, bool thirdParty, const String& sitekey) const | 356 DependentString& docDomain, bool thirdParty, const String& sitekey) const |
373 { | 357 { |
374 if (!(mContentType & typeMask) || | 358 if (!(mData.mContentType & typeMask) || |
375 (mThirdParty == TrippleState::YES && !thirdParty) || | 359 (mData.mThirdParty == TrippleState::YES && !thirdParty) || |
376 (mThirdParty == TrippleState::NO && thirdParty) || | 360 (mData.mThirdParty == TrippleState::NO && thirdParty) || |
377 !IsActiveOnDomain(docDomain, sitekey)) | 361 !IsActiveOnDomain(docDomain, sitekey)) |
378 { | 362 { |
379 return false; | 363 return false; |
380 } | 364 } |
381 | 365 |
382 if (!RegExpParsingDone()) | 366 if (!mData.RegExpParsingDone()) |
383 { | 367 { |
384 const OwnedString pattern(GetRegExpSource(mText)); | 368 const OwnedString pattern(mData.GetRegExpSource(mText)); |
385 SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mMatchCase)); | 369 mData.SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mData.mMatchCase))
; |
386 } | 370 } |
387 return EM_ASM_INT(return regexps.test($0, $1), mRegexpId, &location); | 371 return EM_ASM_INT(return regexps.test($0, $1), mData.mRegexpId, &location); |
388 } | 372 } |
LEFT | RIGHT |