| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #include <climits> | |
| 2 | |
| 3 #include <emscripten.h> | |
| 4 | |
| 5 #include "RegExpFilter.h" | |
| 6 #include "StringScanner.h" | |
| 7 #include "StringMap.h" | |
| 8 | |
| 9 namespace | |
| 10 { | |
| 11 enum | |
| 12 { | |
| 13 TYPE_OTHER = 0x1, | |
| 14 TYPE_SCRIPT = 0x2, | |
| 15 TYPE_IMAGE = 0x4, | |
| 16 TYPE_STYLESHEET = 0x8, | |
| 17 TYPE_OBJECT = 0x10, | |
| 18 TYPE_SUBDOCUMENT = 0x20, | |
| 19 TYPE_DOCUMENT = 0x40, | |
| 20 TYPE_PING = 0x400, | |
| 21 TYPE_XMLHTTPREQUEST = 0x800, | |
| 22 TYPE_OBJECT_SUBREQUEST = 0x1000, | |
| 23 TYPE_MEDIA = 0x4000, | |
| 24 TYPE_FONT = 0x8000, | |
| 25 TYPE_POPUP = 0x8000000, | |
| 26 TYPE_GENERICBLOCK = 0x10000000, | |
| 27 TYPE_GENERICHIDE = 0x20000000, | |
| 28 TYPE_ELEMHIDE = 0x40000000, | |
| 29 }; | |
| 30 | |
| 31 StringMap<int> typeMap { | |
| 32 {u"other"_str, TYPE_OTHER}, | |
| 33 {u"script"_str, TYPE_SCRIPT}, | |
| 34 {u"image"_str, TYPE_IMAGE}, | |
| 35 {u"stylesheet"_str, TYPE_STYLESHEET}, | |
| 36 {u"object"_str, TYPE_OBJECT}, | |
| 37 {u"subdocument"_str, TYPE_SUBDOCUMENT}, | |
| 38 {u"document"_str, TYPE_DOCUMENT}, | |
| 39 {u"xbl"_str, TYPE_OTHER}, // Backwards compat | |
| 40 {u"ping"_str, TYPE_PING}, | |
| 41 {u"xmlhttprequest"_str, TYPE_XMLHTTPREQUEST}, | |
| 42 {u"object-subrequest"_str, TYPE_OBJECT_SUBREQUEST}, | |
| 43 {u"dtd"_str, TYPE_OTHER}, // Backwards compat | |
| 44 {u"media"_str, TYPE_MEDIA}, | |
| 45 {u"font"_str, TYPE_FONT}, | |
| 46 {u"background"_str, TYPE_IMAGE}, // Backwards compat | |
| 47 | |
| 48 {u"popup"_str, TYPE_POPUP}, | |
| 49 {u"genericblock"_str, TYPE_GENERICBLOCK}, | |
| 50 {u"generichide"_str, TYPE_GENERICHIDE}, | |
| 51 {u"elemhide"_str, TYPE_ELEMHIDE}, | |
| 52 }; | |
| 53 | |
| 54 int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | TYPE_POPUP | | |
|
sergei
2016/02/17 12:54:38
Should not it be const?
Wladimir Palant
2016/02/18 16:06:45
Done.
| |
| 55 TYPE_GENERICBLOCK | TYPE_GENERICHIDE); | |
| 56 | |
| 57 int GenerateRegExp(const String& regexp, bool matchCase) | |
| 58 { | |
| 59 return EM_ASM_INT(return regexps.create($0, $1), ®exp, matchCase); | |
| 60 } | |
| 61 | |
| 62 void NormalizeWhitespace(DependentString& text) | |
| 63 { | |
| 64 // We want to remove all spaces but bail out early in the common scenario | |
| 65 // that the string contains no spaces. | |
| 66 | |
| 67 // Look for the first space | |
| 68 String::size_type len = text.length(); | |
| 69 String::size_type pos; | |
| 70 for (pos = 0; pos < len; pos++) | |
| 71 if (text[pos] == ' ') | |
| 72 break; | |
| 73 | |
| 74 if (pos >= len) | |
| 75 return; | |
| 76 | |
| 77 // Found spaces, move characters to remove them | |
| 78 String::size_type delta = 1; | |
| 79 for (pos = pos + 1; pos < len; pos++) | |
| 80 { | |
| 81 if (text[pos] == ' ') | |
| 82 delta++; | |
| 83 else | |
| 84 text[pos - delta] = text[pos]; | |
| 85 } | |
| 86 text.reset(text, 0, len - delta); | |
| 87 } | |
| 88 | |
| 89 void ParseOption(String& text, OwnedString& error, RegExpFilterData& data, | |
| 90 int optionStart, int optionEnd, int valueStart, int valueEnd) | |
| 91 { | |
| 92 if (optionEnd <= optionStart) | |
| 93 return; | |
| 94 | |
| 95 bool reverse = false; | |
| 96 if (text[optionStart] == u'~') | |
| 97 { | |
| 98 reverse = true; | |
| 99 optionStart++; | |
| 100 } | |
| 101 | |
| 102 DependentString name(text, optionStart, optionEnd - optionStart); | |
| 103 for (size_t i = 0; i < name.length(); ++i) | |
| 104 { | |
| 105 char16_t currChar = name[i]; | |
| 106 if (currChar >= u'A' && currChar <= u'Z') | |
| 107 name[i] = currChar + u'a' - u'A'; | |
| 108 else if (currChar == u'_') | |
| 109 name[i] = u'-'; | |
| 110 } | |
| 111 | |
| 112 auto it = typeMap.find(name); | |
| 113 if (it != typeMap.end()) | |
| 114 { | |
| 115 if (data.mContentType < 0) | |
| 116 data.mContentType = reverse ? defaultTypeMask : 0; | |
| 117 if (reverse) | |
| 118 data.mContentType &= ~it->second; | |
| 119 else | |
| 120 data.mContentType |= it->second; | |
| 121 } | |
| 122 else if (name.equals(u"domain"_str)) | |
| 123 { | |
| 124 if (valueStart >= 0 && valueEnd > valueStart) | |
| 125 { | |
| 126 data.mDomainsStart = valueStart; | |
| 127 data.mDomainsEnd = valueEnd; | |
| 128 ActiveFilter::ToLower(text, data.mDomainsStart, data.mDomainsEnd); | |
| 129 } | |
| 130 } | |
| 131 else if (name.equals(u"sitekey"_str)) | |
| 132 { | |
| 133 if (valueStart >= 0 && valueEnd > valueStart) | |
| 134 { | |
| 135 data.mSitekeysStart = valueStart; | |
| 136 data.mSitekeysEnd = valueEnd; | |
| 137 } | |
| 138 } | |
| 139 else if (name.equals(u"match-case"_str)) | |
| 140 data.mMatchCase = !reverse; | |
| 141 else if (name.equals(u"third-party"_str)) | |
| 142 data.mThirdParty = reverse ? TrippleState::NO : TrippleState::YES; | |
| 143 else if (name.equals(u"collapse"_str)) | |
| 144 data.mCollapse = reverse ? TrippleState::NO : TrippleState::YES; | |
| 145 else | |
| 146 { | |
| 147 error = u"Unknown option "_str; | |
| 148 error.append(name); | |
| 149 } | |
| 150 } | |
| 151 | |
| 152 void ParseOptions(String& text, OwnedString& error, RegExpFilterData& data, | |
| 153 String::size_type optionsStart) | |
| 154 { | |
| 155 data.mMatchCase = false; | |
| 156 data.mThirdParty = TrippleState::ANY; | |
| 157 data.mCollapse = TrippleState::ANY; | |
| 158 data.mDomainsStart = String::npos; | |
| 159 data.mSitekeysStart = String::npos; | |
| 160 if (optionsStart >= text.length()) | |
| 161 { | |
| 162 data.mContentType = defaultTypeMask; | |
| 163 return; | |
| 164 } | |
| 165 | |
| 166 data.mContentType = -1; | |
| 167 | |
| 168 int optionStart = data.mPatternEnd + 1; | |
| 169 int optionEnd = -1; | |
| 170 int valueStart = -1; | |
| 171 | |
| 172 StringScanner scanner(text, optionStart, u','); | |
| 173 bool done = false; | |
| 174 while (!done) | |
| 175 { | |
| 176 done = scanner.done(); | |
| 177 switch (scanner.next()) | |
| 178 { | |
| 179 case u'=': | |
| 180 if (optionEnd < 0) | |
| 181 { | |
| 182 optionEnd = scanner.position(); | |
| 183 valueStart = optionEnd + 1; | |
| 184 } | |
| 185 break; | |
| 186 case u',': | |
| 187 if (optionEnd < 0) | |
| 188 optionEnd = scanner.position(); | |
| 189 ParseOption(text, error, data, optionStart, optionEnd, valueStart, | |
| 190 scanner.position()); | |
| 191 if (!error.empty()) | |
| 192 return; | |
| 193 | |
| 194 optionStart = scanner.position() + 1; | |
| 195 optionEnd = -1; | |
| 196 valueStart = -1; | |
| 197 break; | |
| 198 } | |
| 199 } | |
| 200 | |
| 201 if (data.mContentType < 0) | |
| 202 data.mContentType = defaultTypeMask; | |
| 203 } | |
| 204 } | |
| 205 | |
| 206 RegExpFilter::RegExpFilter(const String& text, const RegExpFilterData& data) | |
| 207 : ActiveFilter(text, true), RegExpFilterData(data) | |
| 208 { | |
| 209 } | |
| 210 | |
| 211 RegExpFilter::~RegExpFilter() | |
| 212 { | |
| 213 if (HasRegExp()) | |
| 214 EM_ASM_ARGS(regexps.delete($0), mRegexpId); | |
| 215 } | |
| 216 | |
| 217 Filter::Type RegExpFilter::Parse(DependentString& text, OwnedString& error, | |
| 218 RegExpFilterData& data) | |
| 219 { | |
| 220 NormalizeWhitespace(text); | |
| 221 | |
| 222 bool blocking = true; | |
| 223 | |
| 224 data.mPatternStart = 0; | |
| 225 if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@') | |
| 226 { | |
| 227 blocking = false; | |
| 228 data.mPatternStart = 2; | |
| 229 } | |
| 230 | |
| 231 data.mPatternEnd = text.find(u'$', data.mPatternStart); | |
| 232 if (data.mPatternEnd == text.npos) | |
| 233 data.mPatternEnd = text.length(); | |
| 234 | |
| 235 ParseOptions(text, error, data, data.mPatternEnd + 1); | |
| 236 if (!error.empty()) | |
| 237 return Type::INVALID; | |
| 238 | |
| 239 if (data.mPatternEnd - data.mPatternStart >= 2 && | |
| 240 text[data.mPatternStart] == u'/' && | |
| 241 text[data.mPatternEnd - 1] == u'/') | |
| 242 { | |
| 243 data.SetRegExp(GenerateRegExp(DependentString(text, data.mPatternStart + 1, | |
| 244 data.mPatternEnd - data.mPatternStart - 2), data.mMatchCase)); | |
| 245 | |
| 246 int errorLength = EM_ASM_INT(return regexps.getErrorLength($0), | |
| 247 data.mRegexpId); | |
| 248 if (errorLength >= 0) | |
| 249 { | |
| 250 OwnedString regexpError(errorLength); | |
| 251 EM_ASM_ARGS(regexps.getError($0, $1), data.mRegexpId, regexpError.data()); | |
| 252 error = std::move(regexpError); | |
| 253 return Type::INVALID; | |
| 254 } | |
| 255 } | |
| 256 | |
| 257 if (blocking) | |
| 258 return Type::BLOCKING; | |
| 259 else | |
| 260 return Type::WHITELIST; | |
| 261 } | |
| 262 | |
| 263 void RegExpFilter::ParseSitekeys(const String& sitekeys) const | |
| 264 { | |
| 265 StringScanner scanner(sitekeys, 0, u'|'); | |
| 266 size_t start = 0; | |
| 267 bool done = false; | |
| 268 while (!done) | |
| 269 { | |
| 270 done = scanner.done(); | |
| 271 if (scanner.next() == u'|') | |
| 272 { | |
| 273 if (scanner.position() > start) | |
| 274 AddSitekey(DependentString(sitekeys, start, scanner.position() - start)) ; | |
| 275 start = scanner.position() + 1; | |
| 276 } | |
| 277 } | |
| 278 } | |
| 279 | |
| 280 void RegExpFilter::InitJSTypes() | |
| 281 { | |
| 282 EM_ASM(exports.RegExpFilter.typeMap = {};); | |
| 283 for (auto it = typeMap.begin(); it != typeMap.end(); ++it) | |
| 284 EM_ASM_ARGS(exports.RegExpFilter.typeMap[getStringData($0).replace("-", "_") .toUpperCase()] = $1, &(it->first), it->second); | |
| 285 } | |
| 286 | |
| 287 OwnedString RegExpFilter::RegExpFromSource(const String& source) | |
| 288 { | |
| 289 /* TODO: this is very inefficient */ | |
| 290 | |
| 291 // Note: This doesn't remove trailing wildcards, otherwise the result should | |
| 292 // be identical to Filter.toRegExp(). | |
| 293 OwnedString result; | |
| 294 String::value_type prevChar = u'*'; | |
| 295 for (String::size_type i = 0; i < source.length(); ++i) | |
| 296 { | |
| 297 String::value_type currChar = source[i]; | |
| 298 switch (currChar) | |
| 299 { | |
| 300 case u'*': | |
| 301 if (prevChar != u'*') | |
| 302 result.append(u".*"_str); | |
| 303 break; | |
| 304 case u'^': | |
| 305 result.append(u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x 60\\x7B-\\x7F]|$)"_str); | |
| 306 break; | |
| 307 case u'|': | |
| 308 if (i == 0) | |
| 309 { | |
| 310 // Anchor at expression start, maybe extended anchor? | |
| 311 if (i + 1 < source.length() && source[i + 1] == u'|') | |
| 312 { | |
| 313 result.append(u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"_str); | |
| 314 ++i; | |
| 315 } | |
| 316 else | |
| 317 result.append(u'^'); | |
| 318 } | |
| 319 else if (i == source.length() - 1) | |
| 320 { | |
| 321 // Anchor at expression end, ignore if following separator placeholder | |
| 322 if (prevChar != u'^') | |
| 323 result.append(u'$'); | |
| 324 } | |
| 325 else | |
| 326 { | |
| 327 // Not actually an anchor, escape it | |
| 328 result.append(u"\\|"_str); | |
| 329 } | |
| 330 break; | |
| 331 default: | |
| 332 if (!(currChar >= u'a' && currChar <= u'z') && | |
| 333 !(currChar >= u'A' && currChar <= u'Z') && | |
| 334 !(currChar >= u'0' && currChar <= u'9') && | |
| 335 currChar < 128) | |
| 336 { | |
| 337 result.append(u'\\'); | |
| 338 } | |
| 339 result.append(currChar); | |
| 340 } | |
| 341 prevChar = currChar; | |
| 342 } | |
| 343 return std::move(result); | |
| 344 } | |
| 345 | |
| 346 Filter::Type RegExpFilter::GetType() const | |
| 347 { | |
| 348 return Type::BLOCKING; | |
| 349 } | |
| 350 | |
| 351 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const | |
| 352 { | |
| 353 if (!DomainsParsingDone()) | |
| 354 { | |
| 355 ParseDomains(GetDomainsSource(mText), u'|'); | |
| 356 SetDomainsParsingDone(); | |
| 357 } | |
| 358 return ActiveFilter::GetDomains(); | |
| 359 } | |
| 360 | |
| 361 RegExpFilter::SitekeySet* RegExpFilter::GetSitekeys() const | |
| 362 { | |
| 363 if (!SitekeyParsingDone()) | |
| 364 { | |
| 365 ParseSitekeys(GetSitekeysSource(mText)); | |
| 366 SetSitekeysParsingDone(); | |
| 367 } | |
| 368 return ActiveFilter::GetSitekeys(); | |
| 369 } | |
| 370 | |
| 371 bool RegExpFilter::Matches(const String& location, int typeMask, | |
| 372 DependentString& docDomain, bool thirdParty, const String& sitekey) const | |
| 373 { | |
| 374 if (!(mContentType & typeMask) || | |
| 375 (mThirdParty == TrippleState::YES && !thirdParty) || | |
| 376 (mThirdParty == TrippleState::NO && thirdParty) || | |
| 377 !IsActiveOnDomain(docDomain, sitekey)) | |
| 378 { | |
| 379 return false; | |
| 380 } | |
| 381 | |
| 382 if (!RegExpParsingDone()) | |
| 383 { | |
| 384 const OwnedString pattern(GetRegExpSource(mText)); | |
| 385 SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mMatchCase)); | |
| 386 } | |
| 387 return EM_ASM_INT(return regexps.test($0, $1), mRegexpId, &location); | |
| 388 } | |
| OLD | NEW |