| OLD | NEW | 
|---|
| (Empty) |  | 
|  | 1 #include <climits> | 
|  | 2 | 
|  | 3 #include <emscripten.h> | 
|  | 4 | 
|  | 5 #include "RegExpFilter.h" | 
|  | 6 #include "StringScanner.h" | 
|  | 7 #include "StringMap.h" | 
|  | 8 | 
|  | 9 namespace | 
|  | 10 { | 
|  | 11   enum | 
|  | 12   { | 
|  | 13     TYPE_OTHER = 0x1, | 
|  | 14     TYPE_SCRIPT = 0x2, | 
|  | 15     TYPE_IMAGE = 0x4, | 
|  | 16     TYPE_STYLESHEET = 0x8, | 
|  | 17     TYPE_OBJECT = 0x10, | 
|  | 18     TYPE_SUBDOCUMENT = 0x20, | 
|  | 19     TYPE_DOCUMENT = 0x40, | 
|  | 20     TYPE_PING = 0x400, | 
|  | 21     TYPE_XMLHTTPREQUEST = 0x800, | 
|  | 22     TYPE_OBJECT_SUBREQUEST = 0x1000, | 
|  | 23     TYPE_MEDIA = 0x4000, | 
|  | 24     TYPE_FONT = 0x8000, | 
|  | 25     TYPE_POPUP = 0x8000000, | 
|  | 26     TYPE_GENERICBLOCK = 0x10000000, | 
|  | 27     TYPE_GENERICHIDE = 0x20000000, | 
|  | 28     TYPE_ELEMHIDE = 0x40000000, | 
|  | 29   }; | 
|  | 30 | 
|  | 31   StringMap<int> typeMap { | 
|  | 32     {u"other"_str, TYPE_OTHER}, | 
|  | 33     {u"script"_str, TYPE_SCRIPT}, | 
|  | 34     {u"image"_str, TYPE_IMAGE}, | 
|  | 35     {u"stylesheet"_str, TYPE_STYLESHEET}, | 
|  | 36     {u"object"_str, TYPE_OBJECT}, | 
|  | 37     {u"subdocument"_str, TYPE_SUBDOCUMENT}, | 
|  | 38     {u"document"_str, TYPE_DOCUMENT}, | 
|  | 39     {u"xbl"_str, TYPE_OTHER},          // Backwards compat | 
|  | 40     {u"ping"_str, TYPE_PING}, | 
|  | 41     {u"xmlhttprequest"_str, TYPE_XMLHTTPREQUEST}, | 
|  | 42     {u"object-subrequest"_str, TYPE_OBJECT_SUBREQUEST}, | 
|  | 43     {u"dtd"_str, TYPE_OTHER},          // Backwards compat | 
|  | 44     {u"media"_str, TYPE_MEDIA}, | 
|  | 45     {u"font"_str, TYPE_FONT}, | 
|  | 46     {u"background"_str, TYPE_IMAGE},   // Backwards compat | 
|  | 47 | 
|  | 48     {u"popup"_str, TYPE_POPUP}, | 
|  | 49     {u"genericblock"_str, TYPE_GENERICBLOCK}, | 
|  | 50     {u"generichide"_str, TYPE_GENERICHIDE}, | 
|  | 51     {u"elemhide"_str, TYPE_ELEMHIDE}, | 
|  | 52   }; | 
|  | 53 | 
|  | 54   const int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | | 
|  | 55       TYPE_POPUP | TYPE_GENERICBLOCK | TYPE_GENERICHIDE); | 
|  | 56 | 
|  | 57   int GenerateRegExp(const String& regexp, bool matchCase) | 
|  | 58   { | 
|  | 59     return EM_ASM_INT(return regexps.create($0, $1), ®exp, matchCase); | 
|  | 60   } | 
|  | 61 | 
|  | 62   void NormalizeWhitespace(DependentString& text) | 
|  | 63   { | 
|  | 64     // We want to remove all spaces but bail out early in the common scenario | 
|  | 65     // that the string contains no spaces. | 
|  | 66 | 
|  | 67     // Look for the first space | 
|  | 68     String::size_type len = text.length(); | 
|  | 69     String::size_type pos; | 
|  | 70     for (pos = 0; pos < len; pos++) | 
|  | 71       if (text[pos] == ' ') | 
|  | 72         break; | 
|  | 73 | 
|  | 74     if (pos >= len) | 
|  | 75       return; | 
|  | 76 | 
|  | 77     // Found spaces, move characters to remove them | 
|  | 78     String::size_type delta = 1; | 
|  | 79     for (pos = pos + 1; pos < len; pos++) | 
|  | 80     { | 
|  | 81       if (text[pos] == ' ') | 
|  | 82         delta++; | 
|  | 83       else | 
|  | 84         text[pos - delta] = text[pos]; | 
|  | 85     } | 
|  | 86     text.reset(text, 0, len - delta); | 
|  | 87   } | 
|  | 88 | 
|  | 89   void ParseOption(String& text, DependentString& error, RegExpFilterData& data, | 
|  | 90       int optionStart, int optionEnd, int valueStart, int valueEnd) | 
|  | 91   { | 
|  | 92     if (optionEnd <= optionStart) | 
|  | 93       return; | 
|  | 94 | 
|  | 95     bool reverse = false; | 
|  | 96     if (text[optionStart] == u'~') | 
|  | 97     { | 
|  | 98       reverse = true; | 
|  | 99       optionStart++; | 
|  | 100     } | 
|  | 101 | 
|  | 102     DependentString name(text, optionStart, optionEnd - optionStart); | 
|  | 103     for (size_t i = 0; i < name.length(); ++i) | 
|  | 104     { | 
|  | 105       char16_t currChar = name[i]; | 
|  | 106       if (currChar >= u'A' && currChar <= u'Z') | 
|  | 107         name[i] = currChar + u'a' - u'A'; | 
|  | 108       else if (currChar == u'_') | 
|  | 109         name[i] = u'-'; | 
|  | 110     } | 
|  | 111 | 
|  | 112     auto it = typeMap.find(name); | 
|  | 113     if (it) | 
|  | 114     { | 
|  | 115       if (data.mContentType < 0) | 
|  | 116         data.mContentType = reverse ? defaultTypeMask : 0; | 
|  | 117       if (reverse) | 
|  | 118         data.mContentType &= ~it->second; | 
|  | 119       else | 
|  | 120         data.mContentType |= it->second; | 
|  | 121     } | 
|  | 122     else if (name.equals(u"domain"_str)) | 
|  | 123     { | 
|  | 124       if (valueStart >= 0 && valueEnd > valueStart) | 
|  | 125       { | 
|  | 126         data.mDomainsStart = valueStart; | 
|  | 127         data.mDomainsEnd = valueEnd; | 
|  | 128         DependentString(text, valueStart, valueEnd - valueStart).toLower(); | 
|  | 129       } | 
|  | 130     } | 
|  | 131     else if (name.equals(u"sitekey"_str)) | 
|  | 132     { | 
|  | 133       if (valueStart >= 0 && valueEnd > valueStart) | 
|  | 134       { | 
|  | 135         data.mSitekeysStart = valueStart; | 
|  | 136         data.mSitekeysEnd = valueEnd; | 
|  | 137       } | 
|  | 138     } | 
|  | 139     else if (name.equals(u"match-case"_str)) | 
|  | 140       data.mMatchCase = !reverse; | 
|  | 141     else if (name.equals(u"third-party"_str)) | 
|  | 142       data.mThirdParty = reverse ? TrippleState::NO : TrippleState::YES; | 
|  | 143     else if (name.equals(u"collapse"_str)) | 
|  | 144       data.mCollapse = reverse ? TrippleState::NO : TrippleState::YES; | 
|  | 145     else | 
|  | 146       error.reset(u"filter_unknown_option"_str); | 
|  | 147   } | 
|  | 148 | 
|  | 149   void ParseOptions(String& text, DependentString& error, RegExpFilterData& data
     , | 
|  | 150       String::size_type optionsStart) | 
|  | 151   { | 
|  | 152     data.mMatchCase = false; | 
|  | 153     data.mThirdParty = TrippleState::ANY; | 
|  | 154     data.mCollapse = TrippleState::ANY; | 
|  | 155     data.mDomainsStart = String::npos; | 
|  | 156     data.mSitekeysStart = String::npos; | 
|  | 157     if (optionsStart >= text.length()) | 
|  | 158     { | 
|  | 159       data.mContentType = defaultTypeMask; | 
|  | 160       return; | 
|  | 161     } | 
|  | 162 | 
|  | 163     data.mContentType = -1; | 
|  | 164 | 
|  | 165     int optionStart = data.mPatternEnd + 1; | 
|  | 166     int optionEnd = -1; | 
|  | 167     int valueStart = -1; | 
|  | 168 | 
|  | 169     StringScanner scanner(text, optionStart, u','); | 
|  | 170     bool done = false; | 
|  | 171     while (!done) | 
|  | 172     { | 
|  | 173       done = scanner.done(); | 
|  | 174       switch (scanner.next()) | 
|  | 175       { | 
|  | 176         case u'=': | 
|  | 177           if (optionEnd < 0) | 
|  | 178           { | 
|  | 179             optionEnd = scanner.position(); | 
|  | 180             valueStart = optionEnd + 1; | 
|  | 181           } | 
|  | 182           break; | 
|  | 183         case u',': | 
|  | 184           if (optionEnd < 0) | 
|  | 185             optionEnd = scanner.position(); | 
|  | 186           ParseOption(text, error, data, optionStart, optionEnd, valueStart, | 
|  | 187               scanner.position()); | 
|  | 188           if (!error.empty()) | 
|  | 189             return; | 
|  | 190 | 
|  | 191           optionStart = scanner.position() + 1; | 
|  | 192           optionEnd = -1; | 
|  | 193           valueStart = -1; | 
|  | 194           break; | 
|  | 195       } | 
|  | 196     } | 
|  | 197 | 
|  | 198     if (data.mContentType < 0) | 
|  | 199       data.mContentType = defaultTypeMask; | 
|  | 200   } | 
|  | 201 } | 
|  | 202 | 
|  | 203 RegExpFilter::RegExpFilter(Type type, const String& text, const RegExpFilterData
     & data) | 
|  | 204     : ActiveFilter(type, text, true), mData(data) | 
|  | 205 { | 
|  | 206 } | 
|  | 207 | 
|  | 208 RegExpFilter::~RegExpFilter() | 
|  | 209 { | 
|  | 210   if (mData.HasRegExp()) | 
|  | 211     EM_ASM_ARGS(regexps.delete($0), mData.mRegexpId); | 
|  | 212 } | 
|  | 213 | 
|  | 214 Filter::Type RegExpFilter::Parse(DependentString& text, DependentString& error, | 
|  | 215     RegExpFilterData& data) | 
|  | 216 { | 
|  | 217   NormalizeWhitespace(text); | 
|  | 218 | 
|  | 219   Filter::Type type = Type::BLOCKING; | 
|  | 220 | 
|  | 221   data.mPatternStart = 0; | 
|  | 222   if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@') | 
|  | 223   { | 
|  | 224     type = Type::WHITELIST; | 
|  | 225     data.mPatternStart = 2; | 
|  | 226   } | 
|  | 227 | 
|  | 228   data.mPatternEnd = text.find(u'$', data.mPatternStart); | 
|  | 229   if (data.mPatternEnd == text.npos) | 
|  | 230     data.mPatternEnd = text.length(); | 
|  | 231 | 
|  | 232   ParseOptions(text, error, data, data.mPatternEnd + 1); | 
|  | 233   if (!error.empty()) | 
|  | 234     return Type::INVALID; | 
|  | 235 | 
|  | 236   if (data.mPatternEnd - data.mPatternStart >= 2 && | 
|  | 237       text[data.mPatternStart] == u'/' && | 
|  | 238       text[data.mPatternEnd - 1] == u'/') | 
|  | 239   { | 
|  | 240     data.SetRegExp(GenerateRegExp(DependentString(text, data.mPatternStart + 1, | 
|  | 241         data.mPatternEnd - data.mPatternStart - 2), data.mMatchCase)); | 
|  | 242     if (data.mRegexpId == -1) | 
|  | 243     { | 
|  | 244       error.reset(u"filter_invalid_regexp"_str); | 
|  | 245       return Type::INVALID; | 
|  | 246     } | 
|  | 247   } | 
|  | 248 | 
|  | 249   return type; | 
|  | 250 } | 
|  | 251 | 
|  | 252 void RegExpFilter::ParseSitekeys(const String& sitekeys) const | 
|  | 253 { | 
|  | 254   StringScanner scanner(sitekeys, 0, u'|'); | 
|  | 255   size_t start = 0; | 
|  | 256   bool done = false; | 
|  | 257   while (!done) | 
|  | 258   { | 
|  | 259     done = scanner.done(); | 
|  | 260     if (scanner.next() == u'|') | 
|  | 261     { | 
|  | 262       if (scanner.position() > start) | 
|  | 263         AddSitekey(DependentString(sitekeys, start, scanner.position() - start))
     ; | 
|  | 264       start = scanner.position() + 1; | 
|  | 265     } | 
|  | 266   } | 
|  | 267 } | 
|  | 268 | 
|  | 269 void RegExpFilter::InitJSTypes() | 
|  | 270 { | 
|  | 271   EM_ASM(exports.RegExpFilter.typeMap = {};); | 
|  | 272   for (auto it = typeMap.begin(); it != typeMap.end(); ++it) | 
|  | 273     EM_ASM_ARGS(exports.RegExpFilter.typeMap[readString($0).replace("-", "_").to
     UpperCase()] = $1, &(it->first), it->second); | 
|  | 274 } | 
|  | 275 | 
|  | 276 OwnedString RegExpFilter::RegExpFromSource(const String& source) | 
|  | 277 { | 
|  | 278   /* TODO: this is very inefficient */ | 
|  | 279 | 
|  | 280   // Note: This doesn't remove trailing wildcards, otherwise the result should | 
|  | 281   // be identical to Filter.toRegExp(). | 
|  | 282   OwnedString result; | 
|  | 283   String::value_type prevChar = u'*'; | 
|  | 284   for (String::size_type i = 0; i < source.length(); ++i) | 
|  | 285   { | 
|  | 286     String::value_type currChar = source[i]; | 
|  | 287     switch (currChar) | 
|  | 288     { | 
|  | 289       case u'*': | 
|  | 290         if (prevChar != u'*') | 
|  | 291           result.append(u".*"_str); | 
|  | 292         break; | 
|  | 293       case u'^': | 
|  | 294         result.append(u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x
     60\\x7B-\\x7F]|$)"_str); | 
|  | 295         break; | 
|  | 296       case u'|': | 
|  | 297         if (i == 0) | 
|  | 298         { | 
|  | 299           // Anchor at expression start, maybe extended anchor? | 
|  | 300           if (i + 1 < source.length() && source[i + 1] == u'|') | 
|  | 301           { | 
|  | 302             result.append(u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"_str); | 
|  | 303             ++i; | 
|  | 304           } | 
|  | 305           else | 
|  | 306             result.append(u'^'); | 
|  | 307         } | 
|  | 308         else if (i == source.length() - 1) | 
|  | 309         { | 
|  | 310           // Anchor at expression end, ignore if following separator placeholder | 
|  | 311           if (prevChar != u'^') | 
|  | 312             result.append(u'$'); | 
|  | 313         } | 
|  | 314         else | 
|  | 315         { | 
|  | 316           // Not actually an anchor, escape it | 
|  | 317           result.append(u"\\|"_str); | 
|  | 318         } | 
|  | 319         break; | 
|  | 320       default: | 
|  | 321         if (!(currChar >= u'a' && currChar <= u'z') && | 
|  | 322             !(currChar >= u'A' && currChar <= u'Z') && | 
|  | 323             !(currChar >= u'0' && currChar <= u'9') && | 
|  | 324             currChar < 128) | 
|  | 325         { | 
|  | 326           result.append(u'\\'); | 
|  | 327         } | 
|  | 328         result.append(currChar); | 
|  | 329     } | 
|  | 330     prevChar = currChar; | 
|  | 331   } | 
|  | 332   return result; | 
|  | 333 } | 
|  | 334 | 
|  | 335 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const | 
|  | 336 { | 
|  | 337   if (!mData.DomainsParsingDone()) | 
|  | 338   { | 
|  | 339     ParseDomains(mData.GetDomainsSource(mText), u'|'); | 
|  | 340     mData.SetDomainsParsingDone(); | 
|  | 341   } | 
|  | 342   return ActiveFilter::GetDomains(); | 
|  | 343 } | 
|  | 344 | 
|  | 345 RegExpFilter::SitekeySet* RegExpFilter::GetSitekeys() const | 
|  | 346 { | 
|  | 347   if (!mData.SitekeyParsingDone()) | 
|  | 348   { | 
|  | 349     ParseSitekeys(mData.GetSitekeysSource(mText)); | 
|  | 350     mData.SetSitekeysParsingDone(); | 
|  | 351   } | 
|  | 352   return ActiveFilter::GetSitekeys(); | 
|  | 353 } | 
|  | 354 | 
|  | 355 bool RegExpFilter::Matches(const String& location, int typeMask, | 
|  | 356     DependentString& docDomain, bool thirdParty, const String& sitekey) const | 
|  | 357 { | 
|  | 358   if (!(mData.mContentType & typeMask) || | 
|  | 359       (mData.mThirdParty == TrippleState::YES && !thirdParty) || | 
|  | 360       (mData.mThirdParty == TrippleState::NO && thirdParty) || | 
|  | 361       !IsActiveOnDomain(docDomain, sitekey)) | 
|  | 362   { | 
|  | 363     return false; | 
|  | 364   } | 
|  | 365 | 
|  | 366   if (!mData.RegExpParsingDone()) | 
|  | 367   { | 
|  | 368     const OwnedString pattern(mData.GetRegExpSource(mText)); | 
|  | 369     mData.SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mData.mMatchCase))
     ; | 
|  | 370   } | 
|  | 371   return EM_ASM_INT(return regexps.test($0, $1), mData.mRegexpId, &location); | 
|  | 372 } | 
| OLD | NEW | 
|---|