| OLD | NEW | 
| (Empty) |  | 
 |    1 #include <climits> | 
 |    2  | 
 |    3 #include <emscripten.h> | 
 |    4  | 
 |    5 #include "RegExpFilter.h" | 
 |    6 #include "StringScanner.h" | 
 |    7 #include "StringMap.h" | 
 |    8  | 
 |    9 namespace | 
 |   10 { | 
 |   11   enum | 
 |   12   { | 
 |   13     TYPE_OTHER = 0x1, | 
 |   14     TYPE_SCRIPT = 0x2, | 
 |   15     TYPE_IMAGE = 0x4, | 
 |   16     TYPE_STYLESHEET = 0x8, | 
 |   17     TYPE_OBJECT = 0x10, | 
 |   18     TYPE_SUBDOCUMENT = 0x20, | 
 |   19     TYPE_DOCUMENT = 0x40, | 
 |   20     TYPE_PING = 0x400, | 
 |   21     TYPE_XMLHTTPREQUEST = 0x800, | 
 |   22     TYPE_OBJECT_SUBREQUEST = 0x1000, | 
 |   23     TYPE_MEDIA = 0x4000, | 
 |   24     TYPE_FONT = 0x8000, | 
 |   25     TYPE_POPUP = 0x8000000, | 
 |   26     TYPE_GENERICBLOCK = 0x10000000, | 
 |   27     TYPE_GENERICHIDE = 0x20000000, | 
 |   28     TYPE_ELEMHIDE = 0x40000000, | 
 |   29   }; | 
 |   30  | 
 |   31   StringMap<int> typeMap { | 
 |   32     {u"other"_str, TYPE_OTHER}, | 
 |   33     {u"script"_str, TYPE_SCRIPT}, | 
 |   34     {u"image"_str, TYPE_IMAGE}, | 
 |   35     {u"stylesheet"_str, TYPE_STYLESHEET}, | 
 |   36     {u"object"_str, TYPE_OBJECT}, | 
 |   37     {u"subdocument"_str, TYPE_SUBDOCUMENT}, | 
 |   38     {u"document"_str, TYPE_DOCUMENT}, | 
 |   39     {u"xbl"_str, TYPE_OTHER},          // Backwards compat | 
 |   40     {u"ping"_str, TYPE_PING}, | 
 |   41     {u"xmlhttprequest"_str, TYPE_XMLHTTPREQUEST}, | 
 |   42     {u"object-subrequest"_str, TYPE_OBJECT_SUBREQUEST}, | 
 |   43     {u"dtd"_str, TYPE_OTHER},          // Backwards compat | 
 |   44     {u"media"_str, TYPE_MEDIA}, | 
 |   45     {u"font"_str, TYPE_FONT}, | 
 |   46     {u"background"_str, TYPE_IMAGE},   // Backwards compat | 
 |   47  | 
 |   48     {u"popup"_str, TYPE_POPUP}, | 
 |   49     {u"genericblock"_str, TYPE_GENERICBLOCK}, | 
 |   50     {u"generichide"_str, TYPE_GENERICHIDE}, | 
 |   51     {u"elemhide"_str, TYPE_ELEMHIDE}, | 
 |   52   }; | 
 |   53  | 
 |   54   const int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | | 
 |   55       TYPE_POPUP | TYPE_GENERICBLOCK | TYPE_GENERICHIDE); | 
 |   56  | 
 |   57   int GenerateRegExp(const String& regexp, bool matchCase) | 
 |   58   { | 
 |   59     return EM_ASM_INT(return regexps.create($0, $1), ®exp, matchCase); | 
 |   60   } | 
 |   61  | 
 |   62   void NormalizeWhitespace(DependentString& text) | 
 |   63   { | 
 |   64     // We want to remove all spaces but bail out early in the common scenario | 
 |   65     // that the string contains no spaces. | 
 |   66  | 
 |   67     // Look for the first space | 
 |   68     String::size_type len = text.length(); | 
 |   69     String::size_type pos; | 
 |   70     for (pos = 0; pos < len; pos++) | 
 |   71       if (text[pos] == ' ') | 
 |   72         break; | 
 |   73  | 
 |   74     if (pos >= len) | 
 |   75       return; | 
 |   76  | 
 |   77     // Found spaces, move characters to remove them | 
 |   78     String::size_type delta = 1; | 
 |   79     for (pos = pos + 1; pos < len; pos++) | 
 |   80     { | 
 |   81       if (text[pos] == ' ') | 
 |   82         delta++; | 
 |   83       else | 
 |   84         text[pos - delta] = text[pos]; | 
 |   85     } | 
 |   86     text.reset(text, 0, len - delta); | 
 |   87   } | 
 |   88  | 
 |   89   void ParseOption(String& text, DependentString& error, RegExpFilterData& data, | 
 |   90       int optionStart, int optionEnd, int valueStart, int valueEnd) | 
 |   91   { | 
 |   92     if (optionEnd <= optionStart) | 
 |   93       return; | 
 |   94  | 
 |   95     bool reverse = false; | 
 |   96     if (text[optionStart] == u'~') | 
 |   97     { | 
 |   98       reverse = true; | 
 |   99       optionStart++; | 
 |  100     } | 
 |  101  | 
 |  102     DependentString name(text, optionStart, optionEnd - optionStart); | 
 |  103     for (size_t i = 0; i < name.length(); ++i) | 
 |  104     { | 
 |  105       char16_t currChar = name[i]; | 
 |  106       if (currChar >= u'A' && currChar <= u'Z') | 
 |  107         name[i] = currChar + u'a' - u'A'; | 
 |  108       else if (currChar == u'_') | 
 |  109         name[i] = u'-'; | 
 |  110     } | 
 |  111  | 
 |  112     auto it = typeMap.find(name); | 
 |  113     if (it) | 
 |  114     { | 
 |  115       if (data.mContentType < 0) | 
 |  116         data.mContentType = reverse ? defaultTypeMask : 0; | 
 |  117       if (reverse) | 
 |  118         data.mContentType &= ~it->second; | 
 |  119       else | 
 |  120         data.mContentType |= it->second; | 
 |  121     } | 
 |  122     else if (name.equals(u"domain"_str)) | 
 |  123     { | 
 |  124       if (valueStart >= 0 && valueEnd > valueStart) | 
 |  125       { | 
 |  126         data.mDomainsStart = valueStart; | 
 |  127         data.mDomainsEnd = valueEnd; | 
 |  128         ActiveFilter::ToLower(DependentString(text, data.mDomainsStart, | 
 |  129             valueEnd - valueStart)); | 
 |  130       } | 
 |  131     } | 
 |  132     else if (name.equals(u"sitekey"_str)) | 
 |  133     { | 
 |  134       if (valueStart >= 0 && valueEnd > valueStart) | 
 |  135       { | 
 |  136         data.mSitekeysStart = valueStart; | 
 |  137         data.mSitekeysEnd = valueEnd; | 
 |  138       } | 
 |  139     } | 
 |  140     else if (name.equals(u"match-case"_str)) | 
 |  141       data.mMatchCase = !reverse; | 
 |  142     else if (name.equals(u"third-party"_str)) | 
 |  143       data.mThirdParty = reverse ? TrippleState::NO : TrippleState::YES; | 
 |  144     else if (name.equals(u"collapse"_str)) | 
 |  145       data.mCollapse = reverse ? TrippleState::NO : TrippleState::YES; | 
 |  146     else | 
 |  147       error.reset(u"filter_unknown_option"_str); | 
 |  148   } | 
 |  149  | 
 |  150   void ParseOptions(String& text, DependentString& error, RegExpFilterData& data
     , | 
 |  151       String::size_type optionsStart) | 
 |  152   { | 
 |  153     data.mMatchCase = false; | 
 |  154     data.mThirdParty = TrippleState::ANY; | 
 |  155     data.mCollapse = TrippleState::ANY; | 
 |  156     data.mDomainsStart = String::npos; | 
 |  157     data.mSitekeysStart = String::npos; | 
 |  158     if (optionsStart >= text.length()) | 
 |  159     { | 
 |  160       data.mContentType = defaultTypeMask; | 
 |  161       return; | 
 |  162     } | 
 |  163  | 
 |  164     data.mContentType = -1; | 
 |  165  | 
 |  166     int optionStart = data.mPatternEnd + 1; | 
 |  167     int optionEnd = -1; | 
 |  168     int valueStart = -1; | 
 |  169  | 
 |  170     StringScanner scanner(text, optionStart, u','); | 
 |  171     bool done = false; | 
 |  172     while (!done) | 
 |  173     { | 
 |  174       done = scanner.done(); | 
 |  175       switch (scanner.next()) | 
 |  176       { | 
 |  177         case u'=': | 
 |  178           if (optionEnd < 0) | 
 |  179           { | 
 |  180             optionEnd = scanner.position(); | 
 |  181             valueStart = optionEnd + 1; | 
 |  182           } | 
 |  183           break; | 
 |  184         case u',': | 
 |  185           if (optionEnd < 0) | 
 |  186             optionEnd = scanner.position(); | 
 |  187           ParseOption(text, error, data, optionStart, optionEnd, valueStart, | 
 |  188               scanner.position()); | 
 |  189           if (!error.empty()) | 
 |  190             return; | 
 |  191  | 
 |  192           optionStart = scanner.position() + 1; | 
 |  193           optionEnd = -1; | 
 |  194           valueStart = -1; | 
 |  195           break; | 
 |  196       } | 
 |  197     } | 
 |  198  | 
 |  199     if (data.mContentType < 0) | 
 |  200       data.mContentType = defaultTypeMask; | 
 |  201   } | 
 |  202 } | 
 |  203  | 
 |  204 RegExpFilter::RegExpFilter(const String& text, const RegExpFilterData& data) | 
 |  205     : ActiveFilter(text, true), RegExpFilterData(data) | 
 |  206 { | 
 |  207 } | 
 |  208  | 
 |  209 RegExpFilter::~RegExpFilter() | 
 |  210 { | 
 |  211   if (HasRegExp()) | 
 |  212     EM_ASM_ARGS(regexps.delete($0), mRegexpId); | 
 |  213 } | 
 |  214  | 
 |  215 Filter::Type RegExpFilter::Parse(DependentString& text, DependentString& error, | 
 |  216     RegExpFilterData& data) | 
 |  217 { | 
 |  218   NormalizeWhitespace(text); | 
 |  219  | 
 |  220   bool blocking = true; | 
 |  221  | 
 |  222   data.mPatternStart = 0; | 
 |  223   if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@') | 
 |  224   { | 
 |  225     blocking = false; | 
 |  226     data.mPatternStart = 2; | 
 |  227   } | 
 |  228  | 
 |  229   data.mPatternEnd = text.find(u'$', data.mPatternStart); | 
 |  230   if (data.mPatternEnd == text.npos) | 
 |  231     data.mPatternEnd = text.length(); | 
 |  232  | 
 |  233   ParseOptions(text, error, data, data.mPatternEnd + 1); | 
 |  234   if (!error.empty()) | 
 |  235     return Type::INVALID; | 
 |  236  | 
 |  237   if (data.mPatternEnd - data.mPatternStart >= 2 && | 
 |  238       text[data.mPatternStart] == u'/' && | 
 |  239       text[data.mPatternEnd - 1] == u'/') | 
 |  240   { | 
 |  241     data.SetRegExp(GenerateRegExp(DependentString(text, data.mPatternStart + 1, | 
 |  242         data.mPatternEnd - data.mPatternStart - 2), data.mMatchCase)); | 
 |  243     if (data.mRegexpId == -1) | 
 |  244     { | 
 |  245       error.reset(u"filter_invalid_regexp"_str); | 
 |  246       return Type::INVALID; | 
 |  247     } | 
 |  248   } | 
 |  249  | 
 |  250   if (blocking) | 
 |  251     return Type::BLOCKING; | 
 |  252   else | 
 |  253     return Type::WHITELIST; | 
 |  254 } | 
 |  255  | 
 |  256 void RegExpFilter::ParseSitekeys(const String& sitekeys) const | 
 |  257 { | 
 |  258   StringScanner scanner(sitekeys, 0, u'|'); | 
 |  259   size_t start = 0; | 
 |  260   bool done = false; | 
 |  261   while (!done) | 
 |  262   { | 
 |  263     done = scanner.done(); | 
 |  264     if (scanner.next() == u'|') | 
 |  265     { | 
 |  266       if (scanner.position() > start) | 
 |  267         AddSitekey(DependentString(sitekeys, start, scanner.position() - start))
     ; | 
 |  268       start = scanner.position() + 1; | 
 |  269     } | 
 |  270   } | 
 |  271 } | 
 |  272  | 
 |  273 void RegExpFilter::InitJSTypes() | 
 |  274 { | 
 |  275   EM_ASM(exports.RegExpFilter.typeMap = {};); | 
 |  276   for (auto it = typeMap.begin(); it != typeMap.end(); ++it) | 
 |  277     EM_ASM_ARGS(exports.RegExpFilter.typeMap[getStringData($0).replace("-", "_")
     .toUpperCase()] = $1, &(it->first), it->second); | 
 |  278 } | 
 |  279  | 
 |  280 OwnedString RegExpFilter::RegExpFromSource(const String& source) | 
 |  281 { | 
 |  282   /* TODO: this is very inefficient */ | 
 |  283  | 
 |  284   // Note: This doesn't remove trailing wildcards, otherwise the result should | 
 |  285   // be identical to Filter.toRegExp(). | 
 |  286   OwnedString result; | 
 |  287   String::value_type prevChar = u'*'; | 
 |  288   for (String::size_type i = 0; i < source.length(); ++i) | 
 |  289   { | 
 |  290     String::value_type currChar = source[i]; | 
 |  291     switch (currChar) | 
 |  292     { | 
 |  293       case u'*': | 
 |  294         if (prevChar != u'*') | 
 |  295           result.append(u".*"_str); | 
 |  296         break; | 
 |  297       case u'^': | 
 |  298         result.append(u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x
     60\\x7B-\\x7F]|$)"_str); | 
 |  299         break; | 
 |  300       case u'|': | 
 |  301         if (i == 0) | 
 |  302         { | 
 |  303           // Anchor at expression start, maybe extended anchor? | 
 |  304           if (i + 1 < source.length() && source[i + 1] == u'|') | 
 |  305           { | 
 |  306             result.append(u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"_str); | 
 |  307             ++i; | 
 |  308           } | 
 |  309           else | 
 |  310             result.append(u'^'); | 
 |  311         } | 
 |  312         else if (i == source.length() - 1) | 
 |  313         { | 
 |  314           // Anchor at expression end, ignore if following separator placeholder | 
 |  315           if (prevChar != u'^') | 
 |  316             result.append(u'$'); | 
 |  317         } | 
 |  318         else | 
 |  319         { | 
 |  320           // Not actually an anchor, escape it | 
 |  321           result.append(u"\\|"_str); | 
 |  322         } | 
 |  323         break; | 
 |  324       default: | 
 |  325         if (!(currChar >= u'a' && currChar <= u'z') && | 
 |  326             !(currChar >= u'A' && currChar <= u'Z') && | 
 |  327             !(currChar >= u'0' && currChar <= u'9') && | 
 |  328             currChar < 128) | 
 |  329         { | 
 |  330           result.append(u'\\'); | 
 |  331         } | 
 |  332         result.append(currChar); | 
 |  333     } | 
 |  334     prevChar = currChar; | 
 |  335   } | 
 |  336   return result; | 
 |  337 } | 
 |  338  | 
 |  339 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const | 
 |  340 { | 
 |  341   if (!DomainsParsingDone()) | 
 |  342   { | 
 |  343     ParseDomains(GetDomainsSource(mText), u'|'); | 
 |  344     SetDomainsParsingDone(); | 
 |  345   } | 
 |  346   return ActiveFilter::GetDomains(); | 
 |  347 } | 
 |  348  | 
 |  349 RegExpFilter::SitekeySet* RegExpFilter::GetSitekeys() const | 
 |  350 { | 
 |  351   if (!SitekeyParsingDone()) | 
 |  352   { | 
 |  353     ParseSitekeys(GetSitekeysSource(mText)); | 
 |  354     SetSitekeysParsingDone(); | 
 |  355   } | 
 |  356   return ActiveFilter::GetSitekeys(); | 
 |  357 } | 
 |  358  | 
 |  359 bool RegExpFilter::Matches(const String& location, int typeMask, | 
 |  360     DependentString& docDomain, bool thirdParty, const String& sitekey) const | 
 |  361 { | 
 |  362   if (!(mContentType & typeMask) || | 
 |  363       (mThirdParty == TrippleState::YES && !thirdParty) || | 
 |  364       (mThirdParty == TrippleState::NO && thirdParty) || | 
 |  365       !IsActiveOnDomain(docDomain, sitekey)) | 
 |  366   { | 
 |  367     return false; | 
 |  368   } | 
 |  369  | 
 |  370   if (!RegExpParsingDone()) | 
 |  371   { | 
 |  372     const OwnedString pattern(GetRegExpSource(mText)); | 
 |  373     SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mMatchCase)); | 
 |  374   } | 
 |  375   return EM_ASM_INT(return regexps.test($0, $1), mRegexpId, &location); | 
 |  376 } | 
| OLD | NEW |